Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions examples/rescue_me.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[
["WR_set_driver", {
"webdriver_name": "chrome",
"options": [
"--autoplay-policy=no-user-gesture-required",
"--disable-blink-features=AutomationControlled",
"--mute-audio=false"
]
}],
["WR_to_url", {"url": "https://music.youtube.com/watch?v=jajHOxvEbXk"}],
["WR_sleep", {"seconds": 5}],
["WR_execute_script", {"script": "const v=document.querySelector('video');if(!v)return 'no-video';v.muted=false;v.volume=1.0;const p=v.play();if(p&&p.catch)p.catch(()=>{});return v.paused?'paused':'playing';"}],
["WR_sleep", {"seconds": 90}],
["WR_quit_all"]
]
78 changes: 78 additions & 0 deletions examples/rescue_me.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Demo: open YouTube Music and play OneRepublic — Rescue Me at 100% volume.

Run from the repo root:

python examples/rescue_me.py

The script launches Chrome with ``--autoplay-policy=no-user-gesture-required``
(YouTube Music blocks ``video.play()`` without a real user gesture otherwise),
navigates to the song's watch URL on music.youtube.com, then forces the
HTMLMediaElement's ``volume`` to 1.0 and calls ``play()`` from JS.
"""
from __future__ import annotations

import sys
import time

from je_web_runner import webdriver_wrapper_instance


RESCUE_ME_URL = "https://music.youtube.com/watch?v=jajHOxvEbXk"
LISTEN_SECONDS = 90


_FORCE_PLAY_JS = """
(() => {
const video = document.querySelector('video');
if (!video) { return 'no-video'; }
video.muted = false;
video.volume = 1.0;
const promise = video.play();
if (promise && typeof promise.catch === 'function') {
promise.catch(() => {});
}
return video.paused ? 'paused' : 'playing';
})()
"""


def _force_play(driver) -> None:
"""Loop the force-play script until the video reports ``playing``."""
for _ in range(8):
if driver.execute_script(_FORCE_PLAY_JS) == "playing":
return
time.sleep(1)


def main() -> int:
chrome_args = [
"--autoplay-policy=no-user-gesture-required",
"--disable-blink-features=AutomationControlled",
"--mute-audio=false",
]
try:
webdriver_wrapper_instance.set_driver("chrome", options=chrome_args)
except Exception as error: # pylint: disable=broad-except
print(f"rescue_me: cannot start chrome ({error!r})", file=sys.stderr)
return 1

driver = webdriver_wrapper_instance.current_webdriver
try:
webdriver_wrapper_instance.to_url(RESCUE_ME_URL)
time.sleep(5)
_force_play(driver)
time.sleep(LISTEN_SECONDS)
except Exception as error: # pylint: disable=broad-except
print(f"rescue_me: playback failed ({error!r})", file=sys.stderr)
return 1
finally:
try:
webdriver_wrapper_instance.quit()
except Exception: # pylint: disable=broad-except # nosec B110 — best-effort cleanup; quit failures aren't actionable here
pass
return 0


if __name__ == "__main__":
sys.exit(main())
6 changes: 5 additions & 1 deletion je_web_runner/mcp_server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""WebRunner MCP server: expose WR_* actions over the Model Context Protocol."""
from je_web_runner.mcp_server.browser_tools import build_browser_tools
from je_web_runner.mcp_server.server import (
McpServer,
McpServerError,
build_default_tools,
serve_stdio,
)

__all__ = ["McpServer", "McpServerError", "build_default_tools", "serve_stdio"]
__all__ = [
"McpServer", "McpServerError",
"build_default_tools", "build_browser_tools", "serve_stdio",
]
119 changes: 119 additions & 0 deletions je_web_runner/mcp_server/browser_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
MCP tools that drive a real browser via the WebRunner executor.

The executor already maps ~200 ``WR_*`` strings to callables (Selenium,
Playwright, reporting, …); these tools simply hand a JSON-RPC ``arguments``
payload through to ``execute_action`` / ``execute_files``.

Two hazards are handled here so the rest of the protocol stays clean:

* ``execute_action`` prints each record to stdout. The MCP server speaks
JSON-RPC over stdout, so stray prints corrupt the wire. We redirect stdout
into a buffer for the duration of the call and surface it as ``stdout`` in
the result.
* Action return values may contain WebDriver / WebElement instances that
``json.dumps`` cannot serialise. ``_serialize_value`` reduces those to
``repr()`` strings before the server's encoder sees them.
"""
from __future__ import annotations

import io
from contextlib import redirect_stdout
from typing import Any, Dict, List

from je_web_runner.mcp_server.server import McpServerError, Tool


def _serialize_value(value: Any) -> Any:
if value is None or isinstance(value, (bool, int, float, str)):
return value
if isinstance(value, (list, tuple)):
return [_serialize_value(item) for item in value]
if isinstance(value, dict):
return {str(key): _serialize_value(item) for key, item in value.items()}
return repr(value)


def _serialize_record(record: Dict[Any, Any]) -> Dict[str, Any]:
return {str(key): _serialize_value(value) for key, value in record.items()}


def _tool_run_actions(arguments: Dict[str, Any]) -> Any:
from je_web_runner.utils.executor.action_executor import execute_action
actions = arguments.get("actions")
if not isinstance(actions, list):
raise McpServerError("'actions' must be a list of [name, params] entries")
buffer = io.StringIO()
with redirect_stdout(buffer):
record = execute_action(actions)
return {"stdout": buffer.getvalue(), "record": _serialize_record(record)}


def _tool_run_action_files(arguments: Dict[str, Any]) -> Any:
from je_web_runner.utils.executor.action_executor import execute_files
files = arguments.get("files")
if not isinstance(files, list):
raise McpServerError("'files' must be a list of file paths")
if not all(isinstance(path, str) for path in files):
raise McpServerError("each entry in 'files' must be a string path")
buffer = io.StringIO()
with redirect_stdout(buffer):
results = execute_files(files)
return {
"stdout": buffer.getvalue(),
"records": [_serialize_record(record) for record in results],
}


def _tool_list_commands(_arguments: Dict[str, Any]) -> Any:
from je_web_runner.utils.executor.action_executor import executor
return sorted(name for name in executor.event_dict if name.startswith("WR_"))


def build_browser_tools() -> List[Tool]:
"""Return the browser-execution MCP tools."""
return [
Tool(
name="webrunner_run_actions",
description=(
"Execute a WebRunner action list against a real browser. Each"
" entry is [command_name, params] where params is a dict of"
" kwargs or a list of positional args. Common commands:"
" WR_get_webdriver_manager, WR_to_url, WR_send_keys,"
" WR_click_element, WR_pw_launch, WR_pw_to_url, WR_quit."
" Returns {'stdout': str, 'record': {action_repr: result}}."
),
input_schema={
"type": "object",
"properties": {"actions": {"type": "array"}},
"required": ["actions"],
},
handler=_tool_run_actions,
),
Tool(
name="webrunner_run_action_files",
description=(
"Read one or more JSON action files from disk and execute"
" them sequentially against a real browser. Returns"
" {'stdout': str, 'records': [<per-file record>]}."
),
input_schema={
"type": "object",
"properties": {
"files": {"type": "array", "items": {"type": "string"}},
},
"required": ["files"],
},
handler=_tool_run_action_files,
),
Tool(
name="webrunner_list_commands",
description=(
"Return every WR_* command currently registered in the"
" executor, so a caller can discover the action surface"
" before composing webrunner_run_actions payloads."
),
input_schema={"type": "object", "properties": {}},
handler=_tool_list_commands,
),
]
6 changes: 6 additions & 0 deletions je_web_runner/mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,9 +618,15 @@ def build_default_tools() -> List[Tool]:


def make_default_server() -> McpServer:
# Imported lazily so ``server`` can be imported without dragging the
# full executor (and therefore Selenium / Playwright) into modules that
# only need the protocol skeleton.
from je_web_runner.mcp_server.browser_tools import build_browser_tools
server = McpServer()
for tool in build_default_tools():
server.register(tool)
for tool in build_browser_tools():
server.register(tool)
return server


Expand Down
49 changes: 49 additions & 0 deletions test/unit_test/test_mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ def test_full_default_tool_surface(self):
"webrunner_score_action_locators",
})

def test_browser_tools_registered_in_default_server(self):
# Browser execution tools are merged into the default server so MCP
# clients can drive Selenium / Playwright through WR_* actions.
server = make_default_server()
for name in (
"webrunner_run_actions",
"webrunner_run_action_files",
"webrunner_list_commands",
):
self.assertIn(name, server.tools)


class TestNewTools(unittest.TestCase):

Expand Down Expand Up @@ -234,5 +245,43 @@ def test_score_action_locators(self):
self.assertIn("score", body)


class TestBrowserTools(unittest.TestCase):
"""Tools that call the executor — covered without launching a browser."""

def setUp(self):
self.server = make_default_server()

def _call(self, name, arguments):
return self.server.handle({"id": 1, "method": "tools/call", "params": {
"name": name, "arguments": arguments,
}})

def test_run_actions_rejects_non_list(self):
result = self._call("webrunner_run_actions", {"actions": "nope"})
self.assertTrue(result["result"]["isError"])

def test_run_action_files_rejects_non_string_paths(self):
result = self._call("webrunner_run_action_files", {"files": [123]})
self.assertTrue(result["result"]["isError"])

def test_list_commands_returns_wr_surface(self):
result = self._call("webrunner_list_commands", {})
body = result["result"]["content"][0]["text"]
self.assertIn("WR_to_url", body)
self.assertIn("WR_quit", body)

def test_run_actions_captures_stdout_and_executes_safe_command(self):
# WR_sleep with 0 seconds is a side-effect-free executor call that
# returns a numeric value — perfect for checking the wiring without
# launching a browser.
result = self._call("webrunner_run_actions",
{"actions": [["WR_sleep", {"seconds": 0}]]})
self.assertFalse(result["result"]["isError"])
body = result["result"]["content"][0]["text"]
self.assertIn('"stdout"', body)
self.assertIn('"record"', body)
self.assertIn("WR_sleep", body)


if __name__ == "__main__":
unittest.main()
Loading