Compare commits

..

4 Commits

Author SHA1 Message Date
Adrià Arrufat
1770dc03e3 refactor: move timeout and busy logic to Runner 2026-04-02 08:06:50 +02:00
Adrià Arrufat
1854627b69 mcp: final protocol cleanup after removing screenshot tool
- Removed unused ImageContent from protocol.
- Simplified CallToolResult back to only support TextContent.
- Cleaned up CallToolResult usages in tools.zig.
2026-04-01 15:00:55 +02:00
Adrià Arrufat
fffa8b6d4b mcp/cdp: fix inactivity timeout
- Fixed CDP inactivity timeout by resetting it when the browser is busy (loading or executing macrotasks).
- Removed the placeholder screenshot tool.
- Refactored MCP tool schemas to constants to avoid duplication.
2026-04-01 14:37:40 +02:00
Adrià Arrufat
58fc60d669 mcp: improve navigation reliability and add CDP support
- Configurable navigation timeouts and wait strategies in MCP tools.
- Default navigation timeout increased from 2s to 10s.
- Added navigate, eval, and screenshot MCP tools.
- Supported running a CDP server alongside MCP using --cdp-port.
- Fixed various startup crashes when running CDP in MCP mode.
- Hardened MCP server error handling.
2026-04-01 12:41:56 +02:00
7 changed files with 186 additions and 613 deletions

View File

@@ -160,6 +160,7 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
pub fn cdpTimeout(self: *const Config) usize {
return switch (self.mode) {
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
.mcp => 10000, // Default timeout for MCP-CDP
else => unreachable,
};
}
@@ -167,6 +168,7 @@ pub fn cdpTimeout(self: *const Config) usize {
pub fn port(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.port,
.mcp => |opts| opts.cdp_port orelse 0,
else => unreachable,
};
}
@@ -174,6 +176,7 @@ pub fn port(self: *const Config) u16 {
pub fn advertiseHost(self: *const Config) []const u8 {
return switch (self.mode) {
.serve => |opts| opts.advertise_host orelse opts.host,
.mcp => "127.0.0.1",
else => unreachable,
};
}
@@ -192,6 +195,7 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
pub fn maxConnections(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_connections,
.mcp => 16,
else => unreachable,
};
}
@@ -199,6 +203,7 @@ pub fn maxConnections(self: *const Config) u16 {
pub fn maxPendingConnections(self: *const Config) u31 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_pending_connections,
.mcp => 128,
else => unreachable,
};
}
@@ -224,6 +229,7 @@ pub const Serve = struct {
pub const Mcp = struct {
common: Common = .{},
version: mcp.Version = .default,
cdp_port: ?u16 = null,
};
pub const DumpFormat = enum {
@@ -677,6 +683,19 @@ fn parseMcpArgs(
continue;
}
if (std.mem.eql(u8, "--cdp-port", opt) or std.mem.eql(u8, "--cdp_port", opt)) {
const str = args.next() orelse {
log.fatal(.mcp, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.cdp_port = std.fmt.parseInt(u16, str, 10) catch |err| {
log.fatal(.mcp, "invalid argument value", .{ .arg = opt, .err = err });
return error.InvalidArgument;
};
continue;
}
if (try parseCommonArg(allocator, opt, args, &result.common)) {
continue;
}

View File

@@ -297,13 +297,12 @@ pub const Client = struct {
}
var cdp = &self.mode.cdp;
var last_message = milliTimestamp(.monotonic);
var ms_remaining = self.ws.timeout_ms;
const timeout_ms = self.ws.timeout_ms;
while (true) {
const result = cdp.pageWait(ms_remaining) catch |wait_err| switch (wait_err) {
const result = cdp.pageWait(timeout_ms) catch |wait_err| switch (wait_err) {
error.NoPage => {
const status = http.tick(ms_remaining) catch |err| {
const status = http.tick(timeout_ms) catch |err| {
log.err(.app, "http tick", .{ .err = err });
return;
};
@@ -314,10 +313,12 @@ pub const Client = struct {
if (self.readSocket() == false) {
return;
}
last_message = milliTimestamp(.monotonic);
ms_remaining = self.ws.timeout_ms;
continue;
},
error.Timeout => {
log.info(.app, "CDP timeout", .{});
return;
},
else => return wait_err,
};
@@ -326,19 +327,8 @@ pub const Client = struct {
if (self.readSocket() == false) {
return;
}
last_message = milliTimestamp(.monotonic);
ms_remaining = self.ws.timeout_ms;
},
.done => {
const now = milliTimestamp(.monotonic);
const elapsed = now - last_message;
if (elapsed >= ms_remaining) {
log.info(.app, "CDP timeout", .{});
return;
}
ms_remaining -= @intCast(elapsed);
last_message = now;
},
.done => unreachable,
}
}
}

View File

@@ -94,7 +94,18 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult {
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed >= ms_remaining) {
return .done;
// Don't timeout if there's still active work (HTTP requests,
// intercepted requests, background JS tasks, or pending macrotasks).
if (self.http_client.active > 0 or self.http_client.intercepted > 0) {
ms_remaining = opts.ms;
continue;
}
const browser = self.session.browser;
if (browser.hasBackgroundTasks() or browser.msToNextMacrotask() != null) {
ms_remaining = opts.ms;
continue;
}
return error.Timeout;
}
ms_remaining -= @intCast(ms_elapsed);
if (next_ms > 0) {
@@ -237,7 +248,16 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
page._parse_state = .{ .raw_done = @errorName(err) };
return err;
},
.raw_done => return .done,
.raw_done => {
if (comptime is_cdp) {
const http_result = try http_client.tick(@intCast(opts.ms));
if (http_result == .cdp_socket) {
return .cdp_socket;
}
return .{ .ok = 0 };
}
return .done;
},
}
}

View File

@@ -22,23 +22,10 @@ const DOMNode = @import("webapi/Node.zig");
const Element = @import("webapi/Element.zig");
const Event = @import("webapi/Event.zig");
const MouseEvent = @import("webapi/event/MouseEvent.zig");
const KeyboardEvent = @import("webapi/event/KeyboardEvent.zig");
const Page = @import("Page.zig");
const Session = @import("Session.zig");
const Selector = @import("webapi/selector/Selector.zig");
fn dispatchInputAndChangeEvents(el: *Element, page: *Page) !void {
const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| {
lp.log.err(.app, "dispatch input event failed", .{ .err = err });
};
const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| {
lp.log.err(.app, "dispatch change event failed", .{ .err = err });
};
}
pub fn click(node: *DOMNode, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
@@ -56,107 +43,9 @@ pub fn click(node: *DOMNode, page: *Page) !void {
};
}
pub fn hover(node: *DOMNode, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const mouseover_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseover"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), mouseover_event.asEvent()) catch |err| {
lp.log.err(.app, "hover mouseover failed", .{ .err = err });
return error.ActionFailed;
};
const mouseenter_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseenter"), .{
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), mouseenter_event.asEvent()) catch |err| {
lp.log.err(.app, "hover mouseenter failed", .{ .err = err });
return error.ActionFailed;
};
}
pub fn press(node: ?*DOMNode, key: []const u8, page: *Page) !void {
const target = if (node) |n|
(n.is(Element) orelse return error.InvalidNodeType).asEventTarget()
else
page.document.asNode().asEventTarget();
const keydown_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keydown"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
}, page);
page._event_manager.dispatch(target, keydown_event.asEvent()) catch |err| {
lp.log.err(.app, "press keydown failed", .{ .err = err });
return error.ActionFailed;
};
const keyup_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keyup"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
}, page);
page._event_manager.dispatch(target, keyup_event.asEvent()) catch |err| {
lp.log.err(.app, "press keyup failed", .{ .err = err });
return error.ActionFailed;
};
}
pub fn selectOption(node: *DOMNode, value: []const u8, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const select = el.is(Element.Html.Select) orelse return error.InvalidNodeType;
select.setValue(value, page) catch |err| {
lp.log.err(.app, "select setValue failed", .{ .err = err });
return error.ActionFailed;
};
try dispatchInputAndChangeEvents(el, page);
}
pub fn setChecked(node: *DOMNode, checked: bool, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const input = el.is(Element.Html.Input) orelse return error.InvalidNodeType;
if (input._input_type != .checkbox and input._input_type != .radio) {
return error.InvalidNodeType;
}
input.setChecked(checked, page) catch |err| {
lp.log.err(.app, "setChecked failed", .{ .err = err });
return error.ActionFailed;
};
// Match browser event order: click fires first, then input and change.
const click_event: *MouseEvent = try .initTrusted(comptime .wrap("click"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), click_event.asEvent()) catch |err| {
lp.log.err(.app, "dispatch click event failed", .{ .err = err });
};
try dispatchInputAndChangeEvents(el, page);
}
pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
el.focus(page) catch |err| {
lp.log.err(.app, "fill focus failed", .{ .err = err });
};
if (el.is(Element.Html.Input)) |input| {
input.setValue(text, page) catch |err| {
lp.log.err(.app, "fill input failed", .{ .err = err });
@@ -176,7 +65,15 @@ pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void {
return error.InvalidNodeType;
}
try dispatchInputAndChangeEvents(el, page);
const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| {
lp.log.err(.app, "dispatch input event failed", .{ .err = err });
};
const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| {
lp.log.err(.app, "dispatch change event failed", .{ .err = err });
};
}
pub fn scroll(node: ?*DOMNode, x: ?i32, y: ?i32, page: *Page) !void {

View File

@@ -10,20 +10,5 @@
<div id="scrollbox" style="width: 100px; height: 100px; overflow: scroll;" onscroll="window.scrolled = true;">
<div style="height: 500px;">Long content</div>
</div>
<div id="hoverTarget" onmouseover="window.hovered = true;">Hover Me</div>
<input id="keyTarget" onkeydown="window.keyPressed = event.key;" onkeyup="window.keyReleased = event.key;">
<select id="sel2" onchange="window.sel2Changed = this.value">
<option value="a">Alpha</option>
<option value="b">Beta</option>
<option value="c">Gamma</option>
</select>
<input id="chk" type="checkbox">
<input id="rad" type="radio" name="group1">
<script>
document.getElementById('chk').addEventListener('click', function() { window.chkClicked = true; });
document.getElementById('chk').addEventListener('change', function() { window.chkChanged = true; });
document.getElementById('rad').addEventListener('click', function() { window.radClicked = true; });
document.getElementById('rad').addEventListener('change', function() { window.radChanged = true; });
</script>
</body>
</html>

View File

@@ -144,11 +144,22 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
app.network.run();
},
.mcp => {
.mcp => |opts| {
log.info(.mcp, "starting server", .{});
log.opts.format = .logfmt;
var cdp_server: ?*lp.Server = null;
if (opts.cdp_port) |port| {
const address = std.net.Address.parseIp("127.0.0.1", port) catch |err| {
log.fatal(.mcp, "invalid cdp address", .{ .err = err, .port = port });
return;
};
cdp_server = try lp.Server.init(app, address);
try sighandler.on(lp.Server.shutdown, .{cdp_server.?});
}
defer if (cdp_server) |s| s.deinit();
var worker_thread = try std.Thread.spawn(.{}, mcpThread, .{ allocator, app });
defer worker_thread.join();

View File

@@ -9,57 +9,72 @@ const protocol = @import("protocol.zig");
const Server = @import("Server.zig");
const CDPNode = @import("../cdp/Node.zig");
const goto_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "The URL to navigate to, must be a valid URL." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ },
\\ "required": ["url"]
\\}
);
const url_params_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before processing." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ }
\\}
);
const evaluate_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "script": { "type": "string" },
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ },
\\ "required": ["script"]
\\}
);
pub const tool_list = [_]protocol.Tool{
.{
.name = "goto",
.description = "Navigate to a specified URL and load the page in memory so it can be reused later for info extraction.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "The URL to navigate to, must be a valid URL." }
\\ },
\\ "required": ["url"]
\\}
),
.inputSchema = goto_schema,
},
.{
.name = "navigate",
.description = "Alias for goto. Navigate to a specified URL and load the page in memory.",
.inputSchema = goto_schema,
},
.{
.name = "markdown",
.description = "Get the page content in markdown format. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching markdown." }
\\ }
\\}
),
.inputSchema = url_params_schema,
},
.{
.name = "links",
.description = "Extract all links in the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting links." }
\\ }
\\}
),
.inputSchema = url_params_schema,
},
.{
.name = "evaluate",
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "script": { "type": "string" },
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." }
\\ },
\\ "required": ["script"]
\\}
),
.inputSchema = evaluate_schema,
},
.{
.name = "eval",
.description = "Alias for evaluate. Evaluate JavaScript in the current page context.",
.inputSchema = evaluate_schema,
},
.{
.name = "semantic_tree",
@@ -69,6 +84,8 @@ pub const tool_list = [_]protocol.Tool{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID to get the tree for a specific element instead of the document root." },
\\ "maxDepth": { "type": "integer", "description": "Optional maximum depth of the tree to return. Useful for exploring high-level structure first." }
\\ }
@@ -91,38 +108,17 @@ pub const tool_list = [_]protocol.Tool{
.{
.name = "interactiveElements",
.description = "Extract interactive elements from the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting interactive elements." }
\\ }
\\}
),
.inputSchema = url_params_schema,
},
.{
.name = "structuredData",
.description = "Extract structured data (like JSON-LD, OpenGraph, etc) from the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting structured data." }
\\ }
\\}
),
.inputSchema = url_params_schema,
},
.{
.name = "detectForms",
.description = "Detect all forms on the page and return their structure including fields, types, and required status. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before detecting forms." }
\\ }
\\}
),
.inputSchema = url_params_schema,
},
.{
.name = "click",
@@ -179,74 +175,6 @@ pub const tool_list = [_]protocol.Tool{
\\}
),
},
.{
.name = "hover",
.description = "Hover over an element, triggering mouseover and mouseenter events. Useful for menus, tooltips, and hover states.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to hover over." }
\\ },
\\ "required": ["backendNodeId"]
\\}
),
},
.{
.name = "press",
.description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "key": { "type": "string", "description": "The key to press (e.g. 'Enter', 'Tab', 'a')." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document." }
\\ },
\\ "required": ["key"]
\\}
),
},
.{
.name = "selectOption",
.description = "Select an option in a <select> dropdown element by its value. Dispatches input and change events.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the <select> element." },
\\ "value": { "type": "string", "description": "The value of the option to select." }
\\ },
\\ "required": ["backendNodeId", "value"]
\\}
),
},
.{
.name = "setChecked",
.description = "Check or uncheck a checkbox or radio button. Dispatches input, change, and click events.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the checkbox or radio input element." },
\\ "checked": { "type": "boolean", "description": "Whether to check (true) or uncheck (false) the element." }
\\ },
\\ "required": ["backendNodeId", "checked"]
\\}
),
},
.{
.name = "findElement",
.description = "Find interactive elements by role and/or accessible name. Returns matching elements with their backend node IDs. Useful for locating specific elements without parsing the full semantic tree.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "role": { "type": "string", "description": "Optional ARIA role to match (e.g. 'button', 'link', 'textbox', 'checkbox')." },
\\ "name": { "type": "string", "description": "Optional accessible name substring to match (case-insensitive)." }
\\ }
\\}
),
},
};
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -257,15 +185,21 @@ pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
const GotoParams = struct {
url: [:0]const u8,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const EvaluateParams = struct {
script: [:0]const u8,
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const ToolStreamingText = struct {
@@ -342,16 +276,12 @@ const ToolAction = enum {
structuredData,
detectForms,
evaluate,
eval,
semantic_tree,
click,
fill,
scroll,
waitForSelector,
hover,
press,
selectOption,
setChecked,
findElement,
};
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
@@ -364,16 +294,12 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "structuredData", .structuredData },
.{ "detectForms", .detectForms },
.{ "evaluate", .evaluate },
.{ "eval", .eval },
.{ "semantic_tree", .semantic_tree },
.{ "click", .click },
.{ "fill", .fill },
.{ "scroll", .scroll },
.{ "waitForSelector", .waitForSelector },
.{ "hover", .hover },
.{ "press", .press },
.{ "selectOption", .selectOption },
.{ "setChecked", .setChecked },
.{ "findElement", .findElement },
});
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -402,23 +328,18 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments),
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
.detectForms => try handleDetectForms(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.eval, .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
.click => try handleClick(server, arena, req.id.?, call_params.arguments),
.fill => try handleFill(server, arena, req.id.?, call_params.arguments),
.scroll => try handleScroll(server, arena, req.id.?, call_params.arguments),
.waitForSelector => try handleWaitForSelector(server, arena, req.id.?, call_params.arguments),
.hover => try handleHover(server, arena, req.id.?, call_params.arguments),
.press => try handlePress(server, arena, req.id.?, call_params.arguments),
.selectOption => try handleSelectOption(server, arena, req.id.?, call_params.arguments),
.setChecked => try handleSetChecked(server, arena, req.id.?, call_params.arguments),
.findElement => try handleFindElement(server, arena, req.id.?, call_params.arguments),
}
}
fn handleGoto(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgs(GotoParams, arena, arguments, server, id, "goto");
try performGoto(server, args.url, id);
try performGoto(server, args.url, id, args.timeout, args.waitUntil);
const content = [_]protocol.TextContent([]const u8){.{ .text = "Navigated successfully." }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
@@ -426,7 +347,7 @@ fn handleGoto(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arg
fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .page = page, .action = .markdown },
@@ -438,7 +359,7 @@ fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .page = page, .action = .links },
@@ -453,9 +374,11 @@ fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Va
url: ?[:0]const u8 = null,
backendNodeId: ?u32 = null,
maxDepth: ?u32 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = try parseArgsOrDefault(TreeParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{
@@ -477,9 +400,17 @@ fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Val
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(Params, arena, arguments, server, id, "nodeDetails");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
const details = lp.SemanticTree.getNodeDetails(arena, resolved.node, &server.node_registry, resolved.page) catch {
_ = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
const page = server.session.currentPage().?;
const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &server.node_registry, page) catch {
return server.sendError(id, .InternalError, "Failed to get node details");
};
@@ -492,7 +423,7 @@ fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Val
fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const elements = lp.interactive.collectInteractiveElements(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "elements collection failed", .{ .err = err });
@@ -513,7 +444,7 @@ fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.
fn handleStructuredData(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const data = lp.structured_data.collectStructuredData(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "struct data collection failed", .{ .err = err });
@@ -528,7 +459,7 @@ fn handleStructuredData(server: *Server, arena: std.mem.Allocator, id: std.json.
fn handleDetectForms(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const forms_data = lp.forms.collectForms(arena, page.document.asNode(), page) catch |err| {
log.err(.mcp, "form collection failed", .{ .err = err });
@@ -549,7 +480,7 @@ fn handleDetectForms(server: *Server, arena: std.mem.Allocator, id: std.json.Val
fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgs(EvaluateParams, arena, arguments, server, id, "evaluate");
const page = try ensurePage(server, id, args.url);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
var ls: js.Local.Scope = undefined;
page.js.localScope(&ls);
@@ -579,19 +510,26 @@ fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(ClickParams, arena, arguments, server, id, "click");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.click(resolved.node, resolved.page) catch |err| {
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
lp.actions.click(node.dom, page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to click element");
};
const page_title = resolved.page.getTitle() catch null;
const page_title = page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Clicked element (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.backendNodeId,
resolved.page.url,
page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
@@ -604,20 +542,27 @@ fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arg
text: []const u8,
};
const args = try parseArgs(FillParams, arena, arguments, server, id, "fill");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.fill(resolved.node, args.text, resolved.page) catch |err| {
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
lp.actions.fill(node.dom, args.text, page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select");
}
return server.sendError(id, .InternalError, "Failed to fill element");
};
const page_title = resolved.page.getTitle() catch null;
const page_title = page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Filled element (backendNodeId: {d}) with \"{s}\". Page url: {s}, title: {s}", .{
args.backendNodeId,
args.text,
resolved.page.url,
page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
@@ -691,192 +636,9 @@ fn handleWaitForSelector(server: *Server, arena: std.mem.Allocator, id: std.json
return server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleHover(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(Params, arena, arguments, server, id, "hover");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.hover(resolved.node, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to hover element");
};
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Hovered element (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.backendNodeId,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handlePress(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
key: []const u8,
backendNodeId: ?CDPNode.Id = null,
};
const args = try parseArgs(Params, arena, arguments, server, id, "press");
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
var target_node: ?*DOMNode = null;
if (args.backendNodeId) |node_id| {
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
target_node = node.dom;
}
lp.actions.press(target_node, args.key, page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to press key");
};
const page_title = page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Pressed key '{s}'. Page url: {s}, title: {s}", .{
args.key,
page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleSelectOption(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
value: []const u8,
};
const args = try parseArgs(Params, arena, arguments, server, id, "selectOption");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.selectOption(resolved.node, args.value, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not a <select> element");
}
return server.sendError(id, .InternalError, "Failed to select option");
};
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Selected option '{s}' (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.value,
args.backendNodeId,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleSetChecked(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
checked: bool,
};
const args = try parseArgs(Params, arena, arguments, server, id, "setChecked");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.setChecked(resolved.node, args.checked, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not a checkbox or radio input");
}
return server.sendError(id, .InternalError, "Failed to set checked state");
};
const state_str = if (args.checked) "checked" else "unchecked";
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Set element (backendNodeId: {d}) to {s}. Page url: {s}, title: {s}", .{
args.backendNodeId,
state_str,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleFindElement(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
role: ?[]const u8 = null,
name: ?[]const u8 = null,
};
const args = try parseArgsOrDefault(Params, arena, arguments, server, id);
if (args.role == null and args.name == null) {
return server.sendError(id, .InvalidParams, "At least one of 'role' or 'name' must be provided");
}
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const elements = lp.interactive.collectInteractiveElements(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "elements collection failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to collect interactive elements");
};
var matches: std.ArrayList(lp.interactive.InteractiveElement) = .empty;
for (elements) |el| {
if (args.role) |role| {
const el_role = el.role orelse continue;
if (!std.ascii.eqlIgnoreCase(el_role, role)) continue;
}
if (args.name) |name| {
const el_name = el.name orelse continue;
if (!containsIgnoreCase(el_name, name)) continue;
}
try matches.append(arena, el);
}
const matched = try matches.toOwnedSlice(arena);
lp.interactive.registerNodes(matched, &server.node_registry) catch |err| {
log.err(.mcp, "node registration failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to register element nodes");
};
var aw: std.Io.Writer.Allocating = .init(arena);
try std.json.Stringify.value(matched, .{}, &aw.writer);
const content = [_]protocol.TextContent([]const u8){.{ .text = aw.written() }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn containsIgnoreCase(haystack: []const u8, needle: []const u8) bool {
if (needle.len > haystack.len) return false;
if (needle.len == 0) return true;
const end = haystack.len - needle.len + 1;
for (0..end) |i| {
if (std.ascii.eqlIgnoreCase(haystack[i..][0..needle.len], needle)) return true;
}
return false;
}
const NodeAndPage = struct { node: *DOMNode, page: *lp.Page };
fn resolveNodeAndPage(server: *Server, id: std.json.Value, node_id: CDPNode.Id) !NodeAndPage {
const page = server.session.currentPage() orelse {
try server.sendError(id, .PageNotLoaded, "Page not loaded");
return error.PageNotLoaded;
};
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
try server.sendError(id, .InvalidParams, "Node not found");
return error.InvalidParams;
};
return .{ .node = node.dom, .page = page };
}
fn ensurePage(server: *Server, id: std.json.Value, url: ?[:0]const u8) !*lp.Page {
fn ensurePage(server: *Server, id: std.json.Value, url: ?[:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !*lp.Page {
if (url) |u| {
try performGoto(server, u, id);
try performGoto(server, u, id, timeout, waitUntil);
}
return server.session.currentPage() orelse {
try server.sendError(id, .PageNotLoaded, "Page not loaded");
@@ -912,7 +674,7 @@ fn parseArgs(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Va
};
}
fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value) !void {
fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !void {
const session = server.session;
if (session.page != null) {
session.removePage();
@@ -933,7 +695,10 @@ fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value) !void {
try server.sendError(id, .InternalError, "Failed to start page runner");
return error.NavigationFailed;
};
runner.wait(.{ .ms = 2000 }) catch {
runner.wait(.{
.ms = timeout orelse 10000,
.until = waitUntil orelse .done,
}) catch {
try server.sendError(id, .InternalError, "Timeout waiting for page load");
return error.NavigationFailed;
};
@@ -971,7 +736,7 @@ test "MCP - evaluate error reporting" {
} }, out.written());
}
test "MCP - Actions: click, fill, scroll, hover, press, selectOption, setChecked" {
test "MCP - Actions: click, fill, scroll" {
defer testing.reset();
const aa = testing.arena_allocator;
@@ -1032,67 +797,7 @@ test "MCP - Actions: click, fill, scroll, hover, press, selectOption, setChecked
out.clearRetainingCapacity();
}
{
// Test Hover
const el = page.document.getElementById("hoverTarget", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":5,\"method\":\"tools/call\",\"params\":{\"name\":\"hover\",\"arguments\":{\"backendNodeId\":", id_str, "}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Hovered element") != null);
out.clearRetainingCapacity();
}
{
// Test Press
const el = page.document.getElementById("keyTarget", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":6,\"method\":\"tools/call\",\"params\":{\"name\":\"press\",\"arguments\":{\"key\":\"Enter\",\"backendNodeId\":", id_str, "}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Pressed key") != null);
out.clearRetainingCapacity();
}
{
// Test SelectOption
const el = page.document.getElementById("sel2", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":7,\"method\":\"tools/call\",\"params\":{\"name\":\"selectOption\",\"arguments\":{\"backendNodeId\":", id_str, ",\"value\":\"b\"}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Selected option") != null);
out.clearRetainingCapacity();
}
{
// Test SetChecked (checkbox)
const el = page.document.getElementById("chk", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":8,\"method\":\"tools/call\",\"params\":{\"name\":\"setChecked\",\"arguments\":{\"backendNodeId\":", id_str, ",\"checked\":true}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "checked") != null);
out.clearRetainingCapacity();
}
{
// Test SetChecked (radio)
const el = page.document.getElementById("rad", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":9,\"method\":\"tools/call\",\"params\":{\"name\":\"setChecked\",\"arguments\":{\"backendNodeId\":", id_str, ",\"checked\":true}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "checked") != null);
out.clearRetainingCapacity();
}
// Evaluate JS assertions for all actions
// Evaluate assertions
var ls: js.Local.Scope = undefined;
page.js.localScope(&ls);
defer ls.deinit();
@@ -1104,66 +809,12 @@ test "MCP - Actions: click, fill, scroll, hover, press, selectOption, setChecked
const result = try ls.local.exec(
\\ window.clicked === true && window.inputVal === 'hello' &&
\\ window.changed === true && window.selChanged === 'opt2' &&
\\ window.scrolled === true &&
\\ window.hovered === true &&
\\ window.keyPressed === 'Enter' && window.keyReleased === 'Enter' &&
\\ window.sel2Changed === 'b' &&
\\ window.chkClicked === true && window.chkChanged === true &&
\\ window.radClicked === true && window.radChanged === true
\\ window.scrolled === true
, null);
try testing.expect(result.isTrue());
}
test "MCP - findElement" {
defer testing.reset();
const aa = testing.arena_allocator;
var out: std.io.Writer.Allocating = .init(aa);
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
defer server.deinit();
{
// Find by role
const msg =
\\{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"findElement","arguments":{"role":"button"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Click Me") != null);
out.clearRetainingCapacity();
}
{
// Find by name (case-insensitive substring)
const msg =
\\{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"findElement","arguments":{"name":"click"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Click Me") != null);
out.clearRetainingCapacity();
}
{
// Find with no matches
const msg =
\\{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"findElement","arguments":{"role":"slider"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "[]") != null);
out.clearRetainingCapacity();
}
{
// Error: no params provided
const msg =
\\{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"findElement","arguments":{}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "error") != null);
out.clearRetainingCapacity();
}
}
test "MCP - waitForSelector: existing element" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);