From b10d866e4bd7a30da02ce553663ad01aa8f5d281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Mon, 16 Mar 2026 13:41:19 +0900 Subject: [PATCH] Add click, fill, and scroll interaction tools Adds click, fill, and scroll functionality to both CDP and MCP to support programmatic browser interactions. --- src/cdp/domains/lp.zig | 126 +++++++++++++++++++++++++++++ src/mcp/tools.zig | 175 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index 2026b17d..9b51139f 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -32,6 +32,9 @@ pub fn processMessage(cmd: anytype) !void { getSemanticTree, getInteractiveElements, getStructuredData, + clickNode, + fillNode, + scrollNode, }, cmd.input.action) orelse return error.UnknownMethod; switch (action) { @@ -39,6 +42,9 @@ pub fn processMessage(cmd: anytype) !void { .getSemanticTree => return getSemanticTree(cmd), .getInteractiveElements => return getInteractiveElements(cmd), .getStructuredData => return getStructuredData(cmd), + .clickNode => return clickNode(cmd), + .fillNode => return fillNode(cmd), + .scrollNode => return scrollNode(cmd), } } @@ -146,6 +152,126 @@ fn getStructuredData(cmd: anytype) !void { }, .{}); } +fn clickNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + }; + const params = (try cmd.params(Params)) orelse Params{}; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam; + const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId; + + if (node.dom.is(DOMNode.Element)) |el| { + if (el.is(DOMNode.Element.Html)) |html_el| { + html_el.click(page) catch |err| { + log.err(.cdp, "click failed", .{ .err = err }); + return error.InternalError; + }; + } else { + return error.InvalidParam; + } + } else { + return error.InvalidParam; + } + + return cmd.sendResult(.{}, .{}); +} + +fn fillNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + text: []const u8, + }; + const params = (try cmd.params(Params)) orelse return error.InvalidParam; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam; + const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId; + + if (node.dom.is(DOMNode.Element)) |el| { + if (el.is(DOMNode.Element.Html.Input)) |input| { + input.setValue(params.text, page) catch |err| { + log.err(.cdp, "fill input failed", .{ .err = err }); + return error.InternalError; + }; + } else if (el.is(DOMNode.Element.Html.TextArea)) |textarea| { + textarea.setValue(params.text, page) catch |err| { + log.err(.cdp, "fill textarea failed", .{ .err = err }); + return error.InternalError; + }; + } else if (el.is(DOMNode.Element.Html.Select)) |select| { + select.setValue(params.text, page) catch |err| { + log.err(.cdp, "fill select failed", .{ .err = err }); + return error.InternalError; + }; + } else { + return error.InvalidParam; + } + + const Event = @import("../../browser/webapi/Event.zig"); + const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {}; + + const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {}; + } else { + return error.InvalidParam; + } + + return cmd.sendResult(.{}, .{}); +} + +fn scrollNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + x: ?i32 = null, + y: ?i32 = null, + }; + const params = (try cmd.params(Params)) orelse Params{}; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const x = params.x orelse 0; + const y = params.y orelse 0; + + const input_node_id = params.nodeId orelse params.backendNodeId; + + if (input_node_id) |node_id| { + const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId; + + if (node.dom.is(DOMNode.Element)) |el| { + if (params.x != null) { + el.setScrollLeft(x, page) catch {}; + } + if (params.y != null) { + el.setScrollTop(y, page) catch {}; + } + + const Event = @import("../../browser/webapi/Event.zig"); + const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {}; + } else { + return error.InvalidParam; + } + } else { + page.window.scrollTo(.{ .x = x }, y, page) catch |err| { + log.err(.cdp, "scroll failed", .{ .err = err }); + return error.InternalError; + }; + } + + return cmd.sendResult(.{}, .{}); +} + const testing = @import("../testing.zig"); test "cdp.lp: getMarkdown" { var ctx = testing.context(); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index f5126be0..adebc120 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -98,6 +98,47 @@ pub const tool_list = [_]protocol.Tool{ \\} ), }, + .{ + .name = "click", + .description = "Click on an interactive element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to click." } + \\ }, + \\ "required": ["backendNodeId"] + \\} + ), + }, + .{ + .name = "fill", + .description = "Fill text into an input element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the input element to fill." }, + \\ "text": { "type": "string", "description": "The text to fill into the input element." } + \\ }, + \\ "required": ["backendNodeId", "text"] + \\} + ), + }, + .{ + .name = "scroll", + .description = "Scroll the page or a specific element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "Optional: The backend node ID of the element to scroll. If omitted, scrolls the window." }, + \\ "x": { "type": "integer", "description": "Optional: The horizontal scroll offset." }, + \\ "y": { "type": "integer", "description": "Optional: The vertical scroll offset." } + \\ } + \\} + ), + }, }; pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -182,6 +223,9 @@ const ToolAction = enum { structuredData, evaluate, semantic_tree, + click, + fill, + scroll, }; const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ @@ -193,6 +237,9 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ .{ "structuredData", .structuredData }, .{ "evaluate", .evaluate }, .{ "semantic_tree", .semantic_tree }, + .{ "click", .click }, + .{ "fill", .fill }, + .{ "scroll", .scroll }, }); pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -221,6 +268,9 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque .structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments), .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments), .semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments), + .click => try handleClick(server, arena, req.id.?, call_params.arguments), + .fill => try handleFill(server, arena, req.id.?, call_params.arguments), + .scroll => try handleScroll(server, arena, req.id.?, call_params.arguments), } } @@ -380,6 +430,131 @@ fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); } +fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const ClickParams = struct { + backendNodeId: CDPNode.Id, + }; + const args = try parseArguments(ClickParams, arena, arguments, server, id, "click"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + + if (node.dom.is(Element)) |el| { + if (el.is(Element.Html)) |html_el| { + html_el.click(page) catch |err| { + log.err(.mcp, "click failed", .{ .err = err }); + return server.sendError(id, .InternalError, "Failed to click element"); + }; + } else { + return server.sendError(id, .InvalidParams, "Node is not an HTML element"); + } + } else { + return server.sendError(id, .InvalidParams, "Node is not an element"); + } + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Clicked successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const FillParams = struct { + backendNodeId: CDPNode.Id, + text: []const u8, + }; + const args = try parseArguments(FillParams, arena, arguments, server, id, "fill"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + + if (node.dom.is(Element)) |el| { + if (el.is(Element.Html.Input)) |input| { + input.setValue(args.text, page) catch |err| { + log.err(.mcp, "fill input failed", .{ .err = err }); + return server.sendError(id, .InternalError, "Failed to fill input"); + }; + } else if (el.is(Element.Html.TextArea)) |textarea| { + textarea.setValue(args.text, page) catch |err| { + log.err(.mcp, "fill textarea failed", .{ .err = err }); + return server.sendError(id, .InternalError, "Failed to fill textarea"); + }; + } else if (el.is(Element.Html.Select)) |select| { + select.setValue(args.text, page) catch |err| { + log.err(.mcp, "fill select failed", .{ .err = err }); + return server.sendError(id, .InternalError, "Failed to fill select"); + }; + } else { + return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select"); + } + + const Event = @import("../browser/webapi/Event.zig"); + const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {}; + + const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {}; + } else { + return server.sendError(id, .InvalidParams, "Node is not an element"); + } + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Filled successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handleScroll(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const ScrollParams = struct { + backendNodeId: ?CDPNode.Id = null, + x: ?i32 = null, + y: ?i32 = null, + }; + const args = try parseArguments(ScrollParams, arena, arguments, server, id, "scroll"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + const x = args.x orelse 0; + const y = args.y orelse 0; + + if (args.backendNodeId) |node_id| { + const node = server.node_registry.lookup_by_id.get(node_id) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + + if (node.dom.is(Element)) |el| { + if (args.x != null) { + el.setScrollLeft(x, page) catch {}; + } + if (args.y != null) { + el.setScrollTop(y, page) catch {}; + } + + const Event = @import("../browser/webapi/Event.zig"); + const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page); + _ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {}; + } else { + return server.sendError(id, .InvalidParams, "Node is not an element"); + } + } else { + page.window.scrollTo(.{ .x = x }, y, page) catch |err| { + log.err(.mcp, "scroll failed", .{ .err = err }); + return server.sendError(id, .InternalError, "Failed to scroll"); + }; + } + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Scrolled successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + fn parseArguments(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value, server: *Server, id: std.json.Value, tool_name: []const u8) !T { if (arguments == null) { try server.sendError(id, .InvalidParams, "Missing arguments");