Add click, fill, and scroll interaction tools

Adds click, fill, and scroll functionality to both CDP and MCP
to support programmatic browser interactions.
This commit is contained in:
Adrià Arrufat
2026-03-16 13:41:19 +09:00
parent ddd34dc57b
commit b10d866e4b
2 changed files with 301 additions and 0 deletions

View File

@@ -32,6 +32,9 @@ pub fn processMessage(cmd: anytype) !void {
getSemanticTree,
getInteractiveElements,
getStructuredData,
clickNode,
fillNode,
scrollNode,
}, cmd.input.action) orelse return error.UnknownMethod;
switch (action) {
@@ -39,6 +42,9 @@ pub fn processMessage(cmd: anytype) !void {
.getSemanticTree => return getSemanticTree(cmd),
.getInteractiveElements => return getInteractiveElements(cmd),
.getStructuredData => return getStructuredData(cmd),
.clickNode => return clickNode(cmd),
.fillNode => return fillNode(cmd),
.scrollNode => return scrollNode(cmd),
}
}
@@ -146,6 +152,126 @@ fn getStructuredData(cmd: anytype) !void {
}, .{});
}
fn clickNode(cmd: anytype) !void {
const Params = struct {
nodeId: ?Node.Id = null,
backendNodeId: ?Node.Id = null,
};
const params = (try cmd.params(Params)) orelse Params{};
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam;
const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId;
if (node.dom.is(DOMNode.Element)) |el| {
if (el.is(DOMNode.Element.Html)) |html_el| {
html_el.click(page) catch |err| {
log.err(.cdp, "click failed", .{ .err = err });
return error.InternalError;
};
} else {
return error.InvalidParam;
}
} else {
return error.InvalidParam;
}
return cmd.sendResult(.{}, .{});
}
fn fillNode(cmd: anytype) !void {
const Params = struct {
nodeId: ?Node.Id = null,
backendNodeId: ?Node.Id = null,
text: []const u8,
};
const params = (try cmd.params(Params)) orelse return error.InvalidParam;
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam;
const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId;
if (node.dom.is(DOMNode.Element)) |el| {
if (el.is(DOMNode.Element.Html.Input)) |input| {
input.setValue(params.text, page) catch |err| {
log.err(.cdp, "fill input failed", .{ .err = err });
return error.InternalError;
};
} else if (el.is(DOMNode.Element.Html.TextArea)) |textarea| {
textarea.setValue(params.text, page) catch |err| {
log.err(.cdp, "fill textarea failed", .{ .err = err });
return error.InternalError;
};
} else if (el.is(DOMNode.Element.Html.Select)) |select| {
select.setValue(params.text, page) catch |err| {
log.err(.cdp, "fill select failed", .{ .err = err });
return error.InternalError;
};
} else {
return error.InvalidParam;
}
const Event = @import("../../browser/webapi/Event.zig");
const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {};
const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {};
} else {
return error.InvalidParam;
}
return cmd.sendResult(.{}, .{});
}
fn scrollNode(cmd: anytype) !void {
const Params = struct {
nodeId: ?Node.Id = null,
backendNodeId: ?Node.Id = null,
x: ?i32 = null,
y: ?i32 = null,
};
const params = (try cmd.params(Params)) orelse Params{};
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const x = params.x orelse 0;
const y = params.y orelse 0;
const input_node_id = params.nodeId orelse params.backendNodeId;
if (input_node_id) |node_id| {
const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId;
if (node.dom.is(DOMNode.Element)) |el| {
if (params.x != null) {
el.setScrollLeft(x, page) catch {};
}
if (params.y != null) {
el.setScrollTop(y, page) catch {};
}
const Event = @import("../../browser/webapi/Event.zig");
const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {};
} else {
return error.InvalidParam;
}
} else {
page.window.scrollTo(.{ .x = x }, y, page) catch |err| {
log.err(.cdp, "scroll failed", .{ .err = err });
return error.InternalError;
};
}
return cmd.sendResult(.{}, .{});
}
const testing = @import("../testing.zig");
test "cdp.lp: getMarkdown" {
var ctx = testing.context();

View File

@@ -98,6 +98,47 @@ pub const tool_list = [_]protocol.Tool{
\\}
),
},
.{
.name = "click",
.description = "Click on an interactive element.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to click." }
\\ },
\\ "required": ["backendNodeId"]
\\}
),
},
.{
.name = "fill",
.description = "Fill text into an input element.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the input element to fill." },
\\ "text": { "type": "string", "description": "The text to fill into the input element." }
\\ },
\\ "required": ["backendNodeId", "text"]
\\}
),
},
.{
.name = "scroll",
.description = "Scroll the page or a specific element.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "Optional: The backend node ID of the element to scroll. If omitted, scrolls the window." },
\\ "x": { "type": "integer", "description": "Optional: The horizontal scroll offset." },
\\ "y": { "type": "integer", "description": "Optional: The vertical scroll offset." }
\\ }
\\}
),
},
};
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -182,6 +223,9 @@ const ToolAction = enum {
structuredData,
evaluate,
semantic_tree,
click,
fill,
scroll,
};
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
@@ -193,6 +237,9 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "structuredData", .structuredData },
.{ "evaluate", .evaluate },
.{ "semantic_tree", .semantic_tree },
.{ "click", .click },
.{ "fill", .fill },
.{ "scroll", .scroll },
});
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -221,6 +268,9 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
.click => try handleClick(server, arena, req.id.?, call_params.arguments),
.fill => try handleFill(server, arena, req.id.?, call_params.arguments),
.scroll => try handleScroll(server, arena, req.id.?, call_params.arguments),
}
}
@@ -380,6 +430,131 @@ fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const ClickParams = struct {
backendNodeId: CDPNode.Id,
};
const args = try parseArguments(ClickParams, arena, arguments, server, id, "click");
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
if (node.dom.is(Element)) |el| {
if (el.is(Element.Html)) |html_el| {
html_el.click(page) catch |err| {
log.err(.mcp, "click failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to click element");
};
} else {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
} else {
return server.sendError(id, .InvalidParams, "Node is not an element");
}
const content = [_]protocol.TextContent([]const u8){.{ .text = "Clicked successfully." }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const FillParams = struct {
backendNodeId: CDPNode.Id,
text: []const u8,
};
const args = try parseArguments(FillParams, arena, arguments, server, id, "fill");
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
if (node.dom.is(Element)) |el| {
if (el.is(Element.Html.Input)) |input| {
input.setValue(args.text, page) catch |err| {
log.err(.mcp, "fill input failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to fill input");
};
} else if (el.is(Element.Html.TextArea)) |textarea| {
textarea.setValue(args.text, page) catch |err| {
log.err(.mcp, "fill textarea failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to fill textarea");
};
} else if (el.is(Element.Html.Select)) |select| {
select.setValue(args.text, page) catch |err| {
log.err(.mcp, "fill select failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to fill select");
};
} else {
return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select");
}
const Event = @import("../browser/webapi/Event.zig");
const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {};
const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {};
} else {
return server.sendError(id, .InvalidParams, "Node is not an element");
}
const content = [_]protocol.TextContent([]const u8){.{ .text = "Filled successfully." }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleScroll(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const ScrollParams = struct {
backendNodeId: ?CDPNode.Id = null,
x: ?i32 = null,
y: ?i32 = null,
};
const args = try parseArguments(ScrollParams, arena, arguments, server, id, "scroll");
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const x = args.x orelse 0;
const y = args.y orelse 0;
if (args.backendNodeId) |node_id| {
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
if (node.dom.is(Element)) |el| {
if (args.x != null) {
el.setScrollLeft(x, page) catch {};
}
if (args.y != null) {
el.setScrollTop(y, page) catch {};
}
const Event = @import("../browser/webapi/Event.zig");
const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page);
_ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {};
} else {
return server.sendError(id, .InvalidParams, "Node is not an element");
}
} else {
page.window.scrollTo(.{ .x = x }, y, page) catch |err| {
log.err(.mcp, "scroll failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to scroll");
};
}
const content = [_]protocol.TextContent([]const u8){.{ .text = "Scrolled successfully." }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn parseArguments(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value, server: *Server, id: std.json.Value, tool_name: []const u8) !T {
if (arguments == null) {
try server.sendError(id, .InvalidParams, "Missing arguments");