mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 04:34:44 +00:00
Add click, fill, and scroll interaction tools
Adds click, fill, and scroll functionality to both CDP and MCP to support programmatic browser interactions.
This commit is contained in:
@@ -32,6 +32,9 @@ pub fn processMessage(cmd: anytype) !void {
|
||||
getSemanticTree,
|
||||
getInteractiveElements,
|
||||
getStructuredData,
|
||||
clickNode,
|
||||
fillNode,
|
||||
scrollNode,
|
||||
}, cmd.input.action) orelse return error.UnknownMethod;
|
||||
|
||||
switch (action) {
|
||||
@@ -39,6 +42,9 @@ pub fn processMessage(cmd: anytype) !void {
|
||||
.getSemanticTree => return getSemanticTree(cmd),
|
||||
.getInteractiveElements => return getInteractiveElements(cmd),
|
||||
.getStructuredData => return getStructuredData(cmd),
|
||||
.clickNode => return clickNode(cmd),
|
||||
.fillNode => return fillNode(cmd),
|
||||
.scrollNode => return scrollNode(cmd),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +152,126 @@ fn getStructuredData(cmd: anytype) !void {
|
||||
}, .{});
|
||||
}
|
||||
|
||||
fn clickNode(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
nodeId: ?Node.Id = null,
|
||||
backendNodeId: ?Node.Id = null,
|
||||
};
|
||||
const params = (try cmd.params(Params)) orelse Params{};
|
||||
|
||||
const bc = cmd.browser_context orelse return error.NoBrowserContext;
|
||||
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
|
||||
|
||||
const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam;
|
||||
const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId;
|
||||
|
||||
if (node.dom.is(DOMNode.Element)) |el| {
|
||||
if (el.is(DOMNode.Element.Html)) |html_el| {
|
||||
html_el.click(page) catch |err| {
|
||||
log.err(.cdp, "click failed", .{ .err = err });
|
||||
return error.InternalError;
|
||||
};
|
||||
} else {
|
||||
return error.InvalidParam;
|
||||
}
|
||||
} else {
|
||||
return error.InvalidParam;
|
||||
}
|
||||
|
||||
return cmd.sendResult(.{}, .{});
|
||||
}
|
||||
|
||||
fn fillNode(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
nodeId: ?Node.Id = null,
|
||||
backendNodeId: ?Node.Id = null,
|
||||
text: []const u8,
|
||||
};
|
||||
const params = (try cmd.params(Params)) orelse return error.InvalidParam;
|
||||
|
||||
const bc = cmd.browser_context orelse return error.NoBrowserContext;
|
||||
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
|
||||
|
||||
const input_node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam;
|
||||
const node = bc.node_registry.lookup_by_id.get(input_node_id) orelse return error.InvalidNodeId;
|
||||
|
||||
if (node.dom.is(DOMNode.Element)) |el| {
|
||||
if (el.is(DOMNode.Element.Html.Input)) |input| {
|
||||
input.setValue(params.text, page) catch |err| {
|
||||
log.err(.cdp, "fill input failed", .{ .err = err });
|
||||
return error.InternalError;
|
||||
};
|
||||
} else if (el.is(DOMNode.Element.Html.TextArea)) |textarea| {
|
||||
textarea.setValue(params.text, page) catch |err| {
|
||||
log.err(.cdp, "fill textarea failed", .{ .err = err });
|
||||
return error.InternalError;
|
||||
};
|
||||
} else if (el.is(DOMNode.Element.Html.Select)) |select| {
|
||||
select.setValue(params.text, page) catch |err| {
|
||||
log.err(.cdp, "fill select failed", .{ .err = err });
|
||||
return error.InternalError;
|
||||
};
|
||||
} else {
|
||||
return error.InvalidParam;
|
||||
}
|
||||
|
||||
const Event = @import("../../browser/webapi/Event.zig");
|
||||
const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {};
|
||||
|
||||
const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {};
|
||||
} else {
|
||||
return error.InvalidParam;
|
||||
}
|
||||
|
||||
return cmd.sendResult(.{}, .{});
|
||||
}
|
||||
|
||||
fn scrollNode(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
nodeId: ?Node.Id = null,
|
||||
backendNodeId: ?Node.Id = null,
|
||||
x: ?i32 = null,
|
||||
y: ?i32 = null,
|
||||
};
|
||||
const params = (try cmd.params(Params)) orelse Params{};
|
||||
|
||||
const bc = cmd.browser_context orelse return error.NoBrowserContext;
|
||||
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
|
||||
|
||||
const x = params.x orelse 0;
|
||||
const y = params.y orelse 0;
|
||||
|
||||
const input_node_id = params.nodeId orelse params.backendNodeId;
|
||||
|
||||
if (input_node_id) |node_id| {
|
||||
const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId;
|
||||
|
||||
if (node.dom.is(DOMNode.Element)) |el| {
|
||||
if (params.x != null) {
|
||||
el.setScrollLeft(x, page) catch {};
|
||||
}
|
||||
if (params.y != null) {
|
||||
el.setScrollTop(y, page) catch {};
|
||||
}
|
||||
|
||||
const Event = @import("../../browser/webapi/Event.zig");
|
||||
const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {};
|
||||
} else {
|
||||
return error.InvalidParam;
|
||||
}
|
||||
} else {
|
||||
page.window.scrollTo(.{ .x = x }, y, page) catch |err| {
|
||||
log.err(.cdp, "scroll failed", .{ .err = err });
|
||||
return error.InternalError;
|
||||
};
|
||||
}
|
||||
|
||||
return cmd.sendResult(.{}, .{});
|
||||
}
|
||||
|
||||
const testing = @import("../testing.zig");
|
||||
test "cdp.lp: getMarkdown" {
|
||||
var ctx = testing.context();
|
||||
|
||||
@@ -98,6 +98,47 @@ pub const tool_list = [_]protocol.Tool{
|
||||
\\}
|
||||
),
|
||||
},
|
||||
.{
|
||||
.name = "click",
|
||||
.description = "Click on an interactive element.",
|
||||
.inputSchema = protocol.minify(
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
\\ "properties": {
|
||||
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to click." }
|
||||
\\ },
|
||||
\\ "required": ["backendNodeId"]
|
||||
\\}
|
||||
),
|
||||
},
|
||||
.{
|
||||
.name = "fill",
|
||||
.description = "Fill text into an input element.",
|
||||
.inputSchema = protocol.minify(
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
\\ "properties": {
|
||||
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the input element to fill." },
|
||||
\\ "text": { "type": "string", "description": "The text to fill into the input element." }
|
||||
\\ },
|
||||
\\ "required": ["backendNodeId", "text"]
|
||||
\\}
|
||||
),
|
||||
},
|
||||
.{
|
||||
.name = "scroll",
|
||||
.description = "Scroll the page or a specific element.",
|
||||
.inputSchema = protocol.minify(
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
\\ "properties": {
|
||||
\\ "backendNodeId": { "type": "integer", "description": "Optional: The backend node ID of the element to scroll. If omitted, scrolls the window." },
|
||||
\\ "x": { "type": "integer", "description": "Optional: The horizontal scroll offset." },
|
||||
\\ "y": { "type": "integer", "description": "Optional: The vertical scroll offset." }
|
||||
\\ }
|
||||
\\}
|
||||
),
|
||||
},
|
||||
};
|
||||
|
||||
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
|
||||
@@ -182,6 +223,9 @@ const ToolAction = enum {
|
||||
structuredData,
|
||||
evaluate,
|
||||
semantic_tree,
|
||||
click,
|
||||
fill,
|
||||
scroll,
|
||||
};
|
||||
|
||||
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
|
||||
@@ -193,6 +237,9 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
|
||||
.{ "structuredData", .structuredData },
|
||||
.{ "evaluate", .evaluate },
|
||||
.{ "semantic_tree", .semantic_tree },
|
||||
.{ "click", .click },
|
||||
.{ "fill", .fill },
|
||||
.{ "scroll", .scroll },
|
||||
});
|
||||
|
||||
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
|
||||
@@ -221,6 +268,9 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
|
||||
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
|
||||
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
|
||||
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
|
||||
.click => try handleClick(server, arena, req.id.?, call_params.arguments),
|
||||
.fill => try handleFill(server, arena, req.id.?, call_params.arguments),
|
||||
.scroll => try handleScroll(server, arena, req.id.?, call_params.arguments),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,6 +430,131 @@ fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
|
||||
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
|
||||
}
|
||||
|
||||
fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
|
||||
const ClickParams = struct {
|
||||
backendNodeId: CDPNode.Id,
|
||||
};
|
||||
const args = try parseArguments(ClickParams, arena, arguments, server, id, "click");
|
||||
|
||||
const page = server.session.currentPage() orelse {
|
||||
return server.sendError(id, .PageNotLoaded, "Page not loaded");
|
||||
};
|
||||
|
||||
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
|
||||
return server.sendError(id, .InvalidParams, "Node not found");
|
||||
};
|
||||
|
||||
if (node.dom.is(Element)) |el| {
|
||||
if (el.is(Element.Html)) |html_el| {
|
||||
html_el.click(page) catch |err| {
|
||||
log.err(.mcp, "click failed", .{ .err = err });
|
||||
return server.sendError(id, .InternalError, "Failed to click element");
|
||||
};
|
||||
} else {
|
||||
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
|
||||
}
|
||||
} else {
|
||||
return server.sendError(id, .InvalidParams, "Node is not an element");
|
||||
}
|
||||
|
||||
const content = [_]protocol.TextContent([]const u8){.{ .text = "Clicked successfully." }};
|
||||
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
|
||||
}
|
||||
|
||||
fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
|
||||
const FillParams = struct {
|
||||
backendNodeId: CDPNode.Id,
|
||||
text: []const u8,
|
||||
};
|
||||
const args = try parseArguments(FillParams, arena, arguments, server, id, "fill");
|
||||
|
||||
const page = server.session.currentPage() orelse {
|
||||
return server.sendError(id, .PageNotLoaded, "Page not loaded");
|
||||
};
|
||||
|
||||
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
|
||||
return server.sendError(id, .InvalidParams, "Node not found");
|
||||
};
|
||||
|
||||
if (node.dom.is(Element)) |el| {
|
||||
if (el.is(Element.Html.Input)) |input| {
|
||||
input.setValue(args.text, page) catch |err| {
|
||||
log.err(.mcp, "fill input failed", .{ .err = err });
|
||||
return server.sendError(id, .InternalError, "Failed to fill input");
|
||||
};
|
||||
} else if (el.is(Element.Html.TextArea)) |textarea| {
|
||||
textarea.setValue(args.text, page) catch |err| {
|
||||
log.err(.mcp, "fill textarea failed", .{ .err = err });
|
||||
return server.sendError(id, .InternalError, "Failed to fill textarea");
|
||||
};
|
||||
} else if (el.is(Element.Html.Select)) |select| {
|
||||
select.setValue(args.text, page) catch |err| {
|
||||
log.err(.mcp, "fill select failed", .{ .err = err });
|
||||
return server.sendError(id, .InternalError, "Failed to fill select");
|
||||
};
|
||||
} else {
|
||||
return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select");
|
||||
}
|
||||
|
||||
const Event = @import("../browser/webapi/Event.zig");
|
||||
const input_evt = try Event.initTrusted(comptime lp.String.wrap("input"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), input_evt) catch {};
|
||||
|
||||
const change_evt = try Event.initTrusted(comptime lp.String.wrap("change"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), change_evt) catch {};
|
||||
} else {
|
||||
return server.sendError(id, .InvalidParams, "Node is not an element");
|
||||
}
|
||||
|
||||
const content = [_]protocol.TextContent([]const u8){.{ .text = "Filled successfully." }};
|
||||
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
|
||||
}
|
||||
|
||||
fn handleScroll(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
|
||||
const ScrollParams = struct {
|
||||
backendNodeId: ?CDPNode.Id = null,
|
||||
x: ?i32 = null,
|
||||
y: ?i32 = null,
|
||||
};
|
||||
const args = try parseArguments(ScrollParams, arena, arguments, server, id, "scroll");
|
||||
|
||||
const page = server.session.currentPage() orelse {
|
||||
return server.sendError(id, .PageNotLoaded, "Page not loaded");
|
||||
};
|
||||
|
||||
const x = args.x orelse 0;
|
||||
const y = args.y orelse 0;
|
||||
|
||||
if (args.backendNodeId) |node_id| {
|
||||
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
|
||||
return server.sendError(id, .InvalidParams, "Node not found");
|
||||
};
|
||||
|
||||
if (node.dom.is(Element)) |el| {
|
||||
if (args.x != null) {
|
||||
el.setScrollLeft(x, page) catch {};
|
||||
}
|
||||
if (args.y != null) {
|
||||
el.setScrollTop(y, page) catch {};
|
||||
}
|
||||
|
||||
const Event = @import("../browser/webapi/Event.zig");
|
||||
const scroll_evt = try Event.initTrusted(comptime lp.String.wrap("scroll"), .{ .bubbles = true }, page);
|
||||
_ = page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch {};
|
||||
} else {
|
||||
return server.sendError(id, .InvalidParams, "Node is not an element");
|
||||
}
|
||||
} else {
|
||||
page.window.scrollTo(.{ .x = x }, y, page) catch |err| {
|
||||
log.err(.mcp, "scroll failed", .{ .err = err });
|
||||
return server.sendError(id, .InternalError, "Failed to scroll");
|
||||
};
|
||||
}
|
||||
|
||||
const content = [_]protocol.TextContent([]const u8){.{ .text = "Scrolled successfully." }};
|
||||
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
|
||||
}
|
||||
|
||||
fn parseArguments(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value, server: *Server, id: std.json.Value, tool_name: []const u8) !T {
|
||||
if (arguments == null) {
|
||||
try server.sendError(id, .InvalidParams, "Missing arguments");
|
||||
|
||||
Reference in New Issue
Block a user