diff --git a/src/browser/Page.zig b/src/browser/Page.zig index b82ca41d..c46a99cd 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -62,6 +62,7 @@ const storage = @import("webapi/storage/storage.zig"); const PageTransitionEvent = @import("webapi/event/PageTransitionEvent.zig"); const NavigationKind = @import("webapi/navigation/root.zig").NavigationKind; const KeyboardEvent = @import("webapi/event/KeyboardEvent.zig"); +const MouseEvent = @import("webapi/event/MouseEvent.zig"); const HttpClient = @import("HttpClient.zig"); const ArenaPool = App.ArenaPool; @@ -3271,14 +3272,14 @@ pub fn triggerMouseClick(self: *Page, x: f64, y: f64) !void { .type = self._type, }); } - const event = (try @import("webapi/event/MouseEvent.zig").initTrusted(comptime .wrap("click"), .{ + const mouse_event: *MouseEvent = try .initTrusted(comptime .wrap("click"), .{ .bubbles = true, .cancelable = true, .composed = true, .clientX = x, .clientY = y, - }, self)).asEvent(); - try self._event_manager.dispatch(target.asEventTarget(), event); + }, self); + try self._event_manager.dispatch(target.asEventTarget(), mouse_event.asEvent()); } // callback when the "click" event reaches the pages. diff --git a/src/browser/actions.zig b/src/browser/actions.zig new file mode 100644 index 00000000..951f2b1e --- /dev/null +++ b/src/browser/actions.zig @@ -0,0 +1,104 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("../lightpanda.zig"); +const DOMNode = @import("webapi/Node.zig"); +const Element = @import("webapi/Element.zig"); +const Event = @import("webapi/Event.zig"); +const MouseEvent = @import("webapi/event/MouseEvent.zig"); +const Page = @import("Page.zig"); + +pub fn click(node: *DOMNode, page: *Page) !void { + const el = node.is(Element) orelse return error.InvalidNodeType; + + const mouse_event: *MouseEvent = try .initTrusted(comptime .wrap("click"), .{ + .bubbles = true, + .cancelable = true, + .composed = true, + .clientX = 0, + .clientY = 0, + }, page); + + page._event_manager.dispatch(el.asEventTarget(), mouse_event.asEvent()) catch |err| { + lp.log.err(.app, "click failed", .{ .err = err }); + return error.ActionFailed; + }; +} + +pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void { + const el = node.is(Element) orelse return error.InvalidNodeType; + + if (el.is(Element.Html.Input)) |input| { + input.setValue(text, page) catch |err| { + lp.log.err(.app, "fill input failed", .{ .err = err }); + return error.ActionFailed; + }; + } else if (el.is(Element.Html.TextArea)) |textarea| { + textarea.setValue(text, page) catch |err| { + lp.log.err(.app, "fill textarea failed", .{ .err = err }); + return error.ActionFailed; + }; + } else if (el.is(Element.Html.Select)) |select| { + select.setValue(text, page) catch |err| { + lp.log.err(.app, "fill select failed", .{ .err = err }); + return error.ActionFailed; + }; + } else { + return error.InvalidNodeType; + } + + const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page); + page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| { + lp.log.err(.app, "dispatch input event failed", .{ .err = err }); + }; + + const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page); + page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| { + lp.log.err(.app, "dispatch change event failed", .{ .err = err }); + }; +} + +pub fn scroll(node: ?*DOMNode, x: ?i32, y: ?i32, page: *Page) !void { + if (node) |n| { + const el = n.is(Element) orelse return error.InvalidNodeType; + + if (x) |val| { + el.setScrollLeft(val, page) catch |err| { + lp.log.err(.app, "setScrollLeft failed", .{ .err = err }); + return error.ActionFailed; + }; + } + if (y) |val| { + el.setScrollTop(val, page) catch |err| { + lp.log.err(.app, "setScrollTop failed", .{ .err = err }); + return error.ActionFailed; + }; + } + + const scroll_evt: *Event = try .initTrusted(comptime .wrap("scroll"), .{ .bubbles = true }, page); + page._event_manager.dispatch(el.asEventTarget(), scroll_evt) catch |err| { + lp.log.err(.app, "dispatch scroll event failed", .{ .err = err }); + }; + } else { + page.window.scrollTo(.{ .x = x orelse 0 }, y, page) catch |err| { + lp.log.err(.app, "scroll failed", .{ .err = err }); + return error.ActionFailed; + }; + } +} diff --git a/src/browser/tests/mcp_actions.html b/src/browser/tests/mcp_actions.html new file mode 100644 index 00000000..88cb70b1 --- /dev/null +++ b/src/browser/tests/mcp_actions.html @@ -0,0 +1,14 @@ + + + + + + +
+
Long content
+
+ + diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index 2026b17d..19fc8cac 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -32,6 +32,9 @@ pub fn processMessage(cmd: anytype) !void { getSemanticTree, getInteractiveElements, getStructuredData, + clickNode, + fillNode, + scrollNode, }, cmd.input.action) orelse return error.UnknownMethod; switch (action) { @@ -39,6 +42,9 @@ pub fn processMessage(cmd: anytype) !void { .getSemanticTree => return getSemanticTree(cmd), .getInteractiveElements => return getInteractiveElements(cmd), .getStructuredData => return getStructuredData(cmd), + .clickNode => return clickNode(cmd), + .fillNode => return fillNode(cmd), + .scrollNode => return scrollNode(cmd), } } @@ -146,6 +152,76 @@ fn getStructuredData(cmd: anytype) !void { }, .{}); } +fn clickNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + }; + const params = (try cmd.params(Params)) orelse return error.InvalidParam; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam; + const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId; + + lp.actions.click(node.dom, page) catch |err| { + if (err == error.InvalidNodeType) return error.InvalidParam; + return error.InternalError; + }; + + return cmd.sendResult(.{}, .{}); +} + +fn fillNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + text: []const u8, + }; + const params = (try cmd.params(Params)) orelse return error.InvalidParam; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const node_id = params.nodeId orelse params.backendNodeId orelse return error.InvalidParam; + const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId; + + lp.actions.fill(node.dom, params.text, page) catch |err| { + if (err == error.InvalidNodeType) return error.InvalidParam; + return error.InternalError; + }; + + return cmd.sendResult(.{}, .{}); +} + +fn scrollNode(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + backendNodeId: ?Node.Id = null, + x: ?i32 = null, + y: ?i32 = null, + }; + const params = (try cmd.params(Params)) orelse return error.InvalidParam; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + const maybe_node_id = params.nodeId orelse params.backendNodeId; + + var target_node: ?*DOMNode = null; + if (maybe_node_id) |node_id| { + const node = bc.node_registry.lookup_by_id.get(node_id) orelse return error.InvalidNodeId; + target_node = node.dom; + } + + lp.actions.scroll(target_node, params.x, params.y, page) catch |err| { + if (err == error.InvalidNodeType) return error.InvalidParam; + return error.InternalError; + }; + + return cmd.sendResult(.{}, .{}); +} const testing = @import("../testing.zig"); test "cdp.lp: getMarkdown" { var ctx = testing.context(); @@ -195,3 +271,63 @@ test "cdp.lp: getStructuredData" { const result = ctx.client.?.sent.items[0].object.get("result").?.object; try testing.expect(result.get("structuredData") != null); } + +test "cdp.lp: action tools" { + var ctx = testing.context(); + defer ctx.deinit(); + + const bc = try ctx.loadBrowserContext(.{}); + const page = try bc.session.createPage(); + const url = "http://localhost:9582/src/browser/tests/mcp_actions.html"; + try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } }); + _ = bc.session.wait(5000); + + // Test Click + const btn = page.document.getElementById("btn", page).?.asNode(); + const btn_id = (try bc.node_registry.register(btn)).id; + try ctx.processMessage(.{ + .id = 1, + .method = "LP.clickNode", + .params = .{ .backendNodeId = btn_id }, + }); + + // Test Fill Input + const inp = page.document.getElementById("inp", page).?.asNode(); + const inp_id = (try bc.node_registry.register(inp)).id; + try ctx.processMessage(.{ + .id = 2, + .method = "LP.fillNode", + .params = .{ .backendNodeId = inp_id, .text = "hello" }, + }); + + // Test Fill Select + const sel = page.document.getElementById("sel", page).?.asNode(); + const sel_id = (try bc.node_registry.register(sel)).id; + try ctx.processMessage(.{ + .id = 3, + .method = "LP.fillNode", + .params = .{ .backendNodeId = sel_id, .text = "opt2" }, + }); + + // Test Scroll + const scrollbox = page.document.getElementById("scrollbox", page).?.asNode(); + const scrollbox_id = (try bc.node_registry.register(scrollbox)).id; + try ctx.processMessage(.{ + .id = 4, + .method = "LP.scrollNode", + .params = .{ .backendNodeId = scrollbox_id, .y = 50 }, + }); + + // Evaluate assertions + var ls: lp.js.Local.Scope = undefined; + page.js.localScope(&ls); + defer ls.deinit(); + + var try_catch: lp.js.TryCatch = undefined; + try_catch.init(&ls.local); + defer try_catch.deinit(); + + const result = try ls.local.compileAndRun("window.clicked === true && window.inputVal === 'hello' && window.changed === true && window.selChanged === 'opt2' && window.scrolled === true", null); + + try testing.expect(result.isTrue()); +} diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 4fac3921..a9c7a1f0 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -35,6 +35,7 @@ pub const markdown = @import("browser/markdown.zig"); pub const SemanticTree = @import("SemanticTree.zig"); pub const CDPNode = @import("cdp/Node.zig"); pub const interactive = @import("browser/interactive.zig"); +pub const actions = @import("browser/actions.zig"); pub const structured_data = @import("browser/structured_data.zig"); pub const mcp = @import("mcp.zig"); pub const build_config = @import("build_config"); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index f5126be0..d8fd4ead 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -5,6 +5,7 @@ const log = lp.log; const js = lp.js; const Element = @import("../browser/webapi/Element.zig"); +const DOMNode = @import("../browser/webapi/Node.zig"); const Selector = @import("../browser/webapi/selector/Selector.zig"); const protocol = @import("protocol.zig"); const Server = @import("Server.zig"); @@ -98,6 +99,47 @@ pub const tool_list = [_]protocol.Tool{ \\} ), }, + .{ + .name = "click", + .description = "Click on an interactive element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to click." } + \\ }, + \\ "required": ["backendNodeId"] + \\} + ), + }, + .{ + .name = "fill", + .description = "Fill text into an input element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the input element to fill." }, + \\ "text": { "type": "string", "description": "The text to fill into the input element." } + \\ }, + \\ "required": ["backendNodeId", "text"] + \\} + ), + }, + .{ + .name = "scroll", + .description = "Scroll the page or a specific element.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "Optional: The backend node ID of the element to scroll. If omitted, scrolls the window." }, + \\ "x": { "type": "integer", "description": "Optional: The horizontal scroll offset." }, + \\ "y": { "type": "integer", "description": "Optional: The vertical scroll offset." } + \\ } + \\} + ), + }, }; pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -182,6 +224,9 @@ const ToolAction = enum { structuredData, evaluate, semantic_tree, + click, + fill, + scroll, }; const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ @@ -193,6 +238,9 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ .{ "structuredData", .structuredData }, .{ "evaluate", .evaluate }, .{ "semantic_tree", .semantic_tree }, + .{ "click", .click }, + .{ "fill", .fill }, + .{ "scroll", .scroll }, }); pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -221,6 +269,9 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque .structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments), .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments), .semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments), + .click => try handleClick(server, arena, req.id.?, call_params.arguments), + .fill => try handleFill(server, arena, req.id.?, call_params.arguments), + .scroll => try handleScroll(server, arena, req.id.?, call_params.arguments), } } @@ -380,6 +431,87 @@ fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); } +fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const ClickParams = struct { + backendNodeId: CDPNode.Id, + }; + const args = try parseArguments(ClickParams, arena, arguments, server, id, "click"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + + lp.actions.click(node.dom, page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not an HTML element"); + } + return server.sendError(id, .InternalError, "Failed to click element"); + }; + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Clicked successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const FillParams = struct { + backendNodeId: CDPNode.Id, + text: []const u8, + }; + const args = try parseArguments(FillParams, arena, arguments, server, id, "fill"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + + lp.actions.fill(node.dom, args.text, page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select"); + } + return server.sendError(id, .InternalError, "Failed to fill element"); + }; + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Filled successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handleScroll(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const ScrollParams = struct { + backendNodeId: ?CDPNode.Id = null, + x: ?i32 = null, + y: ?i32 = null, + }; + const args = try parseArguments(ScrollParams, arena, arguments, server, id, "scroll"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + var target_node: ?*DOMNode = null; + if (args.backendNodeId) |node_id| { + const node = server.node_registry.lookup_by_id.get(node_id) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + target_node = node.dom; + } + + lp.actions.scroll(target_node, args.x, args.y, page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not an element"); + } + return server.sendError(id, .InternalError, "Failed to scroll"); + }; + + const content = [_]protocol.TextContent([]const u8){.{ .text = "Scrolled successfully." }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} fn parseArguments(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value, server: *Server, id: std.json.Value, tool_name: []const u8) !T { if (arguments == null) { try server.sendError(id, .InvalidParams, "Missing arguments"); @@ -455,3 +587,66 @@ test "MCP - evaluate error reporting" { \\} , out_alloc.writer.buffered()); } + +test "MCP - Actions: click, fill, scroll" { + defer testing.reset(); + const allocator = testing.allocator; + const app = testing.test_app; + + var out_alloc: std.io.Writer.Allocating = .init(testing.arena_allocator); + defer out_alloc.deinit(); + + var server = try Server.init(allocator, app, &out_alloc.writer); + defer server.deinit(); + + const aa = testing.arena_allocator; + const page = try server.session.createPage(); + const url = "http://localhost:9582/src/browser/tests/mcp_actions.html"; + try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } }); + _ = server.session.wait(5000); + + // Test Click + const btn = page.document.getElementById("btn", page).?.asNode(); + const btn_id = (try server.node_registry.register(btn)).id; + var btn_id_buf: [12]u8 = undefined; + const btn_id_str = std.fmt.bufPrint(&btn_id_buf, "{d}", .{btn_id}) catch unreachable; + const click_msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":\"click\",\"arguments\":{\"backendNodeId\":", btn_id_str, "}}}" }); + try router.handleMessage(server, aa, click_msg); + + // Test Fill Input + const inp = page.document.getElementById("inp", page).?.asNode(); + const inp_id = (try server.node_registry.register(inp)).id; + var inp_id_buf: [12]u8 = undefined; + const inp_id_str = std.fmt.bufPrint(&inp_id_buf, "{d}", .{inp_id}) catch unreachable; + const fill_msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"fill\",\"arguments\":{\"backendNodeId\":", inp_id_str, ",\"text\":\"hello\"}}}" }); + try router.handleMessage(server, aa, fill_msg); + + // Test Fill Select + const sel = page.document.getElementById("sel", page).?.asNode(); + const sel_id = (try server.node_registry.register(sel)).id; + var sel_id_buf: [12]u8 = undefined; + const sel_id_str = std.fmt.bufPrint(&sel_id_buf, "{d}", .{sel_id}) catch unreachable; + const fill_sel_msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"fill\",\"arguments\":{\"backendNodeId\":", sel_id_str, ",\"text\":\"opt2\"}}}" }); + try router.handleMessage(server, aa, fill_sel_msg); + + // Test Scroll + const scrollbox = page.document.getElementById("scrollbox", page).?.asNode(); + const scrollbox_id = (try server.node_registry.register(scrollbox)).id; + var scroll_id_buf: [12]u8 = undefined; + const scroll_id_str = std.fmt.bufPrint(&scroll_id_buf, "{d}", .{scrollbox_id}) catch unreachable; + const scroll_msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":4,\"method\":\"tools/call\",\"params\":{\"name\":\"scroll\",\"arguments\":{\"backendNodeId\":", scroll_id_str, ",\"y\":50}}}" }); + try router.handleMessage(server, aa, scroll_msg); + + // Evaluate assertions + var ls: js.Local.Scope = undefined; + page.js.localScope(&ls); + defer ls.deinit(); + + var try_catch: js.TryCatch = undefined; + try_catch.init(&ls.local); + defer try_catch.deinit(); + + const result = try ls.local.compileAndRun("window.clicked === true && window.inputVal === 'hello' && window.changed === true && window.selChanged === 'opt2' && window.scrolled === true", null); + + try testing.expect(result.isTrue()); +}