diff --git a/src/browser/actions.zig b/src/browser/actions.zig index 37d17ccb..198e92ee 100644 --- a/src/browser/actions.zig +++ b/src/browser/actions.zig @@ -22,10 +22,23 @@ const DOMNode = @import("webapi/Node.zig"); const Element = @import("webapi/Element.zig"); const Event = @import("webapi/Event.zig"); const MouseEvent = @import("webapi/event/MouseEvent.zig"); +const KeyboardEvent = @import("webapi/event/KeyboardEvent.zig"); const Page = @import("Page.zig"); const Session = @import("Session.zig"); const Selector = @import("webapi/selector/Selector.zig"); +fn dispatchInputAndChangeEvents(el: *Element, page: *Page) !void { + const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page); + page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| { + lp.log.err(.app, "dispatch input event failed", .{ .err = err }); + }; + + const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page); + page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| { + lp.log.err(.app, "dispatch change event failed", .{ .err = err }); + }; +} + pub fn click(node: *DOMNode, page: *Page) !void { const el = node.is(Element) orelse return error.InvalidNodeType; @@ -43,9 +56,107 @@ pub fn click(node: *DOMNode, page: *Page) !void { }; } +pub fn hover(node: *DOMNode, page: *Page) !void { + const el = node.is(Element) orelse return error.InvalidNodeType; + + const mouseover_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseover"), .{ + .bubbles = true, + .cancelable = true, + .composed = true, + }, page); + + page._event_manager.dispatch(el.asEventTarget(), mouseover_event.asEvent()) catch |err| { + lp.log.err(.app, "hover mouseover failed", .{ .err = err }); + return error.ActionFailed; + }; + + const mouseenter_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseenter"), .{ + .composed = true, + }, page); + + page._event_manager.dispatch(el.asEventTarget(), mouseenter_event.asEvent()) catch |err| { + lp.log.err(.app, "hover mouseenter failed", .{ .err = err }); + return error.ActionFailed; + }; +} + +pub fn press(node: ?*DOMNode, key: []const u8, page: *Page) !void { + const target = if (node) |n| + (n.is(Element) orelse return error.InvalidNodeType).asEventTarget() + else + page.document.asNode().asEventTarget(); + + const keydown_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keydown"), .{ + .bubbles = true, + .cancelable = true, + .composed = true, + .key = key, + }, page); + + page._event_manager.dispatch(target, keydown_event.asEvent()) catch |err| { + lp.log.err(.app, "press keydown failed", .{ .err = err }); + return error.ActionFailed; + }; + + const keyup_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keyup"), .{ + .bubbles = true, + .cancelable = true, + .composed = true, + .key = key, + }, page); + + page._event_manager.dispatch(target, keyup_event.asEvent()) catch |err| { + lp.log.err(.app, "press keyup failed", .{ .err = err }); + return error.ActionFailed; + }; +} + +pub fn selectOption(node: *DOMNode, value: []const u8, page: *Page) !void { + const el = node.is(Element) orelse return error.InvalidNodeType; + const select = el.is(Element.Html.Select) orelse return error.InvalidNodeType; + + select.setValue(value, page) catch |err| { + lp.log.err(.app, "select setValue failed", .{ .err = err }); + return error.ActionFailed; + }; + + try dispatchInputAndChangeEvents(el, page); +} + +pub fn setChecked(node: *DOMNode, checked: bool, page: *Page) !void { + const el = node.is(Element) orelse return error.InvalidNodeType; + const input = el.is(Element.Html.Input) orelse return error.InvalidNodeType; + + if (input._input_type != .checkbox and input._input_type != .radio) { + return error.InvalidNodeType; + } + + input.setChecked(checked, page) catch |err| { + lp.log.err(.app, "setChecked failed", .{ .err = err }); + return error.ActionFailed; + }; + + // Match browser event order: click fires first, then input and change. + const click_event: *MouseEvent = try .initTrusted(comptime .wrap("click"), .{ + .bubbles = true, + .cancelable = true, + .composed = true, + }, page); + + page._event_manager.dispatch(el.asEventTarget(), click_event.asEvent()) catch |err| { + lp.log.err(.app, "dispatch click event failed", .{ .err = err }); + }; + + try dispatchInputAndChangeEvents(el, page); +} + pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void { const el = node.is(Element) orelse return error.InvalidNodeType; + el.focus(page) catch |err| { + lp.log.err(.app, "fill focus failed", .{ .err = err }); + }; + if (el.is(Element.Html.Input)) |input| { input.setValue(text, page) catch |err| { lp.log.err(.app, "fill input failed", .{ .err = err }); @@ -65,15 +176,7 @@ pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void { return error.InvalidNodeType; } - const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page); - page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| { - lp.log.err(.app, "dispatch input event failed", .{ .err = err }); - }; - - const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page); - page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| { - lp.log.err(.app, "dispatch change event failed", .{ .err = err }); - }; + try dispatchInputAndChangeEvents(el, page); } pub fn scroll(node: ?*DOMNode, x: ?i32, y: ?i32, page: *Page) !void { diff --git a/src/browser/tests/mcp_actions.html b/src/browser/tests/mcp_actions.html index 88cb70b1..f27c63ef 100644 --- a/src/browser/tests/mcp_actions.html +++ b/src/browser/tests/mcp_actions.html @@ -10,5 +10,20 @@
Long content
+
Hover Me
+ + + + + diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index 0237422d..b69df90f 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -175,6 +175,74 @@ pub const tool_list = [_]protocol.Tool{ \\} ), }, + .{ + .name = "hover", + .description = "Hover over an element, triggering mouseover and mouseenter events. Useful for menus, tooltips, and hover states.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to hover over." } + \\ }, + \\ "required": ["backendNodeId"] + \\} + ), + }, + .{ + .name = "press", + .description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "key": { "type": "string", "description": "The key to press (e.g. 'Enter', 'Tab', 'a')." }, + \\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document." } + \\ }, + \\ "required": ["key"] + \\} + ), + }, + .{ + .name = "selectOption", + .description = "Select an option in a element." }, + \\ "value": { "type": "string", "description": "The value of the option to select." } + \\ }, + \\ "required": ["backendNodeId", "value"] + \\} + ), + }, + .{ + .name = "setChecked", + .description = "Check or uncheck a checkbox or radio button. Dispatches input, change, and click events.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the checkbox or radio input element." }, + \\ "checked": { "type": "boolean", "description": "Whether to check (true) or uncheck (false) the element." } + \\ }, + \\ "required": ["backendNodeId", "checked"] + \\} + ), + }, + .{ + .name = "findElement", + .description = "Find interactive elements by role and/or accessible name. Returns matching elements with their backend node IDs. Useful for locating specific elements without parsing the full semantic tree.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "role": { "type": "string", "description": "Optional ARIA role to match (e.g. 'button', 'link', 'textbox', 'checkbox')." }, + \\ "name": { "type": "string", "description": "Optional accessible name substring to match (case-insensitive)." } + \\ } + \\} + ), + }, }; pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -282,6 +350,11 @@ const ToolAction = enum { fill, scroll, waitForSelector, + hover, + press, + selectOption, + setChecked, + findElement, }; const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ @@ -300,6 +373,11 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ .{ "fill", .fill }, .{ "scroll", .scroll }, .{ "waitForSelector", .waitForSelector }, + .{ "hover", .hover }, + .{ "press", .press }, + .{ "selectOption", .selectOption }, + .{ "setChecked", .setChecked }, + .{ "findElement", .findElement }, }); pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -334,6 +412,11 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque .fill => try handleFill(server, arena, req.id.?, call_params.arguments), .scroll => try handleScroll(server, arena, req.id.?, call_params.arguments), .waitForSelector => try handleWaitForSelector(server, arena, req.id.?, call_params.arguments), + .hover => try handleHover(server, arena, req.id.?, call_params.arguments), + .press => try handlePress(server, arena, req.id.?, call_params.arguments), + .selectOption => try handleSelectOption(server, arena, req.id.?, call_params.arguments), + .setChecked => try handleSetChecked(server, arena, req.id.?, call_params.arguments), + .findElement => try handleFindElement(server, arena, req.id.?, call_params.arguments), } } @@ -400,17 +483,9 @@ fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Val backendNodeId: CDPNode.Id, }; const args = try parseArgs(Params, arena, arguments, server, id, "nodeDetails"); + const resolved = try resolveNodeAndPage(server, id, args.backendNodeId); - _ = server.session.currentPage() orelse { - return server.sendError(id, .PageNotLoaded, "Page not loaded"); - }; - - const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { - return server.sendError(id, .InvalidParams, "Node not found"); - }; - - const page = server.session.currentPage().?; - const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &server.node_registry, page) catch { + const details = lp.SemanticTree.getNodeDetails(arena, resolved.node, &server.node_registry, resolved.page) catch { return server.sendError(id, .InternalError, "Failed to get node details"); }; @@ -510,26 +585,19 @@ fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar backendNodeId: CDPNode.Id, }; const args = try parseArgs(ClickParams, arena, arguments, server, id, "click"); + const resolved = try resolveNodeAndPage(server, id, args.backendNodeId); - const page = server.session.currentPage() orelse { - return server.sendError(id, .PageNotLoaded, "Page not loaded"); - }; - - const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { - return server.sendError(id, .InvalidParams, "Node not found"); - }; - - lp.actions.click(node.dom, page) catch |err| { + lp.actions.click(resolved.node, resolved.page) catch |err| { if (err == error.InvalidNodeType) { return server.sendError(id, .InvalidParams, "Node is not an HTML element"); } return server.sendError(id, .InternalError, "Failed to click element"); }; - const page_title = page.getTitle() catch null; + const page_title = resolved.page.getTitle() catch null; const result_text = try std.fmt.allocPrint(arena, "Clicked element (backendNodeId: {d}). Page url: {s}, title: {s}", .{ args.backendNodeId, - page.url, + resolved.page.url, page_title orelse "(none)", }); const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }}; @@ -542,27 +610,20 @@ fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arg text: []const u8, }; const args = try parseArgs(FillParams, arena, arguments, server, id, "fill"); + const resolved = try resolveNodeAndPage(server, id, args.backendNodeId); - const page = server.session.currentPage() orelse { - return server.sendError(id, .PageNotLoaded, "Page not loaded"); - }; - - const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse { - return server.sendError(id, .InvalidParams, "Node not found"); - }; - - lp.actions.fill(node.dom, args.text, page) catch |err| { + lp.actions.fill(resolved.node, args.text, resolved.page) catch |err| { if (err == error.InvalidNodeType) { return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select"); } return server.sendError(id, .InternalError, "Failed to fill element"); }; - const page_title = page.getTitle() catch null; + const page_title = resolved.page.getTitle() catch null; const result_text = try std.fmt.allocPrint(arena, "Filled element (backendNodeId: {d}) with \"{s}\". Page url: {s}, title: {s}", .{ args.backendNodeId, args.text, - page.url, + resolved.page.url, page_title orelse "(none)", }); const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }}; @@ -636,6 +697,189 @@ fn handleWaitForSelector(server: *Server, arena: std.mem.Allocator, id: std.json return server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); } +fn handleHover(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const Params = struct { + backendNodeId: CDPNode.Id, + }; + const args = try parseArgs(Params, arena, arguments, server, id, "hover"); + const resolved = try resolveNodeAndPage(server, id, args.backendNodeId); + + lp.actions.hover(resolved.node, resolved.page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not an HTML element"); + } + return server.sendError(id, .InternalError, "Failed to hover element"); + }; + + const page_title = resolved.page.getTitle() catch null; + const result_text = try std.fmt.allocPrint(arena, "Hovered element (backendNodeId: {d}). Page url: {s}, title: {s}", .{ + args.backendNodeId, + resolved.page.url, + page_title orelse "(none)", + }); + const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handlePress(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const Params = struct { + key: []const u8, + backendNodeId: ?CDPNode.Id = null, + }; + const args = try parseArgs(Params, arena, arguments, server, id, "press"); + + const page = server.session.currentPage() orelse { + return server.sendError(id, .PageNotLoaded, "Page not loaded"); + }; + + var target_node: ?*DOMNode = null; + if (args.backendNodeId) |node_id| { + const node = server.node_registry.lookup_by_id.get(node_id) orelse { + return server.sendError(id, .InvalidParams, "Node not found"); + }; + target_node = node.dom; + } + + lp.actions.press(target_node, args.key, page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not an HTML element"); + } + return server.sendError(id, .InternalError, "Failed to press key"); + }; + + const page_title = page.getTitle() catch null; + const result_text = try std.fmt.allocPrint(arena, "Pressed key '{s}'. Page url: {s}, title: {s}", .{ + args.key, + page.url, + page_title orelse "(none)", + }); + const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }}; + try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); +} + +fn handleSelectOption(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const Params = struct { + backendNodeId: CDPNode.Id, + value: []const u8, + }; + const args = try parseArgs(Params, arena, arguments, server, id, "selectOption"); + const resolved = try resolveNodeAndPage(server, id, args.backendNodeId); + + lp.actions.selectOption(resolved.node, args.value, resolved.page) catch |err| { + if (err == error.InvalidNodeType) { + return server.sendError(id, .InvalidParams, "Node is not a