diff --git a/src/cdp/semantic_tree.zig b/src/SemanticTree.zig similarity index 72% rename from src/cdp/semantic_tree.zig rename to src/SemanticTree.zig index d3b637f4..cf76acec 100644 --- a/src/cdp/semantic_tree.zig +++ b/src/SemanticTree.zig @@ -19,26 +19,37 @@ const std = @import("std"); const lp = @import("lightpanda"); +const log = @import("log.zig"); const Page = lp.Page; -const CData = @import("../browser/webapi/CData.zig"); -const Element = @import("../browser/webapi/Element.zig"); -const Node = @import("../browser/webapi/Node.zig"); -const AXNode = @import("AXNode.zig"); -const CDPNode = @import("Node.zig"); +const CData = @import("browser/webapi/CData.zig"); +const Element = @import("browser/webapi/Element.zig"); +const Node = @import("browser/webapi/Node.zig"); +const AXNode = @import("cdp/AXNode.zig"); +const CDPNode = @import("cdp/Node.zig"); -pub fn dump(root: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, arena: std.mem.Allocator) !void { - try dumpNode(root, registry, jw, page, "", arena); +const SemanticTree = @This(); + +dom_node: *Node, +registry: *CDPNode.Registry, +page: *Page, +arena: std.mem.Allocator, + +pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { + self.dumpNode(self.dom_node, jw, "") catch |err| { + log.err(.cdp, "semantic tree dump failed", .{ .err = err }); + return error.WriteFailed; + }; } -fn isAllWhitespace(text: []const u8) bool { +fn isAllWhitespace(_: @This(), text: []const u8) bool { for (text) |c| { if (!std.ascii.isWhitespace(c)) return false; } return true; } -fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 { +fn getXPathSegment(self: @This(), node: *Node) ![]const u8 { if (node.is(Element)) |el| { const tag = el.getTagNameLower(); var index: usize = 1; @@ -54,7 +65,7 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 { } } } - return std.fmt.allocPrint(arena, "/{s}[{d}]", .{ tag, index }); + return std.fmt.allocPrint(self.arena, "/{s}[{d}]", .{ tag, index }); } else if (node.is(CData.Text) != null) { var index: usize = 1; if (node._parent) |parent| { @@ -66,12 +77,12 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 { } } } - return std.fmt.allocPrint(arena, "/text()[{d}]", .{index}); + return std.fmt.allocPrint(self.arena, "/text()[{d}]", .{index}); } return ""; } -fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, parent_xpath: []const u8, arena: std.mem.Allocator) !void { +fn dumpNode(self: @This(), node: *Node, jw: *std.json.Stringify, parent_xpath: []const u8) !void { // 1. Skip non-content nodes if (node.is(Element)) |el| { const tag = el.getTagNameLower(); @@ -102,14 +113,14 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p } else if (node.is(CData.Text) != null) { const text_node = node.is(CData.Text).?; const text = text_node.getWholeText(); - if (isAllWhitespace(text)) { + if (self.isAllWhitespace(text)) { return; } } else if (node._type != .document and node._type != .document_fragment) { return; } - const cdp_node = try registry.register(node); + const cdp_node = try self.registry.register(node); const axn = AXNode.fromNode(node); const role = try axn.getRole(); @@ -136,23 +147,23 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p } const event_target = node.asEventTarget(); - if (page._event_manager.hasListener(event_target, "click") or - page._event_manager.hasListener(event_target, "mousedown") or - page._event_manager.hasListener(event_target, "mouseup") or - page._event_manager.hasListener(event_target, "keydown") or - page._event_manager.hasListener(event_target, "change") or - page._event_manager.hasListener(event_target, "input")) + if (self.page._event_manager.hasListener(event_target, "click") or + self.page._event_manager.hasListener(event_target, "mousedown") or + self.page._event_manager.hasListener(event_target, "mouseup") or + self.page._event_manager.hasListener(event_target, "keydown") or + self.page._event_manager.hasListener(event_target, "change") or + self.page._event_manager.hasListener(event_target, "input")) { is_interactive = true; } if (el.is(Element.Html)) |html_el| { - if (html_el.hasAttributeFunction(.onclick, page) or - html_el.hasAttributeFunction(.onmousedown, page) or - html_el.hasAttributeFunction(.onmouseup, page) or - html_el.hasAttributeFunction(.onkeydown, page) or - html_el.hasAttributeFunction(.onchange, page) or - html_el.hasAttributeFunction(.oninput, page)) + if (html_el.hasAttributeFunction(.onclick, self.page) or + html_el.hasAttributeFunction(.onmousedown, self.page) or + html_el.hasAttributeFunction(.onmouseup, self.page) or + html_el.hasAttributeFunction(.onkeydown, self.page) or + html_el.hasAttributeFunction(.onchange, self.page) or + html_el.hasAttributeFunction(.oninput, self.page)) { is_interactive = true; } @@ -161,8 +172,8 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p node_name = "root"; } - const segment = try getXPathSegment(node, arena); - const xpath = try std.mem.concat(arena, u8, &.{ parent_xpath, segment }); + const segment = try self.getXPathSegment(node); + const xpath = try std.mem.concat(self.arena, u8, &.{ parent_xpath, segment }); try jw.beginObject(); @@ -213,7 +224,7 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p try jw.beginArray(); var it = node.childrenIterator(); while (it.next()) |child| { - try dumpNode(child, registry, jw, page, xpath, arena); + try self.dumpNode(child, jw, xpath); } try jw.endArray(); diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index a5fcffe6..12dcbb7b 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -20,9 +20,9 @@ const std = @import("std"); const lp = @import("lightpanda"); const log = @import("../../log.zig"); const markdown = lp.markdown; +const SemanticTree = lp.SemanticTree; const Node = @import("../Node.zig"); const DOMNode = @import("../../browser/webapi/Node.zig"); -const SemanticTree = @import("../semantic_tree.zig"); pub fn processMessage(cmd: anytype) !void { const action = std.meta.stringToEnum(enum { @@ -36,27 +36,13 @@ pub fn processMessage(cmd: anytype) !void { } } -const SemanticTreeResult = struct { - dom_node: *DOMNode, - registry: *Node.Registry, - page: *lp.Page, - arena: std.mem.Allocator, - - pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { - SemanticTree.dump(self.dom_node, self.registry, jw, self.page, self.arena) catch |err| { - log.err(.cdp, "semantic tree dump failed", .{ .err = err }); - return error.WriteFailed; - }; - } -}; - fn getSemanticTree(cmd: anytype) !void { const bc = cmd.browser_context orelse return error.NoBrowserContext; const page = bc.session.currentPage() orelse return error.PageNotLoaded; const dom_node = page.document.asNode(); return cmd.sendResult(.{ - .semanticTree = .{ + .semanticTree = SemanticTree{ .dom_node = dom_node, .registry = &bc.node_registry, .page = page, diff --git a/src/lightpanda.zig b/src/lightpanda.zig index b1bbed59..e506e61b 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -31,6 +31,7 @@ pub const log = @import("log.zig"); pub const js = @import("browser/js/js.zig"); pub const dump = @import("browser/dump.zig"); pub const markdown = @import("browser/markdown.zig"); +pub const SemanticTree = @import("SemanticTree.zig"); pub const mcp = @import("mcp.zig"); pub const build_config = @import("build_config"); pub const crash_handler = @import("crash_handler.zig"); diff --git a/src/mcp/Server.zig b/src/mcp/Server.zig index ef51f30c..3d56a37d 100644 --- a/src/mcp/Server.zig +++ b/src/mcp/Server.zig @@ -7,6 +7,7 @@ const HttpClient = @import("../http/Client.zig"); const testing = @import("../testing.zig"); const protocol = @import("protocol.zig"); const router = @import("router.zig"); +const CDPNode = @import("../cdp/Node.zig"); const Self = @This(); @@ -18,6 +19,7 @@ notification: *lp.Notification, browser: lp.Browser, session: *lp.Session, page: *lp.Page, +node_registry: CDPNode.Registry, writer: *std.io.Writer, mutex: std.Thread.Mutex = .{}, @@ -46,6 +48,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S .notification = notification, .session = undefined, .page = undefined, + .node_registry = CDPNode.Registry.init(allocator), }; self.session = try self.browser.newSession(self.notification); @@ -55,6 +58,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S } pub fn deinit(self: *Self) void { + self.node_registry.deinit(); self.aw.deinit(); self.browser.deinit(); self.notification.deinit(); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index 146bd7db..59ddf1b5 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -61,6 +61,18 @@ pub const tool_list = [_]protocol.Tool{ \\} ), }, + .{ + .name = "semantic_tree", + .description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.", + .inputSchema = protocol.minify( + \\{ + \\ "type": "object", + \\ "properties": { + \\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." } + \\ } + \\} + ), + }, }; pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -79,18 +91,26 @@ const EvaluateParams = struct { const ToolStreamingText = struct { server: *Server, - action: enum { markdown, links }, + action: enum { markdown, links, semantic_tree }, + arena: std.mem.Allocator, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void { - try jw.beginWriteRaw(); - try jw.writer.writeByte('"'); - var escaped = protocol.JsonEscapingWriter.init(jw.writer); - const w = &escaped.writer; switch (self.action) { - .markdown => lp.markdown.dump(self.server.page.document.asNode(), .{}, w, self.server.page) catch |err| { - log.err(.mcp, "markdown dump failed", .{ .err = err }); + .markdown => { + try jw.beginWriteRaw(); + try jw.writer.writeByte('"'); + var escaped = protocol.JsonEscapingWriter.init(jw.writer); + lp.markdown.dump(self.server.page.document.asNode(), .{}, &escaped.writer, self.server.page) catch |err| { + log.err(.mcp, "markdown dump failed", .{ .err = err }); + }; + try jw.writer.writeByte('"'); + jw.endWriteRaw(); }, .links => { + try jw.beginWriteRaw(); + try jw.writer.writeByte('"'); + var escaped = protocol.JsonEscapingWriter.init(jw.writer); + const w = &escaped.writer; if (Selector.querySelectorAll(self.server.page.document.asNode(), "a[href]", self.server.page)) |list| { defer list.deinit(self.server.page); var first = true; @@ -111,10 +131,30 @@ const ToolStreamingText = struct { } else |err| { log.err(.mcp, "query links failed", .{ .err = err }); } + try jw.writer.writeByte('"'); + jw.endWriteRaw(); + }, + .semantic_tree => { + // MCP expects a string for "text" content, but our SemanticTree is a complex object. + // We'll serialize it as a string to fit the MCP text protocol requirements. + try jw.beginWriteRaw(); + try jw.writer.writeByte('"'); + var escaped = protocol.JsonEscapingWriter.init(jw.writer); + + const st = lp.SemanticTree{ + .dom_node = self.server.page.document.asNode(), + .registry = &self.server.node_registry, + .page = self.server.page, + .arena = self.arena, + }; + std.json.Stringify.value(st, .{ .whitespace = .minified }, &escaped.writer) catch |err| { + log.err(.mcp, "semantic tree dump failed", .{ .err = err }); + }; + + try jw.writer.writeByte('"'); + jw.endWriteRaw(); }, } - try jw.writer.writeByte('"'); - jw.endWriteRaw(); } }; @@ -124,6 +164,7 @@ const ToolAction = enum { markdown, links, evaluate, + semantic_tree, }; const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ @@ -132,6 +173,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ .{ "markdown", .markdown }, .{ "links", .links }, .{ "evaluate", .evaluate }, + .{ "semantic_tree", .semantic_tree }, }); pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { @@ -157,6 +199,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque .markdown => try handleMarkdown(server, arena, req.id.?, call_params.arguments), .links => try handleLinks(server, arena, req.id.?, call_params.arguments), .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments), + .semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments), } } @@ -181,7 +224,7 @@ fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value, } const content = [_]protocol.TextContent(ToolStreamingText){.{ - .text = .{ .server = server, .action = .markdown }, + .text = .{ .server = server, .action = .markdown, .arena = arena }, }}; try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); } @@ -199,7 +242,25 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar } const content = [_]protocol.TextContent(ToolStreamingText){.{ - .text = .{ .server = server, .action = .links }, + .text = .{ .server = server, .action = .links, .arena = arena }, + }}; + try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); +} + +fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { + const TreeParams = struct { + url: ?[:0]const u8 = null, + }; + if (arguments) |args_raw| { + if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| { + if (args.url) |u| { + try performGoto(server, u, id); + } + } else |_| {} + } + + const content = [_]protocol.TextContent(ToolStreamingText){.{ + .text = .{ .server = server, .action = .semantic_tree, .arena = arena }, }}; try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); }