From 7a7c4b9f495e4c8544fc0bd159a762ebf3e26d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 19 Mar 2026 10:18:08 +0900 Subject: [PATCH 1/5] SemanticTree): add backendNodeId and maxDepth support --- src/SemanticTree.zig | 13 +++++++++---- src/cdp/domains/lp.zig | 9 ++++++++- src/mcp/tools.zig | 26 +++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 166acf4a..45785e1b 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -38,6 +38,7 @@ page: *Page, arena: std.mem.Allocator, prune: bool = true, interactive_only: bool = false, +max_depth: ?u32 = null, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { var visitor = JsonVisitor{ .jw = jw, .tree = self }; @@ -46,7 +47,7 @@ pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}! log.err(.app, "listener map failed", .{ .err = err }); return error.WriteFailed; }; - self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| { + self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets, 0) catch |err| { log.err(.app, "semantic tree json dump failed", .{ .err = err }); return error.WriteFailed; }; @@ -59,7 +60,7 @@ pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!v log.err(.app, "listener map failed", .{ .err = err }); return error.WriteFailed; }; - self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| { + self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets, 0) catch |err| { log.err(.app, "semantic tree text dump failed", .{ .err = err }); return error.WriteFailed; }; @@ -83,7 +84,11 @@ const NodeData = struct { node_name: []const u8, }; -fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap) !void { +fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap, current_depth: u32) !void { + if (self.max_depth) |max| { + if (current_depth > max) return; + } + // 1. Skip non-content nodes if (node.is(Element)) |el| { const tag = el.getTag(); @@ -230,7 +235,7 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam } gop.value_ptr.* += 1; - try self.walk(child, xpath_buffer, name, visitor, gop.value_ptr.*, listener_targets); + try self.walk(child, xpath_buffer, name, visitor, gop.value_ptr.*, listener_targets, current_depth + 1); } } diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index efbf9ec7..d74dd2de 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -53,12 +53,18 @@ fn getSemanticTree(cmd: anytype) !void { format: ?enum { text } = null, prune: ?bool = null, interactiveOnly: ?bool = null, + backendNodeId: ?Node.Id = null, + maxDepth: ?u32 = null, }; const params = (try cmd.params(Params)) orelse Params{}; const bc = cmd.browser_context orelse return error.NoBrowserContext; const page = bc.session.currentPage() orelse return error.PageNotLoaded; - const dom_node = page.document.asNode(); + + const dom_node = if (params.backendNodeId) |nodeId| + (bc.node_registry.lookup_by_id.get(nodeId) orelse return error.InvalidNodeId).dom + else + page.document.asNode(); var st = SemanticTree{ .dom_node = dom_node, @@ -67,6 +73,7 @@ fn getSemanticTree(cmd: anytype) !void { .arena = cmd.arena, .prune = params.prune orelse true, .interactive_only = params.interactiveOnly orelse false, + .max_depth = params.maxDepth, }; if (params.format) |format| { diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index d8fd4ead..c8c6e16a 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -70,7 +70,9 @@ pub const tool_list = [_]protocol.Tool{ \\{ \\ "type": "object", \\ "properties": { - \\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." } + \\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." }, + \\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID to get the tree for a specific element instead of the document root." }, + \\ "maxDepth": { "type": "integer", "description": "Optional maximum depth of the tree to return. Useful for exploring high-level structure first." } \\ } \\} ), @@ -161,6 +163,8 @@ const ToolStreamingText = struct { action: enum { markdown, links, semantic_tree }, registry: ?*CDPNode.Registry = null, arena: ?std.mem.Allocator = null, + backendNodeId: ?u32 = null, + maxDepth: ?u32 = null, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void { try jw.beginWriteRaw(); @@ -196,12 +200,24 @@ const ToolStreamingText = struct { } }, .semantic_tree => { + var root_node = self.page.document.asNode(); + if (self.backendNodeId) |node_id| { + if (self.registry) |registry| { + if (registry.lookup_by_id.get(node_id)) |n| { + root_node = n.dom; + } else { + log.warn(.mcp, "semantic_tree id {} missing", .{node_id}); + } + } + } + const st = lp.SemanticTree{ - .dom_node = self.page.document.asNode(), + .dom_node = root_node, .registry = self.registry.?, .page = self.page, .arena = self.arena.?, .prune = true, + .max_depth = self.maxDepth, }; st.textStringify(w) catch |err| { @@ -328,9 +344,13 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { const TreeParams = struct { url: ?[:0]const u8 = null, + backendNodeId: ?u32 = null, + maxDepth: ?u32 = null, }; + var tree_args: TreeParams = .{}; if (arguments) |args_raw| { if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| { + tree_args = args; if (args.url) |u| { try performGoto(server, u, id); } @@ -341,7 +361,7 @@ fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Va }; const content = [_]protocol.TextContent(ToolStreamingText){.{ - .text = .{ .page = page, .action = .semantic_tree, .registry = &server.node_registry, .arena = arena }, + .text = .{ .page = page, .action = .semantic_tree, .registry = &server.node_registry, .arena = arena, .backendNodeId = tree_args.backendNodeId, .maxDepth = tree_args.maxDepth }, }}; try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); } From e997f8317e468f5e39e9daa8a6cd23ae93d91d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 19 Mar 2026 12:25:02 +0900 Subject: [PATCH 2/5] SemanticTree: add tests for backendDOMNodeId and maxDepth --- src/SemanticTree.zig | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 45785e1b..7927a10f 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -479,3 +479,56 @@ const TextVisitor = struct { } } }; + +const testing = @import("testing.zig"); + +test "SemanticTree backendDOMNodeId" { + var registry: CDPNode.Registry = .init(testing.allocator); + defer registry.deinit(); + + var page = try testing.pageTest("cdp/registry1.html"); + defer testing.reset(); + defer page._session.removePage(); + + const st: Self = .{ + .dom_node = page.window._document.asNode(), + .registry = ®istry, + .page = page, + .arena = testing.arena_allocator, + .prune = false, + .interactive_only = false, + .max_depth = null, + }; + + const json_str = try std.json.Stringify.valueAlloc(testing.allocator, st, .{}); + defer testing.allocator.free(json_str); + + try testing.expect(std.mem.indexOf(u8, json_str, "\"backendDOMNodeId\":") != null); +} + +test "SemanticTree max_depth" { + var registry: CDPNode.Registry = .init(testing.allocator); + defer registry.deinit(); + + var page = try testing.pageTest("cdp/registry1.html"); + defer testing.reset(); + defer page._session.removePage(); + + const st: Self = .{ + .dom_node = page.window._document.asNode(), + .registry = ®istry, + .page = page, + .arena = testing.arena_allocator, + .prune = false, + .interactive_only = false, + .max_depth = 1, + }; + + var aw: std.Io.Writer.Allocating = .init(testing.allocator); + defer aw.deinit(); + + try st.textStringify(&aw.writer); + const text_str = aw.written(); + + try testing.expect(std.mem.indexOf(u8, text_str, "other") == null); +} From f0cfe3ffc80bfbae74247e7d4235e4297ac99e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= <1671644+arrufat@users.noreply.github.com> Date: Thu, 19 Mar 2026 20:15:56 +0900 Subject: [PATCH 3/5] SemanticTree: use logger better Co-authored-by: Karl Seguin --- src/mcp/tools.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index c8c6e16a..cb105d25 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -206,7 +206,7 @@ const ToolStreamingText = struct { if (registry.lookup_by_id.get(node_id)) |n| { root_node = n.dom; } else { - log.warn(.mcp, "semantic_tree id {} missing", .{node_id}); + log.warn(.mcp, "semantic_tree id missing", .{.id = node_id}); } } } From 9c2393351d23d40190de419ab148bfe5bcef6d57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 19 Mar 2026 20:25:20 +0900 Subject: [PATCH 4/5] SemanticTree: simplify max_depth logic --- src/SemanticTree.zig | 8 +++----- src/cdp/domains/lp.zig | 2 +- src/mcp/tools.zig | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 7927a10f..2ac1ccba 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -38,7 +38,7 @@ page: *Page, arena: std.mem.Allocator, prune: bool = true, interactive_only: bool = false, -max_depth: ?u32 = null, +max_depth: u32 = std.math.maxInt(u32) - 1, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { var visitor = JsonVisitor{ .jw = jw, .tree = self }; @@ -85,9 +85,7 @@ const NodeData = struct { }; fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap, current_depth: u32) !void { - if (self.max_depth) |max| { - if (current_depth > max) return; - } + if (current_depth > self.max_depth) return; // 1. Skip non-content nodes if (node.is(Element)) |el| { @@ -497,7 +495,7 @@ test "SemanticTree backendDOMNodeId" { .arena = testing.arena_allocator, .prune = false, .interactive_only = false, - .max_depth = null, + .max_depth = std.math.maxInt(u32) - 1, }; const json_str = try std.json.Stringify.valueAlloc(testing.allocator, st, .{}); diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index d74dd2de..51b3249d 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -73,7 +73,7 @@ fn getSemanticTree(cmd: anytype) !void { .arena = cmd.arena, .prune = params.prune orelse true, .interactive_only = params.interactiveOnly orelse false, - .max_depth = params.maxDepth, + .max_depth = params.maxDepth orelse std.math.maxInt(u32) - 1, }; if (params.format) |format| { diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index cb105d25..aefcca83 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -206,7 +206,7 @@ const ToolStreamingText = struct { if (registry.lookup_by_id.get(node_id)) |n| { root_node = n.dom; } else { - log.warn(.mcp, "semantic_tree id missing", .{.id = node_id}); + log.warn(.mcp, "semantic_tree id missing", .{ .id = node_id }); } } } @@ -217,7 +217,7 @@ const ToolStreamingText = struct { .page = self.page, .arena = self.arena.?, .prune = true, - .max_depth = self.maxDepth, + .max_depth = self.maxDepth orelse std.math.maxInt(u32) - 1, }; st.textStringify(w) catch |err| { From 5062273b7a274591cb53801793422dbb64e7c82d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 19 Mar 2026 20:29:54 +0900 Subject: [PATCH 5/5] SemanticTree: use CDPNode.Id for NodeData id --- src/SemanticTree.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 2ac1ccba..af8720e9 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -73,7 +73,7 @@ const OptionData = struct { }; const NodeData = struct { - id: u32, + id: CDPNode.Id, axn: AXNode, role: []const u8, name: ?[]const u8,