SemanticTree): add backendNodeId and maxDepth support

This commit is contained in:
Adrià Arrufat
2026-03-19 10:18:08 +09:00
parent edd0c5c83f
commit 7a7c4b9f49
3 changed files with 40 additions and 8 deletions

View File

@@ -38,6 +38,7 @@ page: *Page,
arena: std.mem.Allocator, arena: std.mem.Allocator,
prune: bool = true, prune: bool = true,
interactive_only: bool = false, interactive_only: bool = false,
max_depth: ?u32 = null,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
var visitor = JsonVisitor{ .jw = jw, .tree = self }; var visitor = JsonVisitor{ .jw = jw, .tree = self };
@@ -46,7 +47,7 @@ pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!
log.err(.app, "listener map failed", .{ .err = err }); log.err(.app, "listener map failed", .{ .err = err });
return error.WriteFailed; return error.WriteFailed;
}; };
self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| { self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets, 0) catch |err| {
log.err(.app, "semantic tree json dump failed", .{ .err = err }); log.err(.app, "semantic tree json dump failed", .{ .err = err });
return error.WriteFailed; return error.WriteFailed;
}; };
@@ -59,7 +60,7 @@ pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!v
log.err(.app, "listener map failed", .{ .err = err }); log.err(.app, "listener map failed", .{ .err = err });
return error.WriteFailed; return error.WriteFailed;
}; };
self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| { self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets, 0) catch |err| {
log.err(.app, "semantic tree text dump failed", .{ .err = err }); log.err(.app, "semantic tree text dump failed", .{ .err = err });
return error.WriteFailed; return error.WriteFailed;
}; };
@@ -83,7 +84,11 @@ const NodeData = struct {
node_name: []const u8, node_name: []const u8,
}; };
fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap) !void { fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap, current_depth: u32) !void {
if (self.max_depth) |max| {
if (current_depth > max) return;
}
// 1. Skip non-content nodes // 1. Skip non-content nodes
if (node.is(Element)) |el| { if (node.is(Element)) |el| {
const tag = el.getTag(); const tag = el.getTag();
@@ -230,7 +235,7 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam
} }
gop.value_ptr.* += 1; gop.value_ptr.* += 1;
try self.walk(child, xpath_buffer, name, visitor, gop.value_ptr.*, listener_targets); try self.walk(child, xpath_buffer, name, visitor, gop.value_ptr.*, listener_targets, current_depth + 1);
} }
} }

View File

@@ -53,12 +53,18 @@ fn getSemanticTree(cmd: anytype) !void {
format: ?enum { text } = null, format: ?enum { text } = null,
prune: ?bool = null, prune: ?bool = null,
interactiveOnly: ?bool = null, interactiveOnly: ?bool = null,
backendNodeId: ?Node.Id = null,
maxDepth: ?u32 = null,
}; };
const params = (try cmd.params(Params)) orelse Params{}; const params = (try cmd.params(Params)) orelse Params{};
const bc = cmd.browser_context orelse return error.NoBrowserContext; const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded; const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const dom_node = page.document.asNode();
const dom_node = if (params.backendNodeId) |nodeId|
(bc.node_registry.lookup_by_id.get(nodeId) orelse return error.InvalidNodeId).dom
else
page.document.asNode();
var st = SemanticTree{ var st = SemanticTree{
.dom_node = dom_node, .dom_node = dom_node,
@@ -67,6 +73,7 @@ fn getSemanticTree(cmd: anytype) !void {
.arena = cmd.arena, .arena = cmd.arena,
.prune = params.prune orelse true, .prune = params.prune orelse true,
.interactive_only = params.interactiveOnly orelse false, .interactive_only = params.interactiveOnly orelse false,
.max_depth = params.maxDepth,
}; };
if (params.format) |format| { if (params.format) |format| {

View File

@@ -70,7 +70,9 @@ pub const tool_list = [_]protocol.Tool{
\\{ \\{
\\ "type": "object", \\ "type": "object",
\\ "properties": { \\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." } \\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID to get the tree for a specific element instead of the document root." },
\\ "maxDepth": { "type": "integer", "description": "Optional maximum depth of the tree to return. Useful for exploring high-level structure first." }
\\ } \\ }
\\} \\}
), ),
@@ -161,6 +163,8 @@ const ToolStreamingText = struct {
action: enum { markdown, links, semantic_tree }, action: enum { markdown, links, semantic_tree },
registry: ?*CDPNode.Registry = null, registry: ?*CDPNode.Registry = null,
arena: ?std.mem.Allocator = null, arena: ?std.mem.Allocator = null,
backendNodeId: ?u32 = null,
maxDepth: ?u32 = null,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void { pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void {
try jw.beginWriteRaw(); try jw.beginWriteRaw();
@@ -196,12 +200,24 @@ const ToolStreamingText = struct {
} }
}, },
.semantic_tree => { .semantic_tree => {
var root_node = self.page.document.asNode();
if (self.backendNodeId) |node_id| {
if (self.registry) |registry| {
if (registry.lookup_by_id.get(node_id)) |n| {
root_node = n.dom;
} else {
log.warn(.mcp, "semantic_tree id {} missing", .{node_id});
}
}
}
const st = lp.SemanticTree{ const st = lp.SemanticTree{
.dom_node = self.page.document.asNode(), .dom_node = root_node,
.registry = self.registry.?, .registry = self.registry.?,
.page = self.page, .page = self.page,
.arena = self.arena.?, .arena = self.arena.?,
.prune = true, .prune = true,
.max_depth = self.maxDepth,
}; };
st.textStringify(w) catch |err| { st.textStringify(w) catch |err| {
@@ -328,9 +344,13 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const TreeParams = struct { const TreeParams = struct {
url: ?[:0]const u8 = null, url: ?[:0]const u8 = null,
backendNodeId: ?u32 = null,
maxDepth: ?u32 = null,
}; };
var tree_args: TreeParams = .{};
if (arguments) |args_raw| { if (arguments) |args_raw| {
if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| { if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| {
tree_args = args;
if (args.url) |u| { if (args.url) |u| {
try performGoto(server, u, id); try performGoto(server, u, id);
} }
@@ -341,7 +361,7 @@ fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Va
}; };
const content = [_]protocol.TextContent(ToolStreamingText){.{ const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .page = page, .action = .semantic_tree, .registry = &server.node_registry, .arena = arena }, .text = .{ .page = page, .action = .semantic_tree, .registry = &server.node_registry, .arena = arena, .backendNodeId = tree_args.backendNodeId, .maxDepth = tree_args.maxDepth },
}}; }};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
} }