Refactor: move SemanticTree to core and expose via MCP tools

This commit is contained in:
Adrià Arrufat
2026-03-06 15:44:03 +09:00
parent 0f46277b1f
commit 248851701f
5 changed files with 119 additions and 56 deletions

View File

@@ -19,26 +19,37 @@
const std = @import("std"); const std = @import("std");
const lp = @import("lightpanda"); const lp = @import("lightpanda");
const log = @import("log.zig");
const Page = lp.Page; const Page = lp.Page;
const CData = @import("../browser/webapi/CData.zig"); const CData = @import("browser/webapi/CData.zig");
const Element = @import("../browser/webapi/Element.zig"); const Element = @import("browser/webapi/Element.zig");
const Node = @import("../browser/webapi/Node.zig"); const Node = @import("browser/webapi/Node.zig");
const AXNode = @import("AXNode.zig"); const AXNode = @import("cdp/AXNode.zig");
const CDPNode = @import("Node.zig"); const CDPNode = @import("cdp/Node.zig");
pub fn dump(root: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, arena: std.mem.Allocator) !void { const SemanticTree = @This();
try dumpNode(root, registry, jw, page, "", arena);
dom_node: *Node,
registry: *CDPNode.Registry,
page: *Page,
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
self.dumpNode(self.dom_node, jw, "") catch |err| {
log.err(.cdp, "semantic tree dump failed", .{ .err = err });
return error.WriteFailed;
};
} }
fn isAllWhitespace(text: []const u8) bool { fn isAllWhitespace(_: @This(), text: []const u8) bool {
for (text) |c| { for (text) |c| {
if (!std.ascii.isWhitespace(c)) return false; if (!std.ascii.isWhitespace(c)) return false;
} }
return true; return true;
} }
fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 { fn getXPathSegment(self: @This(), node: *Node) ![]const u8 {
if (node.is(Element)) |el| { if (node.is(Element)) |el| {
const tag = el.getTagNameLower(); const tag = el.getTagNameLower();
var index: usize = 1; var index: usize = 1;
@@ -54,7 +65,7 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 {
} }
} }
} }
return std.fmt.allocPrint(arena, "/{s}[{d}]", .{ tag, index }); return std.fmt.allocPrint(self.arena, "/{s}[{d}]", .{ tag, index });
} else if (node.is(CData.Text) != null) { } else if (node.is(CData.Text) != null) {
var index: usize = 1; var index: usize = 1;
if (node._parent) |parent| { if (node._parent) |parent| {
@@ -66,12 +77,12 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 {
} }
} }
} }
return std.fmt.allocPrint(arena, "/text()[{d}]", .{index}); return std.fmt.allocPrint(self.arena, "/text()[{d}]", .{index});
} }
return ""; return "";
} }
fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, parent_xpath: []const u8, arena: std.mem.Allocator) !void { fn dumpNode(self: @This(), node: *Node, jw: *std.json.Stringify, parent_xpath: []const u8) !void {
// 1. Skip non-content nodes // 1. Skip non-content nodes
if (node.is(Element)) |el| { if (node.is(Element)) |el| {
const tag = el.getTagNameLower(); const tag = el.getTagNameLower();
@@ -102,14 +113,14 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
} else if (node.is(CData.Text) != null) { } else if (node.is(CData.Text) != null) {
const text_node = node.is(CData.Text).?; const text_node = node.is(CData.Text).?;
const text = text_node.getWholeText(); const text = text_node.getWholeText();
if (isAllWhitespace(text)) { if (self.isAllWhitespace(text)) {
return; return;
} }
} else if (node._type != .document and node._type != .document_fragment) { } else if (node._type != .document and node._type != .document_fragment) {
return; return;
} }
const cdp_node = try registry.register(node); const cdp_node = try self.registry.register(node);
const axn = AXNode.fromNode(node); const axn = AXNode.fromNode(node);
const role = try axn.getRole(); const role = try axn.getRole();
@@ -136,23 +147,23 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
} }
const event_target = node.asEventTarget(); const event_target = node.asEventTarget();
if (page._event_manager.hasListener(event_target, "click") or if (self.page._event_manager.hasListener(event_target, "click") or
page._event_manager.hasListener(event_target, "mousedown") or self.page._event_manager.hasListener(event_target, "mousedown") or
page._event_manager.hasListener(event_target, "mouseup") or self.page._event_manager.hasListener(event_target, "mouseup") or
page._event_manager.hasListener(event_target, "keydown") or self.page._event_manager.hasListener(event_target, "keydown") or
page._event_manager.hasListener(event_target, "change") or self.page._event_manager.hasListener(event_target, "change") or
page._event_manager.hasListener(event_target, "input")) self.page._event_manager.hasListener(event_target, "input"))
{ {
is_interactive = true; is_interactive = true;
} }
if (el.is(Element.Html)) |html_el| { if (el.is(Element.Html)) |html_el| {
if (html_el.hasAttributeFunction(.onclick, page) or if (html_el.hasAttributeFunction(.onclick, self.page) or
html_el.hasAttributeFunction(.onmousedown, page) or html_el.hasAttributeFunction(.onmousedown, self.page) or
html_el.hasAttributeFunction(.onmouseup, page) or html_el.hasAttributeFunction(.onmouseup, self.page) or
html_el.hasAttributeFunction(.onkeydown, page) or html_el.hasAttributeFunction(.onkeydown, self.page) or
html_el.hasAttributeFunction(.onchange, page) or html_el.hasAttributeFunction(.onchange, self.page) or
html_el.hasAttributeFunction(.oninput, page)) html_el.hasAttributeFunction(.oninput, self.page))
{ {
is_interactive = true; is_interactive = true;
} }
@@ -161,8 +172,8 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
node_name = "root"; node_name = "root";
} }
const segment = try getXPathSegment(node, arena); const segment = try self.getXPathSegment(node);
const xpath = try std.mem.concat(arena, u8, &.{ parent_xpath, segment }); const xpath = try std.mem.concat(self.arena, u8, &.{ parent_xpath, segment });
try jw.beginObject(); try jw.beginObject();
@@ -213,7 +224,7 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
try jw.beginArray(); try jw.beginArray();
var it = node.childrenIterator(); var it = node.childrenIterator();
while (it.next()) |child| { while (it.next()) |child| {
try dumpNode(child, registry, jw, page, xpath, arena); try self.dumpNode(child, jw, xpath);
} }
try jw.endArray(); try jw.endArray();

View File

@@ -20,9 +20,9 @@ const std = @import("std");
const lp = @import("lightpanda"); const lp = @import("lightpanda");
const log = @import("../../log.zig"); const log = @import("../../log.zig");
const markdown = lp.markdown; const markdown = lp.markdown;
const SemanticTree = lp.SemanticTree;
const Node = @import("../Node.zig"); const Node = @import("../Node.zig");
const DOMNode = @import("../../browser/webapi/Node.zig"); const DOMNode = @import("../../browser/webapi/Node.zig");
const SemanticTree = @import("../semantic_tree.zig");
pub fn processMessage(cmd: anytype) !void { pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum { const action = std.meta.stringToEnum(enum {
@@ -36,27 +36,13 @@ pub fn processMessage(cmd: anytype) !void {
} }
} }
const SemanticTreeResult = struct {
dom_node: *DOMNode,
registry: *Node.Registry,
page: *lp.Page,
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
SemanticTree.dump(self.dom_node, self.registry, jw, self.page, self.arena) catch |err| {
log.err(.cdp, "semantic tree dump failed", .{ .err = err });
return error.WriteFailed;
};
}
};
fn getSemanticTree(cmd: anytype) !void { fn getSemanticTree(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.NoBrowserContext; const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded; const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const dom_node = page.document.asNode(); const dom_node = page.document.asNode();
return cmd.sendResult(.{ return cmd.sendResult(.{
.semanticTree = .{ .semanticTree = SemanticTree{
.dom_node = dom_node, .dom_node = dom_node,
.registry = &bc.node_registry, .registry = &bc.node_registry,
.page = page, .page = page,

View File

@@ -31,6 +31,7 @@ pub const log = @import("log.zig");
pub const js = @import("browser/js/js.zig"); pub const js = @import("browser/js/js.zig");
pub const dump = @import("browser/dump.zig"); pub const dump = @import("browser/dump.zig");
pub const markdown = @import("browser/markdown.zig"); pub const markdown = @import("browser/markdown.zig");
pub const SemanticTree = @import("SemanticTree.zig");
pub const mcp = @import("mcp.zig"); pub const mcp = @import("mcp.zig");
pub const build_config = @import("build_config"); pub const build_config = @import("build_config");
pub const crash_handler = @import("crash_handler.zig"); pub const crash_handler = @import("crash_handler.zig");

View File

@@ -7,6 +7,7 @@ const HttpClient = @import("../http/Client.zig");
const testing = @import("../testing.zig"); const testing = @import("../testing.zig");
const protocol = @import("protocol.zig"); const protocol = @import("protocol.zig");
const router = @import("router.zig"); const router = @import("router.zig");
const CDPNode = @import("../cdp/Node.zig");
const Self = @This(); const Self = @This();
@@ -18,6 +19,7 @@ notification: *lp.Notification,
browser: lp.Browser, browser: lp.Browser,
session: *lp.Session, session: *lp.Session,
page: *lp.Page, page: *lp.Page,
node_registry: CDPNode.Registry,
writer: *std.io.Writer, writer: *std.io.Writer,
mutex: std.Thread.Mutex = .{}, mutex: std.Thread.Mutex = .{},
@@ -46,6 +48,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S
.notification = notification, .notification = notification,
.session = undefined, .session = undefined,
.page = undefined, .page = undefined,
.node_registry = CDPNode.Registry.init(allocator),
}; };
self.session = try self.browser.newSession(self.notification); self.session = try self.browser.newSession(self.notification);
@@ -55,6 +58,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S
} }
pub fn deinit(self: *Self) void { pub fn deinit(self: *Self) void {
self.node_registry.deinit();
self.aw.deinit(); self.aw.deinit();
self.browser.deinit(); self.browser.deinit();
self.notification.deinit(); self.notification.deinit();

View File

@@ -61,6 +61,18 @@ pub const tool_list = [_]protocol.Tool{
\\} \\}
), ),
}, },
.{
.name = "semantic_tree",
.description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." }
\\ }
\\}
),
},
}; };
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -79,18 +91,26 @@ const EvaluateParams = struct {
const ToolStreamingText = struct { const ToolStreamingText = struct {
server: *Server, server: *Server,
action: enum { markdown, links }, action: enum { markdown, links, semantic_tree },
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void { pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void {
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
const w = &escaped.writer;
switch (self.action) { switch (self.action) {
.markdown => lp.markdown.dump(self.server.page.document.asNode(), .{}, w, self.server.page) catch |err| { .markdown => {
log.err(.mcp, "markdown dump failed", .{ .err = err }); try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
lp.markdown.dump(self.server.page.document.asNode(), .{}, &escaped.writer, self.server.page) catch |err| {
log.err(.mcp, "markdown dump failed", .{ .err = err });
};
try jw.writer.writeByte('"');
jw.endWriteRaw();
}, },
.links => { .links => {
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
const w = &escaped.writer;
if (Selector.querySelectorAll(self.server.page.document.asNode(), "a[href]", self.server.page)) |list| { if (Selector.querySelectorAll(self.server.page.document.asNode(), "a[href]", self.server.page)) |list| {
defer list.deinit(self.server.page); defer list.deinit(self.server.page);
var first = true; var first = true;
@@ -111,10 +131,30 @@ const ToolStreamingText = struct {
} else |err| { } else |err| {
log.err(.mcp, "query links failed", .{ .err = err }); log.err(.mcp, "query links failed", .{ .err = err });
} }
try jw.writer.writeByte('"');
jw.endWriteRaw();
},
.semantic_tree => {
// MCP expects a string for "text" content, but our SemanticTree is a complex object.
// We'll serialize it as a string to fit the MCP text protocol requirements.
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
const st = lp.SemanticTree{
.dom_node = self.server.page.document.asNode(),
.registry = &self.server.node_registry,
.page = self.server.page,
.arena = self.arena,
};
std.json.Stringify.value(st, .{ .whitespace = .minified }, &escaped.writer) catch |err| {
log.err(.mcp, "semantic tree dump failed", .{ .err = err });
};
try jw.writer.writeByte('"');
jw.endWriteRaw();
}, },
} }
try jw.writer.writeByte('"');
jw.endWriteRaw();
} }
}; };
@@ -124,6 +164,7 @@ const ToolAction = enum {
markdown, markdown,
links, links,
evaluate, evaluate,
semantic_tree,
}; };
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
@@ -132,6 +173,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "markdown", .markdown }, .{ "markdown", .markdown },
.{ "links", .links }, .{ "links", .links },
.{ "evaluate", .evaluate }, .{ "evaluate", .evaluate },
.{ "semantic_tree", .semantic_tree },
}); });
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void { pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -157,6 +199,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.markdown => try handleMarkdown(server, arena, req.id.?, call_params.arguments), .markdown => try handleMarkdown(server, arena, req.id.?, call_params.arguments),
.links => try handleLinks(server, arena, req.id.?, call_params.arguments), .links => try handleLinks(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments), .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
} }
} }
@@ -181,7 +224,7 @@ fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
} }
const content = [_]protocol.TextContent(ToolStreamingText){.{ const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .markdown }, .text = .{ .server = server, .action = .markdown, .arena = arena },
}}; }};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
} }
@@ -199,7 +242,25 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
} }
const content = [_]protocol.TextContent(ToolStreamingText){.{ const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .links }, .text = .{ .server = server, .action = .links, .arena = arena },
}};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
}
fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const TreeParams = struct {
url: ?[:0]const u8 = null,
};
if (arguments) |args_raw| {
if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| {
if (args.url) |u| {
try performGoto(server, u, id);
}
} else |_| {}
}
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .semantic_tree, .arena = arena },
}}; }};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content }); try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
} }