Refactor: move SemanticTree to core and expose via MCP tools

This commit is contained in:
Adrià Arrufat
2026-03-06 15:44:03 +09:00
parent 0f46277b1f
commit 248851701f
5 changed files with 119 additions and 56 deletions

View File

@@ -19,26 +19,37 @@
const std = @import("std");
const lp = @import("lightpanda");
const log = @import("log.zig");
const Page = lp.Page;
const CData = @import("../browser/webapi/CData.zig");
const Element = @import("../browser/webapi/Element.zig");
const Node = @import("../browser/webapi/Node.zig");
const AXNode = @import("AXNode.zig");
const CDPNode = @import("Node.zig");
const CData = @import("browser/webapi/CData.zig");
const Element = @import("browser/webapi/Element.zig");
const Node = @import("browser/webapi/Node.zig");
const AXNode = @import("cdp/AXNode.zig");
const CDPNode = @import("cdp/Node.zig");
pub fn dump(root: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, arena: std.mem.Allocator) !void {
try dumpNode(root, registry, jw, page, "", arena);
const SemanticTree = @This();
dom_node: *Node,
registry: *CDPNode.Registry,
page: *Page,
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
self.dumpNode(self.dom_node, jw, "") catch |err| {
log.err(.cdp, "semantic tree dump failed", .{ .err = err });
return error.WriteFailed;
};
}
fn isAllWhitespace(text: []const u8) bool {
fn isAllWhitespace(_: @This(), text: []const u8) bool {
for (text) |c| {
if (!std.ascii.isWhitespace(c)) return false;
}
return true;
}
fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 {
fn getXPathSegment(self: @This(), node: *Node) ![]const u8 {
if (node.is(Element)) |el| {
const tag = el.getTagNameLower();
var index: usize = 1;
@@ -54,7 +65,7 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 {
}
}
}
return std.fmt.allocPrint(arena, "/{s}[{d}]", .{ tag, index });
return std.fmt.allocPrint(self.arena, "/{s}[{d}]", .{ tag, index });
} else if (node.is(CData.Text) != null) {
var index: usize = 1;
if (node._parent) |parent| {
@@ -66,12 +77,12 @@ fn getXPathSegment(node: *Node, arena: std.mem.Allocator) ![]const u8 {
}
}
}
return std.fmt.allocPrint(arena, "/text()[{d}]", .{index});
return std.fmt.allocPrint(self.arena, "/text()[{d}]", .{index});
}
return "";
}
fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, page: *Page, parent_xpath: []const u8, arena: std.mem.Allocator) !void {
fn dumpNode(self: @This(), node: *Node, jw: *std.json.Stringify, parent_xpath: []const u8) !void {
// 1. Skip non-content nodes
if (node.is(Element)) |el| {
const tag = el.getTagNameLower();
@@ -102,14 +113,14 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
} else if (node.is(CData.Text) != null) {
const text_node = node.is(CData.Text).?;
const text = text_node.getWholeText();
if (isAllWhitespace(text)) {
if (self.isAllWhitespace(text)) {
return;
}
} else if (node._type != .document and node._type != .document_fragment) {
return;
}
const cdp_node = try registry.register(node);
const cdp_node = try self.registry.register(node);
const axn = AXNode.fromNode(node);
const role = try axn.getRole();
@@ -136,23 +147,23 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
}
const event_target = node.asEventTarget();
if (page._event_manager.hasListener(event_target, "click") or
page._event_manager.hasListener(event_target, "mousedown") or
page._event_manager.hasListener(event_target, "mouseup") or
page._event_manager.hasListener(event_target, "keydown") or
page._event_manager.hasListener(event_target, "change") or
page._event_manager.hasListener(event_target, "input"))
if (self.page._event_manager.hasListener(event_target, "click") or
self.page._event_manager.hasListener(event_target, "mousedown") or
self.page._event_manager.hasListener(event_target, "mouseup") or
self.page._event_manager.hasListener(event_target, "keydown") or
self.page._event_manager.hasListener(event_target, "change") or
self.page._event_manager.hasListener(event_target, "input"))
{
is_interactive = true;
}
if (el.is(Element.Html)) |html_el| {
if (html_el.hasAttributeFunction(.onclick, page) or
html_el.hasAttributeFunction(.onmousedown, page) or
html_el.hasAttributeFunction(.onmouseup, page) or
html_el.hasAttributeFunction(.onkeydown, page) or
html_el.hasAttributeFunction(.onchange, page) or
html_el.hasAttributeFunction(.oninput, page))
if (html_el.hasAttributeFunction(.onclick, self.page) or
html_el.hasAttributeFunction(.onmousedown, self.page) or
html_el.hasAttributeFunction(.onmouseup, self.page) or
html_el.hasAttributeFunction(.onkeydown, self.page) or
html_el.hasAttributeFunction(.onchange, self.page) or
html_el.hasAttributeFunction(.oninput, self.page))
{
is_interactive = true;
}
@@ -161,8 +172,8 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
node_name = "root";
}
const segment = try getXPathSegment(node, arena);
const xpath = try std.mem.concat(arena, u8, &.{ parent_xpath, segment });
const segment = try self.getXPathSegment(node);
const xpath = try std.mem.concat(self.arena, u8, &.{ parent_xpath, segment });
try jw.beginObject();
@@ -213,7 +224,7 @@ fn dumpNode(node: *Node, registry: *CDPNode.Registry, jw: *std.json.Stringify, p
try jw.beginArray();
var it = node.childrenIterator();
while (it.next()) |child| {
try dumpNode(child, registry, jw, page, xpath, arena);
try self.dumpNode(child, jw, xpath);
}
try jw.endArray();

View File

@@ -20,9 +20,9 @@ const std = @import("std");
const lp = @import("lightpanda");
const log = @import("../../log.zig");
const markdown = lp.markdown;
const SemanticTree = lp.SemanticTree;
const Node = @import("../Node.zig");
const DOMNode = @import("../../browser/webapi/Node.zig");
const SemanticTree = @import("../semantic_tree.zig");
pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum {
@@ -36,27 +36,13 @@ pub fn processMessage(cmd: anytype) !void {
}
}
const SemanticTreeResult = struct {
dom_node: *DOMNode,
registry: *Node.Registry,
page: *lp.Page,
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
SemanticTree.dump(self.dom_node, self.registry, jw, self.page, self.arena) catch |err| {
log.err(.cdp, "semantic tree dump failed", .{ .err = err });
return error.WriteFailed;
};
}
};
fn getSemanticTree(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const dom_node = page.document.asNode();
return cmd.sendResult(.{
.semanticTree = .{
.semanticTree = SemanticTree{
.dom_node = dom_node,
.registry = &bc.node_registry,
.page = page,

View File

@@ -31,6 +31,7 @@ pub const log = @import("log.zig");
pub const js = @import("browser/js/js.zig");
pub const dump = @import("browser/dump.zig");
pub const markdown = @import("browser/markdown.zig");
pub const SemanticTree = @import("SemanticTree.zig");
pub const mcp = @import("mcp.zig");
pub const build_config = @import("build_config");
pub const crash_handler = @import("crash_handler.zig");

View File

@@ -7,6 +7,7 @@ const HttpClient = @import("../http/Client.zig");
const testing = @import("../testing.zig");
const protocol = @import("protocol.zig");
const router = @import("router.zig");
const CDPNode = @import("../cdp/Node.zig");
const Self = @This();
@@ -18,6 +19,7 @@ notification: *lp.Notification,
browser: lp.Browser,
session: *lp.Session,
page: *lp.Page,
node_registry: CDPNode.Registry,
writer: *std.io.Writer,
mutex: std.Thread.Mutex = .{},
@@ -46,6 +48,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S
.notification = notification,
.session = undefined,
.page = undefined,
.node_registry = CDPNode.Registry.init(allocator),
};
self.session = try self.browser.newSession(self.notification);
@@ -55,6 +58,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S
}
pub fn deinit(self: *Self) void {
self.node_registry.deinit();
self.aw.deinit();
self.browser.deinit();
self.notification.deinit();

View File

@@ -61,6 +61,18 @@ pub const tool_list = [_]protocol.Tool{
\\}
),
},
.{
.name = "semantic_tree",
.description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." }
\\ }
\\}
),
},
};
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -79,18 +91,26 @@ const EvaluateParams = struct {
const ToolStreamingText = struct {
server: *Server,
action: enum { markdown, links },
action: enum { markdown, links, semantic_tree },
arena: std.mem.Allocator,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void {
switch (self.action) {
.markdown => {
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
lp.markdown.dump(self.server.page.document.asNode(), .{}, &escaped.writer, self.server.page) catch |err| {
log.err(.mcp, "markdown dump failed", .{ .err = err });
};
try jw.writer.writeByte('"');
jw.endWriteRaw();
},
.links => {
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
const w = &escaped.writer;
switch (self.action) {
.markdown => lp.markdown.dump(self.server.page.document.asNode(), .{}, w, self.server.page) catch |err| {
log.err(.mcp, "markdown dump failed", .{ .err = err });
},
.links => {
if (Selector.querySelectorAll(self.server.page.document.asNode(), "a[href]", self.server.page)) |list| {
defer list.deinit(self.server.page);
var first = true;
@@ -111,10 +131,30 @@ const ToolStreamingText = struct {
} else |err| {
log.err(.mcp, "query links failed", .{ .err = err });
}
},
}
try jw.writer.writeByte('"');
jw.endWriteRaw();
},
.semantic_tree => {
// MCP expects a string for "text" content, but our SemanticTree is a complex object.
// We'll serialize it as a string to fit the MCP text protocol requirements.
try jw.beginWriteRaw();
try jw.writer.writeByte('"');
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
const st = lp.SemanticTree{
.dom_node = self.server.page.document.asNode(),
.registry = &self.server.node_registry,
.page = self.server.page,
.arena = self.arena,
};
std.json.Stringify.value(st, .{ .whitespace = .minified }, &escaped.writer) catch |err| {
log.err(.mcp, "semantic tree dump failed", .{ .err = err });
};
try jw.writer.writeByte('"');
jw.endWriteRaw();
},
}
}
};
@@ -124,6 +164,7 @@ const ToolAction = enum {
markdown,
links,
evaluate,
semantic_tree,
};
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
@@ -132,6 +173,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "markdown", .markdown },
.{ "links", .links },
.{ "evaluate", .evaluate },
.{ "semantic_tree", .semantic_tree },
});
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -157,6 +199,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.markdown => try handleMarkdown(server, arena, req.id.?, call_params.arguments),
.links => try handleLinks(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
}
}
@@ -181,7 +224,7 @@ fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
}
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .markdown },
.text = .{ .server = server, .action = .markdown, .arena = arena },
}};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
}
@@ -199,7 +242,25 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
}
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .links },
.text = .{ .server = server, .action = .links, .arena = arena },
}};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
}
fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const TreeParams = struct {
url: ?[:0]const u8 = null,
};
if (arguments) |args_raw| {
if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| {
if (args.url) |u| {
try performGoto(server, u, id);
}
} else |_| {}
}
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .server = server, .action = .semantic_tree, .arena = arena },
}};
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
}