diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 60df9fe7..2bd642da 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -43,6 +43,13 @@ pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}! }; } +pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!void { + self.dumpText(self.dom_node, writer, 0) catch |err| { + log.err(.cdp, "semantic tree text dump failed", .{ .err = err }); + return error.WriteFailed; + }; +} + fn getXPathSegment(self: @This(), node: *Node) ![]const u8 { if (node.is(Element)) |el| { const tag = el.getTagNameLower(); @@ -222,3 +229,106 @@ fn dump(self: Self, node: *Node, jw: *std.json.Stringify, parent_xpath: []const try jw.endObject(); } + +fn dumpText(self: Self, node: *Node, writer: *std.Io.Writer, depth: usize) !void { + // 1. Skip non-content nodes + if (node.is(Element)) |el| { + const tag = el.getTag(); + if (tag.isMetadata() or tag == .svg) return; + + // CSS display: none visibility check (inline style only for now) + if (el.getAttributeSafe(comptime lp.String.wrap("style"))) |style| { + if (std.mem.indexOf(u8, style, "display: none") != null or + std.mem.indexOf(u8, style, "display:none") != null) + { + return; + } + } + + if (el.is(Element.Html)) |html_el| { + if (html_el.getHidden()) return; + } + } else if (node.is(CData.Text) != null) { + const text_node = node.is(CData.Text).?; + const text = text_node.getWholeText(); + if (isAllWhitespace(text)) { + return; + } + } else if (node._type != .document and node._type != .document_fragment) { + return; + } + + const cdp_node = try self.registry.register(node); + const axn = AXNode.fromNode(node); + const role = try axn.getRole(); + + var is_interactive = false; + var value: ?[]const u8 = null; + + if (node.is(Element)) |el| { + const ax_role = std.meta.stringToEnum(AXNode.AXRole, role) orelse .none; + is_interactive = ax_role.isInteractive(); + + const event_target = node.asEventTarget(); + if (self.page._event_manager.hasListener(event_target, "click") or + self.page._event_manager.hasListener(event_target, "mousedown") or + self.page._event_manager.hasListener(event_target, "mouseup") or + self.page._event_manager.hasListener(event_target, "keydown") or + self.page._event_manager.hasListener(event_target, "change") or + self.page._event_manager.hasListener(event_target, "input")) + { + is_interactive = true; + } + + if (el.is(Element.Html)) |html_el| { + if (html_el.hasAttributeFunction(.onclick, self.page) or + html_el.hasAttributeFunction(.onmousedown, self.page) or + html_el.hasAttributeFunction(.onmouseup, self.page) or + html_el.hasAttributeFunction(.onkeydown, self.page) or + html_el.hasAttributeFunction(.onchange, self.page) or + html_el.hasAttributeFunction(.oninput, self.page)) + { + is_interactive = true; + } + } + + if (el.is(Element.Html.Input)) |input| { + value = input.getValue(); + } else if (el.is(Element.Html.TextArea)) |textarea| { + value = textarea.getValue(); + } else if (el.is(Element.Html.Select)) |select| { + value = select.getValue(self.page); + } + } + + // Format: " [12] link: Hacker News (value)" + for (0..(depth * 2)) |_| { + try writer.writeByte(' '); + } + try writer.print("[{d}] {s}: ", .{ cdp_node.id, role }); + + if (try axn.getName(self.page, self.arena)) |name| { + if (name.len > 0) { + try writer.writeAll(name); + } + } else if (node.is(CData.Text) != null) { + const text_node = node.is(CData.Text).?; + const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n"); + if (trimmed.len > 0) { + try writer.writeAll(trimmed); + } + } + + if (value) |v| { + if (v.len > 0) { + try writer.print(" (value: {s})", .{v}); + } + } + + try writer.writeByte('\n'); + + var it = node.childrenIterator(); + while (it.next()) |child| { + try self.dumpText(child, writer, depth + 1); + } +} diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index 12dcbb7b..2470a312 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -37,17 +37,36 @@ pub fn processMessage(cmd: anytype) !void { } fn getSemanticTree(cmd: anytype) !void { + const Params = struct { + format: ?[]const u8 = null, + }; + const params = (try cmd.params(Params)) orelse Params{}; + const bc = cmd.browser_context orelse return error.NoBrowserContext; const page = bc.session.currentPage() orelse return error.PageNotLoaded; const dom_node = page.document.asNode(); + const st = SemanticTree{ + .dom_node = dom_node, + .registry = &bc.node_registry, + .page = page, + .arena = cmd.arena, + }; + + if (params.format) |format| { + if (std.mem.eql(u8, format, "text")) { + var aw: std.Io.Writer.Allocating = .init(cmd.arena); + defer aw.deinit(); + try st.textStringify(&aw.writer); + + return cmd.sendResult(.{ + .semanticTree = aw.written(), + }, .{}); + } + } + return cmd.sendResult(.{ - .semanticTree = SemanticTree{ - .dom_node = dom_node, - .registry = &bc.node_registry, - .page = page, - .arena = cmd.arena, - }, + .semanticTree = st, }, .{}); } diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index 59ddf1b5..8ab55f48 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -135,8 +135,7 @@ const ToolStreamingText = struct { jw.endWriteRaw(); }, .semantic_tree => { - // MCP expects a string for "text" content, but our SemanticTree is a complex object. - // We'll serialize it as a string to fit the MCP text protocol requirements. + // Return the highly compressed Stagehand-style text format for maximum token efficiency try jw.beginWriteRaw(); try jw.writer.writeByte('"'); var escaped = protocol.JsonEscapingWriter.init(jw.writer); @@ -147,7 +146,8 @@ const ToolStreamingText = struct { .page = self.server.page, .arena = self.arena, }; - std.json.Stringify.value(st, .{ .whitespace = .minified }, &escaped.writer) catch |err| { + + st.textStringify(&escaped.writer) catch |err| { log.err(.mcp, "semantic tree dump failed", .{ .err = err }); };