From 015edc38481b989da5fb1fe7acd5f6c3c44f28fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Wed, 18 Mar 2026 10:56:50 +0900 Subject: [PATCH 1/5] SemanticTree: implement interactiveOnly filter and optimize token usage --- src/SemanticTree.zig | 47 +++++++++++++++++++++++++++++-------- src/browser/interactive.zig | 18 ++++++++++++-- src/cdp/domains/lp.zig | 2 ++ 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 3433ca6b..e9661bad 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -37,6 +37,7 @@ registry: *CDPNode.Registry, page: *Page, arena: std.mem.Allocator, prune: bool = false, +interactive_only: bool = false, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { var visitor = JsonVisitor{ .jw = jw, .tree = self }; @@ -174,7 +175,23 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam }; var should_visit = true; - if (self.prune) { + if (self.interactive_only) { + var keep = false; + if (interactive.isInteractiveRole(role)) { + keep = true; + } else if (interactive.isContentRole(role)) { + if (name != null and name.?.len > 0) { + keep = true; + } + } else if (std.mem.eql(u8, role, "RootWebArea")) { + keep = true; + } else if (is_interactive) { + keep = true; + } + if (!keep) { + should_visit = false; + } + } else if (self.prune) { if (structural and !is_interactive and !has_explicit_label) { should_visit = false; } @@ -389,36 +406,46 @@ const TextVisitor = struct { depth: usize, pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool { - // Format: " [12] link: Hacker News (value)" - for (0..(self.depth * 2)) |_| { + for (0..self.depth) |_| { try self.writer.writeByte(' '); } - try self.writer.print("[{d}] {s}: ", .{ data.id, data.role }); + var name_to_print: ?[]const u8 = null; if (data.name) |n| { if (n.len > 0) { - try self.writer.writeAll(n); + name_to_print = n; } } else if (node.is(CData.Text)) |text_node| { const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n"); if (trimmed.len > 0) { - try self.writer.writeAll(trimmed); + name_to_print = trimmed; + } + } + + const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic"); + + if (is_text_only and name_to_print != null) { + try self.writer.print("{d} '{s}'", .{ data.id, name_to_print.? }); + } else { + try self.writer.print("{d} {s}", .{ data.id, data.role }); + if (name_to_print) |n| { + try self.writer.print(" '{s}'", .{n}); } } if (data.value) |v| { if (v.len > 0) { - try self.writer.print(" (value: {s})", .{v}); + try self.writer.print(" value='{s}'", .{v}); } } if (data.options) |options| { - try self.writer.writeAll(" options: ["); + try self.writer.writeAll(" options=["); for (options, 0..) |opt, i| { - if (i > 0) try self.writer.writeAll(", "); + if (i > 0) try self.writer.writeAll(","); try self.writer.print("'{s}'", .{opt.value}); if (opt.selected) { - try self.writer.writeAll(" (selected)"); + try self.writer.writeAll("*"); } } try self.writer.writeAll("]\n"); diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index 2d03db51..04a0d492 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -253,12 +253,13 @@ pub fn classifyInteractivity( return null; } -fn isInteractiveRole(role: []const u8) bool { +pub fn isInteractiveRole(role: []const u8) bool { const interactive_roles = [_][]const u8{ "button", "link", "tab", "menuitem", "menuitemcheckbox", "menuitemradio", "switch", "checkbox", "radio", "slider", "spinbutton", "searchbox", - "combobox", "option", "treeitem", + "combobox", "option", "treeitem", "textbox", + "listbox", "iframe", }; for (interactive_roles) |r| { if (std.ascii.eqlIgnoreCase(role, r)) return true; @@ -266,6 +267,19 @@ fn isInteractiveRole(role: []const u8) bool { return false; } +pub fn isContentRole(role: []const u8) bool { + const content_roles = [_][]const u8{ + "heading", "cell", "gridcell", + "columnheader", "rowheader", "listitem", + "article", "region", "main", + "navigation", + }; + for (content_roles) |r| { + if (std.ascii.eqlIgnoreCase(role, r)) return true; + } + return false; +} + fn getRole(el: *Element) ?[]const u8 { // Explicit role attribute takes precedence if (el.getAttributeSafe(comptime .wrap("role"))) |role| return role; diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index 19fc8cac..b0d5c574 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -52,6 +52,7 @@ fn getSemanticTree(cmd: anytype) !void { const Params = struct { format: ?enum { text } = null, prune: ?bool = null, + interactiveOnly: ?bool = null, }; const params = (try cmd.params(Params)) orelse Params{}; @@ -65,6 +66,7 @@ fn getSemanticTree(cmd: anytype) !void { .page = page, .arena = cmd.arena, .prune = params.prune orelse false, + .interactive_only = params.interactiveOnly orelse false, }; if (params.format) |format| { From e1b14a68339539c65187d0e258c68293e4a90ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Wed, 18 Mar 2026 11:25:34 +0900 Subject: [PATCH 2/5] SemanticTree: enable prune by default --- src/SemanticTree.zig | 2 +- src/cdp/domains/lp.zig | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index e9661bad..6bc0b8ec 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -36,7 +36,7 @@ dom_node: *Node, registry: *CDPNode.Registry, page: *Page, arena: std.mem.Allocator, -prune: bool = false, +prune: bool = true, interactive_only: bool = false, pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index b0d5c574..efbf9ec7 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -65,13 +65,12 @@ fn getSemanticTree(cmd: anytype) !void { .registry = &bc.node_registry, .page = page, .arena = cmd.arena, - .prune = params.prune orelse false, + .prune = params.prune orelse true, .interactive_only = params.interactiveOnly orelse false, }; if (params.format) |format| { if (format == .text) { - st.prune = params.prune orelse true; var aw: std.Io.Writer.Allocating = .init(cmd.arena); defer aw.deinit(); try st.textStringify(&aw.writer); From ff288c8aa2ef2c8e12a1d3b099f5f5059562a6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Wed, 18 Mar 2026 12:04:53 +0900 Subject: [PATCH 3/5] browser.interactive: use for-else expression in role checks --- src/browser/interactive.zig | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index 04a0d492..94d6b799 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -261,10 +261,9 @@ pub fn isInteractiveRole(role: []const u8) bool { "combobox", "option", "treeitem", "textbox", "listbox", "iframe", }; - for (interactive_roles) |r| { - if (std.ascii.eqlIgnoreCase(role, r)) return true; - } - return false; + return for (interactive_roles) |r| { + if (std.ascii.eqlIgnoreCase(role, r)) break true; + } else false; } pub fn isContentRole(role: []const u8) bool { @@ -274,10 +273,9 @@ pub fn isContentRole(role: []const u8) bool { "article", "region", "main", "navigation", }; - for (content_roles) |r| { - if (std.ascii.eqlIgnoreCase(role, r)) return true; - } - return false; + return for (content_roles) |r| { + if (std.ascii.eqlIgnoreCase(role, r)) break true; + } else false; } fn getRole(el: *Element) ?[]const u8 { From cbab0b712a0b2fadd666d2a9b8521b5686f0e5ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Wed, 18 Mar 2026 20:07:11 +0900 Subject: [PATCH 4/5] SemanticTree: simplify TextVisitor printing logic --- src/SemanticTree.zig | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 6bc0b8ec..166acf4a 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -424,13 +424,12 @@ const TextVisitor = struct { const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic"); - if (is_text_only and name_to_print != null) { - try self.writer.print("{d} '{s}'", .{ data.id, name_to_print.? }); - } else { - try self.writer.print("{d} {s}", .{ data.id, data.role }); - if (name_to_print) |n| { - try self.writer.print(" '{s}'", .{n}); - } + try self.writer.print("{d}", .{data.id}); + if (!is_text_only) { + try self.writer.print(" {s}", .{data.role}); + } + if (name_to_print) |n| { + try self.writer.print(" '{s}'", .{n}); } if (data.value) |v| { From 694aac5ce8ba954f817b19f5f67208f76f01749f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Wed, 18 Mar 2026 20:10:15 +0900 Subject: [PATCH 5/5] browser.interactive: optimize role checks with StaticStringMap --- src/browser/interactive.zig | 61 +++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index 94d6b799..075e8554 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -254,28 +254,51 @@ pub fn classifyInteractivity( } pub fn isInteractiveRole(role: []const u8) bool { - const interactive_roles = [_][]const u8{ - "button", "link", "tab", "menuitem", - "menuitemcheckbox", "menuitemradio", "switch", "checkbox", - "radio", "slider", "spinbutton", "searchbox", - "combobox", "option", "treeitem", "textbox", - "listbox", "iframe", - }; - return for (interactive_roles) |r| { - if (std.ascii.eqlIgnoreCase(role, r)) break true; - } else false; + const MAX_LEN = "menuitemcheckbox".len; + if (role.len > MAX_LEN) return false; + var buf: [MAX_LEN]u8 = undefined; + const lowered = std.ascii.lowerString(&buf, role); + const interactive_roles = std.StaticStringMap(void).initComptime(.{ + .{ "button", {} }, + .{ "checkbox", {} }, + .{ "combobox", {} }, + .{ "iframe", {} }, + .{ "link", {} }, + .{ "listbox", {} }, + .{ "menuitem", {} }, + .{ "menuitemcheckbox", {} }, + .{ "menuitemradio", {} }, + .{ "option", {} }, + .{ "radio", {} }, + .{ "searchbox", {} }, + .{ "slider", {} }, + .{ "spinbutton", {} }, + .{ "switch", {} }, + .{ "tab", {} }, + .{ "textbox", {} }, + .{ "treeitem", {} }, + }); + return interactive_roles.has(lowered); } pub fn isContentRole(role: []const u8) bool { - const content_roles = [_][]const u8{ - "heading", "cell", "gridcell", - "columnheader", "rowheader", "listitem", - "article", "region", "main", - "navigation", - }; - return for (content_roles) |r| { - if (std.ascii.eqlIgnoreCase(role, r)) break true; - } else false; + const MAX_LEN = "columnheader".len; + if (role.len > MAX_LEN) return false; + var buf: [MAX_LEN]u8 = undefined; + const lowered = std.ascii.lowerString(&buf, role); + const content_roles = std.StaticStringMap(void).initComptime(.{ + .{ "article", {} }, + .{ "cell", {} }, + .{ "columnheader", {} }, + .{ "gridcell", {} }, + .{ "heading", {} }, + .{ "listitem", {} }, + .{ "main", {} }, + .{ "navigation", {} }, + .{ "region", {} }, + .{ "rowheader", {} }, + }); + return content_roles.has(lowered); } fn getRole(el: *Element) ?[]const u8 {