SemanticTree: implement interactiveOnly filter and optimize token usage

This commit is contained in:
Adrià Arrufat
2026-03-18 10:56:50 +09:00
parent 8372b45cc5
commit 015edc3848
3 changed files with 55 additions and 12 deletions

View File

@@ -37,6 +37,7 @@ registry: *CDPNode.Registry,
page: *Page, page: *Page,
arena: std.mem.Allocator, arena: std.mem.Allocator,
prune: bool = false, prune: bool = false,
interactive_only: bool = false,
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void { pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
var visitor = JsonVisitor{ .jw = jw, .tree = self }; var visitor = JsonVisitor{ .jw = jw, .tree = self };
@@ -174,7 +175,23 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam
}; };
var should_visit = true; var should_visit = true;
if (self.prune) { if (self.interactive_only) {
var keep = false;
if (interactive.isInteractiveRole(role)) {
keep = true;
} else if (interactive.isContentRole(role)) {
if (name != null and name.?.len > 0) {
keep = true;
}
} else if (std.mem.eql(u8, role, "RootWebArea")) {
keep = true;
} else if (is_interactive) {
keep = true;
}
if (!keep) {
should_visit = false;
}
} else if (self.prune) {
if (structural and !is_interactive and !has_explicit_label) { if (structural and !is_interactive and !has_explicit_label) {
should_visit = false; should_visit = false;
} }
@@ -389,36 +406,46 @@ const TextVisitor = struct {
depth: usize, depth: usize,
pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool { pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool {
// Format: " [12] link: Hacker News (value)" for (0..self.depth) |_| {
for (0..(self.depth * 2)) |_| {
try self.writer.writeByte(' '); try self.writer.writeByte(' ');
} }
try self.writer.print("[{d}] {s}: ", .{ data.id, data.role });
var name_to_print: ?[]const u8 = null;
if (data.name) |n| { if (data.name) |n| {
if (n.len > 0) { if (n.len > 0) {
try self.writer.writeAll(n); name_to_print = n;
} }
} else if (node.is(CData.Text)) |text_node| { } else if (node.is(CData.Text)) |text_node| {
const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n"); const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n");
if (trimmed.len > 0) { if (trimmed.len > 0) {
try self.writer.writeAll(trimmed); name_to_print = trimmed;
}
}
const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic");
if (is_text_only and name_to_print != null) {
try self.writer.print("{d} '{s}'", .{ data.id, name_to_print.? });
} else {
try self.writer.print("{d} {s}", .{ data.id, data.role });
if (name_to_print) |n| {
try self.writer.print(" '{s}'", .{n});
} }
} }
if (data.value) |v| { if (data.value) |v| {
if (v.len > 0) { if (v.len > 0) {
try self.writer.print(" (value: {s})", .{v}); try self.writer.print(" value='{s}'", .{v});
} }
} }
if (data.options) |options| { if (data.options) |options| {
try self.writer.writeAll(" options: ["); try self.writer.writeAll(" options=[");
for (options, 0..) |opt, i| { for (options, 0..) |opt, i| {
if (i > 0) try self.writer.writeAll(","); if (i > 0) try self.writer.writeAll(",");
try self.writer.print("'{s}'", .{opt.value}); try self.writer.print("'{s}'", .{opt.value});
if (opt.selected) { if (opt.selected) {
try self.writer.writeAll(" (selected)"); try self.writer.writeAll("*");
} }
} }
try self.writer.writeAll("]\n"); try self.writer.writeAll("]\n");

View File

@@ -253,12 +253,13 @@ pub fn classifyInteractivity(
return null; return null;
} }
fn isInteractiveRole(role: []const u8) bool { pub fn isInteractiveRole(role: []const u8) bool {
const interactive_roles = [_][]const u8{ const interactive_roles = [_][]const u8{
"button", "link", "tab", "menuitem", "button", "link", "tab", "menuitem",
"menuitemcheckbox", "menuitemradio", "switch", "checkbox", "menuitemcheckbox", "menuitemradio", "switch", "checkbox",
"radio", "slider", "spinbutton", "searchbox", "radio", "slider", "spinbutton", "searchbox",
"combobox", "option", "treeitem", "combobox", "option", "treeitem", "textbox",
"listbox", "iframe",
}; };
for (interactive_roles) |r| { for (interactive_roles) |r| {
if (std.ascii.eqlIgnoreCase(role, r)) return true; if (std.ascii.eqlIgnoreCase(role, r)) return true;
@@ -266,6 +267,19 @@ fn isInteractiveRole(role: []const u8) bool {
return false; return false;
} }
pub fn isContentRole(role: []const u8) bool {
const content_roles = [_][]const u8{
"heading", "cell", "gridcell",
"columnheader", "rowheader", "listitem",
"article", "region", "main",
"navigation",
};
for (content_roles) |r| {
if (std.ascii.eqlIgnoreCase(role, r)) return true;
}
return false;
}
fn getRole(el: *Element) ?[]const u8 { fn getRole(el: *Element) ?[]const u8 {
// Explicit role attribute takes precedence // Explicit role attribute takes precedence
if (el.getAttributeSafe(comptime .wrap("role"))) |role| return role; if (el.getAttributeSafe(comptime .wrap("role"))) |role| return role;

View File

@@ -52,6 +52,7 @@ fn getSemanticTree(cmd: anytype) !void {
const Params = struct { const Params = struct {
format: ?enum { text } = null, format: ?enum { text } = null,
prune: ?bool = null, prune: ?bool = null,
interactiveOnly: ?bool = null,
}; };
const params = (try cmd.params(Params)) orelse Params{}; const params = (try cmd.params(Params)) orelse Params{};
@@ -65,6 +66,7 @@ fn getSemanticTree(cmd: anytype) !void {
.page = page, .page = page,
.arena = cmd.arena, .arena = cmd.arena,
.prune = params.prune orelse false, .prune = params.prune orelse false,
.interactive_only = params.interactiveOnly orelse false,
}; };
if (params.format) |format| { if (params.format) |format| {