mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-21 20:24:42 +00:00
Merge pull request #1894 from lightpanda-io/semantic-tree-interactive
SemanticTree: implement interactiveOnly filter and optimize token usage
This commit is contained in:
@@ -36,7 +36,8 @@ dom_node: *Node,
|
||||
registry: *CDPNode.Registry,
|
||||
page: *Page,
|
||||
arena: std.mem.Allocator,
|
||||
prune: bool = false,
|
||||
prune: bool = true,
|
||||
interactive_only: bool = false,
|
||||
|
||||
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
||||
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
||||
@@ -174,7 +175,23 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam
|
||||
};
|
||||
|
||||
var should_visit = true;
|
||||
if (self.prune) {
|
||||
if (self.interactive_only) {
|
||||
var keep = false;
|
||||
if (interactive.isInteractiveRole(role)) {
|
||||
keep = true;
|
||||
} else if (interactive.isContentRole(role)) {
|
||||
if (name != null and name.?.len > 0) {
|
||||
keep = true;
|
||||
}
|
||||
} else if (std.mem.eql(u8, role, "RootWebArea")) {
|
||||
keep = true;
|
||||
} else if (is_interactive) {
|
||||
keep = true;
|
||||
}
|
||||
if (!keep) {
|
||||
should_visit = false;
|
||||
}
|
||||
} else if (self.prune) {
|
||||
if (structural and !is_interactive and !has_explicit_label) {
|
||||
should_visit = false;
|
||||
}
|
||||
@@ -389,36 +406,45 @@ const TextVisitor = struct {
|
||||
depth: usize,
|
||||
|
||||
pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool {
|
||||
// Format: " [12] link: Hacker News (value)"
|
||||
for (0..(self.depth * 2)) |_| {
|
||||
for (0..self.depth) |_| {
|
||||
try self.writer.writeByte(' ');
|
||||
}
|
||||
try self.writer.print("[{d}] {s}: ", .{ data.id, data.role });
|
||||
|
||||
var name_to_print: ?[]const u8 = null;
|
||||
if (data.name) |n| {
|
||||
if (n.len > 0) {
|
||||
try self.writer.writeAll(n);
|
||||
name_to_print = n;
|
||||
}
|
||||
} else if (node.is(CData.Text)) |text_node| {
|
||||
const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n");
|
||||
if (trimmed.len > 0) {
|
||||
try self.writer.writeAll(trimmed);
|
||||
name_to_print = trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic");
|
||||
|
||||
try self.writer.print("{d}", .{data.id});
|
||||
if (!is_text_only) {
|
||||
try self.writer.print(" {s}", .{data.role});
|
||||
}
|
||||
if (name_to_print) |n| {
|
||||
try self.writer.print(" '{s}'", .{n});
|
||||
}
|
||||
|
||||
if (data.value) |v| {
|
||||
if (v.len > 0) {
|
||||
try self.writer.print(" (value: {s})", .{v});
|
||||
try self.writer.print(" value='{s}'", .{v});
|
||||
}
|
||||
}
|
||||
|
||||
if (data.options) |options| {
|
||||
try self.writer.writeAll(" options: [");
|
||||
try self.writer.writeAll(" options=[");
|
||||
for (options, 0..) |opt, i| {
|
||||
if (i > 0) try self.writer.writeAll(", ");
|
||||
if (i > 0) try self.writer.writeAll(",");
|
||||
try self.writer.print("'{s}'", .{opt.value});
|
||||
if (opt.selected) {
|
||||
try self.writer.writeAll(" (selected)");
|
||||
try self.writer.writeAll("*");
|
||||
}
|
||||
}
|
||||
try self.writer.writeAll("]\n");
|
||||
|
||||
@@ -253,17 +253,52 @@ pub fn classifyInteractivity(
|
||||
return null;
|
||||
}
|
||||
|
||||
fn isInteractiveRole(role: []const u8) bool {
|
||||
const interactive_roles = [_][]const u8{
|
||||
"button", "link", "tab", "menuitem",
|
||||
"menuitemcheckbox", "menuitemradio", "switch", "checkbox",
|
||||
"radio", "slider", "spinbutton", "searchbox",
|
||||
"combobox", "option", "treeitem",
|
||||
};
|
||||
for (interactive_roles) |r| {
|
||||
if (std.ascii.eqlIgnoreCase(role, r)) return true;
|
||||
}
|
||||
return false;
|
||||
pub fn isInteractiveRole(role: []const u8) bool {
|
||||
const MAX_LEN = "menuitemcheckbox".len;
|
||||
if (role.len > MAX_LEN) return false;
|
||||
var buf: [MAX_LEN]u8 = undefined;
|
||||
const lowered = std.ascii.lowerString(&buf, role);
|
||||
const interactive_roles = std.StaticStringMap(void).initComptime(.{
|
||||
.{ "button", {} },
|
||||
.{ "checkbox", {} },
|
||||
.{ "combobox", {} },
|
||||
.{ "iframe", {} },
|
||||
.{ "link", {} },
|
||||
.{ "listbox", {} },
|
||||
.{ "menuitem", {} },
|
||||
.{ "menuitemcheckbox", {} },
|
||||
.{ "menuitemradio", {} },
|
||||
.{ "option", {} },
|
||||
.{ "radio", {} },
|
||||
.{ "searchbox", {} },
|
||||
.{ "slider", {} },
|
||||
.{ "spinbutton", {} },
|
||||
.{ "switch", {} },
|
||||
.{ "tab", {} },
|
||||
.{ "textbox", {} },
|
||||
.{ "treeitem", {} },
|
||||
});
|
||||
return interactive_roles.has(lowered);
|
||||
}
|
||||
|
||||
pub fn isContentRole(role: []const u8) bool {
|
||||
const MAX_LEN = "columnheader".len;
|
||||
if (role.len > MAX_LEN) return false;
|
||||
var buf: [MAX_LEN]u8 = undefined;
|
||||
const lowered = std.ascii.lowerString(&buf, role);
|
||||
const content_roles = std.StaticStringMap(void).initComptime(.{
|
||||
.{ "article", {} },
|
||||
.{ "cell", {} },
|
||||
.{ "columnheader", {} },
|
||||
.{ "gridcell", {} },
|
||||
.{ "heading", {} },
|
||||
.{ "listitem", {} },
|
||||
.{ "main", {} },
|
||||
.{ "navigation", {} },
|
||||
.{ "region", {} },
|
||||
.{ "rowheader", {} },
|
||||
});
|
||||
return content_roles.has(lowered);
|
||||
}
|
||||
|
||||
fn getRole(el: *Element) ?[]const u8 {
|
||||
|
||||
@@ -52,6 +52,7 @@ fn getSemanticTree(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
format: ?enum { text } = null,
|
||||
prune: ?bool = null,
|
||||
interactiveOnly: ?bool = null,
|
||||
};
|
||||
const params = (try cmd.params(Params)) orelse Params{};
|
||||
|
||||
@@ -64,12 +65,12 @@ fn getSemanticTree(cmd: anytype) !void {
|
||||
.registry = &bc.node_registry,
|
||||
.page = page,
|
||||
.arena = cmd.arena,
|
||||
.prune = params.prune orelse false,
|
||||
.prune = params.prune orelse true,
|
||||
.interactive_only = params.interactiveOnly orelse false,
|
||||
};
|
||||
|
||||
if (params.format) |format| {
|
||||
if (format == .text) {
|
||||
st.prune = params.prune orelse true;
|
||||
var aw: std.Io.Writer.Allocating = .init(cmd.arena);
|
||||
defer aw.deinit();
|
||||
try st.textStringify(&aw.writer);
|
||||
|
||||
Reference in New Issue
Block a user