mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 04:34:44 +00:00
SemanticTree: implement interactiveOnly filter and optimize token usage
This commit is contained in:
@@ -37,6 +37,7 @@ registry: *CDPNode.Registry,
|
|||||||
page: *Page,
|
page: *Page,
|
||||||
arena: std.mem.Allocator,
|
arena: std.mem.Allocator,
|
||||||
prune: bool = false,
|
prune: bool = false,
|
||||||
|
interactive_only: bool = false,
|
||||||
|
|
||||||
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
||||||
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
||||||
@@ -174,7 +175,23 @@ fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_nam
|
|||||||
};
|
};
|
||||||
|
|
||||||
var should_visit = true;
|
var should_visit = true;
|
||||||
if (self.prune) {
|
if (self.interactive_only) {
|
||||||
|
var keep = false;
|
||||||
|
if (interactive.isInteractiveRole(role)) {
|
||||||
|
keep = true;
|
||||||
|
} else if (interactive.isContentRole(role)) {
|
||||||
|
if (name != null and name.?.len > 0) {
|
||||||
|
keep = true;
|
||||||
|
}
|
||||||
|
} else if (std.mem.eql(u8, role, "RootWebArea")) {
|
||||||
|
keep = true;
|
||||||
|
} else if (is_interactive) {
|
||||||
|
keep = true;
|
||||||
|
}
|
||||||
|
if (!keep) {
|
||||||
|
should_visit = false;
|
||||||
|
}
|
||||||
|
} else if (self.prune) {
|
||||||
if (structural and !is_interactive and !has_explicit_label) {
|
if (structural and !is_interactive and !has_explicit_label) {
|
||||||
should_visit = false;
|
should_visit = false;
|
||||||
}
|
}
|
||||||
@@ -389,36 +406,46 @@ const TextVisitor = struct {
|
|||||||
depth: usize,
|
depth: usize,
|
||||||
|
|
||||||
pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool {
|
pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool {
|
||||||
// Format: " [12] link: Hacker News (value)"
|
for (0..self.depth) |_| {
|
||||||
for (0..(self.depth * 2)) |_| {
|
|
||||||
try self.writer.writeByte(' ');
|
try self.writer.writeByte(' ');
|
||||||
}
|
}
|
||||||
try self.writer.print("[{d}] {s}: ", .{ data.id, data.role });
|
|
||||||
|
|
||||||
|
var name_to_print: ?[]const u8 = null;
|
||||||
if (data.name) |n| {
|
if (data.name) |n| {
|
||||||
if (n.len > 0) {
|
if (n.len > 0) {
|
||||||
try self.writer.writeAll(n);
|
name_to_print = n;
|
||||||
}
|
}
|
||||||
} else if (node.is(CData.Text)) |text_node| {
|
} else if (node.is(CData.Text)) |text_node| {
|
||||||
const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n");
|
const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n");
|
||||||
if (trimmed.len > 0) {
|
if (trimmed.len > 0) {
|
||||||
try self.writer.writeAll(trimmed);
|
name_to_print = trimmed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic");
|
||||||
|
|
||||||
|
if (is_text_only and name_to_print != null) {
|
||||||
|
try self.writer.print("{d} '{s}'", .{ data.id, name_to_print.? });
|
||||||
|
} else {
|
||||||
|
try self.writer.print("{d} {s}", .{ data.id, data.role });
|
||||||
|
if (name_to_print) |n| {
|
||||||
|
try self.writer.print(" '{s}'", .{n});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.value) |v| {
|
if (data.value) |v| {
|
||||||
if (v.len > 0) {
|
if (v.len > 0) {
|
||||||
try self.writer.print(" (value: {s})", .{v});
|
try self.writer.print(" value='{s}'", .{v});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.options) |options| {
|
if (data.options) |options| {
|
||||||
try self.writer.writeAll(" options: [");
|
try self.writer.writeAll(" options=[");
|
||||||
for (options, 0..) |opt, i| {
|
for (options, 0..) |opt, i| {
|
||||||
if (i > 0) try self.writer.writeAll(",");
|
if (i > 0) try self.writer.writeAll(",");
|
||||||
try self.writer.print("'{s}'", .{opt.value});
|
try self.writer.print("'{s}'", .{opt.value});
|
||||||
if (opt.selected) {
|
if (opt.selected) {
|
||||||
try self.writer.writeAll(" (selected)");
|
try self.writer.writeAll("*");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try self.writer.writeAll("]\n");
|
try self.writer.writeAll("]\n");
|
||||||
|
|||||||
@@ -253,12 +253,13 @@ pub fn classifyInteractivity(
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn isInteractiveRole(role: []const u8) bool {
|
pub fn isInteractiveRole(role: []const u8) bool {
|
||||||
const interactive_roles = [_][]const u8{
|
const interactive_roles = [_][]const u8{
|
||||||
"button", "link", "tab", "menuitem",
|
"button", "link", "tab", "menuitem",
|
||||||
"menuitemcheckbox", "menuitemradio", "switch", "checkbox",
|
"menuitemcheckbox", "menuitemradio", "switch", "checkbox",
|
||||||
"radio", "slider", "spinbutton", "searchbox",
|
"radio", "slider", "spinbutton", "searchbox",
|
||||||
"combobox", "option", "treeitem",
|
"combobox", "option", "treeitem", "textbox",
|
||||||
|
"listbox", "iframe",
|
||||||
};
|
};
|
||||||
for (interactive_roles) |r| {
|
for (interactive_roles) |r| {
|
||||||
if (std.ascii.eqlIgnoreCase(role, r)) return true;
|
if (std.ascii.eqlIgnoreCase(role, r)) return true;
|
||||||
@@ -266,6 +267,19 @@ fn isInteractiveRole(role: []const u8) bool {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn isContentRole(role: []const u8) bool {
|
||||||
|
const content_roles = [_][]const u8{
|
||||||
|
"heading", "cell", "gridcell",
|
||||||
|
"columnheader", "rowheader", "listitem",
|
||||||
|
"article", "region", "main",
|
||||||
|
"navigation",
|
||||||
|
};
|
||||||
|
for (content_roles) |r| {
|
||||||
|
if (std.ascii.eqlIgnoreCase(role, r)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
fn getRole(el: *Element) ?[]const u8 {
|
fn getRole(el: *Element) ?[]const u8 {
|
||||||
// Explicit role attribute takes precedence
|
// Explicit role attribute takes precedence
|
||||||
if (el.getAttributeSafe(comptime .wrap("role"))) |role| return role;
|
if (el.getAttributeSafe(comptime .wrap("role"))) |role| return role;
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ fn getSemanticTree(cmd: anytype) !void {
|
|||||||
const Params = struct {
|
const Params = struct {
|
||||||
format: ?enum { text } = null,
|
format: ?enum { text } = null,
|
||||||
prune: ?bool = null,
|
prune: ?bool = null,
|
||||||
|
interactiveOnly: ?bool = null,
|
||||||
};
|
};
|
||||||
const params = (try cmd.params(Params)) orelse Params{};
|
const params = (try cmd.params(Params)) orelse Params{};
|
||||||
|
|
||||||
@@ -65,6 +66,7 @@ fn getSemanticTree(cmd: anytype) !void {
|
|||||||
.page = page,
|
.page = page,
|
||||||
.arena = cmd.arena,
|
.arena = cmd.arena,
|
||||||
.prune = params.prune orelse false,
|
.prune = params.prune orelse false,
|
||||||
|
.interactive_only = params.interactiveOnly orelse false,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (params.format) |format| {
|
if (params.format) |format| {
|
||||||
|
|||||||
Reference in New Issue
Block a user