mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 04:34:44 +00:00
SemanticTree: optimize tree walking and xpath generation
- Use a reusable buffer for XPaths to reduce allocations. - Improve `display: none` detection with proper CSS parsing. - Pass parent name to children to avoid redundant AXNode lookups. - Use `getElementById` for faster datalist lookups.
This commit is contained in:
@@ -39,7 +39,8 @@ prune: bool = false,
|
|||||||
|
|
||||||
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
||||||
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
||||||
self.walk(self.dom_node, "", &visitor) catch |err| {
|
var xpath_buffer: std.ArrayList(u8) = .{};
|
||||||
|
self.walk(self.dom_node, &xpath_buffer, null, &visitor) catch |err| {
|
||||||
log.err(.app, "semantic tree json dump failed", .{ .err = err });
|
log.err(.app, "semantic tree json dump failed", .{ .err = err });
|
||||||
return error.WriteFailed;
|
return error.WriteFailed;
|
||||||
};
|
};
|
||||||
@@ -47,7 +48,8 @@ pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!
|
|||||||
|
|
||||||
pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!void {
|
pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!void {
|
||||||
var visitor = TextVisitor{ .writer = writer, .tree = self, .depth = 0 };
|
var visitor = TextVisitor{ .writer = writer, .tree = self, .depth = 0 };
|
||||||
self.walk(self.dom_node, "", &visitor) catch |err| {
|
var xpath_buffer: std.ArrayList(u8) = .empty;
|
||||||
|
self.walk(self.dom_node, &xpath_buffer, null, &visitor) catch |err| {
|
||||||
log.err(.app, "semantic tree text dump failed", .{ .err = err });
|
log.err(.app, "semantic tree text dump failed", .{ .err = err });
|
||||||
return error.WriteFailed;
|
return error.WriteFailed;
|
||||||
};
|
};
|
||||||
@@ -71,7 +73,26 @@ const NodeData = struct {
|
|||||||
node_name: []const u8,
|
node_name: []const u8,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn walk(self: @This(), node: *Node, parent_xpath: []const u8, visitor: anytype) !void {
|
fn isDisplayNone(style: []const u8) bool {
|
||||||
|
var it = std.mem.splitScalar(u8, style, ';');
|
||||||
|
while (it.next()) |decl| {
|
||||||
|
var decl_it = std.mem.splitScalar(u8, decl, ':');
|
||||||
|
const prop = decl_it.next() orelse continue;
|
||||||
|
const value = decl_it.next() orelse continue;
|
||||||
|
|
||||||
|
const prop_trimmed = std.mem.trim(u8, prop, &std.ascii.whitespace);
|
||||||
|
const value_trimmed = std.mem.trim(u8, value, &std.ascii.whitespace);
|
||||||
|
|
||||||
|
if (std.ascii.eqlIgnoreCase(prop_trimmed, "display") and
|
||||||
|
std.ascii.eqlIgnoreCase(value_trimmed, "none"))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype) !void {
|
||||||
// 1. Skip non-content nodes
|
// 1. Skip non-content nodes
|
||||||
if (node.is(Element)) |el| {
|
if (node.is(Element)) |el| {
|
||||||
const tag = el.getTag();
|
const tag = el.getTag();
|
||||||
@@ -82,9 +103,7 @@ fn walk(self: @This(), node: *Node, parent_xpath: []const u8, visitor: anytype)
|
|||||||
|
|
||||||
// CSS display: none visibility check (inline style only for now)
|
// CSS display: none visibility check (inline style only for now)
|
||||||
if (el.getAttributeSafe(comptime lp.String.wrap("style"))) |style| {
|
if (el.getAttributeSafe(comptime lp.String.wrap("style"))) |style| {
|
||||||
if (std.mem.indexOf(u8, style, "display: none") != null or
|
if (isDisplayNone(style)) {
|
||||||
std.mem.indexOf(u8, style, "display:none") != null)
|
|
||||||
{
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -155,8 +174,9 @@ fn walk(self: @This(), node: *Node, parent_xpath: []const u8, visitor: anytype)
|
|||||||
node_name = "root";
|
node_name = "root";
|
||||||
}
|
}
|
||||||
|
|
||||||
const segment = try self.getXPathSegment(node);
|
const initial_xpath_len = xpath_buffer.items.len;
|
||||||
const xpath = try std.mem.concat(self.arena, u8, &.{ parent_xpath, segment });
|
try appendXPathSegment(node, xpath_buffer.writer(self.arena));
|
||||||
|
const xpath = xpath_buffer.items;
|
||||||
|
|
||||||
const name = try axn.getName(self.page, self.arena);
|
const name = try axn.getName(self.page, self.arena);
|
||||||
|
|
||||||
@@ -185,8 +205,6 @@ fn walk(self: @This(), node: *Node, parent_xpath: []const u8, visitor: anytype)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (std.mem.eql(u8, role, "StaticText") and node._parent != null) {
|
if (std.mem.eql(u8, role, "StaticText") and node._parent != null) {
|
||||||
const parent_axn = AXNode.fromNode(node._parent.?);
|
|
||||||
const parent_name = try parent_axn.getName(self.page, self.arena);
|
|
||||||
if (parent_name != null and name != null and std.mem.indexOf(u8, parent_name.?, name.?) != null) {
|
if (parent_name != null and name != null and std.mem.indexOf(u8, parent_name.?, name.?) != null) {
|
||||||
should_visit = false;
|
should_visit = false;
|
||||||
}
|
}
|
||||||
@@ -208,13 +226,15 @@ fn walk(self: @This(), node: *Node, parent_xpath: []const u8, visitor: anytype)
|
|||||||
// we walk the children iterator.
|
// we walk the children iterator.
|
||||||
var it = node.childrenIterator();
|
var it = node.childrenIterator();
|
||||||
while (it.next()) |child| {
|
while (it.next()) |child| {
|
||||||
try self.walk(child, xpath, visitor);
|
try self.walk(child, xpath_buffer, name, visitor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (did_visit) {
|
if (did_visit) {
|
||||||
try visitor.leave();
|
try visitor.leave();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xpath_buffer.shrinkRetainingCapacity(initial_xpath_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extractSelectOptions(node: *Node, page: *Page, arena: std.mem.Allocator) ![]OptionData {
|
fn extractSelectOptions(node: *Node, page: *Page, arena: std.mem.Allocator) ![]OptionData {
|
||||||
@@ -246,17 +266,15 @@ fn extractSelectOptions(node: *Node, page: *Page, arena: std.mem.Allocator) ![]O
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn extractDataListOptions(list_id: []const u8, page: *Page, arena: std.mem.Allocator) !?[]OptionData {
|
fn extractDataListOptions(list_id: []const u8, page: *Page, arena: std.mem.Allocator) !?[]OptionData {
|
||||||
const doc = page.document.asNode();
|
if (page.document.getElementById(list_id, page)) |referenced_el| {
|
||||||
const datalist = @import("browser/webapi/selector/Selector.zig").querySelector(doc, try std.fmt.allocPrint(arena, "#{s}", .{list_id}), page) catch null;
|
if (referenced_el.getTag() == .datalist) {
|
||||||
if (datalist) |dl| {
|
return try extractSelectOptions(referenced_el.asNode(), page, arena);
|
||||||
if (dl.getTag() == .datalist) {
|
|
||||||
return try extractSelectOptions(dl.asNode(), page, arena);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn getXPathSegment(self: @This(), node: *Node) ![]const u8 {
|
fn appendXPathSegment(node: *Node, writer: anytype) !void {
|
||||||
if (node.is(Element)) |el| {
|
if (node.is(Element)) |el| {
|
||||||
const tag = el.getTagNameLower();
|
const tag = el.getTagNameLower();
|
||||||
var index: usize = 1;
|
var index: usize = 1;
|
||||||
@@ -272,7 +290,7 @@ fn getXPathSegment(self: @This(), node: *Node) ![]const u8 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std.fmt.allocPrint(self.arena, "/{s}[{d}]", .{ tag, index });
|
try std.fmt.format(writer, "/{s}[{d}]", .{ tag, index });
|
||||||
} else if (node.is(CData.Text) != null) {
|
} else if (node.is(CData.Text) != null) {
|
||||||
var index: usize = 1;
|
var index: usize = 1;
|
||||||
if (node._parent) |parent| {
|
if (node._parent) |parent| {
|
||||||
@@ -284,9 +302,8 @@ fn getXPathSegment(self: @This(), node: *Node) ![]const u8 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std.fmt.allocPrint(self.arena, "/text()[{d}]", .{index});
|
try std.fmt.format(writer, "/text()[{d}]", .{index});
|
||||||
}
|
}
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const JsonVisitor = struct {
|
const JsonVisitor = struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user