From 85ebbe8759ab8a32c5f4d5de428e8a2969ea01b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Mon, 9 Mar 2026 21:04:47 +0900 Subject: [PATCH] SemanticTree: improve accessibility tree and name calculation - Add more structural roles (banner, navigation, main, list, etc.). - Implement fallback for accessible names (SVG titles, image alt text). - Skip children for leaf-like semantic nodes to reduce redundancy. - Disable pruning in the default semantic tree view. --- src/SemanticTree.zig | 21 ++++++++++++++++++++- src/cdp/AXNode.zig | 44 ++++++++++++++++++++++++++++++++++++++++---- src/lightpanda.zig | 4 ++-- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 6561bb21..2b6c96e3 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -385,9 +385,17 @@ const JsonVisitor = struct { }; fn isStructuralRole(role: []const u8) bool { + // zig fmt: off return std.mem.eql(u8, role, "none") or std.mem.eql(u8, role, "generic") or - std.mem.eql(u8, role, "InlineTextBox"); + std.mem.eql(u8, role, "InlineTextBox") or + std.mem.eql(u8, role, "banner") or + std.mem.eql(u8, role, "navigation") or + std.mem.eql(u8, role, "main") or + std.mem.eql(u8, role, "list") or + std.mem.eql(u8, role, "listitem") or + std.mem.eql(u8, role, "region"); + // zig fmt: on } const TextVisitor = struct { @@ -436,6 +444,17 @@ const TextVisitor = struct { try self.writer.writeByte('\n'); self.depth += 1; + + // If this is a leaf-like semantic node and we already have a name, + // skip children to avoid redundant StaticText or noise. + const is_leaf_semantic = std.mem.eql(u8, data.role, "link") or + std.mem.eql(u8, data.role, "button") or + std.mem.eql(u8, data.role, "heading") or + std.mem.eql(u8, data.role, "code"); + if (is_leaf_semantic and data.name != null and data.name.?.len > 0) { + return false; + } + return true; } diff --git a/src/cdp/AXNode.zig b/src/cdp/AXNode.zig index 00149943..16da8478 100644 --- a/src/cdp/AXNode.zig +++ b/src/cdp/AXNode.zig @@ -888,10 +888,12 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource { => {}, else => { // write text content if exists. - var buf = std.Io.Writer.Allocating.init(page.call_arena); - try el.getInnerText(&buf.writer); - try writeString(buf.written(), w); - return .contents; + var buf: std.Io.Writer.Allocating = .init(page.call_arena); + try writeAccessibleNameFallback(node, &buf.writer, page); + if (buf.written().len > 0) { + try writeString(buf.written(), w); + return .contents; + } }, } @@ -915,6 +917,40 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource { }; } +fn writeAccessibleNameFallback(node: *DOMNode, writer: *std.Io.Writer, page: *Page) !void { + var it = node.childrenIterator(); + while (it.next()) |child| { + switch (child._type) { + .cdata => |cd| switch (cd._type) { + .text => |*text| try writer.writeAll(text.getWholeText()), + else => {}, + }, + .element => |el| { + if (el.getTag() == .img) { + if (el.getAttributeSafe(.wrap("alt"))) |alt| { + try writer.writeAll(alt); + try writer.writeByte(' '); + } + } else if (el.getTag() == .svg) { + // Try to find a inside SVG + var sit = child.childrenIterator(); + while (sit.next()) |s_child| { + if (s_child.is(DOMNode.Element)) |s_el| { + if (std.mem.eql(u8, s_el.getTagNameLower(), "title")) { + try writeAccessibleNameFallback(s_child, writer, page); + try writer.writeByte(' '); + } + } + } + } else { + try writeAccessibleNameFallback(child, writer, page); + } + }, + else => {}, + } + } +} + fn isHidden(elt: *DOMNode.Element) bool { if (elt.getAttributeSafe(comptime .wrap("aria-hidden"))) |value| { if (std.mem.eql(u8, value, "true")) { diff --git a/src/lightpanda.zig b/src/lightpanda.zig index d11a9ff5..c4abae6c 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -113,12 +113,12 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { var registry = CDPNode.Registry.init(app.allocator); defer registry.deinit(); - const st = SemanticTree{ + const st: SemanticTree = .{ .dom_node = page.window._document.asNode(), .registry = ®istry, .page = page, .arena = page.call_arena, - .prune = true, + .prune = false, }; if (mode == .semantic_tree) {