diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 6561bb21..2b6c96e3 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -385,9 +385,17 @@ const JsonVisitor = struct { }; fn isStructuralRole(role: []const u8) bool { + // zig fmt: off return std.mem.eql(u8, role, "none") or std.mem.eql(u8, role, "generic") or - std.mem.eql(u8, role, "InlineTextBox"); + std.mem.eql(u8, role, "InlineTextBox") or + std.mem.eql(u8, role, "banner") or + std.mem.eql(u8, role, "navigation") or + std.mem.eql(u8, role, "main") or + std.mem.eql(u8, role, "list") or + std.mem.eql(u8, role, "listitem") or + std.mem.eql(u8, role, "region"); + // zig fmt: on } const TextVisitor = struct { @@ -436,6 +444,17 @@ const TextVisitor = struct { try self.writer.writeByte('\n'); self.depth += 1; + + // If this is a leaf-like semantic node and we already have a name, + // skip children to avoid redundant StaticText or noise. + const is_leaf_semantic = std.mem.eql(u8, data.role, "link") or + std.mem.eql(u8, data.role, "button") or + std.mem.eql(u8, data.role, "heading") or + std.mem.eql(u8, data.role, "code"); + if (is_leaf_semantic and data.name != null and data.name.?.len > 0) { + return false; + } + return true; } diff --git a/src/cdp/AXNode.zig b/src/cdp/AXNode.zig index 00149943..16da8478 100644 --- a/src/cdp/AXNode.zig +++ b/src/cdp/AXNode.zig @@ -888,10 +888,12 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource { => {}, else => { // write text content if exists. - var buf = std.Io.Writer.Allocating.init(page.call_arena); - try el.getInnerText(&buf.writer); - try writeString(buf.written(), w); - return .contents; + var buf: std.Io.Writer.Allocating = .init(page.call_arena); + try writeAccessibleNameFallback(node, &buf.writer, page); + if (buf.written().len > 0) { + try writeString(buf.written(), w); + return .contents; + } }, } @@ -915,6 +917,40 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource { }; } +fn writeAccessibleNameFallback(node: *DOMNode, writer: *std.Io.Writer, page: *Page) !void { + var it = node.childrenIterator(); + while (it.next()) |child| { + switch (child._type) { + .cdata => |cd| switch (cd._type) { + .text => |*text| try writer.writeAll(text.getWholeText()), + else => {}, + }, + .element => |el| { + if (el.getTag() == .img) { + if (el.getAttributeSafe(.wrap("alt"))) |alt| { + try writer.writeAll(alt); + try writer.writeByte(' '); + } + } else if (el.getTag() == .svg) { + // Try to find a