From d6d74c5024593282350d04301578420d52f4495b Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 30 Dec 2025 12:36:44 +0100 Subject: [PATCH] first version of AXTree --- src/cdp/AXNode.zig | 804 ++++++++++++++++++++++++++++++ src/cdp/cdp.zig | 11 + src/cdp/domains/accessibility.zig | 24 + 3 files changed, 839 insertions(+) create mode 100644 src/cdp/AXNode.zig diff --git a/src/cdp/AXNode.zig b/src/cdp/AXNode.zig new file mode 100644 index 00000000..c1466e88 --- /dev/null +++ b/src/cdp/AXNode.zig @@ -0,0 +1,804 @@ +// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const log = @import("../log.zig"); +const Page = @import("../browser/Page.zig"); +const DOMNode = @import("../browser/webapi/Node.zig"); +const URL = @import("../browser/URL.zig"); + +const AXNode = @This(); +const Node = @import("Node.zig"); + +// Need a custom writer, because we can't just serialize the node as-is. +// Sometimes we want to serializ the node without chidren, sometimes with just +// its direct children, and sometimes the entire tree. +// (For now, we only support direct children) +pub const Writer = struct { + root: *const Node, + registry: *Node.Registry, + page: *Page, + + const AXValuesType = enum(u8) { boolean, tristate, booleanOrUndefined, idref, idrefList, integer, node, nodeList, number, string, computedString, token, tokenList, domRelation, role, internalRole, valueUndefined }; + + pub const Opts = struct {}; + + pub fn jsonStringify(self: *const Writer, w: anytype) error{WriteFailed}!void { + self.toJSON(self.root, w) catch |err| { + // The only error our jsonStringify method can return is + // @TypeOf(w).Error. In other words, our code can't return its own + // error, we can only return a writer error. Kinda sucks. + log.err(.cdp, "node toJSON stringify", .{ .err = err }); + return error.WriteFailed; + }; + } + + fn toJSON(self: *const Writer, node: *const Node, w: anytype) !void { + try w.beginArray(); + const root = try AXNode.fromNode(node.dom); + if (try self.writeNode(node.id, root, w)) { + // skip children + try w.endArray(); + return; + } + try self.writeNodeChildren(root, w); + try w.endArray(); + } + + fn writeNodeChildren(self: *const Writer, parent: AXNode, w: anytype) !void { + var it = parent.dom.childrenIterator(); + const ignore_text = ignoreText(parent.dom); + while (it.next()) |dom_node| { + switch (dom_node._type) { + .cdata => { + if (dom_node.is(DOMNode.CData.Text) == null) { + continue; + } + if (ignore_text) { + continue; + } + }, + .element => {}, + else => continue, + } + + const node = try self.registry.register(dom_node); + const axn = try AXNode.fromNode(node.dom); + if (try self.writeNode(node.id, axn, w)) { + // skip children + continue; + } + try self.writeNodeChildren(axn, w); + } + } + + const AXValue = struct { + type: enum(u8) { boolean, tristate, booleanOrUndefined, idref, idrefList, integer, node, nodeList, number, string, computedString, token, tokenList, domRelation, role, internalRole, valueUndefined }, + value: ?union(enum) { + string: []const u8, + uint: usize, + boolean: bool, + } = null, + // TODO relatedNodes + source: ?AXSource = null, + }; + + fn writeAXSource(_: *const Writer, source: AXSource, w: anytype) !void { + try w.objectField("sources"); + try w.beginArray(); + try w.beginObject(); + + // attribute, implicit, style, contents, placeholder, relatedElement + const source_type = switch (source) { + .aria_labelledby => blk: { + try w.objectField("attribute"); + try w.write(@tagName(source)); + break :blk "relatedElement"; + }, + .aria_label, .alt, .title, .placeholder, .value => blk: { + // No sure if it's correct for .value case. + try w.objectField("attribute"); + try w.write(@tagName(source)); + break :blk "attribute"; + }, + // Chrome sends the content AXValue *again* in the source. + // But It seems useless to me. + // + // w.objectField("value"); + // self.writeAXValue(.{ .type = .computedString, .value = value.value }, w); + .contents => "contents", + .label_element, .label_wrap => "TODO", // TODO + }; + try w.objectField("type"); + try w.write(source_type); + + try w.endObject(); + try w.endArray(); + } + + fn writeAXValue(self: *const Writer, value: AXValue, w: anytype) !void { + try w.beginObject(); + try w.objectField("type"); + try w.write(@tagName(value.type)); + + if (value.value) |v| { + try w.objectField("value"); + switch (v) { + .uint => try w.write(v.uint), + .string => try w.write(v.string), + .boolean => try w.write(v.boolean), + } + } + + if (value.source) |source| { + try self.writeAXSource(source, w); + } + try w.endObject(); + } + + const AXProperty = struct { + name: enum(u8) { actions, busy, disabled, editable, focusable, focused, hidden, hiddenRoot, invalid, keyshortcuts, settable, roledescription, live, atomic, relevant, root, autocomplete, hasPopup, level, multiselectable, orientation, multiline, readonly, required, valuemin, valuemax, valuetext, checked, expanded, modal, pressed, selected, activedescendant, controls, describedby, details, errormessage, flowto, labelledby, owns, url, activeFullscreenElement, activeModalDialog, activeAriaModalDialog, ariaHiddenElement, ariaHiddenSubtree, emptyAlt, emptyText, inertElement, inertSubtree, labelContainer, labelFor, notRendered, notVisible, presentationalRole, probablyPresentational, inactiveCarouselTabContent, uninteresting }, + value: AXValue, + }; + + fn writeAXProperties(self: *const Writer, axnode: AXNode, w: anytype) !void { + const dom_node = axnode.dom; + const page = self.page; + switch (dom_node._type) { + .document => |document| { + const uri = document.getURL(page); + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + try self.writeAXProperty(.{ .name = .focusable, .value = .{ .type = .booleanOrUndefined, .value = .{ .boolean = true } } }, w); + return; + }, + .cdata => return, + .element => |el| switch (el.getTag()) { + .h1 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 1 } } }, w), + .h2 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 2 } } }, w), + .h3 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 3 } } }, w), + .h4 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 4 } } }, w), + .h5 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 5 } } }, w), + .h6 => try self.writeAXProperty(.{ .name = .level, .value = .{ .type = .integer, .value = .{ .uint = 6 } } }, w), + .img => { + const uri = el.getAttributeSafe("src") orelse return; + // TODO make uri absolute + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + }, + .anchor => { + const uri = el.getAttributeSafe("href") orelse return; + // TODO make uri absolute + try self.writeAXProperty(.{ .name = .url, .value = .{ .type = .string, .value = .{ .string = uri } } }, w); + try self.writeAXProperty(.{ .name = .focusable, .value = .{ .type = .booleanOrUndefined, .value = .{ .boolean = true } } }, w); + }, + else => {}, + }, + else => |tag| { + log.debug(.cdp, "invalid tag", .{ .tag = tag }); + return error.InvalidTag; + }, + } + } + + fn writeAXProperty(self: *const Writer, value: AXProperty, w: anytype) !void { + try w.beginObject(); + try w.objectField("name"); + try w.write(@tagName(value.name)); + try w.objectField("value"); + try self.writeAXValue(value.value, w); + try w.endObject(); + } + + // write a node. returns true if children must be skipped. + fn writeNode(self: *const Writer, id: u32, axn: AXNode, w: anytype) !bool { + // ignore empty texts + try w.beginObject(); + + try w.objectField("nodeId"); + try w.write(id); + + try w.objectField("backendDOMNodeId"); + try w.write(id); + + try w.objectField("role"); + try self.writeAXValue(.{ .type = .role, .value = .{ .string = try axn.getRole() } }, w); + + const ignore = try axn.isIgnore(self.page); + try w.objectField("ignored"); + try w.write(ignore); + + if (ignore) { + // Ignore reasons + try w.objectField("ignoredReasons"); + try w.beginArray(); + try w.beginObject(); + try w.objectField("name"); + try w.write("uninteresting"); + try w.objectField("value"); + try self.writeAXValue(.{ .type = .boolean, .value = .{ .boolean = true } }, w); + try w.endObject(); + try w.endArray(); + } else { + // Name + try w.objectField("name"); + try w.beginObject(); + try w.objectField("type"); + try w.write(@tagName(.computedString)); + try w.objectField("value"); + const source = try axn.writeName(w, self.page); + if (source) |s| { + try self.writeAXSource(s, w); + } + try w.endObject(); + + // Properties + try w.objectField("properties"); + try w.beginArray(); + try self.writeAXProperties(axn, w); + try w.endArray(); + } + + const n = axn.dom; + + // Parent + if (n._parent) |p| { + const parent_node = try self.registry.register(p); + try w.objectField("parentId"); + try w.write(parent_node.id); + } + + // Children + const skip_children = axn.ignoreChildren(); + + try w.objectField("childIds"); + try w.beginArray(); + if (!skip_children) { + var registry = self.registry; + var it = n.childrenIterator(); + while (it.next()) |child| { + // ignore non-elements or text. + if (child.is(DOMNode.Element.Html) == null and child.is(DOMNode.CData.Text) == null) { + continue; + } + + const child_node = try registry.register(child); + try w.write(child_node.id); + } + } + try w.endArray(); + + try w.endObject(); + + return skip_children; + } +}; + +pub const AXRole = enum(u8) { + none, + article, + banner, + blockquote, + button, + caption, + cell, + checkbox, + code, + columnheader, + combobox, + complementary, + contentinfo, + definition, + deletion, + dialog, + document, + emphasis, + figure, + form, + group, + heading, + image, + insertion, + link, + list, + listbox, + listitem, + main, + marquee, + meter, + navigation, + option, + paragraph, + presentation, + progressbar, + radio, + region, + row, + rowgroup, + rowheader, + searchbox, + separator, + slider, + spinbutton, + status, + strong, + subscript, + superscript, + table, + term, + textbox, + time, + RootWebArea, + LineBreak, + StaticText, + + fn fromNode(node: *DOMNode) !AXRole { + return switch (node._type) { + .document => return .RootWebArea, // Chrome specific. + .cdata => |cd| { + if (cd.is(DOMNode.CData.Text) == null) { + log.debug(.cdp, "invalid tag", .{ .tag = cd }); + return error.InvalidTag; + } + + return .StaticText; + }, + .element => |el| switch (el.getTag()) { + // Navigation & Structure + .nav => .navigation, + .main => .main, + .aside => .complementary, + // TODO conditions: + // .banner Not descendant of article, aside, main, nav, section + // (none) When descendant of article, aside, main, nav, section + .header => .banner, + // TODO conditions: + // contentinfo Not descendant of article, aside, main, nav, section + // (none) When descendant of article, aside, main, nav, section + .footer => .contentinfo, + // TODO conditions: + // region Has accessible name (aria-label, aria-labelledby, or title) | + // (none) No accessible name | + .section => .region, + .article, .hgroup => .article, + .address => .group, + + // Headings + .h1, .h2, .h3, .h4, .h5, .h6 => .heading, + .ul, .ol, .menu => .list, + .li => .listitem, + .dt => .term, + .dd => .definition, + + // Forms & Inputs + // TODO conditions: + // form Has accessible name + // (none) No accessible name + .form => .form, + .input => { + const input = el.as(DOMNode.Element.Html.Input); + return switch (input._input_type) { + .tel, .url, .email, .text => .textbox, + .image, .reset, .button, .submit => .button, + .radio => .radio, + .range => .slider, + .number => .spinbutton, + .search => .searchbox, + .checkbox => .checkbox, + .password, .datetime_local, .hidden, .month, .color, .week, .time, .file, .date => .none, + }; + }, + .textarea => .textbox, + .select => { + if (el.getAttributeSafe("multiple") != null) { + return .listbox; + } + if (el.getAttributeSafe("size")) |size| { + if (!std.ascii.eqlIgnoreCase(size, "1")) { + return .listbox; + } + } + return .combobox; + }, + .option => .option, + .optgroup, .fieldset => .group, + .button => .button, + .output => .status, + .progress => .progressbar, + .meter => .meter, + .datalist => .listbox, + + // Interactive Elements + .anchor, .area => { + if (el.getAttributeSafe("href") == null) { + return .none; + } + + return .link; + }, + .details => .group, + .summary => .button, + .dialog => .dialog, + + // Media + .img => .image, + .figure => .figure, + + // Tables + .table => .table, + .caption => .caption, + .thead, .tbody, .tfoot => .rowgroup, + .tr => .row, + .th => { + if (el.getAttributeSafe("scope")) |scope| { + if (std.ascii.eqlIgnoreCase(scope, "row")) { + return .rowheader; + } + } + return .columnheader; + }, + .td => .cell, + + // Text & Semantics + .p => .paragraph, + .hr => .separator, + .blockquote => .blockquote, + .code => .code, + .em => .emphasis, + .strong => .strong, + .s, .del => .deletion, + .ins => .insertion, + .sub => .subscript, + .sup => .superscript, + .time => .time, + .dfn => .term, + + // Document Structure + .html => .none, + .body => .none, + + // Deprecated/Obsolete Elements + .marquee => .marquee, + + .br => .LineBreak, + + else => .none, + }, + else => |tag| { + log.debug(.cdp, "invalid tag", .{ .tag = tag }); + return error.InvalidTag; + }, + }; + } +}; + +dom: *DOMNode, +role_attr: ?[]const u8, + +pub fn fromNode(dom: *DOMNode) !AXNode { + return .{ + .dom = dom, + .role_attr = blk: { + if (dom.is(DOMNode.Element.Html) == null) { + break :blk null; + } + const elt = dom.as(DOMNode.Element); + break :blk elt.getAttributeSafe("role"); + }, + }; +} + +const AXSource = enum(u8) { + aria_labelledby, + aria_label, + label_element, //