Merge pull request #144 from lightpanda-io/html-doc

dom: DocumentHTML getters
2026-02-04 22:43:48 +00:00 · 2024-01-10 17:33:43 +01:00
parent 9df3e4182d ca6bb577c6
commit c1b4026fa3
6 changed files with 390 additions and 22 deletions
--- a/src/dom/document.zig
+++ b/src/dom/document.zig
@@ -108,12 +108,7 @@ pub const Document = struct {
        alloc: std.mem.Allocator,
        tag_name: []const u8,
    ) !collection.HTMLCollection {
-        var elt: ?*parser.Node = null;
-        if (try parser.documentGetDocumentElement(self)) |root| {
-            elt = parser.elementToNode(root);
-        }
-
-        return try collection.HTMLCollectionByTagName(alloc, elt, tag_name, true);
+        return try collection.HTMLCollectionByTagName(alloc, parser.documentToNode(self), tag_name, true);
    }

    pub fn _getElementsByClassName(
@@ -121,12 +116,7 @@ pub const Document = struct {
        alloc: std.mem.Allocator,
        classNames: []const u8,
    ) !collection.HTMLCollection {
-        var elt: ?*parser.Node = null;
-        if (try parser.documentGetDocumentElement(self)) |root| {
-            elt = parser.elementToNode(root);
-        }
-
-        return try collection.HTMLCollectionByClassName(alloc, elt, classNames, true);
+        return try collection.HTMLCollectionByClassName(alloc, parser.documentToNode(self), classNames, true);
    }

    pub fn _createDocumentFragment(self: *parser.Document) !*parser.DocumentFragment {
@@ -170,11 +160,7 @@ pub const Document = struct {
    // ParentNode
    // https://dom.spec.whatwg.org/#parentnode
    pub fn get_children(self: *parser.Document) !collection.HTMLCollection {
-        var elt: ?*parser.Node = null;
-        if (try parser.documentGetDocumentElement(self)) |root| {
-            elt = parser.elementToNode(root);
-        }
-        return try collection.HTMLCollectionChildren(elt, true);
+        return try collection.HTMLCollectionChildren(parser.documentToNode(self), false);
    }

    pub fn get_firstElementChild(self: *parser.Document) !?ElementUnion {
@@ -219,7 +205,7 @@ pub const Document = struct {
        // catch-all, return all elements
        if (selectors[0] == '*') {
            // walk over the node tree fo find the node by id.
-            const root = parser.elementToNode(try parser.documentGetDocumentElement(self) orelse return list);
+            const root = parser.documentToNode(self);
            const walker = Walker{};
            var next: ?*parser.Node = null;
            while (true) {
--- a/src/dom/html_collection.zig
+++ b/src/dom/html_collection.zig
@@ -12,23 +12,35 @@ const Element = @import("element.zig").Element;
 const Union = @import("element.zig").Union;

 const Matcher = union(enum) {
+    matchByName: MatchByName,
    matchByTagName: MatchByTagName,
    matchByClassName: MatchByClassName,
+    matchByLinks: MatchByLinks,
+    matchByAnchors: MatchByAnchors,
    matchTrue: struct {},
+    matchFalse: struct {},

    pub fn match(self: Matcher, node: *parser.Node) !bool {
        switch (self) {
            inline .matchTrue => return true,
+            inline .matchFalse => return false,
            inline .matchByTagName => |case| return case.match(node),
            inline .matchByClassName => |case| return case.match(node),
+            inline .matchByName => |case| return case.match(node),
+            inline .matchByLinks => return MatchByLinks.match(node),
+            inline .matchByAnchors => return MatchByAnchors.match(node),
        }
    }

    pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void {
        switch (self) {
            inline .matchTrue => return,
+            inline .matchFalse => return,
            inline .matchByTagName => |case| return case.deinit(alloc),
            inline .matchByClassName => |case| return case.deinit(alloc),
+            inline .matchByName => |case| return case.deinit(alloc),
+            inline .matchByLinks => return,
+            inline .matchByAnchors => return,
        }
    }
 };
@@ -117,6 +129,56 @@ pub fn HTMLCollectionByClassName(
    };
 }

+pub const MatchByName = struct {
+    name: []const u8,
+
+    fn init(alloc: std.mem.Allocator, name: []const u8) !MatchByName {
+        const names_alloc = try alloc.alloc(u8, name.len);
+        @memcpy(names_alloc, name);
+        return MatchByName{
+            .name = names_alloc,
+        };
+    }
+
+    pub fn match(self: MatchByName, node: *parser.Node) !bool {
+        const e = parser.nodeToElement(node);
+        const nname = try parser.elementGetAttribute(e, "name") orelse return false;
+        return std.mem.eql(u8, self.name, nname);
+    }
+
+    fn deinit(self: MatchByName, alloc: std.mem.Allocator) void {
+        alloc.free(self.name);
+    }
+};
+
+pub fn HTMLCollectionByName(
+    alloc: std.mem.Allocator,
+    root: ?*parser.Node,
+    name: []const u8,
+    include_root: bool,
+) !HTMLCollection {
+    return HTMLCollection{
+        .root = root,
+        .walker = Walker{ .walkerDepthFirst = .{} },
+        .matcher = Matcher{
+            .matchByName = try MatchByName.init(alloc, name),
+        },
+        .include_root = include_root,
+    };
+}
+
+pub fn HTMLCollectionAll(
+    root: ?*parser.Node,
+    include_root: bool,
+) !HTMLCollection {
+    return HTMLCollection{
+        .root = root,
+        .walker = Walker{ .walkerDepthFirst = .{} },
+        .matcher = Matcher{ .matchTrue = .{} },
+        .include_root = include_root,
+    };
+}
+
 pub fn HTMLCollectionChildren(
    root: ?*parser.Node,
    include_root: bool,
@@ -129,9 +191,74 @@ pub fn HTMLCollectionChildren(
    };
 }

+pub fn HTMLCollectionEmpty() !HTMLCollection {
+    return HTMLCollection{
+        .root = null,
+        .walker = Walker{ .walkerNone = .{} },
+        .matcher = Matcher{ .matchFalse = .{} },
+        .include_root = false,
+    };
+}
+
+// MatchByLinks matches the a and area elements in the Document that have href
+// attributes.
+// https://html.spec.whatwg.org/#dom-document-links
+pub const MatchByLinks = struct {
+    pub fn match(node: *parser.Node) !bool {
+        const tag = try parser.nodeName(node);
+        if (!std.ascii.eqlIgnoreCase(tag, "a") and !std.ascii.eqlIgnoreCase(tag, "area")) {
+            return false;
+        }
+        const elem = @as(*parser.Element, @ptrCast(node));
+        return parser.elementHasAttribute(elem, "href");
+    }
+};
+
+pub fn HTMLCollectionByLinks(
+    root: ?*parser.Node,
+    include_root: bool,
+) !HTMLCollection {
+    return HTMLCollection{
+        .root = root,
+        .walker = Walker{ .walkerDepthFirst = .{} },
+        .matcher = Matcher{
+            .matchByLinks = MatchByLinks{},
+        },
+        .include_root = include_root,
+    };
+}
+
+// MatchByAnchors matches the a elements in the Document that have name
+// attributes.
+// https://html.spec.whatwg.org/#dom-document-anchors
+pub const MatchByAnchors = struct {
+    pub fn match(node: *parser.Node) !bool {
+        const tag = try parser.nodeName(node);
+        if (!std.ascii.eqlIgnoreCase(tag, "a")) return false;
+
+        const elem = @as(*parser.Element, @ptrCast(node));
+        return parser.elementHasAttribute(elem, "name");
+    }
+};
+
+pub fn HTMLCollectionByAnchors(
+    root: ?*parser.Node,
+    include_root: bool,
+) !HTMLCollection {
+    return HTMLCollection{
+        .root = root,
+        .walker = Walker{ .walkerDepthFirst = .{} },
+        .matcher = Matcher{
+            .matchByAnchors = MatchByAnchors{},
+        },
+        .include_root = include_root,
+    };
+}
+
 const Walker = union(enum) {
    walkerDepthFirst: WalkerDepthFirst,
    walkerChildren: WalkerChildren,
+    walkerNone: WalkerNone,

    pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
        switch (self) {
@@ -205,6 +332,12 @@ pub const WalkerChildren = struct {
    }
 };

+pub const WalkerNone = struct {
+    pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
+        return null;
+    }
+};
+
 // WEB IDL https://dom.spec.whatwg.org/#htmlcollection
 // HTMLCollection is re implemented in zig here because libdom
 // dom_html_collection expects a comparison function callback as arguement.
@@ -259,7 +392,7 @@ pub const HTMLCollection = struct {
        return len;
    }

-    pub fn _item(self: *HTMLCollection, index: u32) !?Union {
+    pub fn item(self: *HTMLCollection, index: u32) !?*parser.Node {
        if (self.root == null) return null;

        var i: u32 = 0;
@@ -282,8 +415,7 @@ pub const HTMLCollection = struct {
                        self.cur_node = node;
                        self.cur_idx = i;

-                        const e = @as(*parser.Element, @ptrCast(node));
-                        return try Element.toInterface(e);
+                        return node;
                    }

                    i += 1;
@@ -296,6 +428,12 @@ pub const HTMLCollection = struct {
        return null;
    }

+    pub fn _item(self: *HTMLCollection, index: u32) !?Union {
+        const node = try self.item(index) orelse return null;
+        const e = @as(*parser.Element, @ptrCast(node));
+        return try Element.toInterface(e);
+    }
+
    pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union {
        if (self.root == null) return null;
        if (name.len == 0) return null;
--- a/src/dom/nodelist.zig
+++ b/src/dom/nodelist.zig
@@ -14,6 +14,10 @@ const DOMException = @import("exceptions.zig").DOMException;
 // Nodelist is implemented in pure Zig b/c libdom's NodeList doesn't allow to
 // append nodes.
 // WEB IDL https://dom.spec.whatwg.org/#nodelist
+//
+// TODO: a Nodelist can be either static or live. But the current
+// implementation allows only static nodelist.
+// see https://dom.spec.whatwg.org/#old-style-collections
 pub const NodeList = struct {
    pub const mem_guarantied = true;
    pub const Exception = DOMException;