dom: refacto html collection to handle root or descendant only

HTMLCollection works on two different way:
* by default it must walk over root's descendants only
* on document, since the given root must be included in the walk.
This commit is contained in:
Pierre Tachoire
2023-12-12 15:22:33 +01:00
parent 2b298708ba
commit 6566df6338
2 changed files with 93 additions and 58 deletions

View File

@@ -110,6 +110,7 @@ pub const Document = struct {
alloc, alloc,
parser.elementToNode(root), parser.elementToNode(root),
tag_name, tag_name,
true,
); );
} }
@@ -123,6 +124,7 @@ pub const Document = struct {
alloc, alloc,
parser.elementToNode(root), parser.elementToNode(root),
classNames, classNames,
true,
); );
} }

View File

@@ -14,15 +14,18 @@ const Union = @import("element.zig").Union;
const Matcher = union(enum) { const Matcher = union(enum) {
matchByTagName: MatchByTagName, matchByTagName: MatchByTagName,
matchByClassName: MatchByClassName, matchByClassName: MatchByClassName,
matchTrue: struct {},
pub fn match(self: Matcher, node: *parser.Node) !bool { pub fn match(self: Matcher, node: *parser.Node) !bool {
switch (self) { switch (self) {
inline .matchTrue => return true,
inline else => |case| return case.match(node), inline else => |case| return case.match(node),
} }
} }
pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void { pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void {
switch (self) { switch (self) {
.matchTrue => return,
inline else => |case| return case.deinit(alloc), inline else => |case| return case.deinit(alloc),
} }
} }
@@ -56,12 +59,15 @@ pub fn HTMLCollectionByTagName(
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
root: *parser.Node, root: *parser.Node,
tag_name: []const u8, tag_name: []const u8,
include_root: bool,
) !HTMLCollection { ) !HTMLCollection {
return HTMLCollection{ return HTMLCollection{
.root = root, .root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{ .matcher = Matcher{
.matchByTagName = try MatchByTagName.init(alloc, tag_name), .matchByTagName = try MatchByTagName.init(alloc, tag_name),
}, },
.include_root = include_root,
}; };
} }
@@ -97,15 +103,78 @@ pub fn HTMLCollectionByClassName(
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
root: *parser.Node, root: *parser.Node,
classNames: []const u8, classNames: []const u8,
include_root: bool,
) !HTMLCollection { ) !HTMLCollection {
return HTMLCollection{ return HTMLCollection{
.root = root, .root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{ .matcher = Matcher{
.matchByClassName = try MatchByClassName.init(alloc, classNames), .matchByClassName = try MatchByClassName.init(alloc, classNames),
}, },
.include_root = include_root,
}; };
} }
const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WEB IDL https://dom.spec.whatwg.org/#htmlcollection // WEB IDL https://dom.spec.whatwg.org/#htmlcollection
// HTMLCollection is re implemented in zig here because libdom // HTMLCollection is re implemented in zig here because libdom
// dom_html_collection expects a comparison function callback as arguement. // dom_html_collection expects a comparison function callback as arguement.
@@ -114,58 +183,26 @@ pub const HTMLCollection = struct {
pub const mem_guarantied = true; pub const mem_guarantied = true;
matcher: Matcher, matcher: Matcher,
walker: Walker,
root: *parser.Node, root: *parser.Node,
// By default the HTMLCollection walk on the root's descendant only.
// But on somes cases, like for dom document, we want to walk over the root
// itself.
include_root: bool = false,
// save a state for the collection to improve the _item speed. // save a state for the collection to improve the _item speed.
cur_idx: ?u32 = undefined, cur_idx: ?u32 = undefined,
cur_node: ?*parser.Node = undefined, cur_node: ?*parser.Node = undefined,
// get_next iterates over the DOM tree to return the next following node or // start returns the first node to walk on.
// null at the end. fn start(self: HTMLCollection) !?*parser.Node {
// if (self.include_root) {
// This implementation is a zig version of Netsurf code. return self.root;
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
fn get_next(root: *parser.Node, cur: *parser.Node) !?*parser.Node {
// TODO deinit next
if (try parser.nodeFirstChild(cur)) |next| {
return next;
} }
// TODO deinit next return try self.walker.get_next(self.root, null);
if (try parser.nodeNextSibling(cur)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(cur) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
var prev = cur;
while (prev != root and prev == lastchild) {
prev = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(prev) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (prev == root) {
return null;
}
return try parser.nodeNextSibling(prev);
} }
/// get_length computes the collection's length dynamically according to /// get_length computes the collection's length dynamically according to
@@ -173,18 +210,16 @@ pub const HTMLCollection = struct {
// TODO: nodes retrieved must be de-referenced. // TODO: nodes retrieved must be de-referenced.
pub fn get_length(self: *HTMLCollection) !u32 { pub fn get_length(self: *HTMLCollection) !u32 {
var len: u32 = 0; var len: u32 = 0;
var node: *parser.Node = self.root; var node = try self.start() orelse return 0;
var ntype: parser.NodeType = undefined;
while (true) { while (true) {
ntype = try parser.nodeType(node); if (try parser.nodeType(node) == .element) {
if (ntype == .element) {
if (try self.matcher.match(node)) { if (try self.matcher.match(node)) {
len += 1; len += 1;
} }
} }
node = try get_next(self.root, node) orelse break; node = try self.walker.get_next(self.root, node) orelse break;
} }
return len; return len;
@@ -192,18 +227,18 @@ pub const HTMLCollection = struct {
pub fn _item(self: *HTMLCollection, index: u32) !?Union { pub fn _item(self: *HTMLCollection, index: u32) !?Union {
var i: u32 = 0; var i: u32 = 0;
var node: *parser.Node = self.root; var node: *parser.Node = undefined;
var ntype: parser.NodeType = undefined;
// Use the current state to improve speed if possible. // Use the current state to improve speed if possible.
if (self.cur_idx != null and index >= self.cur_idx.?) { if (self.cur_idx != null and index >= self.cur_idx.?) {
i = self.cur_idx.?; i = self.cur_idx.?;
node = self.cur_node.?; node = self.cur_node.?;
} else {
node = try self.start() orelse return null;
} }
while (true) { while (true) {
ntype = try parser.nodeType(node); if (try parser.nodeType(node) == .element) {
if (ntype == .element) {
if (try self.matcher.match(node)) { if (try self.matcher.match(node)) {
// check if we found the searched element. // check if we found the searched element.
if (i == index) { if (i == index) {
@@ -219,7 +254,7 @@ pub const HTMLCollection = struct {
} }
} }
node = try get_next(self.root, node) orelse break; node = try self.walker.get_next(self.root, node) orelse break;
} }
return null; return null;
@@ -230,12 +265,10 @@ pub const HTMLCollection = struct {
return null; return null;
} }
var node: *parser.Node = self.root; var node = try self.start() orelse return null;
var ntype: parser.NodeType = undefined;
while (true) { while (true) {
ntype = try parser.nodeType(node); if (try parser.nodeType(node) == .element) {
if (ntype == .element) {
if (try self.matcher.match(node)) { if (try self.matcher.match(node)) {
const elem = @as(*parser.Element, @ptrCast(node)); const elem = @as(*parser.Element, @ptrCast(node));
@@ -253,7 +286,7 @@ pub const HTMLCollection = struct {
} }
} }
node = try get_next(self.root, node) orelse break; node = try self.walker.get_next(self.root, node) orelse break;
} }
return null; return null;