dom: extract walker from html_collection to its own file

This commit is contained in:
Pierre Tachoire
2024-01-16 15:39:35 +01:00
parent d22b7a6d29
commit a2e266514f
7 changed files with 96 additions and 88 deletions

View File

@@ -14,7 +14,7 @@ const Env = jsruntime.Env;
const apiweb = @import("../apiweb.zig");
const Window = @import("../html/window.zig").Window;
const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst;
const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
const FetchResult = std.http.Client.FetchResult;

View File

@@ -2,7 +2,7 @@ const std = @import("std");
const File = std.fs.File;
const parser = @import("../netsurf.zig");
const Walker = @import("../dom/html_collection.zig").WalkerChildren;
const Walker = @import("../dom/walker.zig").WalkerChildren;
pub fn htmlFile(doc: *parser.Document, out: File) !void {
try out.writeAll("<!DOCTYPE html>\n");

View File

@@ -10,7 +10,7 @@ const Node = @import("node.zig").Node;
const NodeList = @import("nodelist.zig").NodeList;
const NodeUnion = @import("node.zig").Union;
const Walker = @import("html_collection.zig").WalkerDepthFirst;
const Walker = @import("walker.zig").WalkerDepthFirst;
const collection = @import("html_collection.zig");
const Element = @import("element.zig").Element;

View File

@@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases;
const collection = @import("html_collection.zig");
const Node = @import("node.zig").Node;
const Walker = @import("html_collection.zig").WalkerDepthFirst;
const Walker = @import("walker.zig").WalkerDepthFirst;
const NodeList = @import("nodelist.zig").NodeList;
const HTMLElem = @import("../html/elements.zig");
pub const Union = @import("../html/elements.zig").Union;

View File

@@ -11,6 +11,11 @@ const utils = @import("utils.z");
const Element = @import("element.zig").Element;
const Union = @import("element.zig").Union;
const Walker = @import("walker.zig").Walker;
const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst;
const WalkerChildren = @import("walker.zig").WalkerChildren;
const WalkerNone = @import("walker.zig").WalkerNone;
const Matcher = union(enum) {
matchByName: MatchByName,
matchByTagName: MatchByTagName,
@@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors(
};
}
const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WalkerChildren iterates over the root's children only.
pub const WalkerChildren = struct {
pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
// On walk start, we return the first root's child.
if (cur == null) return try parser.nodeFirstChild(root);
// If cur is root, then return null.
// This is a special case, if the root is included in the walk, we
// don't want to go further to find children.
if (root == cur.?) return null;
return try parser.nodeNextSibling(cur.?);
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};
pub const HTMLCollectionIterator = struct {
pub const mem_guarantied = true;

86
src/dom/walker.zig Normal file
View File

@@ -0,0 +1,86 @@
const std = @import("std");
const parser = @import("../netsurf.zig");
pub const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WalkerChildren iterates over the root's children only.
pub const WalkerChildren = struct {
pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
// On walk start, we return the first root's child.
if (cur == null) return try parser.nodeFirstChild(root);
// If cur is root, then return null.
// This is a special case, if the root is included in the walk, we
// don't want to go further to find children.
if (root == cur.?) return null;
return try parser.nodeNextSibling(cur.?);
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};

View File

@@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList;
const HTMLElem = @import("elements.zig");
const collection = @import("../dom/html_collection.zig");
const Walker = collection.WalkerDepthFirst;
const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
// WEB IDL https://html.spec.whatwg.org/#the-document-object
pub const HTMLDocument = struct {