From a2e266514f2d36e566f3238a96f29ec762e3fe27 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 15:39:35 +0100 Subject: [PATCH] dom: extract walker from html_collection to its own file --- src/browser/browser.zig | 2 +- src/browser/dump.zig | 2 +- src/dom/document.zig | 2 +- src/dom/element.zig | 2 +- src/dom/html_collection.zig | 88 +++---------------------------------- src/dom/walker.zig | 86 ++++++++++++++++++++++++++++++++++++ src/html/document.zig | 2 +- 7 files changed, 96 insertions(+), 88 deletions(-) create mode 100644 src/dom/walker.zig diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 62143139..18e37cec 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -14,7 +14,7 @@ const Env = jsruntime.Env; const apiweb = @import("../apiweb.zig"); const Window = @import("../html/window.zig").Window; -const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; const FetchResult = std.http.Client.FetchResult; diff --git a/src/browser/dump.zig b/src/browser/dump.zig index 3ed3de98..a00bd3c7 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -2,7 +2,7 @@ const std = @import("std"); const File = std.fs.File; const parser = @import("../netsurf.zig"); -const Walker = @import("../dom/html_collection.zig").WalkerChildren; +const Walker = @import("../dom/walker.zig").WalkerChildren; pub fn htmlFile(doc: *parser.Document, out: File) !void { try out.writeAll("\n"); diff --git a/src/dom/document.zig b/src/dom/document.zig index ee7f9454..9c878871 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -10,7 +10,7 @@ const Node = @import("node.zig").Node; const NodeList = @import("nodelist.zig").NodeList; const NodeUnion = @import("node.zig").Union; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const collection = @import("html_collection.zig"); const Element = @import("element.zig").Element; diff --git a/src/dom/element.zig b/src/dom/element.zig index e64faf08..cd12f625 100644 --- a/src/dom/element.zig +++ b/src/dom/element.zig @@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases; const collection = @import("html_collection.zig"); const Node = @import("node.zig").Node; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const NodeList = @import("nodelist.zig").NodeList; const HTMLElem = @import("../html/elements.zig"); pub const Union = @import("../html/elements.zig").Union; diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index f5e3d6d1..5cff8f92 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -11,6 +11,11 @@ const utils = @import("utils.z"); const Element = @import("element.zig").Element; const Union = @import("element.zig").Union; +const Walker = @import("walker.zig").Walker; +const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst; +const WalkerChildren = @import("walker.zig").WalkerChildren; +const WalkerNone = @import("walker.zig").WalkerNone; + const Matcher = union(enum) { matchByName: MatchByName, matchByTagName: MatchByTagName, @@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors( }; } -const Walker = union(enum) { - walkerDepthFirst: WalkerDepthFirst, - walkerChildren: WalkerChildren, - walkerNone: WalkerNone, - - pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - switch (self) { - inline else => |case| return case.get_next(root, cur), - } - } -}; - -// WalkerDepthFirst iterates over the DOM tree to return the next following -// node or null at the end. -// -// This implementation is a zig version of Netsurf code. -// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 -// -// The iteration is a depth first as required by the specification. -// https://dom.spec.whatwg.org/#htmlcollection -// https://dom.spec.whatwg.org/#concept-tree-order -pub const WalkerDepthFirst = struct { - pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - var n = cur orelse root; - - // TODO deinit next - if (try parser.nodeFirstChild(n)) |next| { - return next; - } - - // TODO deinit next - if (try parser.nodeNextSibling(n)) |next| { - return next; - } - - // TODO deinit parent - // Back to the parent of cur. - // If cur has no parent, then the iteration is over. - var parent = try parser.nodeParentNode(n) orelse return null; - - // TODO deinit lastchild - var lastchild = try parser.nodeLastChild(parent); - while (n != root and n == lastchild) { - n = parent; - - // TODO deinit parent - // Back to the prev's parent. - // If prev has no parent, then the loop must stop. - parent = try parser.nodeParentNode(n) orelse break; - - // TODO deinit lastchild - lastchild = try parser.nodeLastChild(parent); - } - - if (n == root) { - return null; - } - - return try parser.nodeNextSibling(n); - } -}; - -// WalkerChildren iterates over the root's children only. -pub const WalkerChildren = struct { - pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - // On walk start, we return the first root's child. - if (cur == null) return try parser.nodeFirstChild(root); - - // If cur is root, then return null. - // This is a special case, if the root is included in the walk, we - // don't want to go further to find children. - if (root == cur.?) return null; - - return try parser.nodeNextSibling(cur.?); - } -}; - -pub const WalkerNone = struct { - pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { - return null; - } -}; - pub const HTMLCollectionIterator = struct { pub const mem_guarantied = true; diff --git a/src/dom/walker.zig b/src/dom/walker.zig new file mode 100644 index 00000000..205936cb --- /dev/null +++ b/src/dom/walker.zig @@ -0,0 +1,86 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +pub const Walker = union(enum) { + walkerDepthFirst: WalkerDepthFirst, + walkerChildren: WalkerChildren, + walkerNone: WalkerNone, + + pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + switch (self) { + inline else => |case| return case.get_next(root, cur), + } + } +}; + +// WalkerDepthFirst iterates over the DOM tree to return the next following +// node or null at the end. +// +// This implementation is a zig version of Netsurf code. +// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 +// +// The iteration is a depth first as required by the specification. +// https://dom.spec.whatwg.org/#htmlcollection +// https://dom.spec.whatwg.org/#concept-tree-order +pub const WalkerDepthFirst = struct { + pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + var n = cur orelse root; + + // TODO deinit next + if (try parser.nodeFirstChild(n)) |next| { + return next; + } + + // TODO deinit next + if (try parser.nodeNextSibling(n)) |next| { + return next; + } + + // TODO deinit parent + // Back to the parent of cur. + // If cur has no parent, then the iteration is over. + var parent = try parser.nodeParentNode(n) orelse return null; + + // TODO deinit lastchild + var lastchild = try parser.nodeLastChild(parent); + while (n != root and n == lastchild) { + n = parent; + + // TODO deinit parent + // Back to the prev's parent. + // If prev has no parent, then the loop must stop. + parent = try parser.nodeParentNode(n) orelse break; + + // TODO deinit lastchild + lastchild = try parser.nodeLastChild(parent); + } + + if (n == root) { + return null; + } + + return try parser.nodeNextSibling(n); + } +}; + +// WalkerChildren iterates over the root's children only. +pub const WalkerChildren = struct { + pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + // On walk start, we return the first root's child. + if (cur == null) return try parser.nodeFirstChild(root); + + // If cur is root, then return null. + // This is a special case, if the root is included in the walk, we + // don't want to go further to find children. + if (root == cur.?) return null; + + return try parser.nodeNextSibling(cur.?); + } +}; + +pub const WalkerNone = struct { + pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { + return null; + } +}; diff --git a/src/html/document.zig b/src/html/document.zig index 96dc96cd..d463ab29 100644 --- a/src/html/document.zig +++ b/src/html/document.zig @@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList; const HTMLElem = @import("elements.zig"); const collection = @import("../dom/html_collection.zig"); -const Walker = collection.WalkerDepthFirst; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; // WEB IDL https://html.spec.whatwg.org/#the-document-object pub const HTMLDocument = struct {