From a8b72c1d5fb1943fda794095c222fe6968bdae70 Mon Sep 17 00:00:00 2001 From: sjorsdonkers <72333389+sjorsdonkers@users.noreply.github.com> Date: Wed, 9 Jul 2025 11:29:05 +0200 Subject: [PATCH] Separate NodeIterator impl, fix _filter --- src/browser/dom/document.zig | 2 +- src/browser/dom/node_filter.zig | 35 ++++++++ src/browser/dom/node_iterator.zig | 135 +++++++++++++++++++++++------- src/browser/dom/tree_walker.zig | 62 ++++---------- 4 files changed, 156 insertions(+), 78 deletions(-) diff --git a/src/browser/dom/document.zig b/src/browser/dom/document.zig index ac732b63..79572418 100644 --- a/src/browser/dom/document.zig +++ b/src/browser/dom/document.zig @@ -266,7 +266,7 @@ pub const Document = struct { return try TreeWalker.init(root, what_to_show, filter); } - pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !NodeIterator { + pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator { return try NodeIterator.init(root, what_to_show, filter); } diff --git a/src/browser/dom/node_filter.zig b/src/browser/dom/node_filter.zig index 8ba80e10..54ce6b67 100644 --- a/src/browser/dom/node_filter.zig +++ b/src/browser/dom/node_filter.zig @@ -17,6 +17,8 @@ // along with this program. If not, see . const std = @import("std"); +const parser = @import("../netsurf.zig"); +const Env = @import("../env.zig").Env; pub const NodeFilter = struct { pub const _FILTER_ACCEPT: u16 = 1; @@ -38,6 +40,39 @@ pub const NodeFilter = struct { pub const _SHOW_NOTATION: u32 = 0b100000000000; }; +const VerifyResult = enum { accept, skip, reject }; + +pub fn verify(what_to_show: u32, filter: ?Env.Function, node: *parser.Node) !VerifyResult { + const node_type = try parser.nodeType(node); + + // Verify that we can show this node type. + if (!switch (node_type) { + .attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0, + .cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0, + .comment => what_to_show & NodeFilter._SHOW_COMMENT != 0, + .document => what_to_show & NodeFilter._SHOW_DOCUMENT != 0, + .document_fragment => what_to_show & NodeFilter._SHOW_DOCUMENT_FRAGMENT != 0, + .document_type => what_to_show & NodeFilter._SHOW_DOCUMENT_TYPE != 0, + .element => what_to_show & NodeFilter._SHOW_ELEMENT != 0, + .entity => what_to_show & NodeFilter._SHOW_ENTITY != 0, + .entity_reference => what_to_show & NodeFilter._SHOW_ENTITY_REFERENCE != 0, + .notation => what_to_show & NodeFilter._SHOW_NOTATION != 0, + .processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0, + .text => what_to_show & NodeFilter._SHOW_TEXT != 0, + }) return .reject; + + // Verify that we aren't filtering it out. + if (filter) |f| { + const acceptance = try f.call(u16, .{node}); + return switch (acceptance) { + NodeFilter._FILTER_ACCEPT => .accept, + NodeFilter._FILTER_REJECT => .reject, + NodeFilter._FILTER_SKIP => .skip, + else => .reject, + }; + } else return .accept; +} + const testing = @import("../../testing.zig"); test "Browser.DOM.NodeFilter" { var runner = try testing.jsRunner(testing.tracking_allocator, .{}); diff --git a/src/browser/dom/node_iterator.zig b/src/browser/dom/node_iterator.zig index 336368ff..d79812e0 100644 --- a/src/browser/dom/node_iterator.zig +++ b/src/browser/dom/node_iterator.zig @@ -19,19 +19,47 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); const Env = @import("../env.zig").Env; -const TreeWalker = @import("tree_walker.zig").TreeWalker; +const NodeFilter = @import("node_filter.zig"); // https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator +// While this is similar to TreeWalker it has it's own implementation as there are several suttle differences +// For example: +// - nextNode returns the reference node, whereas TreeWalker returns the next node +// - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different pub const NodeIterator = struct { - walker: TreeWalker, + root: *parser.Node, + reference_node: *parser.Node, + what_to_show: u32, + filter: ?NodeIteratorOpts, + filter_func: ?Env.Function, + pointer_before_current: bool = true, - pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !NodeIterator { - return .{ .walker = try TreeWalker.init(node, what_to_show, filter) }; + pub const NodeIteratorOpts = union(enum) { + function: Env.Function, + object: struct { acceptNode: Env.Function }, + }; + + pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?NodeIteratorOpts) !NodeIterator { + var filter_func: ?Env.Function = null; + if (filter) |f| { + filter_func = switch (f) { + .function => |func| func, + .object => |o| o.acceptNode, + }; + } + + return .{ + .root = node, + .reference_node = node, + .what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, + .filter = filter, + .filter_func = filter_func, + }; } - pub fn get_filter(self: *const NodeIterator) ?Env.Function { - return self.walker.filter; + pub fn get_filter(self: *const NodeIterator) ?NodeIteratorOpts { + return self.filter; } pub fn get_pointerBeforeReferenceNode(self: *const NodeIterator) bool { @@ -39,34 +67,34 @@ pub const NodeIterator = struct { } pub fn get_referenceNode(self: *const NodeIterator) *parser.Node { - return self.walker.current_node; + return self.reference_node; } pub fn get_root(self: *const NodeIterator) *parser.Node { - return self.walker.root; + return self.root; } pub fn get_whatToShow(self: *const NodeIterator) u32 { - return self.walker.what_to_show; + return self.what_to_show; } pub fn _nextNode(self: *NodeIterator) !?*parser.Node { if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node self.pointer_before_current = false; - if (.accept == try self.walker.verify(self.walker.current_node)) { - return self.walker.current_node; + if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { + return self.reference_node; } } - if (try self.firstChild(self.walker.current_node)) |child| { - self.walker.current_node = child; + if (try self.firstChild(self.reference_node)) |child| { + self.reference_node = child; return child; } - var current = self.walker.current_node; - while (current != self.walker.root) { - if (try self.walker.nextSibling(current)) |sibling| { - self.walker.current_node = sibling; + var current = self.reference_node; + while (current != self.root) { + if (try self.nextSibling(current)) |sibling| { + self.reference_node = sibling; return sibling; } @@ -79,41 +107,41 @@ pub const NodeIterator = struct { pub fn _previousNode(self: *NodeIterator) !?*parser.Node { if (!self.pointer_before_current) { self.pointer_before_current = true; - if (.accept == try self.walker.verify(self.walker.current_node)) { - return self.walker.current_node; // Still need to verify as last may be first as well + if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { + return self.reference_node; // Still need to verify as last may be first as well } } - if (self.walker.current_node == self.walker.root) return null; + if (self.reference_node == self.root) return null; - var current = self.walker.current_node; + var current = self.reference_node; while (try parser.nodePreviousSibling(current)) |previous| { current = previous; - switch (try self.walker.verify(current)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { .accept => { // Get last child if it has one. if (try self.lastChild(current)) |child| { - self.walker.current_node = child; + self.reference_node = child; return child; } // Otherwise, this node is our previous one. - self.walker.current_node = current; + self.reference_node = current; return current; }, .reject, .skip => { // Get last child if it has one. if (try self.lastChild(current)) |child| { - self.walker.current_node = child; + self.reference_node = child; return child; } }, } } - if (current != self.walker.root) { - if (try self.walker.parentNode(current)) |parent| { - self.walker.current_node = parent; + if (current != self.root) { + if (try self.parentNode(current)) |parent| { + self.reference_node = parent; return parent; } } @@ -129,7 +157,7 @@ pub const NodeIterator = struct { const index: u32 = @intCast(i); const child = (try parser.nodeListItem(children, index)) orelse return null; - switch (try self.walker.verify(child)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) { .accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker .reject, .skip => if (try self.firstChild(child)) |gchild| return gchild, } @@ -147,7 +175,7 @@ pub const NodeIterator = struct { index -= 1; const child = (try parser.nodeListItem(children, index)) orelse return null; - switch (try self.walker.verify(child)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) { .accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker .reject, .skip => if (try self.lastChild(child)) |gchild| return gchild, } @@ -155,6 +183,38 @@ pub const NodeIterator = struct { return null; } + + // This implementation is actually the same as :TreeWalker + fn parentNode(self: *const NodeIterator, node: *parser.Node) !?*parser.Node { + if (self.root == node) return null; + + var current = node; + while (true) { + if (current == self.root) return null; + current = (try parser.nodeParentNode(current)) orelse return null; + + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { + .accept => return current, + .reject, .skip => continue, + } + } + } + + // This implementation is actually the same as :TreeWalker + fn nextSibling(self: *const NodeIterator, node: *parser.Node) !?*parser.Node { + var current = node; + + while (true) { + current = (try parser.nodeNextSibling(current)) orelse return null; + + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { + .accept => return current, + .skip, .reject => continue, + } + } + + return null; + } }; const testing = @import("../../testing.zig"); @@ -210,4 +270,19 @@ test "Browser.DOM.NodeFilter" { }, .{ "notationIterator.previousNode()", "null" }, }, .{}); + + try runner.testCases(&.{ + .{ "nodeIterator.filter.acceptNode(document.body)", "1" }, + .{ "notationIterator.filter", "null" }, + .{ + \\ const rejectIterator = document.createNodeIterator( + \\ document.body, + \\ NodeFilter.SHOW_ALL, + \\ (e => { return NodeFilter.FILTER_REJECT}), + \\ ); + \\ rejectIterator.filter(document.body); + , + "2", + }, + }, .{}); } diff --git a/src/browser/dom/tree_walker.zig b/src/browser/dom/tree_walker.zig index 55160c3d..22952f97 100644 --- a/src/browser/dom/tree_walker.zig +++ b/src/browser/dom/tree_walker.zig @@ -19,7 +19,7 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); -const NodeFilter = @import("node_filter.zig").NodeFilter; +const NodeFilter = @import("node_filter.zig"); const Env = @import("../env.zig").Env; const Page = @import("../page.zig").Page; @@ -28,7 +28,8 @@ pub const TreeWalker = struct { root: *parser.Node, current_node: *parser.Node, what_to_show: u32, - filter: ?Env.Function, + filter: ?TreeWalkerOpts, + filter_func: ?Env.Function, pub const TreeWalkerOpts = union(enum) { function: Env.Function, @@ -48,45 +49,12 @@ pub const TreeWalker = struct { return .{ .root = node, .current_node = node, - .what_to_show = what_to_show orelse NodeFilter._SHOW_ALL, - .filter = filter_func, + .what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, + .filter = filter, + .filter_func = filter_func, }; } - const VerifyResult = enum { accept, skip, reject }; - - pub fn verify(self: *const TreeWalker, node: *parser.Node) !VerifyResult { - const node_type = try parser.nodeType(node); - const what_to_show = self.what_to_show; - - // Verify that we can show this node type. - if (!switch (node_type) { - .attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0, - .cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0, - .comment => what_to_show & NodeFilter._SHOW_COMMENT != 0, - .document => what_to_show & NodeFilter._SHOW_DOCUMENT != 0, - .document_fragment => what_to_show & NodeFilter._SHOW_DOCUMENT_FRAGMENT != 0, - .document_type => what_to_show & NodeFilter._SHOW_DOCUMENT_TYPE != 0, - .element => what_to_show & NodeFilter._SHOW_ELEMENT != 0, - .entity => what_to_show & NodeFilter._SHOW_ENTITY != 0, - .entity_reference => what_to_show & NodeFilter._SHOW_ENTITY_REFERENCE != 0, - .notation => what_to_show & NodeFilter._SHOW_NOTATION != 0, - .processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0, - .text => what_to_show & NodeFilter._SHOW_TEXT != 0, - }) return .reject; - - // Verify that we aren't filtering it out. - if (self.filter) |f| { - const filter = try f.call(u16, .{node}); - return switch (filter) { - NodeFilter._FILTER_ACCEPT => .accept, - NodeFilter._FILTER_REJECT => .reject, - NodeFilter._FILTER_SKIP => .skip, - else => .reject, - }; - } else return .accept; - } - pub fn get_root(self: *TreeWalker) *parser.Node { return self.root; } @@ -99,7 +67,7 @@ pub const TreeWalker = struct { return self.what_to_show; } - pub fn get_filter(self: *TreeWalker) ?Env.Function { + pub fn get_filter(self: *TreeWalker) ?TreeWalkerOpts { return self.filter; } @@ -115,7 +83,7 @@ pub const TreeWalker = struct { const index: u32 = @intCast(i); const child = (try parser.nodeListItem(children, index)) orelse return null; - switch (try self.verify(child)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) { .accept => return child, .reject => continue, .skip => if (try self.firstChild(child)) |gchild| return gchild, @@ -134,7 +102,7 @@ pub const TreeWalker = struct { index -= 1; const child = (try parser.nodeListItem(children, index)) orelse return null; - switch (try self.verify(child)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) { .accept => return child, .reject => continue, .skip => if (try self.lastChild(child)) |gchild| return gchild, @@ -144,13 +112,13 @@ pub const TreeWalker = struct { return null; } - pub fn nextSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { + fn nextSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { var current = node; while (true) { current = (try parser.nodeNextSibling(current)) orelse return null; - switch (try self.verify(current)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { .accept => return current, .skip, .reject => continue, } @@ -165,7 +133,7 @@ pub const TreeWalker = struct { while (true) { current = (try parser.nodePreviousSibling(current)) orelse return null; - switch (try self.verify(current)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { .accept => return current, .skip, .reject => continue, } @@ -174,7 +142,7 @@ pub const TreeWalker = struct { return null; } - pub fn parentNode(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { + fn parentNode(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { if (self.root == node) return null; var current = node; @@ -182,7 +150,7 @@ pub const TreeWalker = struct { if (current == self.root) return null; current = (try parser.nodeParentNode(current)) orelse return null; - switch (try self.verify(current)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { .accept => return current, .reject, .skip => continue, } @@ -251,7 +219,7 @@ pub const TreeWalker = struct { while (try parser.nodePreviousSibling(current)) |previous| { current = previous; - switch (try self.verify(current)) { + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { .accept => { // Get last child if it has one. if (try self.lastChild(current)) |child| {