From 3657a49a2caf2517fcfb64b48074b7a0520169cd Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Wed, 15 Oct 2025 09:23:54 +0800 Subject: [PATCH] Improve correctness of NodeIterator and Treewalker In their current implementation, both the NodeIterator and TreeWalker would skip over ignored nodes. However, while the node itself should have been ignored its children should still be iterated. For example, when going over: ```
``` With `SHOW_COMMENT`, the previous version would completely skip over `container` and its children. Now the code still won't emit the `container` div itself, it will still iterate through its children (and thus emit the two comments). This change relates to ongoing react compatibility. --- src/browser/dom/node_filter.zig | 5 +- src/browser/dom/node_iterator.zig | 39 +++++- src/browser/dom/tree_walker.zig | 47 ++++++- src/tests/dom/node_filter.html | 210 ++++++++++++++++++++++++++++++ vendor/netsurf/libdom | 2 +- 5 files changed, 290 insertions(+), 13 deletions(-) diff --git a/src/browser/dom/node_filter.zig b/src/browser/dom/node_filter.zig index 56cdd493..ccbc3f9e 100644 --- a/src/browser/dom/node_filter.zig +++ b/src/browser/dom/node_filter.zig @@ -47,6 +47,9 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri const node_type = parser.nodeType(node); // Verify that we can show this node type. + // Per the DOM spec, what_to_show filters which nodes to return, but should + // still traverse children. So we return .skip (not .reject) when the node + // type doesn't match. if (!switch (node_type) { .attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0, .cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0, @@ -60,7 +63,7 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri .notation => what_to_show & NodeFilter._SHOW_NOTATION != 0, .processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0, .text => what_to_show & NodeFilter._SHOW_TEXT != 0, - }) return .reject; + }) return .skip; // Verify that we aren't filtering it out. if (filter) |f| { diff --git a/src/browser/dom/node_iterator.zig b/src/browser/dom/node_iterator.zig index bf059340..8c754f16 100644 --- a/src/browser/dom/node_iterator.zig +++ b/src/browser/dom/node_iterator.zig @@ -74,10 +74,10 @@ pub const NodeIterator = struct { return .{ .root = node, - .reference_node = node, - .what_to_show = what_to_show, .filter = filter, + .reference_node = node, .filter_func = filter_func, + .what_to_show = what_to_show, }; } @@ -106,6 +106,7 @@ pub const NodeIterator = struct { defer self.callbackEnd(); if (self.pointer_before_current) { + self.pointer_before_current = false; // Unlike TreeWalker, NodeIterator starts at the first node if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { self.pointer_before_current = false; @@ -120,9 +121,21 @@ pub const NodeIterator = struct { var current = self.reference_node; while (current != self.root) { - if (try self.nextSibling(current)) |sibling| { - self.reference_node = sibling; - return try Node.toInterface(sibling); + // Try to get next sibling (including .skip/.reject nodes we need to descend into) + if (try self.nextSiblingOrSkipReject(current)) |result| { + if (result.should_descend) { + // This is a .skip/.reject node - try to find acceptable children within it + if (try self.firstChild(result.node)) |child| { + self.reference_node = child; + return try Node.toInterface(child); + } + // No acceptable children, continue looking at this node's siblings + current = result.node; + continue; + } + // This is an .accept node - return it + self.reference_node = result.node; + return try Node.toInterface(result.node); } current = (parser.nodeParentNode(current)) orelse break; @@ -254,6 +267,22 @@ pub const NodeIterator = struct { return null; } + // Get the next sibling that is either acceptable or should be descended into (skip/reject) + fn nextSiblingOrSkipReject(self: *const NodeIterator, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } { + var current = node; + + while (true) { + current = (parser.nodeNextSibling(current)) orelse return null; + + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { + .accept => return .{ .node = current, .should_descend = false }, + .skip, .reject => return .{ .node = current, .should_descend = true }, + } + } + + return null; + } + fn callbackStart(self: *NodeIterator) !void { if (self.is_in_callback) { // this is the correct DOMExeption diff --git a/src/browser/dom/tree_walker.zig b/src/browser/dom/tree_walker.zig index 7da44d54..f8143e22 100644 --- a/src/browser/dom/tree_walker.zig +++ b/src/browser/dom/tree_walker.zig @@ -144,6 +144,23 @@ pub const TreeWalker = struct { return null; } + // Get the next sibling that is either acceptable or should be descended into (skip) + fn nextSiblingOrSkip(self: *const TreeWalker, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } { + var current = node; + + while (true) { + current = (parser.nodeNextSibling(current)) orelse return null; + + switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) { + .accept => return .{ .node = current, .should_descend = false }, + .skip => return .{ .node = current, .should_descend = true }, + .reject => continue, + } + } + + return null; + } + fn previousSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { var current = node; @@ -193,19 +210,37 @@ pub const TreeWalker = struct { } pub fn _nextNode(self: *TreeWalker) !?NodeUnion { - if (try self.firstChild(self.current_node)) |child| { + var current = self.current_node; + + // First, try to go to first child of current node + if (try self.firstChild(current)) |child| { self.current_node = child; return try Node.toInterface(child); } - var current = self.current_node; + // No acceptable children, move to next node in tree while (current != self.root) { - if (try self.nextSibling(current)) |sibling| { - self.current_node = sibling; - return try Node.toInterface(sibling); + const result = try self.nextSiblingOrSkip(current) orelse { + // No next sibling, go up to parent and continue + // or, if there is no parent, we're done + current = (parser.nodeParentNode(current)) orelse break; + continue; + }; + + + if (!result.should_descend) { + // This is an .accept node - return it + self.current_node = result.node; + return try Node.toInterface(result.node); } - current = (parser.nodeParentNode(current)) orelse break; + // This is a .skip node - try to find acceptable children within it + if (try self.firstChild(result.node)) |child| { + self.current_node = child; + return try Node.toInterface(child); + } + // No acceptable children, continue looking at this node's siblings + current = result.node; } return null; diff --git a/src/tests/dom/node_filter.html b/src/tests/dom/node_filter.html index 4f862c6c..d5ac95f4 100644 --- a/src/tests/dom/node_filter.html +++ b/src/tests/dom/node_filter.html @@ -1,5 +1,21 @@ + + +
+ +
+ + + + Text content + + + +
+ +
+ + + + + + + + + + + + + + + diff --git a/vendor/netsurf/libdom b/vendor/netsurf/libdom index ef7d5d4f..cfb92353 160000 --- a/vendor/netsurf/libdom +++ b/vendor/netsurf/libdom @@ -1 +1 @@ -Subproject commit ef7d5d4faba3579d2ec759c69d58cad40ee852b4 +Subproject commit cfb92353518a3fb75e5c3c85b8dab26b21c9cee1