Improve correctness of NodeIterator and Treewalker

In their current implementation, both the NodeIterator and TreeWalker would
skip over ignored nodes. However, while the node itself should have been ignored
its children should still be iterated.

For example, when going over:

```
<div id="container">
  <!-- comment1 -->
  <span>
    <!-- comment2 -->
  </span>
</div>
```

With `SHOW_COMMENT`, the previous version would completely skip over `container`
and its children. Now the code still won't emit the `container` div itself,
it will still iterate through its children (and thus emit the two comments).

This change relates to ongoing react compatibility.
This commit is contained in:
Karl Seguin
2025-10-15 09:23:54 +08:00
parent d87d782fd5
commit 3657a49a2c
5 changed files with 290 additions and 13 deletions

View File

@@ -47,6 +47,9 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri
const node_type = parser.nodeType(node); const node_type = parser.nodeType(node);
// Verify that we can show this node type. // Verify that we can show this node type.
// Per the DOM spec, what_to_show filters which nodes to return, but should
// still traverse children. So we return .skip (not .reject) when the node
// type doesn't match.
if (!switch (node_type) { if (!switch (node_type) {
.attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0, .attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0,
.cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0, .cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0,
@@ -60,7 +63,7 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri
.notation => what_to_show & NodeFilter._SHOW_NOTATION != 0, .notation => what_to_show & NodeFilter._SHOW_NOTATION != 0,
.processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0, .processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0,
.text => what_to_show & NodeFilter._SHOW_TEXT != 0, .text => what_to_show & NodeFilter._SHOW_TEXT != 0,
}) return .reject; }) return .skip;
// Verify that we aren't filtering it out. // Verify that we aren't filtering it out.
if (filter) |f| { if (filter) |f| {

View File

@@ -74,10 +74,10 @@ pub const NodeIterator = struct {
return .{ return .{
.root = node, .root = node,
.reference_node = node,
.what_to_show = what_to_show,
.filter = filter, .filter = filter,
.reference_node = node,
.filter_func = filter_func, .filter_func = filter_func,
.what_to_show = what_to_show,
}; };
} }
@@ -106,6 +106,7 @@ pub const NodeIterator = struct {
defer self.callbackEnd(); defer self.callbackEnd();
if (self.pointer_before_current) { if (self.pointer_before_current) {
self.pointer_before_current = false;
// Unlike TreeWalker, NodeIterator starts at the first node // Unlike TreeWalker, NodeIterator starts at the first node
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
self.pointer_before_current = false; self.pointer_before_current = false;
@@ -120,9 +121,21 @@ pub const NodeIterator = struct {
var current = self.reference_node; var current = self.reference_node;
while (current != self.root) { while (current != self.root) {
if (try self.nextSibling(current)) |sibling| { // Try to get next sibling (including .skip/.reject nodes we need to descend into)
self.reference_node = sibling; if (try self.nextSiblingOrSkipReject(current)) |result| {
return try Node.toInterface(sibling); if (result.should_descend) {
// This is a .skip/.reject node - try to find acceptable children within it
if (try self.firstChild(result.node)) |child| {
self.reference_node = child;
return try Node.toInterface(child);
}
// No acceptable children, continue looking at this node's siblings
current = result.node;
continue;
}
// This is an .accept node - return it
self.reference_node = result.node;
return try Node.toInterface(result.node);
} }
current = (parser.nodeParentNode(current)) orelse break; current = (parser.nodeParentNode(current)) orelse break;
@@ -254,6 +267,22 @@ pub const NodeIterator = struct {
return null; return null;
} }
// Get the next sibling that is either acceptable or should be descended into (skip/reject)
fn nextSiblingOrSkipReject(self: *const NodeIterator, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } {
var current = node;
while (true) {
current = (parser.nodeNextSibling(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return .{ .node = current, .should_descend = false },
.skip, .reject => return .{ .node = current, .should_descend = true },
}
}
return null;
}
fn callbackStart(self: *NodeIterator) !void { fn callbackStart(self: *NodeIterator) !void {
if (self.is_in_callback) { if (self.is_in_callback) {
// this is the correct DOMExeption // this is the correct DOMExeption

View File

@@ -144,6 +144,23 @@ pub const TreeWalker = struct {
return null; return null;
} }
// Get the next sibling that is either acceptable or should be descended into (skip)
fn nextSiblingOrSkip(self: *const TreeWalker, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } {
var current = node;
while (true) {
current = (parser.nodeNextSibling(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return .{ .node = current, .should_descend = false },
.skip => return .{ .node = current, .should_descend = true },
.reject => continue,
}
}
return null;
}
fn previousSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { fn previousSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node {
var current = node; var current = node;
@@ -193,19 +210,37 @@ pub const TreeWalker = struct {
} }
pub fn _nextNode(self: *TreeWalker) !?NodeUnion { pub fn _nextNode(self: *TreeWalker) !?NodeUnion {
if (try self.firstChild(self.current_node)) |child| { var current = self.current_node;
// First, try to go to first child of current node
if (try self.firstChild(current)) |child| {
self.current_node = child; self.current_node = child;
return try Node.toInterface(child); return try Node.toInterface(child);
} }
var current = self.current_node; // No acceptable children, move to next node in tree
while (current != self.root) { while (current != self.root) {
if (try self.nextSibling(current)) |sibling| { const result = try self.nextSiblingOrSkip(current) orelse {
self.current_node = sibling; // No next sibling, go up to parent and continue
return try Node.toInterface(sibling); // or, if there is no parent, we're done
current = (parser.nodeParentNode(current)) orelse break;
continue;
};
if (!result.should_descend) {
// This is an .accept node - return it
self.current_node = result.node;
return try Node.toInterface(result.node);
} }
current = (parser.nodeParentNode(current)) orelse break; // This is a .skip node - try to find acceptable children within it
if (try self.firstChild(result.node)) |child| {
self.current_node = child;
return try Node.toInterface(child);
}
// No acceptable children, continue looking at this node's siblings
current = result.node;
} }
return null; return null;

View File

@@ -1,5 +1,21 @@
<!DOCTYPE html> <!DOCTYPE html>
<script src="../testing.js"></script> <script src="../testing.js"></script>
<!-- Test fixture -->
<div id="container">
<!-- comment1 -->
<div id="outer">
<!-- comment2 -->
<span id="inner">
<!-- comment3 -->
Text content
<!-- comment4 -->
</span>
<!-- comment5 -->
</div>
<!-- comment6 -->
</div>
<script id=nodeFilter> <script id=nodeFilter>
testing.expectEqual(1, NodeFilter.FILTER_ACCEPT); testing.expectEqual(1, NodeFilter.FILTER_ACCEPT);
testing.expectEqual(2, NodeFilter.FILTER_REJECT); testing.expectEqual(2, NodeFilter.FILTER_REJECT);
@@ -7,3 +23,197 @@
testing.expectEqual(4294967295, NodeFilter.SHOW_ALL); testing.expectEqual(4294967295, NodeFilter.SHOW_ALL);
testing.expectEqual(129, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT); testing.expectEqual(129, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT);
</script> </script>
<script id=treeWalkerComments>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = walker.nextNode()) {
comments.push(node.data.trim());
}
// Should find all 6 comments, including those nested inside elements
testing.expectEqual(6, comments.length);
testing.expectEqual('comment1', comments[0]);
testing.expectEqual('comment2', comments[1]);
testing.expectEqual('comment3', comments[2]);
testing.expectEqual('comment4', comments[3]);
testing.expectEqual('comment5', comments[4]);
testing.expectEqual('comment6', comments[5]);
}
</script>
<script id=treeWalkerElements>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT,
null,
false
);
const elements = [];
let node;
while (node = walker.nextNode()) {
if (node.id) {
elements.push(node.id);
}
}
// Should find the 2 nested elements (outer and inner)
testing.expectEqual(2, elements.length);
testing.expectEqual('outer', elements[0]);
testing.expectEqual('inner', elements[1]);
}
</script>
<script id=treeWalkerAll>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ALL,
null,
false
);
let commentCount = 0;
let elementCount = 0;
let textCount = 0;
let node;
while (node = walker.nextNode()) {
if (node.nodeType === 8) commentCount++; // Comment
else if (node.nodeType === 1) elementCount++; // Element
else if (node.nodeType === 3) textCount++; // Text
}
testing.expectEqual(6, commentCount);
testing.expectEqual(2, elementCount);
testing.expectEqual(true, textCount > 0);
}
</script>
<script id=treeWalkerCombined>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT,
null,
false
);
let commentCount = 0;
let elementCount = 0;
let node;
while (node = walker.nextNode()) {
if (node.nodeType === 8) commentCount++; // Comment
else if (node.nodeType === 1) elementCount++; // Element
}
// Should find 6 comments and 2 elements, but no text nodes
testing.expectEqual(6, commentCount);
testing.expectEqual(2, elementCount);
}
</script>
<script id=treeWalkerCustomFilter>
{
const container = $('#container');
// Filter that accepts only elements with id
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT,
{
acceptNode: function(node) {
return node.id ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_SKIP;
}
},
false
);
const elements = [];
let node;
while (node = walker.nextNode()) {
elements.push(node.id);
}
// Should find only elements with id (outer and inner)
testing.expectEqual(2, elements.length);
testing.expectEqual('outer', elements[0]);
testing.expectEqual('inner', elements[1]);
}
</script>
<script id=nodeIteratorComments>
{
const container = $('#container');
const iterator = document.createNodeIterator(
container,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = iterator.nextNode()) {
comments.push(node.data.trim());
}
// Should find all 6 comments, including those nested inside elements
testing.expectEqual(6, comments.length);
testing.expectEqual('comment1', comments[0]);
testing.expectEqual('comment2', comments[1]);
testing.expectEqual('comment3', comments[2]);
testing.expectEqual('comment4', comments[3]);
testing.expectEqual('comment5', comments[4]);
testing.expectEqual('comment6', comments[5]);
}
</script>
<script id=reactLikeScenario>
{
// Test a React-like scenario with comment markers
const div = document.createElement('div');
div.innerHTML = `
<a href="/">
<!--$-->
<svg viewBox="0 0 10 10">
<path d="M0,0 L10,10" />
</svg>
<!--/$-->
</a>
`;
const walker = document.createTreeWalker(
div,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = walker.nextNode()) {
comments.push(node.data);
}
// Should find both React markers even though they're nested inside <a>
testing.expectEqual(2, comments.length);
testing.expectEqual('$', comments[0]);
testing.expectEqual('/$', comments[1]);
}
</script>