Merge pull request #1149 from lightpanda-io/iterators_and_walker_fix

Improve correctness of NodeIterator and Treewalker
This commit is contained in:
Karl Seguin
2025-10-15 15:58:12 +08:00
committed by GitHub
4 changed files with 289 additions and 12 deletions

View File

@@ -47,6 +47,9 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri
const node_type = parser.nodeType(node);
// Verify that we can show this node type.
// Per the DOM spec, what_to_show filters which nodes to return, but should
// still traverse children. So we return .skip (not .reject) when the node
// type doesn't match.
if (!switch (node_type) {
.attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0,
.cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0,
@@ -60,7 +63,7 @@ pub fn verify(what_to_show: u32, filter: ?js.Function, node: *parser.Node) !Veri
.notation => what_to_show & NodeFilter._SHOW_NOTATION != 0,
.processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0,
.text => what_to_show & NodeFilter._SHOW_TEXT != 0,
}) return .reject;
}) return .skip;
// Verify that we aren't filtering it out.
if (filter) |f| {

View File

@@ -74,10 +74,10 @@ pub const NodeIterator = struct {
return .{
.root = node,
.reference_node = node,
.what_to_show = what_to_show,
.filter = filter,
.reference_node = node,
.filter_func = filter_func,
.what_to_show = what_to_show,
};
}
@@ -106,6 +106,7 @@ pub const NodeIterator = struct {
defer self.callbackEnd();
if (self.pointer_before_current) {
self.pointer_before_current = false;
// Unlike TreeWalker, NodeIterator starts at the first node
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
self.pointer_before_current = false;
@@ -120,9 +121,21 @@ pub const NodeIterator = struct {
var current = self.reference_node;
while (current != self.root) {
if (try self.nextSibling(current)) |sibling| {
self.reference_node = sibling;
return try Node.toInterface(sibling);
// Try to get next sibling (including .skip/.reject nodes we need to descend into)
if (try self.nextSiblingOrSkipReject(current)) |result| {
if (result.should_descend) {
// This is a .skip/.reject node - try to find acceptable children within it
if (try self.firstChild(result.node)) |child| {
self.reference_node = child;
return try Node.toInterface(child);
}
// No acceptable children, continue looking at this node's siblings
current = result.node;
continue;
}
// This is an .accept node - return it
self.reference_node = result.node;
return try Node.toInterface(result.node);
}
current = (parser.nodeParentNode(current)) orelse break;
@@ -254,6 +267,22 @@ pub const NodeIterator = struct {
return null;
}
// Get the next sibling that is either acceptable or should be descended into (skip/reject)
fn nextSiblingOrSkipReject(self: *const NodeIterator, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } {
var current = node;
while (true) {
current = (parser.nodeNextSibling(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return .{ .node = current, .should_descend = false },
.skip, .reject => return .{ .node = current, .should_descend = true },
}
}
return null;
}
fn callbackStart(self: *NodeIterator) !void {
if (self.is_in_callback) {
// this is the correct DOMExeption

View File

@@ -144,6 +144,23 @@ pub const TreeWalker = struct {
return null;
}
// Get the next sibling that is either acceptable or should be descended into (skip)
fn nextSiblingOrSkip(self: *const TreeWalker, node: *parser.Node) !?struct { node: *parser.Node, should_descend: bool } {
var current = node;
while (true) {
current = (parser.nodeNextSibling(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return .{ .node = current, .should_descend = false },
.skip => return .{ .node = current, .should_descend = true },
.reject => continue,
}
}
return null;
}
fn previousSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node {
var current = node;
@@ -193,19 +210,37 @@ pub const TreeWalker = struct {
}
pub fn _nextNode(self: *TreeWalker) !?NodeUnion {
if (try self.firstChild(self.current_node)) |child| {
var current = self.current_node;
// First, try to go to first child of current node
if (try self.firstChild(current)) |child| {
self.current_node = child;
return try Node.toInterface(child);
}
var current = self.current_node;
// No acceptable children, move to next node in tree
while (current != self.root) {
if (try self.nextSibling(current)) |sibling| {
self.current_node = sibling;
return try Node.toInterface(sibling);
const result = try self.nextSiblingOrSkip(current) orelse {
// No next sibling, go up to parent and continue
// or, if there is no parent, we're done
current = (parser.nodeParentNode(current)) orelse break;
continue;
};
if (!result.should_descend) {
// This is an .accept node - return it
self.current_node = result.node;
return try Node.toInterface(result.node);
}
current = (parser.nodeParentNode(current)) orelse break;
// This is a .skip node - try to find acceptable children within it
if (try self.firstChild(result.node)) |child| {
self.current_node = child;
return try Node.toInterface(child);
}
// No acceptable children, continue looking at this node's siblings
current = result.node;
}
return null;

View File

@@ -1,5 +1,21 @@
<!DOCTYPE html>
<script src="../testing.js"></script>
<!-- Test fixture -->
<div id="container">
<!-- comment1 -->
<div id="outer">
<!-- comment2 -->
<span id="inner">
<!-- comment3 -->
Text content
<!-- comment4 -->
</span>
<!-- comment5 -->
</div>
<!-- comment6 -->
</div>
<script id=nodeFilter>
testing.expectEqual(1, NodeFilter.FILTER_ACCEPT);
testing.expectEqual(2, NodeFilter.FILTER_REJECT);
@@ -7,3 +23,197 @@
testing.expectEqual(4294967295, NodeFilter.SHOW_ALL);
testing.expectEqual(129, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT);
</script>
<script id=treeWalkerComments>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = walker.nextNode()) {
comments.push(node.data.trim());
}
// Should find all 6 comments, including those nested inside elements
testing.expectEqual(6, comments.length);
testing.expectEqual('comment1', comments[0]);
testing.expectEqual('comment2', comments[1]);
testing.expectEqual('comment3', comments[2]);
testing.expectEqual('comment4', comments[3]);
testing.expectEqual('comment5', comments[4]);
testing.expectEqual('comment6', comments[5]);
}
</script>
<script id=treeWalkerElements>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT,
null,
false
);
const elements = [];
let node;
while (node = walker.nextNode()) {
if (node.id) {
elements.push(node.id);
}
}
// Should find the 2 nested elements (outer and inner)
testing.expectEqual(2, elements.length);
testing.expectEqual('outer', elements[0]);
testing.expectEqual('inner', elements[1]);
}
</script>
<script id=treeWalkerAll>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ALL,
null,
false
);
let commentCount = 0;
let elementCount = 0;
let textCount = 0;
let node;
while (node = walker.nextNode()) {
if (node.nodeType === 8) commentCount++; // Comment
else if (node.nodeType === 1) elementCount++; // Element
else if (node.nodeType === 3) textCount++; // Text
}
testing.expectEqual(6, commentCount);
testing.expectEqual(2, elementCount);
testing.expectEqual(true, textCount > 0);
}
</script>
<script id=treeWalkerCombined>
{
const container = $('#container');
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT,
null,
false
);
let commentCount = 0;
let elementCount = 0;
let node;
while (node = walker.nextNode()) {
if (node.nodeType === 8) commentCount++; // Comment
else if (node.nodeType === 1) elementCount++; // Element
}
// Should find 6 comments and 2 elements, but no text nodes
testing.expectEqual(6, commentCount);
testing.expectEqual(2, elementCount);
}
</script>
<script id=treeWalkerCustomFilter>
{
const container = $('#container');
// Filter that accepts only elements with id
const walker = document.createTreeWalker(
container,
NodeFilter.SHOW_ELEMENT,
{
acceptNode: function(node) {
return node.id ? NodeFilter.FILTER_ACCEPT : NodeFilter.FILTER_SKIP;
}
},
false
);
const elements = [];
let node;
while (node = walker.nextNode()) {
elements.push(node.id);
}
// Should find only elements with id (outer and inner)
testing.expectEqual(2, elements.length);
testing.expectEqual('outer', elements[0]);
testing.expectEqual('inner', elements[1]);
}
</script>
<script id=nodeIteratorComments>
{
const container = $('#container');
const iterator = document.createNodeIterator(
container,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = iterator.nextNode()) {
comments.push(node.data.trim());
}
// Should find all 6 comments, including those nested inside elements
testing.expectEqual(6, comments.length);
testing.expectEqual('comment1', comments[0]);
testing.expectEqual('comment2', comments[1]);
testing.expectEqual('comment3', comments[2]);
testing.expectEqual('comment4', comments[3]);
testing.expectEqual('comment5', comments[4]);
testing.expectEqual('comment6', comments[5]);
}
</script>
<script id=reactLikeScenario>
{
// Test a React-like scenario with comment markers
const div = document.createElement('div');
div.innerHTML = `
<a href="/">
<!--$-->
<svg viewBox="0 0 10 10">
<path d="M0,0 L10,10" />
</svg>
<!--/$-->
</a>
`;
const walker = document.createTreeWalker(
div,
NodeFilter.SHOW_COMMENT,
null,
false
);
const comments = [];
let node;
while (node = walker.nextNode()) {
comments.push(node.data);
}
// Should find both React markers even though they're nested inside <a>
testing.expectEqual(2, comments.length);
testing.expectEqual('$', comments[0]);
testing.expectEqual('/$', comments[1]);
}
</script>