Separate NodeIterator impl, fix _filter

This commit is contained in:
sjorsdonkers
2025-07-09 11:29:05 +02:00
committed by Sjors
parent 765b8dc97b
commit a8b72c1d5f
4 changed files with 156 additions and 78 deletions

View File

@@ -266,7 +266,7 @@ pub const Document = struct {
return try TreeWalker.init(root, what_to_show, filter); return try TreeWalker.init(root, what_to_show, filter);
} }
pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !NodeIterator { pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator {
return try NodeIterator.init(root, what_to_show, filter); return try NodeIterator.init(root, what_to_show, filter);
} }

View File

@@ -17,6 +17,8 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std"); const std = @import("std");
const parser = @import("../netsurf.zig");
const Env = @import("../env.zig").Env;
pub const NodeFilter = struct { pub const NodeFilter = struct {
pub const _FILTER_ACCEPT: u16 = 1; pub const _FILTER_ACCEPT: u16 = 1;
@@ -38,6 +40,39 @@ pub const NodeFilter = struct {
pub const _SHOW_NOTATION: u32 = 0b100000000000; pub const _SHOW_NOTATION: u32 = 0b100000000000;
}; };
const VerifyResult = enum { accept, skip, reject };
pub fn verify(what_to_show: u32, filter: ?Env.Function, node: *parser.Node) !VerifyResult {
const node_type = try parser.nodeType(node);
// Verify that we can show this node type.
if (!switch (node_type) {
.attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0,
.cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0,
.comment => what_to_show & NodeFilter._SHOW_COMMENT != 0,
.document => what_to_show & NodeFilter._SHOW_DOCUMENT != 0,
.document_fragment => what_to_show & NodeFilter._SHOW_DOCUMENT_FRAGMENT != 0,
.document_type => what_to_show & NodeFilter._SHOW_DOCUMENT_TYPE != 0,
.element => what_to_show & NodeFilter._SHOW_ELEMENT != 0,
.entity => what_to_show & NodeFilter._SHOW_ENTITY != 0,
.entity_reference => what_to_show & NodeFilter._SHOW_ENTITY_REFERENCE != 0,
.notation => what_to_show & NodeFilter._SHOW_NOTATION != 0,
.processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0,
.text => what_to_show & NodeFilter._SHOW_TEXT != 0,
}) return .reject;
// Verify that we aren't filtering it out.
if (filter) |f| {
const acceptance = try f.call(u16, .{node});
return switch (acceptance) {
NodeFilter._FILTER_ACCEPT => .accept,
NodeFilter._FILTER_REJECT => .reject,
NodeFilter._FILTER_SKIP => .skip,
else => .reject,
};
} else return .accept;
}
const testing = @import("../../testing.zig"); const testing = @import("../../testing.zig");
test "Browser.DOM.NodeFilter" { test "Browser.DOM.NodeFilter" {
var runner = try testing.jsRunner(testing.tracking_allocator, .{}); var runner = try testing.jsRunner(testing.tracking_allocator, .{});

View File

@@ -19,19 +19,47 @@
const std = @import("std"); const std = @import("std");
const parser = @import("../netsurf.zig"); const parser = @import("../netsurf.zig");
const Env = @import("../env.zig").Env; const Env = @import("../env.zig").Env;
const TreeWalker = @import("tree_walker.zig").TreeWalker; const NodeFilter = @import("node_filter.zig");
// https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator // https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator
// While this is similar to TreeWalker it has it's own implementation as there are several suttle differences
// For example:
// - nextNode returns the reference node, whereas TreeWalker returns the next node
// - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different
pub const NodeIterator = struct { pub const NodeIterator = struct {
walker: TreeWalker, root: *parser.Node,
reference_node: *parser.Node,
what_to_show: u32,
filter: ?NodeIteratorOpts,
filter_func: ?Env.Function,
pointer_before_current: bool = true, pointer_before_current: bool = true,
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !NodeIterator { pub const NodeIteratorOpts = union(enum) {
return .{ .walker = try TreeWalker.init(node, what_to_show, filter) }; function: Env.Function,
object: struct { acceptNode: Env.Function },
};
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?NodeIteratorOpts) !NodeIterator {
var filter_func: ?Env.Function = null;
if (filter) |f| {
filter_func = switch (f) {
.function => |func| func,
.object => |o| o.acceptNode,
};
}
return .{
.root = node,
.reference_node = node,
.what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL,
.filter = filter,
.filter_func = filter_func,
};
} }
pub fn get_filter(self: *const NodeIterator) ?Env.Function { pub fn get_filter(self: *const NodeIterator) ?NodeIteratorOpts {
return self.walker.filter; return self.filter;
} }
pub fn get_pointerBeforeReferenceNode(self: *const NodeIterator) bool { pub fn get_pointerBeforeReferenceNode(self: *const NodeIterator) bool {
@@ -39,34 +67,34 @@ pub const NodeIterator = struct {
} }
pub fn get_referenceNode(self: *const NodeIterator) *parser.Node { pub fn get_referenceNode(self: *const NodeIterator) *parser.Node {
return self.walker.current_node; return self.reference_node;
} }
pub fn get_root(self: *const NodeIterator) *parser.Node { pub fn get_root(self: *const NodeIterator) *parser.Node {
return self.walker.root; return self.root;
} }
pub fn get_whatToShow(self: *const NodeIterator) u32 { pub fn get_whatToShow(self: *const NodeIterator) u32 {
return self.walker.what_to_show; return self.what_to_show;
} }
pub fn _nextNode(self: *NodeIterator) !?*parser.Node { pub fn _nextNode(self: *NodeIterator) !?*parser.Node {
if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node
self.pointer_before_current = false; self.pointer_before_current = false;
if (.accept == try self.walker.verify(self.walker.current_node)) { if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
return self.walker.current_node; return self.reference_node;
} }
} }
if (try self.firstChild(self.walker.current_node)) |child| { if (try self.firstChild(self.reference_node)) |child| {
self.walker.current_node = child; self.reference_node = child;
return child; return child;
} }
var current = self.walker.current_node; var current = self.reference_node;
while (current != self.walker.root) { while (current != self.root) {
if (try self.walker.nextSibling(current)) |sibling| { if (try self.nextSibling(current)) |sibling| {
self.walker.current_node = sibling; self.reference_node = sibling;
return sibling; return sibling;
} }
@@ -79,41 +107,41 @@ pub const NodeIterator = struct {
pub fn _previousNode(self: *NodeIterator) !?*parser.Node { pub fn _previousNode(self: *NodeIterator) !?*parser.Node {
if (!self.pointer_before_current) { if (!self.pointer_before_current) {
self.pointer_before_current = true; self.pointer_before_current = true;
if (.accept == try self.walker.verify(self.walker.current_node)) { if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
return self.walker.current_node; // Still need to verify as last may be first as well return self.reference_node; // Still need to verify as last may be first as well
} }
} }
if (self.walker.current_node == self.walker.root) return null; if (self.reference_node == self.root) return null;
var current = self.walker.current_node; var current = self.reference_node;
while (try parser.nodePreviousSibling(current)) |previous| { while (try parser.nodePreviousSibling(current)) |previous| {
current = previous; current = previous;
switch (try self.walker.verify(current)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => { .accept => {
// Get last child if it has one. // Get last child if it has one.
if (try self.lastChild(current)) |child| { if (try self.lastChild(current)) |child| {
self.walker.current_node = child; self.reference_node = child;
return child; return child;
} }
// Otherwise, this node is our previous one. // Otherwise, this node is our previous one.
self.walker.current_node = current; self.reference_node = current;
return current; return current;
}, },
.reject, .skip => { .reject, .skip => {
// Get last child if it has one. // Get last child if it has one.
if (try self.lastChild(current)) |child| { if (try self.lastChild(current)) |child| {
self.walker.current_node = child; self.reference_node = child;
return child; return child;
} }
}, },
} }
} }
if (current != self.walker.root) { if (current != self.root) {
if (try self.walker.parentNode(current)) |parent| { if (try self.parentNode(current)) |parent| {
self.walker.current_node = parent; self.reference_node = parent;
return parent; return parent;
} }
} }
@@ -129,7 +157,7 @@ pub const NodeIterator = struct {
const index: u32 = @intCast(i); const index: u32 = @intCast(i);
const child = (try parser.nodeListItem(children, index)) orelse return null; const child = (try parser.nodeListItem(children, index)) orelse return null;
switch (try self.walker.verify(child)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) {
.accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker .accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker
.reject, .skip => if (try self.firstChild(child)) |gchild| return gchild, .reject, .skip => if (try self.firstChild(child)) |gchild| return gchild,
} }
@@ -147,7 +175,7 @@ pub const NodeIterator = struct {
index -= 1; index -= 1;
const child = (try parser.nodeListItem(children, index)) orelse return null; const child = (try parser.nodeListItem(children, index)) orelse return null;
switch (try self.walker.verify(child)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) {
.accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker .accept => return child, // NOTE: Skip and reject are equivalent for NodeIterator, this is different from TreeWalker
.reject, .skip => if (try self.lastChild(child)) |gchild| return gchild, .reject, .skip => if (try self.lastChild(child)) |gchild| return gchild,
} }
@@ -155,6 +183,38 @@ pub const NodeIterator = struct {
return null; return null;
} }
// This implementation is actually the same as :TreeWalker
fn parentNode(self: *const NodeIterator, node: *parser.Node) !?*parser.Node {
if (self.root == node) return null;
var current = node;
while (true) {
if (current == self.root) return null;
current = (try parser.nodeParentNode(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return current,
.reject, .skip => continue,
}
}
}
// This implementation is actually the same as :TreeWalker
fn nextSibling(self: *const NodeIterator, node: *parser.Node) !?*parser.Node {
var current = node;
while (true) {
current = (try parser.nodeNextSibling(current)) orelse return null;
switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return current,
.skip, .reject => continue,
}
}
return null;
}
}; };
const testing = @import("../../testing.zig"); const testing = @import("../../testing.zig");
@@ -210,4 +270,19 @@ test "Browser.DOM.NodeFilter" {
}, },
.{ "notationIterator.previousNode()", "null" }, .{ "notationIterator.previousNode()", "null" },
}, .{}); }, .{});
try runner.testCases(&.{
.{ "nodeIterator.filter.acceptNode(document.body)", "1" },
.{ "notationIterator.filter", "null" },
.{
\\ const rejectIterator = document.createNodeIterator(
\\ document.body,
\\ NodeFilter.SHOW_ALL,
\\ (e => { return NodeFilter.FILTER_REJECT}),
\\ );
\\ rejectIterator.filter(document.body);
,
"2",
},
}, .{});
} }

View File

@@ -19,7 +19,7 @@
const std = @import("std"); const std = @import("std");
const parser = @import("../netsurf.zig"); const parser = @import("../netsurf.zig");
const NodeFilter = @import("node_filter.zig").NodeFilter; const NodeFilter = @import("node_filter.zig");
const Env = @import("../env.zig").Env; const Env = @import("../env.zig").Env;
const Page = @import("../page.zig").Page; const Page = @import("../page.zig").Page;
@@ -28,7 +28,8 @@ pub const TreeWalker = struct {
root: *parser.Node, root: *parser.Node,
current_node: *parser.Node, current_node: *parser.Node,
what_to_show: u32, what_to_show: u32,
filter: ?Env.Function, filter: ?TreeWalkerOpts,
filter_func: ?Env.Function,
pub const TreeWalkerOpts = union(enum) { pub const TreeWalkerOpts = union(enum) {
function: Env.Function, function: Env.Function,
@@ -48,45 +49,12 @@ pub const TreeWalker = struct {
return .{ return .{
.root = node, .root = node,
.current_node = node, .current_node = node,
.what_to_show = what_to_show orelse NodeFilter._SHOW_ALL, .what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL,
.filter = filter_func, .filter = filter,
.filter_func = filter_func,
}; };
} }
const VerifyResult = enum { accept, skip, reject };
pub fn verify(self: *const TreeWalker, node: *parser.Node) !VerifyResult {
const node_type = try parser.nodeType(node);
const what_to_show = self.what_to_show;
// Verify that we can show this node type.
if (!switch (node_type) {
.attribute => what_to_show & NodeFilter._SHOW_ATTRIBUTE != 0,
.cdata_section => what_to_show & NodeFilter._SHOW_CDATA_SECTION != 0,
.comment => what_to_show & NodeFilter._SHOW_COMMENT != 0,
.document => what_to_show & NodeFilter._SHOW_DOCUMENT != 0,
.document_fragment => what_to_show & NodeFilter._SHOW_DOCUMENT_FRAGMENT != 0,
.document_type => what_to_show & NodeFilter._SHOW_DOCUMENT_TYPE != 0,
.element => what_to_show & NodeFilter._SHOW_ELEMENT != 0,
.entity => what_to_show & NodeFilter._SHOW_ENTITY != 0,
.entity_reference => what_to_show & NodeFilter._SHOW_ENTITY_REFERENCE != 0,
.notation => what_to_show & NodeFilter._SHOW_NOTATION != 0,
.processing_instruction => what_to_show & NodeFilter._SHOW_PROCESSING_INSTRUCTION != 0,
.text => what_to_show & NodeFilter._SHOW_TEXT != 0,
}) return .reject;
// Verify that we aren't filtering it out.
if (self.filter) |f| {
const filter = try f.call(u16, .{node});
return switch (filter) {
NodeFilter._FILTER_ACCEPT => .accept,
NodeFilter._FILTER_REJECT => .reject,
NodeFilter._FILTER_SKIP => .skip,
else => .reject,
};
} else return .accept;
}
pub fn get_root(self: *TreeWalker) *parser.Node { pub fn get_root(self: *TreeWalker) *parser.Node {
return self.root; return self.root;
} }
@@ -99,7 +67,7 @@ pub const TreeWalker = struct {
return self.what_to_show; return self.what_to_show;
} }
pub fn get_filter(self: *TreeWalker) ?Env.Function { pub fn get_filter(self: *TreeWalker) ?TreeWalkerOpts {
return self.filter; return self.filter;
} }
@@ -115,7 +83,7 @@ pub const TreeWalker = struct {
const index: u32 = @intCast(i); const index: u32 = @intCast(i);
const child = (try parser.nodeListItem(children, index)) orelse return null; const child = (try parser.nodeListItem(children, index)) orelse return null;
switch (try self.verify(child)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) {
.accept => return child, .accept => return child,
.reject => continue, .reject => continue,
.skip => if (try self.firstChild(child)) |gchild| return gchild, .skip => if (try self.firstChild(child)) |gchild| return gchild,
@@ -134,7 +102,7 @@ pub const TreeWalker = struct {
index -= 1; index -= 1;
const child = (try parser.nodeListItem(children, index)) orelse return null; const child = (try parser.nodeListItem(children, index)) orelse return null;
switch (try self.verify(child)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, child)) {
.accept => return child, .accept => return child,
.reject => continue, .reject => continue,
.skip => if (try self.lastChild(child)) |gchild| return gchild, .skip => if (try self.lastChild(child)) |gchild| return gchild,
@@ -144,13 +112,13 @@ pub const TreeWalker = struct {
return null; return null;
} }
pub fn nextSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { fn nextSibling(self: *const TreeWalker, node: *parser.Node) !?*parser.Node {
var current = node; var current = node;
while (true) { while (true) {
current = (try parser.nodeNextSibling(current)) orelse return null; current = (try parser.nodeNextSibling(current)) orelse return null;
switch (try self.verify(current)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return current, .accept => return current,
.skip, .reject => continue, .skip, .reject => continue,
} }
@@ -165,7 +133,7 @@ pub const TreeWalker = struct {
while (true) { while (true) {
current = (try parser.nodePreviousSibling(current)) orelse return null; current = (try parser.nodePreviousSibling(current)) orelse return null;
switch (try self.verify(current)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return current, .accept => return current,
.skip, .reject => continue, .skip, .reject => continue,
} }
@@ -174,7 +142,7 @@ pub const TreeWalker = struct {
return null; return null;
} }
pub fn parentNode(self: *const TreeWalker, node: *parser.Node) !?*parser.Node { fn parentNode(self: *const TreeWalker, node: *parser.Node) !?*parser.Node {
if (self.root == node) return null; if (self.root == node) return null;
var current = node; var current = node;
@@ -182,7 +150,7 @@ pub const TreeWalker = struct {
if (current == self.root) return null; if (current == self.root) return null;
current = (try parser.nodeParentNode(current)) orelse return null; current = (try parser.nodeParentNode(current)) orelse return null;
switch (try self.verify(current)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => return current, .accept => return current,
.reject, .skip => continue, .reject, .skip => continue,
} }
@@ -251,7 +219,7 @@ pub const TreeWalker = struct {
while (try parser.nodePreviousSibling(current)) |previous| { while (try parser.nodePreviousSibling(current)) |previous| {
current = previous; current = previous;
switch (try self.verify(current)) { switch (try NodeFilter.verify(self.what_to_show, self.filter_func, current)) {
.accept => { .accept => {
// Get last child if it has one. // Get last child if it has one.
if (try self.lastChild(current)) |child| { if (try self.lastChild(current)) |child| {