Improves correctness of NodeIterator

Minor improvement to correctness of TreeWalker.

Fun fact, this is the first time, that I've run into, where we have to default
null and undefined to different values.

Also, tweaked the WPT test runner. WPT test results use | as a field delimiter.
But a WPT test (and, I assume a message) can contain a |. So we had at least
a few tests that were being reported as failed, only because the result line
was weird / unexpected. No great robust way to parse this, but I changed it
to look explicitly for |Pass or |Fail and use those positions as anchor points.
This commit is contained in:
Karl Seguin
2025-08-21 18:11:48 +08:00
parent 557f8444b2
commit ce14f0b380
5 changed files with 142 additions and 28 deletions

View File

@@ -249,12 +249,12 @@ pub const Document = struct {
return Node.replaceChildren(parser.documentToNode(self), nodes);
}
pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker {
return try TreeWalker.init(root, what_to_show, filter);
pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?TreeWalker.WhatToShow, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker {
return TreeWalker.init(root, what_to_show, filter);
}
pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator {
return try NodeIterator.init(root, what_to_show, filter);
pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?NodeIterator.WhatToShow, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator {
return NodeIterator.init(root, what_to_show, filter);
}
pub fn getActiveElement(self: *parser.Document, page: *Page) !?*parser.Element {

View File

@@ -17,11 +17,13 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const parser = @import("../netsurf.zig");
const Env = @import("../env.zig").Env;
const NodeFilter = @import("node_filter.zig");
const Node = @import("node.zig").Node;
const NodeUnion = @import("node.zig").Union;
const DOMException = @import("exceptions.zig").DOMException;
// https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator
// While this is similar to TreeWalker it has its own implementation as there are several subtle differences
@@ -29,20 +31,28 @@ const NodeUnion = @import("node.zig").Union;
// - nextNode returns the reference node, whereas TreeWalker returns the next node
// - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different
pub const NodeIterator = struct {
pub const Exception = DOMException;
root: *parser.Node,
reference_node: *parser.Node,
what_to_show: u32,
filter: ?NodeIteratorOpts,
filter_func: ?Env.Function,
pointer_before_current: bool = true,
// used to track / block recursive filters
is_in_callback: bool = false,
// One of the few cases where null and undefined resolve to different default.
// We need the raw JsObject so that we can probe the tri state:
// null, undefined or i32.
pub const WhatToShow = Env.JsObject;
pub const NodeIteratorOpts = union(enum) {
function: Env.Function,
object: struct { acceptNode: Env.Function },
};
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?NodeIteratorOpts) !NodeIterator {
pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?NodeIteratorOpts) !NodeIterator {
var filter_func: ?Env.Function = null;
if (filter) |f| {
filter_func = switch (f) {
@@ -51,10 +61,21 @@ pub const NodeIterator = struct {
};
}
var what_to_show: u32 = undefined;
if (what_to_show_) |wts| {
switch (try wts.triState(NodeIterator, "what_to_show", u32)) {
.null => what_to_show = 0,
.undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL,
.value => |v| what_to_show = v,
}
} else {
what_to_show = NodeFilter.NodeFilter._SHOW_ALL;
}
return .{
.root = node,
.reference_node = node,
.what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL,
.what_to_show = what_to_show,
.filter = filter,
.filter_func = filter_func,
};
@@ -81,9 +102,13 @@ pub const NodeIterator = struct {
}
pub fn _nextNode(self: *NodeIterator) !?NodeUnion {
if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node
self.pointer_before_current = false;
try self.callbackStart();
defer self.callbackEnd();
if (self.pointer_before_current) {
// Unlike TreeWalker, NodeIterator starts at the first node
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
self.pointer_before_current = false;
return try Node.toInterface(self.reference_node);
}
}
@@ -107,13 +132,19 @@ pub const NodeIterator = struct {
}
pub fn _previousNode(self: *NodeIterator) !?NodeUnion {
try self.callbackStart();
defer self.callbackEnd();
if (!self.pointer_before_current) {
self.pointer_before_current = true;
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
return try Node.toInterface(self.reference_node); // Still need to verify as last may be first as well
self.pointer_before_current = true;
// Still need to verify as last may be first as well
return try Node.toInterface(self.reference_node);
}
}
if (self.reference_node == self.root) return null;
if (self.reference_node == self.root) {
return null;
}
var current = self.reference_node;
while (try parser.nodePreviousSibling(current)) |previous| {
@@ -151,6 +182,11 @@ pub const NodeIterator = struct {
return null;
}
pub fn _detach(self: *const NodeIterator) void {
// no-op as per spec
_ = self;
}
fn firstChild(self: *const NodeIterator, node: *parser.Node) !?*parser.Node {
const children = try parser.nodeGetChildNodes(node);
const child_count = try parser.nodeListLength(children);
@@ -217,6 +253,18 @@ pub const NodeIterator = struct {
return null;
}
fn callbackStart(self: *NodeIterator) !void {
if (self.is_in_callback) {
// this is the correct DOMExeption
return error.InvalidState;
}
self.is_in_callback = true;
}
fn callbackEnd(self: *NodeIterator) void {
self.is_in_callback = false;
}
};
const testing = @import("../../testing.zig");

View File

@@ -33,12 +33,17 @@ pub const TreeWalker = struct {
filter: ?TreeWalkerOpts,
filter_func: ?Env.Function,
// One of the few cases where null and undefined resolve to different default.
// We need the raw JsObject so that we can probe the tri state:
// null, undefined or i32.
pub const WhatToShow = Env.JsObject;
pub const TreeWalkerOpts = union(enum) {
function: Env.Function,
object: struct { acceptNode: Env.Function },
};
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?TreeWalkerOpts) !TreeWalker {
pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?TreeWalkerOpts) !TreeWalker {
var filter_func: ?Env.Function = null;
if (filter) |f| {
@@ -48,10 +53,21 @@ pub const TreeWalker = struct {
};
}
var what_to_show: u32 = undefined;
if (what_to_show_) |wts| {
switch (try wts.triState(TreeWalker, "what_to_show", u32)) {
.null => what_to_show = 0,
.undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL,
.value => |v| what_to_show = v,
}
} else {
what_to_show = NodeFilter.NodeFilter._SHOW_ALL;
}
return .{
.root = node,
.current_node = node,
.what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL,
.what_to_show = what_to_show,
.filter = filter,
.filter_func = filter_func,
};

View File

@@ -263,22 +263,25 @@ const Writer = struct {
if (line.len == 0) {
break;
}
var fields = std.mem.splitScalar(u8, line, '|');
const case_name = fields.next() orelse {
std.debug.print("invalid result line: {s}\n", .{line});
return error.InvalidResult;
};
// case names can have | in them, so we can't simply split on |
var case_name = line;
var case_pass = false; // so pessimistic!
var case_message: []const u8 = "";
const text_status = fields.next() orelse {
std.debug.print("invalid result line: {s}\n", .{line});
return error.InvalidResult;
};
const case_pass = std.mem.eql(u8, text_status, "Pass");
if (case_pass) {
if (std.mem.endsWith(u8, line, "|Pass")) {
case_name = line[0 .. line.len - 5];
case_pass = true;
case_pass_count += 1;
} else {
// If 1 case fails, we treat the entire file as a fail.
// both cases names and messages can have | in them. Our only
// chance to "parse" this is to anchor off the |Fail.
const pos = std.mem.indexOf(u8, line, "|Fail") orelse {
std.debug.print("invalid result line: {s}\n", .{line});
return error.InvalidResult;
};
case_name = line[0..pos];
case_message = line[pos + 1 ..];
pass = false;
case_fail_count += 1;
}
@@ -286,7 +289,7 @@ const Writer = struct {
try cases.append(self.arena, .{
.name = case_name,
.pass = case_pass,
.message = fields.next(),
.message = case_message,
});
}

View File

@@ -941,6 +941,27 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
fn jsValueToZig(self: *JsContext, comptime named_function: NamedFunction, comptime T: type, js_value: v8.Value) !T {
switch (@typeInfo(T)) {
.optional => |o| {
if (comptime isJsObject(o.child)) {
// If type type is a ?JsObject, then we want to pass
// a JsObject, not null. Consider a function,
// _doSomething(arg: ?Env.JsObjet) void { ... }
//
// And then these three calls:
// doSomething();
// doSomething(null);
//
// In the first case, we'll pass `null`. But in the
// second, we'll pass a JsObject which represents
// null.
// If we don't have this code, both cases will
// pass in `null` and the the doSomething won't
// be able to tell if `null` was explicitly passed
// or whether no parameter was passed.
return JsObject{
.js_obj = js_value.castTo(v8.Object),
.js_context = self,
};
}
if (js_value.isNullOrUndefined()) {
return null;
}
@@ -1938,6 +1959,24 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
return try js_context.createFunction(js_value);
}
pub fn isNull(self: JsObject) bool {
return self.js_obj.toValue().isNull();
}
pub fn isUndefined(self: JsObject) bool {
return self.js_obj.toValue().isUndefined();
}
pub fn triState(self: JsObject, comptime Struct: type, comptime name: []const u8, comptime T: type) !TriState(T) {
if (self.isNull()) {
return .{ .null = {} };
}
if (self.isUndefined()) {
return .{ .undefined = {} };
}
return .{ .value = try self.toZig(Struct, name, T) };
}
pub fn isNullOrUndefined(self: JsObject) bool {
return self.js_obj.toValue().isNullOrUndefined();
}
@@ -1965,6 +2004,14 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
const named_function = comptime NamedFunction.init(Struct, name);
return self.js_context.jsValueToZig(named_function, T, self.js_obj.toValue());
}
pub fn TriState(comptime T: type) type {
return union(enum) {
null: void,
undefined: void,
value: T,
};
}
};
// This only exists so that we know whether a function wants the opaque