Improves correctness of NodeIterator

Minor improvement to correctness of TreeWalker.

Fun fact, this is the first time, that I've run into, where we have to default
null and undefined to different values.

Also, tweaked the WPT test runner. WPT test results use | as a field delimiter.
But a WPT test (and, I assume a message) can contain a |. So we had at least
a few tests that were being reported as failed, only because the result line
was weird / unexpected. No great robust way to parse this, but I changed it
to look explicitly for |Pass or |Fail and use those positions as anchor points.
This commit is contained in:
Karl Seguin
2025-08-21 18:11:48 +08:00
parent 557f8444b2
commit ce14f0b380
5 changed files with 142 additions and 28 deletions

View File

@@ -249,12 +249,12 @@ pub const Document = struct {
return Node.replaceChildren(parser.documentToNode(self), nodes); return Node.replaceChildren(parser.documentToNode(self), nodes);
} }
pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker { pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?TreeWalker.WhatToShow, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker {
return try TreeWalker.init(root, what_to_show, filter); return TreeWalker.init(root, what_to_show, filter);
} }
pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator { pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?NodeIterator.WhatToShow, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator {
return try NodeIterator.init(root, what_to_show, filter); return NodeIterator.init(root, what_to_show, filter);
} }
pub fn getActiveElement(self: *parser.Document, page: *Page) !?*parser.Element { pub fn getActiveElement(self: *parser.Document, page: *Page) !?*parser.Element {

View File

@@ -17,11 +17,13 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std"); const std = @import("std");
const parser = @import("../netsurf.zig"); const parser = @import("../netsurf.zig");
const Env = @import("../env.zig").Env; const Env = @import("../env.zig").Env;
const NodeFilter = @import("node_filter.zig"); const NodeFilter = @import("node_filter.zig");
const Node = @import("node.zig").Node; const Node = @import("node.zig").Node;
const NodeUnion = @import("node.zig").Union; const NodeUnion = @import("node.zig").Union;
const DOMException = @import("exceptions.zig").DOMException;
// https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator // https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator
// While this is similar to TreeWalker it has its own implementation as there are several subtle differences // While this is similar to TreeWalker it has its own implementation as there are several subtle differences
@@ -29,20 +31,28 @@ const NodeUnion = @import("node.zig").Union;
// - nextNode returns the reference node, whereas TreeWalker returns the next node // - nextNode returns the reference node, whereas TreeWalker returns the next node
// - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different // - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different
pub const NodeIterator = struct { pub const NodeIterator = struct {
pub const Exception = DOMException;
root: *parser.Node, root: *parser.Node,
reference_node: *parser.Node, reference_node: *parser.Node,
what_to_show: u32, what_to_show: u32,
filter: ?NodeIteratorOpts, filter: ?NodeIteratorOpts,
filter_func: ?Env.Function, filter_func: ?Env.Function,
pointer_before_current: bool = true, pointer_before_current: bool = true,
// used to track / block recursive filters
is_in_callback: bool = false,
// One of the few cases where null and undefined resolve to different default.
// We need the raw JsObject so that we can probe the tri state:
// null, undefined or i32.
pub const WhatToShow = Env.JsObject;
pub const NodeIteratorOpts = union(enum) { pub const NodeIteratorOpts = union(enum) {
function: Env.Function, function: Env.Function,
object: struct { acceptNode: Env.Function }, object: struct { acceptNode: Env.Function },
}; };
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?NodeIteratorOpts) !NodeIterator { pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?NodeIteratorOpts) !NodeIterator {
var filter_func: ?Env.Function = null; var filter_func: ?Env.Function = null;
if (filter) |f| { if (filter) |f| {
filter_func = switch (f) { filter_func = switch (f) {
@@ -51,10 +61,21 @@ pub const NodeIterator = struct {
}; };
} }
var what_to_show: u32 = undefined;
if (what_to_show_) |wts| {
switch (try wts.triState(NodeIterator, "what_to_show", u32)) {
.null => what_to_show = 0,
.undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL,
.value => |v| what_to_show = v,
}
} else {
what_to_show = NodeFilter.NodeFilter._SHOW_ALL;
}
return .{ return .{
.root = node, .root = node,
.reference_node = node, .reference_node = node,
.what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, .what_to_show = what_to_show,
.filter = filter, .filter = filter,
.filter_func = filter_func, .filter_func = filter_func,
}; };
@@ -81,9 +102,13 @@ pub const NodeIterator = struct {
} }
pub fn _nextNode(self: *NodeIterator) !?NodeUnion { pub fn _nextNode(self: *NodeIterator) !?NodeUnion {
if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node try self.callbackStart();
self.pointer_before_current = false; defer self.callbackEnd();
if (self.pointer_before_current) {
// Unlike TreeWalker, NodeIterator starts at the first node
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
self.pointer_before_current = false;
return try Node.toInterface(self.reference_node); return try Node.toInterface(self.reference_node);
} }
} }
@@ -107,13 +132,19 @@ pub const NodeIterator = struct {
} }
pub fn _previousNode(self: *NodeIterator) !?NodeUnion { pub fn _previousNode(self: *NodeIterator) !?NodeUnion {
try self.callbackStart();
defer self.callbackEnd();
if (!self.pointer_before_current) { if (!self.pointer_before_current) {
self.pointer_before_current = true;
if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) {
return try Node.toInterface(self.reference_node); // Still need to verify as last may be first as well self.pointer_before_current = true;
// Still need to verify as last may be first as well
return try Node.toInterface(self.reference_node);
} }
} }
if (self.reference_node == self.root) return null; if (self.reference_node == self.root) {
return null;
}
var current = self.reference_node; var current = self.reference_node;
while (try parser.nodePreviousSibling(current)) |previous| { while (try parser.nodePreviousSibling(current)) |previous| {
@@ -151,6 +182,11 @@ pub const NodeIterator = struct {
return null; return null;
} }
pub fn _detach(self: *const NodeIterator) void {
// no-op as per spec
_ = self;
}
fn firstChild(self: *const NodeIterator, node: *parser.Node) !?*parser.Node { fn firstChild(self: *const NodeIterator, node: *parser.Node) !?*parser.Node {
const children = try parser.nodeGetChildNodes(node); const children = try parser.nodeGetChildNodes(node);
const child_count = try parser.nodeListLength(children); const child_count = try parser.nodeListLength(children);
@@ -217,6 +253,18 @@ pub const NodeIterator = struct {
return null; return null;
} }
fn callbackStart(self: *NodeIterator) !void {
if (self.is_in_callback) {
// this is the correct DOMExeption
return error.InvalidState;
}
self.is_in_callback = true;
}
fn callbackEnd(self: *NodeIterator) void {
self.is_in_callback = false;
}
}; };
const testing = @import("../../testing.zig"); const testing = @import("../../testing.zig");

View File

@@ -33,12 +33,17 @@ pub const TreeWalker = struct {
filter: ?TreeWalkerOpts, filter: ?TreeWalkerOpts,
filter_func: ?Env.Function, filter_func: ?Env.Function,
// One of the few cases where null and undefined resolve to different default.
// We need the raw JsObject so that we can probe the tri state:
// null, undefined or i32.
pub const WhatToShow = Env.JsObject;
pub const TreeWalkerOpts = union(enum) { pub const TreeWalkerOpts = union(enum) {
function: Env.Function, function: Env.Function,
object: struct { acceptNode: Env.Function }, object: struct { acceptNode: Env.Function },
}; };
pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?TreeWalkerOpts) !TreeWalker { pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?TreeWalkerOpts) !TreeWalker {
var filter_func: ?Env.Function = null; var filter_func: ?Env.Function = null;
if (filter) |f| { if (filter) |f| {
@@ -48,10 +53,21 @@ pub const TreeWalker = struct {
}; };
} }
var what_to_show: u32 = undefined;
if (what_to_show_) |wts| {
switch (try wts.triState(TreeWalker, "what_to_show", u32)) {
.null => what_to_show = 0,
.undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL,
.value => |v| what_to_show = v,
}
} else {
what_to_show = NodeFilter.NodeFilter._SHOW_ALL;
}
return .{ return .{
.root = node, .root = node,
.current_node = node, .current_node = node,
.what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, .what_to_show = what_to_show,
.filter = filter, .filter = filter,
.filter_func = filter_func, .filter_func = filter_func,
}; };

View File

@@ -263,22 +263,25 @@ const Writer = struct {
if (line.len == 0) { if (line.len == 0) {
break; break;
} }
var fields = std.mem.splitScalar(u8, line, '|'); // case names can have | in them, so we can't simply split on |
const case_name = fields.next() orelse { var case_name = line;
std.debug.print("invalid result line: {s}\n", .{line}); var case_pass = false; // so pessimistic!
return error.InvalidResult; var case_message: []const u8 = "";
};
const text_status = fields.next() orelse { if (std.mem.endsWith(u8, line, "|Pass")) {
std.debug.print("invalid result line: {s}\n", .{line}); case_name = line[0 .. line.len - 5];
return error.InvalidResult; case_pass = true;
};
const case_pass = std.mem.eql(u8, text_status, "Pass");
if (case_pass) {
case_pass_count += 1; case_pass_count += 1;
} else { } else {
// If 1 case fails, we treat the entire file as a fail. // both cases names and messages can have | in them. Our only
// chance to "parse" this is to anchor off the |Fail.
const pos = std.mem.indexOf(u8, line, "|Fail") orelse {
std.debug.print("invalid result line: {s}\n", .{line});
return error.InvalidResult;
};
case_name = line[0..pos];
case_message = line[pos + 1 ..];
pass = false; pass = false;
case_fail_count += 1; case_fail_count += 1;
} }
@@ -286,7 +289,7 @@ const Writer = struct {
try cases.append(self.arena, .{ try cases.append(self.arena, .{
.name = case_name, .name = case_name,
.pass = case_pass, .pass = case_pass,
.message = fields.next(), .message = case_message,
}); });
} }

View File

@@ -941,6 +941,27 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
fn jsValueToZig(self: *JsContext, comptime named_function: NamedFunction, comptime T: type, js_value: v8.Value) !T { fn jsValueToZig(self: *JsContext, comptime named_function: NamedFunction, comptime T: type, js_value: v8.Value) !T {
switch (@typeInfo(T)) { switch (@typeInfo(T)) {
.optional => |o| { .optional => |o| {
if (comptime isJsObject(o.child)) {
// If type type is a ?JsObject, then we want to pass
// a JsObject, not null. Consider a function,
// _doSomething(arg: ?Env.JsObjet) void { ... }
//
// And then these three calls:
// doSomething();
// doSomething(null);
//
// In the first case, we'll pass `null`. But in the
// second, we'll pass a JsObject which represents
// null.
// If we don't have this code, both cases will
// pass in `null` and the the doSomething won't
// be able to tell if `null` was explicitly passed
// or whether no parameter was passed.
return JsObject{
.js_obj = js_value.castTo(v8.Object),
.js_context = self,
};
}
if (js_value.isNullOrUndefined()) { if (js_value.isNullOrUndefined()) {
return null; return null;
} }
@@ -1938,6 +1959,24 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
return try js_context.createFunction(js_value); return try js_context.createFunction(js_value);
} }
pub fn isNull(self: JsObject) bool {
return self.js_obj.toValue().isNull();
}
pub fn isUndefined(self: JsObject) bool {
return self.js_obj.toValue().isUndefined();
}
pub fn triState(self: JsObject, comptime Struct: type, comptime name: []const u8, comptime T: type) !TriState(T) {
if (self.isNull()) {
return .{ .null = {} };
}
if (self.isUndefined()) {
return .{ .undefined = {} };
}
return .{ .value = try self.toZig(Struct, name, T) };
}
pub fn isNullOrUndefined(self: JsObject) bool { pub fn isNullOrUndefined(self: JsObject) bool {
return self.js_obj.toValue().isNullOrUndefined(); return self.js_obj.toValue().isNullOrUndefined();
} }
@@ -1965,6 +2004,14 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
const named_function = comptime NamedFunction.init(Struct, name); const named_function = comptime NamedFunction.init(Struct, name);
return self.js_context.jsValueToZig(named_function, T, self.js_obj.toValue()); return self.js_context.jsValueToZig(named_function, T, self.js_obj.toValue());
} }
pub fn TriState(comptime T: type) type {
return union(enum) {
null: void,
undefined: void,
value: T,
};
}
}; };
// This only exists so that we know whether a function wants the opaque // This only exists so that we know whether a function wants the opaque