From ce14f0b38015a60a9abf426ecca5cf8a49c3c996 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 21 Aug 2025 18:11:48 +0800 Subject: [PATCH] Improves correctness of NodeIterator Minor improvement to correctness of TreeWalker. Fun fact, this is the first time, that I've run into, where we have to default null and undefined to different values. Also, tweaked the WPT test runner. WPT test results use | as a field delimiter. But a WPT test (and, I assume a message) can contain a |. So we had at least a few tests that were being reported as failed, only because the result line was weird / unexpected. No great robust way to parse this, but I changed it to look explicitly for |Pass or |Fail and use those positions as anchor points. --- src/browser/dom/document.zig | 8 ++-- src/browser/dom/node_iterator.zig | 64 +++++++++++++++++++++++++++---- src/browser/dom/tree_walker.zig | 20 +++++++++- src/main_wpt.zig | 31 ++++++++------- src/runtime/js.zig | 47 +++++++++++++++++++++++ 5 files changed, 142 insertions(+), 28 deletions(-) diff --git a/src/browser/dom/document.zig b/src/browser/dom/document.zig index a1819e9e..4facba99 100644 --- a/src/browser/dom/document.zig +++ b/src/browser/dom/document.zig @@ -249,12 +249,12 @@ pub const Document = struct { return Node.replaceChildren(parser.documentToNode(self), nodes); } - pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker { - return try TreeWalker.init(root, what_to_show, filter); + pub fn _createTreeWalker(_: *parser.Document, root: *parser.Node, what_to_show: ?TreeWalker.WhatToShow, filter: ?TreeWalker.TreeWalkerOpts) !TreeWalker { + return TreeWalker.init(root, what_to_show, filter); } - pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?u32, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator { - return try NodeIterator.init(root, what_to_show, filter); + pub fn _createNodeIterator(_: *parser.Document, root: *parser.Node, what_to_show: ?NodeIterator.WhatToShow, filter: ?NodeIterator.NodeIteratorOpts) !NodeIterator { + return NodeIterator.init(root, what_to_show, filter); } pub fn getActiveElement(self: *parser.Document, page: *Page) !?*parser.Element { diff --git a/src/browser/dom/node_iterator.zig b/src/browser/dom/node_iterator.zig index 039227af..0d3c53a6 100644 --- a/src/browser/dom/node_iterator.zig +++ b/src/browser/dom/node_iterator.zig @@ -17,11 +17,13 @@ // along with this program. If not, see . const std = @import("std"); + const parser = @import("../netsurf.zig"); const Env = @import("../env.zig").Env; const NodeFilter = @import("node_filter.zig"); const Node = @import("node.zig").Node; const NodeUnion = @import("node.zig").Union; +const DOMException = @import("exceptions.zig").DOMException; // https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator // While this is similar to TreeWalker it has its own implementation as there are several subtle differences @@ -29,20 +31,28 @@ const NodeUnion = @import("node.zig").Union; // - nextNode returns the reference node, whereas TreeWalker returns the next node // - Skip and reject are equivalent for NodeIterator, for TreeWalker they are different pub const NodeIterator = struct { + pub const Exception = DOMException; + root: *parser.Node, reference_node: *parser.Node, what_to_show: u32, filter: ?NodeIteratorOpts, filter_func: ?Env.Function, - pointer_before_current: bool = true, + // used to track / block recursive filters + is_in_callback: bool = false, + + // One of the few cases where null and undefined resolve to different default. + // We need the raw JsObject so that we can probe the tri state: + // null, undefined or i32. + pub const WhatToShow = Env.JsObject; pub const NodeIteratorOpts = union(enum) { function: Env.Function, object: struct { acceptNode: Env.Function }, }; - pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?NodeIteratorOpts) !NodeIterator { + pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?NodeIteratorOpts) !NodeIterator { var filter_func: ?Env.Function = null; if (filter) |f| { filter_func = switch (f) { @@ -51,10 +61,21 @@ pub const NodeIterator = struct { }; } + var what_to_show: u32 = undefined; + if (what_to_show_) |wts| { + switch (try wts.triState(NodeIterator, "what_to_show", u32)) { + .null => what_to_show = 0, + .undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL, + .value => |v| what_to_show = v, + } + } else { + what_to_show = NodeFilter.NodeFilter._SHOW_ALL; + } + return .{ .root = node, .reference_node = node, - .what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, + .what_to_show = what_to_show, .filter = filter, .filter_func = filter_func, }; @@ -81,9 +102,13 @@ pub const NodeIterator = struct { } pub fn _nextNode(self: *NodeIterator) !?NodeUnion { - if (self.pointer_before_current) { // Unlike TreeWalker, NodeIterator starts at the first node - self.pointer_before_current = false; + try self.callbackStart(); + defer self.callbackEnd(); + + if (self.pointer_before_current) { + // Unlike TreeWalker, NodeIterator starts at the first node if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { + self.pointer_before_current = false; return try Node.toInterface(self.reference_node); } } @@ -107,13 +132,19 @@ pub const NodeIterator = struct { } pub fn _previousNode(self: *NodeIterator) !?NodeUnion { + try self.callbackStart(); + defer self.callbackEnd(); + if (!self.pointer_before_current) { - self.pointer_before_current = true; if (.accept == try NodeFilter.verify(self.what_to_show, self.filter_func, self.reference_node)) { - return try Node.toInterface(self.reference_node); // Still need to verify as last may be first as well + self.pointer_before_current = true; + // Still need to verify as last may be first as well + return try Node.toInterface(self.reference_node); } } - if (self.reference_node == self.root) return null; + if (self.reference_node == self.root) { + return null; + } var current = self.reference_node; while (try parser.nodePreviousSibling(current)) |previous| { @@ -151,6 +182,11 @@ pub const NodeIterator = struct { return null; } + pub fn _detach(self: *const NodeIterator) void { + // no-op as per spec + _ = self; + } + fn firstChild(self: *const NodeIterator, node: *parser.Node) !?*parser.Node { const children = try parser.nodeGetChildNodes(node); const child_count = try parser.nodeListLength(children); @@ -217,6 +253,18 @@ pub const NodeIterator = struct { return null; } + + fn callbackStart(self: *NodeIterator) !void { + if (self.is_in_callback) { + // this is the correct DOMExeption + return error.InvalidState; + } + self.is_in_callback = true; + } + + fn callbackEnd(self: *NodeIterator) void { + self.is_in_callback = false; + } }; const testing = @import("../../testing.zig"); diff --git a/src/browser/dom/tree_walker.zig b/src/browser/dom/tree_walker.zig index 4822e615..d4061cb2 100644 --- a/src/browser/dom/tree_walker.zig +++ b/src/browser/dom/tree_walker.zig @@ -33,12 +33,17 @@ pub const TreeWalker = struct { filter: ?TreeWalkerOpts, filter_func: ?Env.Function, + // One of the few cases where null and undefined resolve to different default. + // We need the raw JsObject so that we can probe the tri state: + // null, undefined or i32. + pub const WhatToShow = Env.JsObject; + pub const TreeWalkerOpts = union(enum) { function: Env.Function, object: struct { acceptNode: Env.Function }, }; - pub fn init(node: *parser.Node, what_to_show: ?u32, filter: ?TreeWalkerOpts) !TreeWalker { + pub fn init(node: *parser.Node, what_to_show_: ?WhatToShow, filter: ?TreeWalkerOpts) !TreeWalker { var filter_func: ?Env.Function = null; if (filter) |f| { @@ -48,10 +53,21 @@ pub const TreeWalker = struct { }; } + var what_to_show: u32 = undefined; + if (what_to_show_) |wts| { + switch (try wts.triState(TreeWalker, "what_to_show", u32)) { + .null => what_to_show = 0, + .undefined => what_to_show = NodeFilter.NodeFilter._SHOW_ALL, + .value => |v| what_to_show = v, + } + } else { + what_to_show = NodeFilter.NodeFilter._SHOW_ALL; + } + return .{ .root = node, .current_node = node, - .what_to_show = what_to_show orelse NodeFilter.NodeFilter._SHOW_ALL, + .what_to_show = what_to_show, .filter = filter, .filter_func = filter_func, }; diff --git a/src/main_wpt.zig b/src/main_wpt.zig index bbe80a3d..80ff62fd 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -263,22 +263,25 @@ const Writer = struct { if (line.len == 0) { break; } - var fields = std.mem.splitScalar(u8, line, '|'); - const case_name = fields.next() orelse { - std.debug.print("invalid result line: {s}\n", .{line}); - return error.InvalidResult; - }; + // case names can have | in them, so we can't simply split on | + var case_name = line; + var case_pass = false; // so pessimistic! + var case_message: []const u8 = ""; - const text_status = fields.next() orelse { - std.debug.print("invalid result line: {s}\n", .{line}); - return error.InvalidResult; - }; - - const case_pass = std.mem.eql(u8, text_status, "Pass"); - if (case_pass) { + if (std.mem.endsWith(u8, line, "|Pass")) { + case_name = line[0 .. line.len - 5]; + case_pass = true; case_pass_count += 1; } else { - // If 1 case fails, we treat the entire file as a fail. + // both cases names and messages can have | in them. Our only + // chance to "parse" this is to anchor off the |Fail. + const pos = std.mem.indexOf(u8, line, "|Fail") orelse { + std.debug.print("invalid result line: {s}\n", .{line}); + return error.InvalidResult; + }; + + case_name = line[0..pos]; + case_message = line[pos + 1 ..]; pass = false; case_fail_count += 1; } @@ -286,7 +289,7 @@ const Writer = struct { try cases.append(self.arena, .{ .name = case_name, .pass = case_pass, - .message = fields.next(), + .message = case_message, }); } diff --git a/src/runtime/js.zig b/src/runtime/js.zig index 30d138c4..0b22cd10 100644 --- a/src/runtime/js.zig +++ b/src/runtime/js.zig @@ -941,6 +941,27 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { fn jsValueToZig(self: *JsContext, comptime named_function: NamedFunction, comptime T: type, js_value: v8.Value) !T { switch (@typeInfo(T)) { .optional => |o| { + if (comptime isJsObject(o.child)) { + // If type type is a ?JsObject, then we want to pass + // a JsObject, not null. Consider a function, + // _doSomething(arg: ?Env.JsObjet) void { ... } + // + // And then these three calls: + // doSomething(); + // doSomething(null); + // + // In the first case, we'll pass `null`. But in the + // second, we'll pass a JsObject which represents + // null. + // If we don't have this code, both cases will + // pass in `null` and the the doSomething won't + // be able to tell if `null` was explicitly passed + // or whether no parameter was passed. + return JsObject{ + .js_obj = js_value.castTo(v8.Object), + .js_context = self, + }; + } if (js_value.isNullOrUndefined()) { return null; } @@ -1938,6 +1959,24 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { return try js_context.createFunction(js_value); } + pub fn isNull(self: JsObject) bool { + return self.js_obj.toValue().isNull(); + } + + pub fn isUndefined(self: JsObject) bool { + return self.js_obj.toValue().isUndefined(); + } + + pub fn triState(self: JsObject, comptime Struct: type, comptime name: []const u8, comptime T: type) !TriState(T) { + if (self.isNull()) { + return .{ .null = {} }; + } + if (self.isUndefined()) { + return .{ .undefined = {} }; + } + return .{ .value = try self.toZig(Struct, name, T) }; + } + pub fn isNullOrUndefined(self: JsObject) bool { return self.js_obj.toValue().isNullOrUndefined(); } @@ -1965,6 +2004,14 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { const named_function = comptime NamedFunction.init(Struct, name); return self.js_context.jsValueToZig(named_function, T, self.js_obj.toValue()); } + + pub fn TriState(comptime T: type) type { + return union(enum) { + null: void, + undefined: void, + value: T, + }; + } }; // This only exists so that we know whether a function wants the opaque