Merge pull request #951 from lightpanda-io/wpt_range

Improve correctness of Node.compareDocumentPosition and Range api.
2025-12-16 00:08:59 +00:00 · 2025-08-19 17:23:39 +02:00
parent 4fbedf5b20 b46c181b07
commit 4de4e7504d
5 changed files with 296 additions and 27 deletions
--- a/src/browser/dom/exceptions.zig
+++ b/src/browser/dom/exceptions.zig
@@ -68,23 +68,24 @@ pub const DOMException = struct {
    }

    // TODO: deinit
-    pub fn init(alloc: std.mem.Allocator, err: anyerror, callerName: []const u8) !DOMException {
-        const errCast = @as(parser.DOMError, @errorCast(err));
-        const errName = DOMException.name(errCast);
-        const str = switch (errCast) {
+    pub fn init(alloc: std.mem.Allocator, err: anyerror, caller_name: []const u8) !DOMException {
+        const dom_error = @as(parser.DOMError, @errorCast(err));
+        const error_name = DOMException.name(dom_error);
+        const str = switch (dom_error) {
            error.HierarchyRequest => try allocPrint(
                alloc,
                "{s}: Failed to execute '{s}' on 'Node': The new child element contains the parent.",
-                .{ errName, callerName },
+                .{ error_name, caller_name },
            ),
-            error.NoError => unreachable,
+            // todo add more custom error messages
            else => try allocPrint(
                alloc,
-                "{s}: TODO message", // TODO: implement other messages
-                .{DOMException.name(errCast)},
+                "{s}: Failed to execute '{s}' : {s}",
+                .{ error_name, caller_name, error_name },
            ),
+            error.NoError => unreachable,
        };
-        return .{ .err = errCast, .str = str };
+        return .{ .err = dom_error, .str = str };
    }

    fn error_from_str(name_: []const u8) ?parser.DOMError {
--- a/src/browser/dom/node.zig
+++ b/src/browser/dom/node.zig
@@ -107,6 +107,13 @@ pub const Node = struct {
    pub const _ENTITY_NODE = @intFromEnum(parser.NodeType.entity);
    pub const _NOTATION_NODE = @intFromEnum(parser.NodeType.notation);

+    pub const _DOCUMENT_POSITION_DISCONNECTED = @intFromEnum(parser.DocumentPosition.disconnected);
+    pub const _DOCUMENT_POSITION_PRECEDING = @intFromEnum(parser.DocumentPosition.preceding);
+    pub const _DOCUMENT_POSITION_FOLLOWING = @intFromEnum(parser.DocumentPosition.following);
+    pub const _DOCUMENT_POSITION_CONTAINS = @intFromEnum(parser.DocumentPosition.contains);
+    pub const _DOCUMENT_POSITION_CONTAINED_BY = @intFromEnum(parser.DocumentPosition.contained_by);
+    pub const _DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC = @intFromEnum(parser.DocumentPosition.implementation_specific);
+
    // JS funcs
    // --------

@@ -260,14 +267,43 @@ pub const Node = struct {
    }

    pub fn _compareDocumentPosition(self: *parser.Node, other: *parser.Node) !u32 {
-        if (self == other) return 0;
+        if (self == other) {
+            return 0;
+        }

-        const docself = try parser.nodeOwnerDocument(self);
-        const docother = try parser.nodeOwnerDocument(other);
+        const docself = try parser.nodeOwnerDocument(self) orelse blk: {
+            if (try parser.nodeType(self) == .document) {
+                break :blk @as(*parser.Document, @ptrCast(self));
+            }
+            break :blk null;
+        };
+        const docother = try parser.nodeOwnerDocument(other) orelse blk: {
+            if (try parser.nodeType(other) == .document) {
+                break :blk @as(*parser.Document, @ptrCast(other));
+            }
+            break :blk null;
+        };

        // Both are in different document.
-        if (docself == null or docother == null or docother.? != docself.?) {
-            return @intFromEnum(parser.DocumentPosition.disconnected);
+        if (docself == null or docother == null or docself.? != docother.?) {
+            return @intFromEnum(parser.DocumentPosition.disconnected) +
+                @intFromEnum(parser.DocumentPosition.implementation_specific) +
+                @intFromEnum(parser.DocumentPosition.preceding);
+        }
+
+        if (@intFromPtr(self) == @intFromPtr(docself.?)) {
+            // if self is the document, and we already know other is in the
+            // document, then other is contained by and following self.
+            return @intFromEnum(parser.DocumentPosition.following) +
+                @intFromEnum(parser.DocumentPosition.contained_by);
+        }
+
+        const rootself = try parser.nodeGetRootNode(self);
+        const rootother = try parser.nodeGetRootNode(other);
+        if (rootself != rootother) {
+            return @intFromEnum(parser.DocumentPosition.disconnected) +
+                @intFromEnum(parser.DocumentPosition.implementation_specific) +
+                @intFromEnum(parser.DocumentPosition.preceding);
        }

        // TODO Both are in a different trees in the same document.
--- a/src/browser/dom/range.zig
+++ b/src/browser/dom/range.zig
@@ -21,8 +21,9 @@ const std = @import("std");
 const parser = @import("../netsurf.zig");
 const Page = @import("../page.zig").Page;

-const NodeUnion = @import("node.zig").Union;
 const Node = @import("node.zig").Node;
+const NodeUnion = @import("node.zig").Union;
+const DOMException = @import("exceptions.zig").DOMException;

 pub const Interfaces = .{
    AbstractRange,
@@ -32,9 +33,9 @@ pub const Interfaces = .{
 pub const AbstractRange = struct {
    collapsed: bool,
    end_container: *parser.Node,
-    end_offset: i32,
+    end_offset: u32,
    start_container: *parser.Node,
-    start_offset: i32,
+    start_offset: u32,

    pub fn updateCollapsed(self: *AbstractRange) void {
        // TODO: Eventually, compare properly.
@@ -49,7 +50,7 @@ pub const AbstractRange = struct {
        return Node.toInterface(self.end_container);
    }

-    pub fn get_endOffset(self: *const AbstractRange) i32 {
+    pub fn get_endOffset(self: *const AbstractRange) u32 {
        return self.end_offset;
    }

@@ -57,12 +58,13 @@ pub const AbstractRange = struct {
        return Node.toInterface(self.start_container);
    }

-    pub fn get_startOffset(self: *const AbstractRange) i32 {
+    pub fn get_startOffset(self: *const AbstractRange) u32 {
        return self.start_offset;
    }
 };

 pub const Range = struct {
+    pub const Exception = DOMException;
    pub const prototype = *AbstractRange;

    proto: AbstractRange,
@@ -82,18 +84,80 @@ pub const Range = struct {
        return .{ .proto = proto };
    }

-    pub fn _setStart(self: *Range, node: *parser.Node, offset: i32) void {
+    pub fn _setStart(self: *Range, node: *parser.Node, offset_: i32) !void {
+        try ensureValidOffset(node, offset_);
+        const offset: u32 = @intCast(offset_);
+        const position = compare(node, offset, self.proto.start_container, self.proto.start_offset) catch |err| switch (err) {
+            error.WrongDocument => blk: {
+                // allow a node with a different root than the current, or
+                // a disconnected one. Treat it as if it's "after", so that
+                // we also update the end_offset and end_container.
+                break :blk 1;
+            },
+            else => return err,
+        };
+
+        if (position == 1) {
+            // if we're setting the node after the current start, the end must
+            // be set too.
+            self.proto.end_offset = offset;
+            self.proto.end_container = node;
+        }
        self.proto.start_container = node;
        self.proto.start_offset = offset;
        self.proto.updateCollapsed();
    }

-    pub fn _setEnd(self: *Range, node: *parser.Node, offset: i32) void {
+    pub fn _setStartBefore(self: *Range, node: *parser.Node) !void {
+        const parent, const index = try getParentAndIndex(node);
+        self.proto.start_container = parent;
+        self.proto.start_offset = index;
+    }
+
+    pub fn _setStartAfter(self: *Range, node: *parser.Node) !void {
+        const parent, const index = try getParentAndIndex(node);
+        self.proto.start_container = parent;
+        self.proto.start_offset = index + 1;
+    }
+
+    pub fn _setEnd(self: *Range, node: *parser.Node, offset_: i32) !void {
+        try ensureValidOffset(node, offset_);
+        const offset: u32 = @intCast(offset_);
+
+        const position = compare(node, offset, self.proto.start_container, self.proto.start_offset) catch |err| switch (err) {
+            error.WrongDocument => blk: {
+                // allow a node with a different root than the current, or
+                // a disconnected one. Treat it as if it's "before", so that
+                // we also update the end_offset and end_container.
+                break :blk -1;
+            },
+            else => return err,
+        };
+
+        if (position == -1) {
+            // if we're setting the node before the current start, the start
+            // must be set too.
+            self.proto.start_offset = offset;
+            self.proto.start_container = node;
+        }
+
        self.proto.end_container = node;
        self.proto.end_offset = offset;
        self.proto.updateCollapsed();
    }

+    pub fn _setEndBefore(self: *Range, node: *parser.Node) !void {
+        const parent, const index = try getParentAndIndex(node);
+        self.proto.end_container = parent;
+        self.proto.end_offset = index;
+    }
+
+    pub fn _setEndAfter(self: *Range, node: *parser.Node) !void {
+        const parent, const index = try getParentAndIndex(node);
+        self.proto.end_container = parent;
+        self.proto.end_offset = index + 1;
+    }
+
    pub fn _createContextualFragment(_: *Range, fragment: []const u8, page: *Page) !*parser.DocumentFragment {
        const document_html = page.window.document;
        const document = parser.documentHTMLToDocument(document_html);
@@ -127,6 +191,79 @@ pub const Range = struct {
        self.proto.updateCollapsed();
    }

+    // creates a copy
+    pub fn _cloneRange(self: *const Range) Range {
+        return .{
+            .proto = .{
+                .collapsed = self.proto.collapsed,
+                .end_container = self.proto.end_container,
+                .end_offset = self.proto.end_offset,
+                .start_container = self.proto.start_container,
+                .start_offset = self.proto.start_offset,
+            },
+        };
+    }
+
+    pub fn _comparePoint(self: *const Range, node: *parser.Node, offset_: i32) !i32 {
+        const start = self.proto.start_container;
+        if (try parser.nodeGetRootNode(start) != try parser.nodeGetRootNode(node)) {
+            // WPT really wants this error to be first. Later, when we check
+            // if the relative position is 'disconnected', it'll also catch this
+            // case, but WPT will complain because it sometimes also sends
+            // invalid offsets, and it wants WrongDocument to be raised.
+            return error.WrongDocument;
+        }
+
+        if (try parser.nodeType(node) == .document_type) {
+            return error.InvalidNodeType;
+        }
+
+        try ensureValidOffset(node, offset_);
+
+        const offset: u32 = @intCast(offset_);
+        if (try compare(node, offset, start, self.proto.start_offset) == -1) {
+            return -1;
+        }
+
+        if (try compare(node, offset, self.proto.end_container, self.proto.end_offset) == 1) {
+            return 1;
+        }
+
+        return 0;
+    }
+
+    pub fn _isPointInRange(self: *const Range, node: *parser.Node, offset_: i32) !bool {
+        return self._comparePoint(node, offset_) catch |err| switch (err) {
+            error.WrongDocument => return false,
+            else => return err,
+        } == 0;
+    }
+
+    pub fn _intersectsNode(self: *const Range, node: *parser.Node) !bool {
+        const start_root = try parser.nodeGetRootNode(self.proto.start_container);
+        const node_root = try parser.nodeGetRootNode(node);
+        if (start_root != node_root) {
+            return false;
+        }
+
+        const parent, const index = getParentAndIndex(node) catch |err| switch (err) {
+            error.InvalidNodeType => return true, // if node has no parent, we return true.
+            else => return err,
+        };
+
+        if (try compare(parent, index + 1, self.proto.start_container, self.proto.start_offset) != 1) {
+            // node isn't after start, can't intersect
+            return false;
+        }
+
+        if (try compare(parent, index, self.proto.end_container, self.proto.end_offset) != -1) {
+            // node isn't before end, can't intersect
+            return false;
+        }
+
+        return true;
+    }
+
    // The Range.detach() method does nothing. It used to disable the Range
    // object and enable the browser to release associated resources. The
    // method has been kept for compatibility.
@@ -134,6 +271,104 @@ pub const Range = struct {
    pub fn _detach(_: *Range) void {}
 };

+fn ensureValidOffset(node: *parser.Node, offset: i32) !void {
+    if (offset < 0) {
+        return error.IndexSize;
+    }
+
+    // not >= because 0 seems to represent the node itself.
+    if (offset > try nodeLength(node)) {
+        return error.IndexSize;
+    }
+}
+
+fn nodeLength(node: *parser.Node) !usize {
+    switch (try isTextual(node)) {
+        true => return ((try parser.nodeTextContent(node)) orelse "").len,
+        false => {
+            const children = try parser.nodeGetChildNodes(node);
+            return @intCast(try parser.nodeListLength(children));
+        },
+    }
+}
+
+fn isTextual(node: *parser.Node) !bool {
+    return switch (try parser.nodeType(node)) {
+        .text, .comment, .cdata_section => true,
+        else => false,
+    };
+}
+
+fn getParentAndIndex(child: *parser.Node) !struct { *parser.Node, u32 } {
+    const parent = (try parser.nodeParentNode(child)) orelse return error.InvalidNodeType;
+    const children = try parser.nodeGetChildNodes(parent);
+    const ln = try parser.nodeListLength(children);
+    var i: u32 = 0;
+    while (i < ln) {
+        defer i += 1;
+        const c = try parser.nodeListItem(children, i) orelse continue;
+        if (c == child) {
+            return .{ parent, i };
+        }
+    }
+
+    // should not be possible to reach this point
+    return error.InvalidNodeType;
+}
+
+// implementation is largely copied from the WPT helper called getPosition in
+// the common.js of the dom folder.
+fn compare(node_a: *parser.Node, offset_a: u32, node_b: *parser.Node, offset_b: u32) !i32 {
+    if (node_a == node_b) {
+        // This is a simple and common case, where the two nodes are the same
+        // We just need to compare their offsets
+        if (offset_a == offset_b) {
+            return 0;
+        }
+        return if (offset_a < offset_b) -1 else 1;
+    }
+
+    // We're probably comparing two different nodes. "Probably", because the
+    // above case on considered the offset if the two nodes were the same
+    // as-is. They could still be the same here, if we first consider the
+    // offset.
+    const position = try Node._compareDocumentPosition(node_b, node_a);
+    if (position & @intFromEnum(parser.DocumentPosition.disconnected) == @intFromEnum(parser.DocumentPosition.disconnected)) {
+        return error.WrongDocument;
+    }
+
+    if (position & @intFromEnum(parser.DocumentPosition.following) == @intFromEnum(parser.DocumentPosition.following)) {
+        return switch (try compare(node_b, offset_b, node_a, offset_a)) {
+            -1 => 1,
+            1 => -1,
+            else => unreachable,
+        };
+    }
+
+    if (position & @intFromEnum(parser.DocumentPosition.contains) == @intFromEnum(parser.DocumentPosition.contains)) {
+        // node_a contains node_b
+        var child = node_b;
+        while (try parser.nodeParentNode(child)) |parent| {
+            if (parent == node_a) {
+                // child.parentNode == node_a
+                break;
+            }
+            child = parent;
+        } else {
+            // this should not happen, because  Node._compareDocumentPosition
+            // has told us that node_a contains node_b, so one of node_b's
+            // parent's MUST be node_a. But somehow we do end up here sometimes.
+            return -1;
+        }
+
+        const child_parent, const child_index = try getParentAndIndex(child);
+        std.debug.assert(node_a == child_parent);
+        return if (child_index < offset_a) -1 else 1;
+    }
+
+    return -1;
+}
+
 const testing = @import("../../testing.zig");
 test "Browser.Range" {
    var runner = try testing.jsRunner(testing.tracking_allocator, .{});
--- a/src/main_wpt.zig
+++ b/src/main_wpt.zig
@@ -18,6 +18,7 @@

 const std = @import("std");

+const log = @import("log.zig");
 const Allocator = std.mem.Allocator;
 const ArenaAllocator = std.heap.ArenaAllocator;

@@ -29,11 +30,6 @@ const polyfill = @import("browser/polyfill/polyfill.zig");

 const WPT_DIR = "tests/wpt";

-pub const std_options = std.Options{
-    // Set the log level to info
-    .log_level = .info,
-};
-
 // TODO For now the WPT tests run is specific to WPT.
 // It manually load js framwork libs, and run the first script w/ js content in
 // the HTML page.
@@ -43,6 +39,7 @@ pub fn main() !void {
    var gpa: std.heap.DebugAllocator(.{}) = .init;
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();
+    log.opts.level = .warn;

    // An arena for the runner itself, lives for the duration of the the process
    var ra = ArenaAllocator.init(allocator);
--- a/src/runtime/js.zig
+++ b/src/runtime/js.zig
@@ -3866,7 +3866,7 @@ const NamedFunction = struct {
 // this can add as much as 10 seconds of compilation time.
 fn logFunctionCallError(arena: Allocator, isolate: v8.Isolate, context: v8.Context, err: anyerror, function_name: []const u8, info: v8.FunctionCallbackInfo) void {
    const args_dump = serializeFunctionArgs(arena, isolate, context, info) catch "failed to serialize args";
-    log.warn(.js, "function call error", .{
+    log.info(.js, "function call error", .{
        .name = function_name,
        .err = err,
        .args = args_dump,