From fcb3f08bcbd45abfb284380546d1e177550308d3 Mon Sep 17 00:00:00 2001
From: Karl Seguin <k@openmymind.io>
Date: Wed, 25 Feb 2026 08:17:05 +0800
Subject: [PATCH] Add url encoding option to URL.resolve

Given:

a.href = "over 9000!"

Then:

a.href === BASE_URL + '/over%209000!';

This commits adds an escape: bool option to URL.resolve which will escape the
path, query and fragment when true.

Also changes the Anchor, Image, Link and IFrame getSrc to escape. Escaping is
also used when navigating a frame.
---
 src/browser/Page.zig                          |  16 +-
 src/browser/URL.zig                           | 308 +++++++++++++++++-
 src/browser/tests/element/html/anchor.html    |   8 +
 src/browser/tests/element/html/image.html     |   9 +
 src/browser/tests/frames/frames.html          |   3 +-
 .../frames/support/{sub1.html => sub 1.html}  |   0
 src/browser/webapi/element/html/Anchor.zig    |   2 +-
 src/browser/webapi/element/html/IFrame.zig    |   2 +-
 src/browser/webapi/element/html/Image.zig     |   2 +-
 src/browser/webapi/element/html/Link.zig      |   2 +-
 src/browser/webapi/element/html/Media.zig     |   2 +-
 src/browser/webapi/element/html/Script.zig    |   2 +-
 src/browser/webapi/element/html/Video.zig     |   2 +-
 13 files changed, 338 insertions(+), 20 deletions(-)
 rename src/browser/tests/frames/support/{sub1.html => sub 1.html} (100%)

diff --git a/src/browser/Page.zig b/src/browser/Page.zig
index 11ff9ebc..66a3e083 100644
--- a/src/browser/Page.zig
+++ b/src/browser/Page.zig
@@ -566,7 +566,7 @@ fn scheduleNavigationWithArena(self: *Page, arena: Allocator, request_url: []con
         arena,
         self.base(),
         request_url,
-        .{ .always_dupe = true },
+        .{ .always_dupe = true, .encode = true },
     );
 
     const session = self._session;
@@ -1203,7 +1203,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
         return;
     }
 
-    const src = try iframe.getSrc(self);
+    const src = iframe.asElement().getAttributeSafe(comptime .wrap("src")) orelse return;
     if (src.len == 0) {
         return;
     }
@@ -1225,8 +1225,16 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
         .timestamp = timestamp(.monotonic),
     });
 
-    page_frame.navigate(src, .{ .reason = .initialFrameNavigation }) catch |err| {
-        log.warn(.page, "iframe navigate failure", .{ .url = src, .err = err });
+    // navigate will dupe the url
+    const url = try URL.resolve(
+        self.call_arena,
+        self.base(),
+        src,
+        .{ .encode = true },
+    );
+
+    page_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| {
+        log.warn(.page, "iframe navigate failure", .{ .url = url, .err = err });
         self._pending_loads -= 1;
         iframe._content_window = null;
         page_frame.deinit();
diff --git a/src/browser/URL.zig b/src/browser/URL.zig
index 716480b1..3a2a0514 100644
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -20,44 +20,61 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 
 const ResolveOpts = struct {
+    encode: bool = false,
     always_dupe: bool = false,
 };
+
 // path is anytype, so that it can be used with both []const u8 and [:0]const u8
 pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
     const PT = @TypeOf(path);
     if (base.len == 0 or isCompleteHTTPUrl(path)) {
         if (comptime opts.always_dupe or !isNullTerminated(PT)) {
-            return allocator.dupeZ(u8, path);
+            const duped = try allocator.dupeZ(u8, path);
+            return encodeURL(allocator, duped, opts);
+        }
+        if (comptime opts.encode) {
+            return encodeURL(allocator, path, opts);
         }
         return path;
     }
 
     if (path.len == 0) {
         if (comptime opts.always_dupe) {
-            return allocator.dupeZ(u8, base);
+            const duped = try allocator.dupeZ(u8, base);
+            return encodeURL(allocator, duped, opts);
+        }
+        if (comptime opts.encode) {
+            return encodeURL(allocator, base, opts);
         }
         return base;
     }
 
     if (path[0] == '?') {
         const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len;
-        return std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
+        const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
+        return encodeURL(allocator, result, opts);
     }
     if (path[0] == '#') {
         const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len;
-        return std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
+        const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
+        return encodeURL(allocator, result, opts);
     }
 
     if (std.mem.startsWith(u8, path, "//")) {
         // network-path reference
         const index = std.mem.indexOfScalar(u8, base, ':') orelse {
             if (comptime isNullTerminated(PT)) {
+                if (comptime opts.encode) {
+                    return encodeURL(allocator, path, opts);
+                }
                 return path;
             }
-            return allocator.dupeZ(u8, path);
+            const duped = try allocator.dupeZ(u8, path);
+            return encodeURL(allocator, duped, opts);
         };
         const protocol = base[0 .. index + 1];
-        return std.mem.joinZ(allocator, "", &.{ protocol, path });
+        const result = try std.mem.joinZ(allocator, "", &.{ protocol, path });
+        return encodeURL(allocator, result, opts);
     }
 
     const scheme_end = std.mem.indexOf(u8, base, "://");
@@ -65,7 +82,8 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
     const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;
 
     if (path[0] == '/') {
-        return std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
+        const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
+        return encodeURL(allocator, result, opts);
     }
 
     var normalized_base: []const u8 = base[0..path_start];
@@ -127,7 +145,115 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
 
     // we always have an extra space
     out[out_i] = 0;
-    return out[0..out_i :0];
+    return encodeURL(allocator, out[0..out_i :0], opts);
+}
+
+fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
+    if (!comptime opts.encode) {
+        return url;
+    }
+
+    const scheme_end = std.mem.indexOf(u8, url, "://");
+    const authority_start = if (scheme_end) |end| end + 3 else 0;
+    const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;
+
+    const query_start = std.mem.indexOfScalarPos(u8, url, path_start, '?');
+    const fragment_start = std.mem.indexOfScalarPos(u8, url, query_start orelse path_start, '#');
+
+    const path_end = query_start orelse fragment_start orelse url.len;
+    const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end;
+
+    const path_to_encode = url[path_start..path_end];
+    const encoded_path = try percentEncodeSegment(allocator, path_to_encode, true);
+
+    const encoded_query = if (query_start) |qs| blk: {
+        const query_to_encode = url[qs + 1 .. query_end];
+        const encoded = try percentEncodeSegment(allocator, query_to_encode, false);
+        break :blk encoded;
+    } else null;
+
+    const encoded_fragment = if (fragment_start) |fs| blk: {
+        const fragment_to_encode = url[fs + 1 ..];
+        const encoded = try percentEncodeSegment(allocator, fragment_to_encode, false);
+        break :blk encoded;
+    } else null;
+
+    if (encoded_path.ptr == path_to_encode.ptr and
+        (encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and
+        (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) {
+        // nothing has changed
+        return url;
+    }
+
+    var buf = try std.ArrayList(u8).initCapacity(allocator, url.len + 20);
+    try buf.appendSlice(allocator, url[0..path_start]);
+    try buf.appendSlice(allocator, encoded_path);
+    if (encoded_query) |eq| {
+        try buf.append(allocator, '?');
+        try buf.appendSlice(allocator, eq);
+    }
+    if (encoded_fragment) |ef| {
+        try buf.append(allocator, '#');
+        try buf.appendSlice(allocator, ef);
+    }
+    try buf.append(allocator, 0);
+    return buf.items[0 .. buf.items.len - 1 :0];
+}
+
+fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime is_path: bool) ![]const u8 {
+    // Check if encoding is needed
+    var needs_encoding = false;
+    for (segment) |c| {
+        if (shouldPercentEncode(c, is_path)) {
+            needs_encoding = true;
+            break;
+        }
+    }
+    if (!needs_encoding) {
+        return segment;
+    }
+
+    var buf = try std.ArrayList(u8).initCapacity(allocator, segment.len + 10);
+
+    var i: usize = 0;
+    while (i < segment.len) : (i += 1) {
+        const c = segment[i];
+
+        // Check if this is an already-encoded sequence (%XX)
+        if (c == '%' and i + 2 < segment.len) {
+            const end = i + 2;
+            const h1 = segment[i + 1];
+            const h2 = segment[end];
+            if (std.ascii.isHex(h1) and std.ascii.isHex(h2)) {
+                try buf.appendSlice(allocator, segment[i .. end + 1]);
+                i = end;
+                continue;
+            }
+        }
+
+        if (shouldPercentEncode(c, is_path)) {
+            try buf.writer(allocator).print("%{X:0>2}", .{c});
+        } else {
+            try buf.append(allocator, c);
+        }
+    }
+
+    return buf.items;
+}
+
+fn shouldPercentEncode(c: u8, comptime is_path: bool) bool {
+    return switch (c) {
+        // Unreserved characters (RFC 3986)
+        'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false,
+        // sub-delims allowed in both path and query
+        '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => false,
+        // Separators allowed in both path and query
+        '/', ':', '@' => false,
+        // Query-specific: '?' is allowed in queries but not in paths
+        '?' => comptime is_path,
+        // Everything else needs encoding (including space)
+        else => true,
+    };
 }
 
 fn isNullTerminated(comptime value: type) bool {
@@ -691,6 +817,172 @@ test "URL: resolve" {
     }
 }
 
+test "URL: resolve with encoding" {
+    defer testing.reset();
+
+    const Case = struct {
+        base: [:0]const u8,
+        path: [:0]const u8,
+        expected: [:0]const u8,
+    };
+
+    const cases = [_]Case{
+        // Spaces should be encoded as %20, but ! is allowed
+        .{
+            .base = "https://example.com/dir/",
+            .path = "over 9000!",
+            .expected = "https://example.com/dir/over%209000!",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "hello world.html",
+            .expected = "https://example.com/hello%20world.html",
+        },
+        // Multiple spaces
+        .{
+            .base = "https://example.com/",
+            .path = "path with  multiple   spaces",
+            .expected = "https://example.com/path%20with%20%20multiple%20%20%20spaces",
+        },
+        // Special characters that need encoding
+        .{
+            .base = "https://example.com/",
+            .path = "file[1].html",
+            .expected = "https://example.com/file%5B1%5D.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file{name}.html",
+            .expected = "https://example.com/file%7Bname%7D.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file<test>.html",
+            .expected = "https://example.com/file%3Ctest%3E.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file\"quote\".html",
+            .expected = "https://example.com/file%22quote%22.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file|pipe.html",
+            .expected = "https://example.com/file%7Cpipe.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file\\backslash.html",
+            .expected = "https://example.com/file%5Cbackslash.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file^caret.html",
+            .expected = "https://example.com/file%5Ecaret.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file`backtick`.html",
+            .expected = "https://example.com/file%60backtick%60.html",
+        },
+        // Characters that should NOT be encoded
+        .{
+            .base = "https://example.com/",
+            .path = "path-with_under~tilde.html",
+            .expected = "https://example.com/path-with_under~tilde.html",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "path/with/slashes",
+            .expected = "https://example.com/path/with/slashes",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "sub-delims!$&'()*+,;=.html",
+            .expected = "https://example.com/sub-delims!$&'()*+,;=.html",
+        },
+        // Already encoded characters should not be double-encoded
+        .{
+            .base = "https://example.com/",
+            .path = "already%20encoded",
+            .expected = "https://example.com/already%20encoded",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file%5B1%5D.html",
+            .expected = "https://example.com/file%5B1%5D.html",
+        },
+        // Mix of encoded and unencoded
+        .{
+            .base = "https://example.com/",
+            .path = "part%20encoded and not",
+            .expected = "https://example.com/part%20encoded%20and%20not",
+        },
+        // Query strings and fragments ARE encoded
+        .{
+            .base = "https://example.com/",
+            .path = "file name.html?query=value with spaces",
+            .expected = "https://example.com/file%20name.html?query=value%20with%20spaces",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file name.html#anchor with spaces",
+            .expected = "https://example.com/file%20name.html#anchor%20with%20spaces",
+        },
+        .{
+            .base = "https://example.com/",
+            .path = "file.html?hello=world !",
+            .expected = "https://example.com/file.html?hello=world%20!",
+        },
+        // Query structural characters should NOT be encoded
+        .{
+            .base = "https://example.com/",
+            .path = "file.html?a=1&b=2",
+            .expected = "https://example.com/file.html?a=1&b=2",
+        },
+        // Relative paths with encoding
+        .{
+            .base = "https://example.com/dir/page.html",
+            .path = "../other dir/file.html",
+            .expected = "https://example.com/other%20dir/file.html",
+        },
+        .{
+            .base = "https://example.com/dir/",
+            .path = "./sub dir/file.html",
+            .expected = "https://example.com/dir/sub%20dir/file.html",
+        },
+        // Absolute paths with encoding
+        .{
+            .base = "https://example.com/some/path",
+            .path = "/absolute path/file.html",
+            .expected = "https://example.com/absolute%20path/file.html",
+        },
+        // Unicode/high bytes (though ideally these should be UTF-8 encoded first)
+        .{
+            .base = "https://example.com/",
+            .path = "café",
+            .expected = "https://example.com/caf%C3%A9",
+        },
+        // Empty path
+        .{
+            .base = "https://example.com/",
+            .path = "",
+            .expected = "https://example.com/",
+        },
+        // Complete URL as path (should not be encoded)
+        .{
+            .base = "https://example.com/",
+            .path = "https://other.com/path with spaces",
+            .expected = "https://other.com/path%20with%20spaces",
+        },
+    };
+
+    for (cases) |case| {
+        const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true });
+        try testing.expectString(case.expected, result);
+    }
+}
+
 test "URL: eqlDocument" {
     defer testing.reset();
     {
diff --git a/src/browser/tests/element/html/anchor.html b/src/browser/tests/element/html/anchor.html
index 3c248a7b..0522163f 100644
--- a/src/browser/tests/element/html/anchor.html
+++ b/src/browser/tests/element/html/anchor.html
@@ -245,3 +245,11 @@
   testing.expectEqual('', b.toString());
 }
 </script>
+
+<script id=url_encode>
+  {
+    let a = document.createElement('a');
+    a.href = 'over 9000!';
+    testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!', a.href);
+  }
+</script>
diff --git a/src/browser/tests/element/html/image.html b/src/browser/tests/element/html/image.html
index 1fda424a..e7868229 100644
--- a/src/browser/tests/element/html/image.html
+++ b/src/browser/tests/element/html/image.html
@@ -172,3 +172,12 @@
   });
 }
 </script>
+
+<script id=url_encode>
+  {
+    let img = document.createElement('img');
+    img.src = 'over 9000!?hello=world !';
+    testing.expectEqual('over 9000!?hello=world !', img.getAttribute('src'));
+    testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!?hello=world%20!', img.src);
+  }
+</script>
diff --git a/src/browser/tests/frames/frames.html b/src/browser/tests/frames/frames.html
index 00403f3a..1aa81b21 100644
--- a/src/browser/tests/frames/frames.html
+++ b/src/browser/tests/frames/frames.html
@@ -7,7 +7,7 @@
   }
 </script>
 
-<iframe id=f1 onload="frame1Onload" src="support/sub1.html"></iframe>
+<iframe id=f1 onload="frame1Onload" src="support/sub 1.html"></iframe>
 <iframe id=f2 src="support/sub2.html"></iframe>
 
 <script id="basic">
@@ -25,6 +25,7 @@
 
     testing.expectEqual(0, $('#f1').childNodes.length);
 
+    testing.expectEqual(testing.BASE_URL + 'frames/support/sub%201.html', $('#f1').src);
     testing.expectEqual(window[0], $('#f1').contentWindow);
     testing.expectEqual(window[1], $('#f2').contentWindow);
 
diff --git a/src/browser/tests/frames/support/sub1.html b/src/browser/tests/frames/support/sub 1.html
similarity index 100%
rename from src/browser/tests/frames/support/sub1.html
rename to src/browser/tests/frames/support/sub 1.html
diff --git a/src/browser/webapi/element/html/Anchor.zig b/src/browser/webapi/element/html/Anchor.zig
index 3adb3741..badee7bc 100644
--- a/src/browser/webapi/element/html/Anchor.zig
+++ b/src/browser/webapi/element/html/Anchor.zig
@@ -44,7 +44,7 @@ pub fn getHref(self: *Anchor, page: *Page) ![]const u8 {
     if (href.len == 0) {
         return "";
     }
-    return URL.resolve(page.call_arena, page.base(), href, .{});
+    return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
 }
 
 pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/IFrame.zig b/src/browser/webapi/element/html/IFrame.zig
index 7d4d183f..1a5498a3 100644
--- a/src/browser/webapi/element/html/IFrame.zig
+++ b/src/browser/webapi/element/html/IFrame.zig
@@ -50,7 +50,7 @@ pub fn getContentDocument(self: *const IFrame) ?*Document {
 
 pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 {
     if (self._src.len == 0) return "";
-    return try URL.resolve(page.call_arena, page.base(), self._src, .{});
+    return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
 }
 
 pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/Image.zig b/src/browser/webapi/element/html/Image.zig
index 76a56ac8..1e33b3c0 100644
--- a/src/browser/webapi/element/html/Image.zig
+++ b/src/browser/webapi/element/html/Image.zig
@@ -46,7 +46,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 {
     }
 
     // Always resolve the src against the page URL
-    return URL.resolve(page.call_arena, page.base(), src, .{});
+    return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
 }
 
 pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig
index 0a0f8961..7f6b48fc 100644
--- a/src/browser/webapi/element/html/Link.zig
+++ b/src/browser/webapi/element/html/Link.zig
@@ -46,7 +46,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 {
     }
 
     // Always resolve the href against the page URL
-    return URL.resolve(page.call_arena, page.base(), href, .{});
+    return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
 }
 
 pub fn setHref(self: *Link, value: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/Media.zig b/src/browser/webapi/element/html/Media.zig
index f258ee74..310e942b 100644
--- a/src/browser/webapi/element/html/Media.zig
+++ b/src/browser/webapi/element/html/Media.zig
@@ -236,7 +236,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 {
         return "";
     }
     const URL = @import("../../URL.zig");
-    return URL.resolve(page.call_arena, page.base(), src, .{});
+    return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
 }
 
 pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/Script.zig b/src/browser/webapi/element/html/Script.zig
index 41899bb4..ad3d9ef7 100644
--- a/src/browser/webapi/element/html/Script.zig
+++ b/src/browser/webapi/element/html/Script.zig
@@ -46,7 +46,7 @@ pub fn asNode(self: *Script) *Node {
 
 pub fn getSrc(self: *const Script, page: *Page) ![]const u8 {
     if (self._src.len == 0) return "";
-    return try URL.resolve(page.call_arena, page.base(), self._src, .{});
+    return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
 }
 
 pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void {
diff --git a/src/browser/webapi/element/html/Video.zig b/src/browser/webapi/element/html/Video.zig
index 57942e03..63ccda4a 100644
--- a/src/browser/webapi/element/html/Video.zig
+++ b/src/browser/webapi/element/html/Video.zig
@@ -59,7 +59,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 {
     }
 
     const URL = @import("../../URL.zig");
-    return URL.resolve(page.call_arena, page.base(), poster, .{});
+    return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true });
 }
 
 pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void {