Add handling for resolving special URLs

Takes inspiration from https://github.com/lightpanda-io/browser/pull/2030 and fixes https://github.com/lightpanda-io/browser/issues/1994 A url like http:/test gets special treatment. If the scheme, `http:` matches the base scheme, then it's treated as relative to the base. If it doesn't match the base scheme, then it's normalized to http://test, e.g. the path becomes the host.
2026-03-30 17:18:57 +00:00 · 2026-03-30 17:07:10 +08:00
parent 49cef740b2
commit ca76575c2a
1 changed files with 152 additions and 191 deletions
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -24,125 +24,101 @@ const ResolveOpts = struct {
    always_dupe: bool = false,
 };

-const scheme_full_separator = "://";
-const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" };
-
-fn isSpecialScheme(scheme: []const u8) bool {
-    if (scheme.len == 0 or scheme.len > 5) {
-        return false;
-    }
-
-    inline for (special_schemes) |special_scheme| {
-        if (std.ascii.eqlIgnoreCase(scheme, special_scheme)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 // path is anytype, so that it can be used with both []const u8 and [:0]const u8
-pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
-    const PT = @TypeOf(source_path);
+pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
+    const PT = @TypeOf(path);
+    if (base.len == 0 or isCompleteHTTPUrl(path)) {
+        if (comptime opts.always_dupe or !isNullTerminated(PT)) {
+            const duped = try allocator.dupeZ(u8, path);
+            return processResolved(allocator, duped, opts);
+        }
+        if (comptime opts.encode) {
+            return processResolved(allocator, path, opts);
+        }
+        return path;
+    }

-    if (source_path.len == 0) {
+    if (path.len == 0) {
+        if (comptime opts.always_dupe) {
+            const duped = try allocator.dupeZ(u8, base);
+            return processResolved(allocator, duped, opts);
+        }
+        if (comptime opts.encode) {
            return processResolved(allocator, base, opts);
        }
-    const path_needs_duping = comptime isNullTerminated(PT) or !opts.always_dupe;
-    var path: [:0]const u8 = if (path_needs_duping) try allocator.dupeZ(u8, source_path) else source_path;
-    errdefer if (path_needs_duping) allocator.free(path);
-
-    if (base.len == 0) {
-        return processResolved(allocator, path, opts);
-    }
-
-    // Minimum is "x://" and skip relative path
-    if (path.len > 3 and path[0] != '/') {
-        if (std.mem.startsWith(u8, path, "blob:") or std.mem.startsWith(u8, path, "data:")) {
-            return processResolved(allocator, path, opts);
-        }
-
-        var scheme_path: []const u8 = "";
-        var scheme_path_end: usize = 0;
-
-        if (std.mem.indexOf(u8, path, ":")) |scheme_end| {
-            scheme_path = path[0..scheme_end];
-            scheme_path_end = scheme_end;
-        }
-
-        if (isSpecialScheme(scheme_path)) {
-            var scheme_base: []const u8 = "";
-
-            if (std.mem.indexOf(u8, base, scheme_full_separator)) |scheme_end| {
-                scheme_base = base[0..scheme_end];
-            }
-
-            const has_double_sleshes: bool = path[scheme_path_end + 1] == '/' and path[scheme_path_end + 2] == '/';
-
-            if (std.mem.eql(u8, scheme_base, scheme_path) and !has_double_sleshes) {
-                //Skip ":" and set relative state
-                path = path[scheme_path_end + 1 ..];
-            } else {
-                //Skip ":"
-                var path_start: usize = scheme_path_end + 1;
-                var host_file_separator: []const u8 = "";
-
-                //file scheme allow empty host
-                if (std.mem.eql(u8, scheme_path, "file") and !has_double_sleshes) {
-                    host_file_separator = "/";
-                }
-
-                //Skip any sleshes after "scheme:"
-                for (path[path_start..]) |char| {
-                    if (char == '/' or char == '\\') {
-                        path_start += 1;
-                    } else {
-                        break;
-                    }
-                }
-                path = try std.mem.joinZ(allocator, "", &.{ scheme_path, scheme_full_separator, host_file_separator, path[path_start..] });
-                errdefer allocator.free(path);
-
-                return try processResolved(allocator, path, opts);
-            }
-        }
+        return base;
    }

    if (path[0] == '?') {
        const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len;
        const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
-        errdefer allocator.free(result);
-
-        return try processResolved(allocator, result, opts);
+        return processResolved(allocator, result, opts);
    }
    if (path[0] == '#') {
        const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len;
        const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
-        errdefer allocator.free(result);
-        
-        return try processResolved(allocator, result, opts);
+        return processResolved(allocator, result, opts);
    }

    if (std.mem.startsWith(u8, path, "//")) {
        // network-path reference
        const index = std.mem.indexOfScalar(u8, base, ':') orelse {
+            if (comptime isNullTerminated(PT)) {
+                if (comptime opts.encode) {
                    return processResolved(allocator, path, opts);
+                }
+                return path;
+            }
+            const duped = try allocator.dupeZ(u8, path);
+            return processResolved(allocator, duped, opts);
        };
        const protocol = base[0 .. index + 1];
        const result = try std.mem.joinZ(allocator, "", &.{ protocol, path });
-        errdefer allocator.free(result);
-
-        return try processResolved(allocator, result, opts);
+        return processResolved(allocator, result, opts);
    }

-    const scheme_end = std.mem.indexOf(u8, base, scheme_full_separator);
+    if (path.len >= 4) { // Minimum: "ws:x"
+        if (std.mem.indexOfScalar(u8, path[0..@min(path.len, 6)], ':')) |pos| {
+            // we know this isn't a complete URL, else the very first check in
+            // this function would have handled it.
+            const possible_special_protocol = path[0..pos];
+            const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" };
+            for (special_schemes) |special_scheme| {
+                if (std.ascii.eqlIgnoreCase(possible_special_protocol, special_scheme)) {
+                    const rest = path[pos + 1 ..];
+
+                    // Check if base has the same scheme
+                    const base_scheme_end = std.mem.indexOf(u8, base, "://") orelse 0;
+                    if (base_scheme_end > 0 and std.ascii.eqlIgnoreCase(base[0..base_scheme_end], special_scheme)) {
+                        // Same scheme - strip it and resolve rest as relative
+                        return resolve(allocator, base, rest, opts);
+                    }
+
+                    // Different scheme - construct absolute URL
+                    // Skip any leading slashes in rest
+                    var rest_start: usize = 0;
+                    while (rest_start < rest.len and (rest[rest_start] == '/' or rest[rest_start] == '\\')) {
+                        rest_start += 1;
+                    }
+                    const rest_trimmed = rest[rest_start..];
+
+                    // file: scheme needs empty host (triple slash)
+                    const separator = if (std.mem.eql(u8, special_scheme, "file")) ":///" else "://";
+                    const normalized = try std.mem.joinZ(allocator, "", &.{ special_scheme, separator, rest_trimmed });
+                    return resolve(allocator, "", normalized, opts);
+                }
+            }
+            // Don't know what this is, just try to resolve it through our normal logic
+        }
+    }
+
+    const scheme_end = std.mem.indexOf(u8, base, "://");
    const authority_start = if (scheme_end) |end| end + 3 else 0;
    const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;

    if (path[0] == '/') {
        const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
-        errdefer allocator.free(result);
-
-        return try processResolved(allocator, result, opts);
+        return processResolved(allocator, result, opts);
    }

    var normalized_base: []const u8 = base[0..path_start];
@@ -155,8 +131,6 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c
    // trailing space so that we always have space to append the null terminator
    // and so that we can compare the next two characters without needing to length check
    var out = try std.mem.join(allocator, "", &.{ normalized_base, "/", path, "  " });
-    errdefer allocator.free(out);
-    
    const end = out.len - 2;

    const path_marker = path_start + 1;
@@ -206,7 +180,7 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c

    // we always have an extra space
    out[out_i] = 0;
-    return try processResolved(allocator, out[0..out_i :0], opts);
+    return processResolved(allocator, out[0..out_i :0], opts);
 }

 fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
@@ -217,7 +191,7 @@ fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: Resol
 }

 pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
-    const scheme_end = std.mem.indexOf(u8, url, scheme_full_separator);
+    const scheme_end = std.mem.indexOf(u8, url, "://");
    const authority_start = if (scheme_end) |end| end + 3 else 0;
    const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;

@@ -386,7 +360,7 @@ pub fn getPassword(raw: [:0]const u8) []const u8 {
 }

 pub fn getPathname(raw: [:0]const u8) []const u8 {
-    const protocol_end = std.mem.indexOf(u8, raw, scheme_full_separator);
+    const protocol_end = std.mem.indexOf(u8, raw, "://");

    // Handle scheme:path URLs like about:blank (no "://")
    if (protocol_end == null) {
@@ -469,7 +443,7 @@ pub fn getHash(raw: [:0]const u8) []const u8 {
 }

 pub fn getOrigin(allocator: Allocator, raw: [:0]const u8) !?[]const u8 {
-    const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null;
+    const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;

    // Only HTTP and HTTPS schemes have origins
    const protocol = raw[0 .. scheme_end + 1];
@@ -527,7 +501,7 @@ fn getUserInfo(raw: [:0]const u8) ?[]const u8 {
    if (!auth.has_user_info) return null;

    // User info is from authority_start to host_start - 1 (excluding the @)
-    const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator).?;
+    const scheme_end = std.mem.indexOf(u8, raw, "://").?;
    const authority_start = scheme_end + 3;
    return raw[authority_start .. auth.host_start - 1];
 }
@@ -828,7 +802,7 @@ const AuthorityInfo = struct {
 // SECURITY: Only looks for @ within the authority portion (before /?#)
 // to prevent path-based @ injection attacks.
 fn parseAuthority(raw: []const u8) ?AuthorityInfo {
-    const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null;
+    const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;
    const authority_start = scheme_end + 3;

    // Find end of authority FIRST (start of path/query/fragment or end of string)
@@ -1033,100 +1007,6 @@ test "URL: resolve" {
    }
 }

-test "URL: resolve path scheme" {
-    const Case = struct {
-        base: [:0]const u8,
-        path: [:0]const u8,
-        expected: [:0]const u8,
-    };
-
-    const cases = [_]Case{
-        //same schemes and path as relative path (one slash)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "https:/about",
-            .expected = "https://www.example.com/about",
-        },
-        //same schemes and path as relative path (without slash)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "https:about",
-            .expected = "https://www.example.com/about",
-        },
-        //same schemes and path as absolute path (two slashes)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "https://about",
-            .expected = "https://about",
-        },
-        //different schemes and path as absolute (without slash)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "http:about",
-            .expected = "http://about",
-        },
-        //different schemes and path as absolute (with one slash)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "http:/about",
-            .expected = "http://about",
-        },
-        //different schemes and path as absolute (with two slashes)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "http://about",
-            .expected = "http://about",
-        },
-        //same schemes and path as absolute (with more slashes)
-        .{
-            .base = "https://site/",
-            .path = "https://path",
-            .expected = "https://path",
-        },
-        //path scheme is not special and path as absolute (without additional slashes)
-        .{
-            .base = "http://localhost/",
-            .path = "data:test",
-            .expected = "data:test",
-        },
-        //different schemes and path as absolute (pathscheme=ws)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "ws://about",
-            .expected = "ws://about",
-        },
-        //different schemes and path as absolute (path scheme=wss)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "wss://about",
-            .expected = "wss://about",
-        },
-        //different schemes and path as absolute (path scheme=ftp)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "ftp://about",
-            .expected = "ftp://about",
-        },
-        //different schemes and path as absolute (path scheme=file)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "file://path/to/file",
-            .expected = "file://path/to/file",
-        },
-        //different schemes and path as absolute (path scheme=file, host is empty)
-        .{
-            .base = "https://www.example.com/example",
-            .path = "file:/path/to/file",
-            .expected = "file:///path/to/file",
-        },
-    };
-
-    for (cases) |case| {
-        const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
-        try testing.expectString(case.expected, result);
-    }
-}
-
 test "URL: ensureEncoded" {
    defer testing.reset();

@@ -1725,3 +1605,84 @@ test "URL: getOrigin" {
        }
    }
 }
+
+test "URL: resolve path scheme" {
+    const Case = struct {
+        base: [:0]const u8,
+        path: [:0]const u8,
+        expected: [:0]const u8,
+    };
+
+    const cases = [_]Case{
+        .{
+            .base = "https://www.example.com/example",
+            .path = "https:/about",
+            .expected = "https://www.example.com/about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "https:about",
+            .expected = "https://www.example.com/about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "https://about",
+            .expected = "https://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "http:about",
+            .expected = "http://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "http:/about",
+            .expected = "http://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "http://about",
+            .expected = "http://about",
+        },
+        .{
+            .base = "https://site/",
+            .path = "https://path",
+            .expected = "https://path",
+        },
+        .{
+            .base = "http://localhost/",
+            .path = "data:test",
+            .expected = "data:test",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "ws://about",
+            .expected = "ws://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "wss://about",
+            .expected = "wss://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "ftp://about",
+            .expected = "ftp://about",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "file://path/to/file",
+            .expected = "file://path/to/file",
+        },
+        .{
+            .base = "https://www.example.com/example",
+            .path = "file:/path/to/file",
+            .expected = "file:///path/to/file",
+        },
+    };
+
+    for (cases) |case| {
+        const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
+        try testing.expectString(case.expected, result);
+    }
+}