From ca76575c2a196a4451e9f133b05dc6be39d403e6 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 30 Mar 2026 17:07:10 +0800 Subject: [PATCH] Add handling for resolving special URLs Takes inspiration from https://github.com/lightpanda-io/browser/pull/2030 and fixes https://github.com/lightpanda-io/browser/issues/1994 A url like http:/test gets special treatment. If the scheme, `http:` matches the base scheme, then it's treated as relative to the base. If it doesn't match the base scheme, then it's normalized to http://test, e.g. the path becomes the host. --- src/browser/URL.zig | 343 ++++++++++++++++++++------------------------ 1 file changed, 152 insertions(+), 191 deletions(-) diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 7c483a20..2cc3c3e8 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -24,125 +24,101 @@ const ResolveOpts = struct { always_dupe: bool = false, }; -const scheme_full_separator = "://"; -const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" }; - -fn isSpecialScheme(scheme: []const u8) bool { - if (scheme.len == 0 or scheme.len > 5) { - return false; - } - - inline for (special_schemes) |special_scheme| { - if (std.ascii.eqlIgnoreCase(scheme, special_scheme)) { - return true; - } - } - return false; -} - // path is anytype, so that it can be used with both []const u8 and [:0]const u8 -pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 { - const PT = @TypeOf(source_path); - - if (source_path.len == 0) { - return processResolved(allocator, base, opts); - } - const path_needs_duping = comptime isNullTerminated(PT) or !opts.always_dupe; - var path: [:0]const u8 = if (path_needs_duping) try allocator.dupeZ(u8, source_path) else source_path; - errdefer if (path_needs_duping) allocator.free(path); - - if (base.len == 0) { - return processResolved(allocator, path, opts); - } - - // Minimum is "x://" and skip relative path - if (path.len > 3 and path[0] != '/') { - if (std.mem.startsWith(u8, path, "blob:") or std.mem.startsWith(u8, path, "data:")) { +pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 { + const PT = @TypeOf(path); + if (base.len == 0 or isCompleteHTTPUrl(path)) { + if (comptime opts.always_dupe or !isNullTerminated(PT)) { + const duped = try allocator.dupeZ(u8, path); + return processResolved(allocator, duped, opts); + } + if (comptime opts.encode) { return processResolved(allocator, path, opts); } + return path; + } - var scheme_path: []const u8 = ""; - var scheme_path_end: usize = 0; - - if (std.mem.indexOf(u8, path, ":")) |scheme_end| { - scheme_path = path[0..scheme_end]; - scheme_path_end = scheme_end; + if (path.len == 0) { + if (comptime opts.always_dupe) { + const duped = try allocator.dupeZ(u8, base); + return processResolved(allocator, duped, opts); } - - if (isSpecialScheme(scheme_path)) { - var scheme_base: []const u8 = ""; - - if (std.mem.indexOf(u8, base, scheme_full_separator)) |scheme_end| { - scheme_base = base[0..scheme_end]; - } - - const has_double_sleshes: bool = path[scheme_path_end + 1] == '/' and path[scheme_path_end + 2] == '/'; - - if (std.mem.eql(u8, scheme_base, scheme_path) and !has_double_sleshes) { - //Skip ":" and set relative state - path = path[scheme_path_end + 1 ..]; - } else { - //Skip ":" - var path_start: usize = scheme_path_end + 1; - var host_file_separator: []const u8 = ""; - - //file scheme allow empty host - if (std.mem.eql(u8, scheme_path, "file") and !has_double_sleshes) { - host_file_separator = "/"; - } - - //Skip any sleshes after "scheme:" - for (path[path_start..]) |char| { - if (char == '/' or char == '\\') { - path_start += 1; - } else { - break; - } - } - path = try std.mem.joinZ(allocator, "", &.{ scheme_path, scheme_full_separator, host_file_separator, path[path_start..] }); - errdefer allocator.free(path); - - return try processResolved(allocator, path, opts); - } + if (comptime opts.encode) { + return processResolved(allocator, base, opts); } + return base; } if (path[0] == '?') { const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path }); - errdefer allocator.free(result); - - return try processResolved(allocator, result, opts); + return processResolved(allocator, result, opts); } if (path[0] == '#') { const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path }); - errdefer allocator.free(result); - - return try processResolved(allocator, result, opts); + return processResolved(allocator, result, opts); } if (std.mem.startsWith(u8, path, "//")) { // network-path reference const index = std.mem.indexOfScalar(u8, base, ':') orelse { - return processResolved(allocator, path, opts); + if (comptime isNullTerminated(PT)) { + if (comptime opts.encode) { + return processResolved(allocator, path, opts); + } + return path; + } + const duped = try allocator.dupeZ(u8, path); + return processResolved(allocator, duped, opts); }; const protocol = base[0 .. index + 1]; const result = try std.mem.joinZ(allocator, "", &.{ protocol, path }); - errdefer allocator.free(result); - - return try processResolved(allocator, result, opts); + return processResolved(allocator, result, opts); } - const scheme_end = std.mem.indexOf(u8, base, scheme_full_separator); + if (path.len >= 4) { // Minimum: "ws:x" + if (std.mem.indexOfScalar(u8, path[0..@min(path.len, 6)], ':')) |pos| { + // we know this isn't a complete URL, else the very first check in + // this function would have handled it. + const possible_special_protocol = path[0..pos]; + const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" }; + for (special_schemes) |special_scheme| { + if (std.ascii.eqlIgnoreCase(possible_special_protocol, special_scheme)) { + const rest = path[pos + 1 ..]; + + // Check if base has the same scheme + const base_scheme_end = std.mem.indexOf(u8, base, "://") orelse 0; + if (base_scheme_end > 0 and std.ascii.eqlIgnoreCase(base[0..base_scheme_end], special_scheme)) { + // Same scheme - strip it and resolve rest as relative + return resolve(allocator, base, rest, opts); + } + + // Different scheme - construct absolute URL + // Skip any leading slashes in rest + var rest_start: usize = 0; + while (rest_start < rest.len and (rest[rest_start] == '/' or rest[rest_start] == '\\')) { + rest_start += 1; + } + const rest_trimmed = rest[rest_start..]; + + // file: scheme needs empty host (triple slash) + const separator = if (std.mem.eql(u8, special_scheme, "file")) ":///" else "://"; + const normalized = try std.mem.joinZ(allocator, "", &.{ special_scheme, separator, rest_trimmed }); + return resolve(allocator, "", normalized, opts); + } + } + // Don't know what this is, just try to resolve it through our normal logic + } + } + + const scheme_end = std.mem.indexOf(u8, base, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len; if (path[0] == '/') { const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path }); - errdefer allocator.free(result); - - return try processResolved(allocator, result, opts); + return processResolved(allocator, result, opts); } var normalized_base: []const u8 = base[0..path_start]; @@ -155,8 +131,6 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c // trailing space so that we always have space to append the null terminator // and so that we can compare the next two characters without needing to length check var out = try std.mem.join(allocator, "", &.{ normalized_base, "/", path, " " }); - errdefer allocator.free(out); - const end = out.len - 2; const path_marker = path_start + 1; @@ -206,7 +180,7 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c // we always have an extra space out[out_i] = 0; - return try processResolved(allocator, out[0..out_i :0], opts); + return processResolved(allocator, out[0..out_i :0], opts); } fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { @@ -217,7 +191,7 @@ fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: Resol } pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { - const scheme_end = std.mem.indexOf(u8, url, scheme_full_separator); + const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; @@ -386,7 +360,7 @@ pub fn getPassword(raw: [:0]const u8) []const u8 { } pub fn getPathname(raw: [:0]const u8) []const u8 { - const protocol_end = std.mem.indexOf(u8, raw, scheme_full_separator); + const protocol_end = std.mem.indexOf(u8, raw, "://"); // Handle scheme:path URLs like about:blank (no "://") if (protocol_end == null) { @@ -469,7 +443,7 @@ pub fn getHash(raw: [:0]const u8) []const u8 { } pub fn getOrigin(allocator: Allocator, raw: [:0]const u8) !?[]const u8 { - const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null; + const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null; // Only HTTP and HTTPS schemes have origins const protocol = raw[0 .. scheme_end + 1]; @@ -527,7 +501,7 @@ fn getUserInfo(raw: [:0]const u8) ?[]const u8 { if (!auth.has_user_info) return null; // User info is from authority_start to host_start - 1 (excluding the @) - const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator).?; + const scheme_end = std.mem.indexOf(u8, raw, "://").?; const authority_start = scheme_end + 3; return raw[authority_start .. auth.host_start - 1]; } @@ -828,7 +802,7 @@ const AuthorityInfo = struct { // SECURITY: Only looks for @ within the authority portion (before /?#) // to prevent path-based @ injection attacks. fn parseAuthority(raw: []const u8) ?AuthorityInfo { - const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null; + const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null; const authority_start = scheme_end + 3; // Find end of authority FIRST (start of path/query/fragment or end of string) @@ -1033,100 +1007,6 @@ test "URL: resolve" { } } -test "URL: resolve path scheme" { - const Case = struct { - base: [:0]const u8, - path: [:0]const u8, - expected: [:0]const u8, - }; - - const cases = [_]Case{ - //same schemes and path as relative path (one slash) - .{ - .base = "https://www.example.com/example", - .path = "https:/about", - .expected = "https://www.example.com/about", - }, - //same schemes and path as relative path (without slash) - .{ - .base = "https://www.example.com/example", - .path = "https:about", - .expected = "https://www.example.com/about", - }, - //same schemes and path as absolute path (two slashes) - .{ - .base = "https://www.example.com/example", - .path = "https://about", - .expected = "https://about", - }, - //different schemes and path as absolute (without slash) - .{ - .base = "https://www.example.com/example", - .path = "http:about", - .expected = "http://about", - }, - //different schemes and path as absolute (with one slash) - .{ - .base = "https://www.example.com/example", - .path = "http:/about", - .expected = "http://about", - }, - //different schemes and path as absolute (with two slashes) - .{ - .base = "https://www.example.com/example", - .path = "http://about", - .expected = "http://about", - }, - //same schemes and path as absolute (with more slashes) - .{ - .base = "https://site/", - .path = "https://path", - .expected = "https://path", - }, - //path scheme is not special and path as absolute (without additional slashes) - .{ - .base = "http://localhost/", - .path = "data:test", - .expected = "data:test", - }, - //different schemes and path as absolute (pathscheme=ws) - .{ - .base = "https://www.example.com/example", - .path = "ws://about", - .expected = "ws://about", - }, - //different schemes and path as absolute (path scheme=wss) - .{ - .base = "https://www.example.com/example", - .path = "wss://about", - .expected = "wss://about", - }, - //different schemes and path as absolute (path scheme=ftp) - .{ - .base = "https://www.example.com/example", - .path = "ftp://about", - .expected = "ftp://about", - }, - //different schemes and path as absolute (path scheme=file) - .{ - .base = "https://www.example.com/example", - .path = "file://path/to/file", - .expected = "file://path/to/file", - }, - //different schemes and path as absolute (path scheme=file, host is empty) - .{ - .base = "https://www.example.com/example", - .path = "file:/path/to/file", - .expected = "file:///path/to/file", - }, - }; - - for (cases) |case| { - const result = try resolve(testing.arena_allocator, case.base, case.path, .{}); - try testing.expectString(case.expected, result); - } -} - test "URL: ensureEncoded" { defer testing.reset(); @@ -1725,3 +1605,84 @@ test "URL: getOrigin" { } } } + +test "URL: resolve path scheme" { + const Case = struct { + base: [:0]const u8, + path: [:0]const u8, + expected: [:0]const u8, + }; + + const cases = [_]Case{ + .{ + .base = "https://www.example.com/example", + .path = "https:/about", + .expected = "https://www.example.com/about", + }, + .{ + .base = "https://www.example.com/example", + .path = "https:about", + .expected = "https://www.example.com/about", + }, + .{ + .base = "https://www.example.com/example", + .path = "https://about", + .expected = "https://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "http:about", + .expected = "http://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "http:/about", + .expected = "http://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "http://about", + .expected = "http://about", + }, + .{ + .base = "https://site/", + .path = "https://path", + .expected = "https://path", + }, + .{ + .base = "http://localhost/", + .path = "data:test", + .expected = "data:test", + }, + .{ + .base = "https://www.example.com/example", + .path = "ws://about", + .expected = "ws://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "wss://about", + .expected = "wss://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "ftp://about", + .expected = "ftp://about", + }, + .{ + .base = "https://www.example.com/example", + .path = "file://path/to/file", + .expected = "file://path/to/file", + }, + .{ + .base = "https://www.example.com/example", + .path = "file:/path/to/file", + .expected = "file:///path/to/file", + }, + }; + + for (cases) |case| { + const result = try resolve(testing.arena_allocator, case.base, case.path, .{}); + try testing.expectString(case.expected, result); + } +}