mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-30 17:18:57 +00:00
Add handling for resolving special URLs
Takes inspiration from https://github.com/lightpanda-io/browser/pull/2030 and fixes https://github.com/lightpanda-io/browser/issues/1994 A url like http:/test gets special treatment. If the scheme, `http:` matches the base scheme, then it's treated as relative to the base. If it doesn't match the base scheme, then it's normalized to http://test, e.g. the path becomes the host.
This commit is contained in:
@@ -24,125 +24,101 @@ const ResolveOpts = struct {
|
||||
always_dupe: bool = false,
|
||||
};
|
||||
|
||||
const scheme_full_separator = "://";
|
||||
const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" };
|
||||
|
||||
fn isSpecialScheme(scheme: []const u8) bool {
|
||||
if (scheme.len == 0 or scheme.len > 5) {
|
||||
return false;
|
||||
}
|
||||
|
||||
inline for (special_schemes) |special_scheme| {
|
||||
if (std.ascii.eqlIgnoreCase(scheme, special_scheme)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// path is anytype, so that it can be used with both []const u8 and [:0]const u8
|
||||
pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
|
||||
const PT = @TypeOf(source_path);
|
||||
pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
|
||||
const PT = @TypeOf(path);
|
||||
if (base.len == 0 or isCompleteHTTPUrl(path)) {
|
||||
if (comptime opts.always_dupe or !isNullTerminated(PT)) {
|
||||
const duped = try allocator.dupeZ(u8, path);
|
||||
return processResolved(allocator, duped, opts);
|
||||
}
|
||||
if (comptime opts.encode) {
|
||||
return processResolved(allocator, path, opts);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
if (source_path.len == 0) {
|
||||
if (path.len == 0) {
|
||||
if (comptime opts.always_dupe) {
|
||||
const duped = try allocator.dupeZ(u8, base);
|
||||
return processResolved(allocator, duped, opts);
|
||||
}
|
||||
if (comptime opts.encode) {
|
||||
return processResolved(allocator, base, opts);
|
||||
}
|
||||
const path_needs_duping = comptime isNullTerminated(PT) or !opts.always_dupe;
|
||||
var path: [:0]const u8 = if (path_needs_duping) try allocator.dupeZ(u8, source_path) else source_path;
|
||||
errdefer if (path_needs_duping) allocator.free(path);
|
||||
|
||||
if (base.len == 0) {
|
||||
return processResolved(allocator, path, opts);
|
||||
}
|
||||
|
||||
// Minimum is "x://" and skip relative path
|
||||
if (path.len > 3 and path[0] != '/') {
|
||||
if (std.mem.startsWith(u8, path, "blob:") or std.mem.startsWith(u8, path, "data:")) {
|
||||
return processResolved(allocator, path, opts);
|
||||
}
|
||||
|
||||
var scheme_path: []const u8 = "";
|
||||
var scheme_path_end: usize = 0;
|
||||
|
||||
if (std.mem.indexOf(u8, path, ":")) |scheme_end| {
|
||||
scheme_path = path[0..scheme_end];
|
||||
scheme_path_end = scheme_end;
|
||||
}
|
||||
|
||||
if (isSpecialScheme(scheme_path)) {
|
||||
var scheme_base: []const u8 = "";
|
||||
|
||||
if (std.mem.indexOf(u8, base, scheme_full_separator)) |scheme_end| {
|
||||
scheme_base = base[0..scheme_end];
|
||||
}
|
||||
|
||||
const has_double_sleshes: bool = path[scheme_path_end + 1] == '/' and path[scheme_path_end + 2] == '/';
|
||||
|
||||
if (std.mem.eql(u8, scheme_base, scheme_path) and !has_double_sleshes) {
|
||||
//Skip ":" and set relative state
|
||||
path = path[scheme_path_end + 1 ..];
|
||||
} else {
|
||||
//Skip ":"
|
||||
var path_start: usize = scheme_path_end + 1;
|
||||
var host_file_separator: []const u8 = "";
|
||||
|
||||
//file scheme allow empty host
|
||||
if (std.mem.eql(u8, scheme_path, "file") and !has_double_sleshes) {
|
||||
host_file_separator = "/";
|
||||
}
|
||||
|
||||
//Skip any sleshes after "scheme:"
|
||||
for (path[path_start..]) |char| {
|
||||
if (char == '/' or char == '\\') {
|
||||
path_start += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
path = try std.mem.joinZ(allocator, "", &.{ scheme_path, scheme_full_separator, host_file_separator, path[path_start..] });
|
||||
errdefer allocator.free(path);
|
||||
|
||||
return try processResolved(allocator, path, opts);
|
||||
}
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
if (path[0] == '?') {
|
||||
const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len;
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
|
||||
errdefer allocator.free(result);
|
||||
|
||||
return try processResolved(allocator, result, opts);
|
||||
return processResolved(allocator, result, opts);
|
||||
}
|
||||
if (path[0] == '#') {
|
||||
const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len;
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
|
||||
errdefer allocator.free(result);
|
||||
|
||||
return try processResolved(allocator, result, opts);
|
||||
return processResolved(allocator, result, opts);
|
||||
}
|
||||
|
||||
if (std.mem.startsWith(u8, path, "//")) {
|
||||
// network-path reference
|
||||
const index = std.mem.indexOfScalar(u8, base, ':') orelse {
|
||||
if (comptime isNullTerminated(PT)) {
|
||||
if (comptime opts.encode) {
|
||||
return processResolved(allocator, path, opts);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
const duped = try allocator.dupeZ(u8, path);
|
||||
return processResolved(allocator, duped, opts);
|
||||
};
|
||||
const protocol = base[0 .. index + 1];
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ protocol, path });
|
||||
errdefer allocator.free(result);
|
||||
|
||||
return try processResolved(allocator, result, opts);
|
||||
return processResolved(allocator, result, opts);
|
||||
}
|
||||
|
||||
const scheme_end = std.mem.indexOf(u8, base, scheme_full_separator);
|
||||
if (path.len >= 4) { // Minimum: "ws:x"
|
||||
if (std.mem.indexOfScalar(u8, path[0..@min(path.len, 6)], ':')) |pos| {
|
||||
// we know this isn't a complete URL, else the very first check in
|
||||
// this function would have handled it.
|
||||
const possible_special_protocol = path[0..pos];
|
||||
const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" };
|
||||
for (special_schemes) |special_scheme| {
|
||||
if (std.ascii.eqlIgnoreCase(possible_special_protocol, special_scheme)) {
|
||||
const rest = path[pos + 1 ..];
|
||||
|
||||
// Check if base has the same scheme
|
||||
const base_scheme_end = std.mem.indexOf(u8, base, "://") orelse 0;
|
||||
if (base_scheme_end > 0 and std.ascii.eqlIgnoreCase(base[0..base_scheme_end], special_scheme)) {
|
||||
// Same scheme - strip it and resolve rest as relative
|
||||
return resolve(allocator, base, rest, opts);
|
||||
}
|
||||
|
||||
// Different scheme - construct absolute URL
|
||||
// Skip any leading slashes in rest
|
||||
var rest_start: usize = 0;
|
||||
while (rest_start < rest.len and (rest[rest_start] == '/' or rest[rest_start] == '\\')) {
|
||||
rest_start += 1;
|
||||
}
|
||||
const rest_trimmed = rest[rest_start..];
|
||||
|
||||
// file: scheme needs empty host (triple slash)
|
||||
const separator = if (std.mem.eql(u8, special_scheme, "file")) ":///" else "://";
|
||||
const normalized = try std.mem.joinZ(allocator, "", &.{ special_scheme, separator, rest_trimmed });
|
||||
return resolve(allocator, "", normalized, opts);
|
||||
}
|
||||
}
|
||||
// Don't know what this is, just try to resolve it through our normal logic
|
||||
}
|
||||
}
|
||||
|
||||
const scheme_end = std.mem.indexOf(u8, base, "://");
|
||||
const authority_start = if (scheme_end) |end| end + 3 else 0;
|
||||
const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;
|
||||
|
||||
if (path[0] == '/') {
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
|
||||
errdefer allocator.free(result);
|
||||
|
||||
return try processResolved(allocator, result, opts);
|
||||
return processResolved(allocator, result, opts);
|
||||
}
|
||||
|
||||
var normalized_base: []const u8 = base[0..path_start];
|
||||
@@ -155,8 +131,6 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c
|
||||
// trailing space so that we always have space to append the null terminator
|
||||
// and so that we can compare the next two characters without needing to length check
|
||||
var out = try std.mem.join(allocator, "", &.{ normalized_base, "/", path, " " });
|
||||
errdefer allocator.free(out);
|
||||
|
||||
const end = out.len - 2;
|
||||
|
||||
const path_marker = path_start + 1;
|
||||
@@ -206,7 +180,7 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c
|
||||
|
||||
// we always have an extra space
|
||||
out[out_i] = 0;
|
||||
return try processResolved(allocator, out[0..out_i :0], opts);
|
||||
return processResolved(allocator, out[0..out_i :0], opts);
|
||||
}
|
||||
|
||||
fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
|
||||
@@ -217,7 +191,7 @@ fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: Resol
|
||||
}
|
||||
|
||||
pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
|
||||
const scheme_end = std.mem.indexOf(u8, url, scheme_full_separator);
|
||||
const scheme_end = std.mem.indexOf(u8, url, "://");
|
||||
const authority_start = if (scheme_end) |end| end + 3 else 0;
|
||||
const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;
|
||||
|
||||
@@ -386,7 +360,7 @@ pub fn getPassword(raw: [:0]const u8) []const u8 {
|
||||
}
|
||||
|
||||
pub fn getPathname(raw: [:0]const u8) []const u8 {
|
||||
const protocol_end = std.mem.indexOf(u8, raw, scheme_full_separator);
|
||||
const protocol_end = std.mem.indexOf(u8, raw, "://");
|
||||
|
||||
// Handle scheme:path URLs like about:blank (no "://")
|
||||
if (protocol_end == null) {
|
||||
@@ -469,7 +443,7 @@ pub fn getHash(raw: [:0]const u8) []const u8 {
|
||||
}
|
||||
|
||||
pub fn getOrigin(allocator: Allocator, raw: [:0]const u8) !?[]const u8 {
|
||||
const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null;
|
||||
const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;
|
||||
|
||||
// Only HTTP and HTTPS schemes have origins
|
||||
const protocol = raw[0 .. scheme_end + 1];
|
||||
@@ -527,7 +501,7 @@ fn getUserInfo(raw: [:0]const u8) ?[]const u8 {
|
||||
if (!auth.has_user_info) return null;
|
||||
|
||||
// User info is from authority_start to host_start - 1 (excluding the @)
|
||||
const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator).?;
|
||||
const scheme_end = std.mem.indexOf(u8, raw, "://").?;
|
||||
const authority_start = scheme_end + 3;
|
||||
return raw[authority_start .. auth.host_start - 1];
|
||||
}
|
||||
@@ -828,7 +802,7 @@ const AuthorityInfo = struct {
|
||||
// SECURITY: Only looks for @ within the authority portion (before /?#)
|
||||
// to prevent path-based @ injection attacks.
|
||||
fn parseAuthority(raw: []const u8) ?AuthorityInfo {
|
||||
const scheme_end = std.mem.indexOf(u8, raw, scheme_full_separator) orelse return null;
|
||||
const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;
|
||||
const authority_start = scheme_end + 3;
|
||||
|
||||
// Find end of authority FIRST (start of path/query/fragment or end of string)
|
||||
@@ -1033,100 +1007,6 @@ test "URL: resolve" {
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: resolve path scheme" {
|
||||
const Case = struct {
|
||||
base: [:0]const u8,
|
||||
path: [:0]const u8,
|
||||
expected: [:0]const u8,
|
||||
};
|
||||
|
||||
const cases = [_]Case{
|
||||
//same schemes and path as relative path (one slash)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https:/about",
|
||||
.expected = "https://www.example.com/about",
|
||||
},
|
||||
//same schemes and path as relative path (without slash)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https:about",
|
||||
.expected = "https://www.example.com/about",
|
||||
},
|
||||
//same schemes and path as absolute path (two slashes)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https://about",
|
||||
.expected = "https://about",
|
||||
},
|
||||
//different schemes and path as absolute (without slash)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http:about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
//different schemes and path as absolute (with one slash)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http:/about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
//different schemes and path as absolute (with two slashes)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http://about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
//same schemes and path as absolute (with more slashes)
|
||||
.{
|
||||
.base = "https://site/",
|
||||
.path = "https://path",
|
||||
.expected = "https://path",
|
||||
},
|
||||
//path scheme is not special and path as absolute (without additional slashes)
|
||||
.{
|
||||
.base = "http://localhost/",
|
||||
.path = "data:test",
|
||||
.expected = "data:test",
|
||||
},
|
||||
//different schemes and path as absolute (pathscheme=ws)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "ws://about",
|
||||
.expected = "ws://about",
|
||||
},
|
||||
//different schemes and path as absolute (path scheme=wss)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "wss://about",
|
||||
.expected = "wss://about",
|
||||
},
|
||||
//different schemes and path as absolute (path scheme=ftp)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "ftp://about",
|
||||
.expected = "ftp://about",
|
||||
},
|
||||
//different schemes and path as absolute (path scheme=file)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "file://path/to/file",
|
||||
.expected = "file://path/to/file",
|
||||
},
|
||||
//different schemes and path as absolute (path scheme=file, host is empty)
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "file:/path/to/file",
|
||||
.expected = "file:///path/to/file",
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
|
||||
try testing.expectString(case.expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: ensureEncoded" {
|
||||
defer testing.reset();
|
||||
|
||||
@@ -1725,3 +1605,84 @@ test "URL: getOrigin" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: resolve path scheme" {
|
||||
const Case = struct {
|
||||
base: [:0]const u8,
|
||||
path: [:0]const u8,
|
||||
expected: [:0]const u8,
|
||||
};
|
||||
|
||||
const cases = [_]Case{
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https:/about",
|
||||
.expected = "https://www.example.com/about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https:about",
|
||||
.expected = "https://www.example.com/about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "https://about",
|
||||
.expected = "https://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http:about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http:/about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "http://about",
|
||||
.expected = "http://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://site/",
|
||||
.path = "https://path",
|
||||
.expected = "https://path",
|
||||
},
|
||||
.{
|
||||
.base = "http://localhost/",
|
||||
.path = "data:test",
|
||||
.expected = "data:test",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "ws://about",
|
||||
.expected = "ws://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "wss://about",
|
||||
.expected = "wss://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "ftp://about",
|
||||
.expected = "ftp://about",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "file://path/to/file",
|
||||
.expected = "file://path/to/file",
|
||||
},
|
||||
.{
|
||||
.base = "https://www.example.com/example",
|
||||
.path = "file:/path/to/file",
|
||||
.expected = "file:///path/to/file",
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
|
||||
try testing.expectString(case.expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user