Improve authority parsing

Only look for @ within the first part of the url (up to the first possible separator, i.e /, # or ?). This fixes potentially incorrect (and insecure) getOrigin and getHost, both of which use the new helper. Also make port parsing IPv6-aware.
2026-03-28 15:40:04 +00:00 · 2026-03-26 13:22:56 +08:00
parent 2812b8f07c
commit 0588cc374d
1 changed files with 165 additions and 44 deletions
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -357,25 +357,38 @@ pub fn isHTTPS(raw: [:0]const u8) bool {
 pub fn getHostname(raw: [:0]const u8) []const u8 {
    const host = getHost(raw);
-    const pos = std.mem.lastIndexOfScalar(u8, host, ':') orelse return host;
+    const port_sep = findPortSeparator(host) orelse return host;
-    return host[0..pos];
+    return host[0..port_sep];
 }
 pub fn getPort(raw: [:0]const u8) []const u8 {
    const host = getHost(raw);
-    const pos = std.mem.lastIndexOfScalar(u8, host, ':') orelse return "";
+    const port_sep = findPortSeparator(host) orelse return "";
    return host[port_sep + 1 ..];
 }
-    if (pos + 1 >= host.len) {
+// Finds the colon separating host from port, handling IPv6 bracket notation.
-        return "";
+// For IPv6 like "[::1]:8080", returns position of ":" after "]".
 // For IPv6 like "[::1]" (no port), returns null.
 // For regular hosts, returns position of last ":" if followed by digits.
 fn findPortSeparator(host: []const u8) ?usize {
    if (host.len > 0 and host[0] == '[') {
        // IPv6: find closing bracket, port separator must be after it
        const bracket_end = std.mem.indexOfScalar(u8, host, ']') orelse return null;
        if (bracket_end + 1 < host.len and host[bracket_end + 1] == ':') {
            return bracket_end + 1;
        }
        return null;
    }
    // Regular host: find last colon and verify it's followed by digits
    const pos = std.mem.lastIndexOfScalar(u8, host, ':') orelse return null;
    if (pos + 1 >= host.len) return null;
    for (host[pos + 1 ..]) |c| {
-        if (c < '0' or c > '9') {
+        if (c < '0' or c > '9') return null;
            return "";
    }
-    }
+    return pos;
    return host[pos + 1 ..];
 }
 pub fn getSearch(raw: [:0]const u8) []const u8 {
@@ -403,21 +416,12 @@ pub fn getOrigin(allocator: Allocator, raw: [:0]const u8) !?[]const u8 {
        return null;
    }
-    var authority_start = scheme_end + 3;
+    const auth = parseAuthority(raw) orelse return null;
-    const has_user_info = if (std.mem.indexOf(u8, raw[authority_start..], "@")) |pos| blk: {
+    const has_user_info = auth.has_user_info;
-        authority_start += pos + 1;
+    const authority_end = auth.host_end;
        break :blk true;
    } else false;
    // Find end of authority (start of path/query/fragment or end of string)
    const authority_end_relative = std.mem.indexOfAny(u8, raw[authority_start..], "/?#");
    const authority_end = if (authority_end_relative) |end|
        authority_start + end
    else
        raw.len;
    // Check for port in the host:port section
-    const host_part = raw[authority_start..authority_end];
+    const host_part = auth.getHost(raw);
    if (std.mem.lastIndexOfScalar(u8, host_part, ':')) |colon_pos_in_host| {
        const port = host_part[colon_pos_in_host + 1 ..];
@@ -458,31 +462,18 @@ pub fn getOrigin(allocator: Allocator, raw: [:0]const u8) !?[]const u8 {
 }
 fn getUserInfo(raw: [:0]const u8) ?[]const u8 {
-    const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;
+    const auth = parseAuthority(raw) orelse return null;
    if (!auth.has_user_info) return null;
    // User info is from authority_start to host_start - 1 (excluding the @)
    const scheme_end = std.mem.indexOf(u8, raw, "://").?;
    const authority_start = scheme_end + 3;
-
+    return raw[authority_start .. auth.host_start - 1];
    const pos = std.mem.indexOfScalar(u8, raw[authority_start..], '@') orelse return null;
    const path_start = std.mem.indexOfScalarPos(u8, raw, authority_start, '/') orelse raw.len;
    const full_pos = authority_start + pos;
    if (full_pos < path_start) {
        return raw[authority_start..full_pos];
    }
    return null;
 }
 pub fn getHost(raw: [:0]const u8) []const u8 {
-    const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return "";
+    const auth = parseAuthority(raw) orelse return "";
-
+    return auth.getHost(raw);
    var authority_start = scheme_end + 3;
    if (std.mem.indexOf(u8, raw[authority_start..], "@")) |pos| {
        authority_start += pos + 1;
    }
    const authority = raw[authority_start..];
    const path_start = std.mem.indexOfAny(u8, authority, "/?#") orelse return authority;
    return authority[0..path_start];
 }
 // Returns true if these two URLs point to the same document.
@@ -761,6 +752,47 @@ pub fn unescape(arena: Allocator, input: []const u8) ![]const u8 {
    return result.items;
 }
 const AuthorityInfo = struct {
    host_start: usize,
    host_end: usize,
    has_user_info: bool,
    fn getHost(self: AuthorityInfo, raw: []const u8) []const u8 {
        return raw[self.host_start..self.host_end];
    }
 };
 // Parses the authority component of a URL, correctly handling userinfo.
 // Returns null if the URL doesn't have a valid scheme (no "://").
 // SECURITY: Only looks for @ within the authority portion (before /?#)
 // to prevent path-based @ injection attacks.
 fn parseAuthority(raw: []const u8) ?AuthorityInfo {
    const scheme_end = std.mem.indexOf(u8, raw, "://") orelse return null;
    const authority_start = scheme_end + 3;
    // Find end of authority FIRST (start of path/query/fragment or end of string)
    const authority_end = if (std.mem.indexOfAny(u8, raw[authority_start..], "/?#")) |end|
        authority_start + end
    else
        raw.len;
    // Only look for @ within the authority portion, not in path/query/fragment
    const authority_portion = raw[authority_start..authority_end];
    if (std.mem.indexOf(u8, authority_portion, "@")) |pos| {
        return .{
            .host_start = authority_start + pos + 1,
            .host_end = authority_end,
            .has_user_info = true,
        };
    }
    return .{
        .host_start = authority_start,
        .host_end = authority_end,
        .has_user_info = false,
    };
 }
 const testing = @import("../testing.zig");
 test "URL: isCompleteHTTPUrl" {
    try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about"));
@@ -1429,6 +1461,42 @@ test "URL: getHost" {
    try testing.expectEqualSlices(u8, "example.com", getHost("https://user:pass@example.com/page"));
    try testing.expectEqualSlices(u8, "example.com:8080", getHost("https://user:pass@example.com:8080/page"));
    try testing.expectEqualSlices(u8, "", getHost("not-a-url"));
    // SECURITY: @ in path must NOT be treated as userinfo separator
    try testing.expectEqualSlices(u8, "evil.example.com", getHost("http://evil.example.com/@victim.example.com/"));
    try testing.expectEqualSlices(u8, "evil.example.com", getHost("https://evil.example.com/path/@victim.example.com"));
    // IPv6 addresses
    try testing.expectEqualSlices(u8, "[::1]:8080", getHost("http://[::1]:8080/path"));
    try testing.expectEqualSlices(u8, "[::1]", getHost("http://[::1]/path"));
    try testing.expectEqualSlices(u8, "[2001:db8::1]", getHost("https://[2001:db8::1]/"));
 }
 test "URL: getHostname" {
    // Regular hosts
    try testing.expectEqualSlices(u8, "example.com", getHostname("https://example.com:8080/path"));
    try testing.expectEqualSlices(u8, "example.com", getHostname("https://example.com/path"));
    // IPv6 with port
    try testing.expectEqualSlices(u8, "[::1]", getHostname("http://[::1]:8080/path"));
    // IPv6 without port - must return full bracket notation
    try testing.expectEqualSlices(u8, "[::1]", getHostname("http://[::1]/path"));
    try testing.expectEqualSlices(u8, "[2001:db8::1]", getHostname("https://[2001:db8::1]/"));
 }
 test "URL: getPort" {
    // Regular hosts
    try testing.expectEqualSlices(u8, "8080", getPort("https://example.com:8080/path"));
    try testing.expectEqualSlices(u8, "", getPort("https://example.com/path"));
    // IPv6 with port
    try testing.expectEqualSlices(u8, "8080", getPort("http://[::1]:8080/path"));
    try testing.expectEqualSlices(u8, "3000", getPort("http://[2001:db8::1]:3000/"));
    // IPv6 without port - colons inside brackets must not be treated as port separator
    try testing.expectEqualSlices(u8, "", getPort("http://[::1]/path"));
    try testing.expectEqualSlices(u8, "", getPort("https://[2001:db8::1]/"));
 }
 test "URL: setPathname percent-encodes" {
@@ -1449,3 +1517,56 @@ test "URL: setPathname percent-encodes" {
    const result3 = try setPathname("https://example.com/path?a=b#hash", "/new path", allocator);
    try testing.expectEqualSlices(u8, "https://example.com/new%20path?a=b#hash", result3);
 }
 test "URL: getOrigin" {
    defer testing.reset();
    const Case = struct {
        url: [:0]const u8,
        expected: ?[]const u8,
    };
    const cases = [_]Case{
        // Basic HTTP/HTTPS origins
        .{ .url = "http://example.com/path", .expected = "http://example.com" },
        .{ .url = "https://example.com/path", .expected = "https://example.com" },
        .{ .url = "https://example.com:8080/path", .expected = "https://example.com:8080" },
        // Default ports should be stripped
        .{ .url = "http://example.com:80/path", .expected = "http://example.com" },
        .{ .url = "https://example.com:443/path", .expected = "https://example.com" },
        // User info should be stripped from origin
        .{ .url = "http://user:pass@example.com/path", .expected = "http://example.com" },
        .{ .url = "https://user@example.com:8080/path", .expected = "https://example.com:8080" },
        // Non-HTTP schemes return null
        .{ .url = "ftp://example.com/path", .expected = null },
        .{ .url = "file:///path/to/file", .expected = null },
        .{ .url = "about:blank", .expected = null },
        // Query and fragment should not affect origin
        .{ .url = "https://example.com?query=1", .expected = "https://example.com" },
        .{ .url = "https://example.com#fragment", .expected = "https://example.com" },
        .{ .url = "https://example.com/path?q=1#frag", .expected = "https://example.com" },
        // SECURITY: @ in path must NOT be treated as userinfo separator
        // This would be a Same-Origin Policy bypass if mishandled
        .{ .url = "http://evil.example.com/@victim.example.com/", .expected = "http://evil.example.com" },
        .{ .url = "https://evil.example.com/path/@victim.example.com/steal", .expected = "https://evil.example.com" },
        .{ .url = "http://evil.example.com/@victim.example.com:443/", .expected = "http://evil.example.com" },
        // @ in query/fragment must also not affect origin
        .{ .url = "https://example.com/path?user=foo@bar.com", .expected = "https://example.com" },
        .{ .url = "https://example.com/path#user@host", .expected = "https://example.com" },
    };
    for (cases) |case| {
        const result = try getOrigin(testing.arena_allocator, case.url);
        if (case.expected) |expected| {
            try testing.expectString(expected, result.?);
        } else {
            try testing.expectEqual(null, result);
        }
    }
 }