Merge pull request #1975 from lightpanda-io/percent-encode-path

fix: percent-encode pathname in URL.setPathname per URL spec
This commit is contained in:
Karl Seguin
2026-03-24 07:41:27 +08:00
committed by GitHub
2 changed files with 95 additions and 11 deletions

View File

@@ -204,7 +204,7 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
return buf.items[0 .. buf.items.len - 1 :0]; return buf.items[0 .. buf.items.len - 1 :0];
} }
const EncodeSet = enum { path, query, userinfo }; const EncodeSet = enum { path, query, userinfo, fragment };
fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 { fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 {
// Check if encoding is needed // Check if encoding is needed
@@ -256,8 +256,10 @@ fn shouldPercentEncode(c: u8, comptime encode_set: EncodeSet) bool {
';', '=' => encode_set == .userinfo, ';', '=' => encode_set == .userinfo,
// Separators: userinfo must encode these // Separators: userinfo must encode these
'/', ':', '@' => encode_set == .userinfo, '/', ':', '@' => encode_set == .userinfo,
// '?' is allowed in queries but not in paths or userinfo // '?' is allowed in queries only
'?' => encode_set != .query, '?' => encode_set != .query,
// '#' is allowed in fragments only
'#' => encode_set != .fragment,
// Everything else needs encoding (including space) // Everything else needs encoding (including space)
else => true, else => true,
}; };
@@ -595,11 +597,13 @@ pub fn setPathname(current: [:0]const u8, value: []const u8, allocator: Allocato
const search = getSearch(current); const search = getSearch(current);
const hash = getHash(current); const hash = getHash(current);
const encoded = try percentEncodeSegment(allocator, value, .path);
// Add / prefix if not present and value is not empty // Add / prefix if not present and value is not empty
const pathname = if (value.len > 0 and value[0] != '/') const pathname = if (encoded.len > 0 and encoded[0] != '/')
try std.fmt.allocPrint(allocator, "/{s}", .{value}) try std.fmt.allocPrint(allocator, "/{s}", .{encoded})
else else
value; encoded;
return buildUrl(allocator, protocol, host, pathname, search, hash); return buildUrl(allocator, protocol, host, pathname, search, hash);
} }
@@ -610,11 +614,13 @@ pub fn setSearch(current: [:0]const u8, value: []const u8, allocator: Allocator)
const pathname = getPathname(current); const pathname = getPathname(current);
const hash = getHash(current); const hash = getHash(current);
const encoded = try percentEncodeSegment(allocator, value, .query);
// Add ? prefix if not present and value is not empty // Add ? prefix if not present and value is not empty
const search = if (value.len > 0 and value[0] != '?') const search = if (encoded.len > 0 and value[0] != '?')
try std.fmt.allocPrint(allocator, "?{s}", .{value}) try std.fmt.allocPrint(allocator, "?{s}", .{encoded})
else else
value; encoded;
return buildUrl(allocator, protocol, host, pathname, search, hash); return buildUrl(allocator, protocol, host, pathname, search, hash);
} }
@@ -625,11 +631,13 @@ pub fn setHash(current: [:0]const u8, value: []const u8, allocator: Allocator) !
const pathname = getPathname(current); const pathname = getPathname(current);
const search = getSearch(current); const search = getSearch(current);
const encoded = try percentEncodeSegment(allocator, value, .fragment);
// Add # prefix if not present and value is not empty // Add # prefix if not present and value is not empty
const hash = if (value.len > 0 and value[0] != '#') const hash = if (encoded.len > 0 and encoded[0] != '#')
try std.fmt.allocPrint(allocator, "#{s}", .{value}) try std.fmt.allocPrint(allocator, "#{s}", .{encoded})
else else
value; encoded;
return buildUrl(allocator, protocol, host, pathname, search, hash); return buildUrl(allocator, protocol, host, pathname, search, hash);
} }
@@ -1422,3 +1430,22 @@ test "URL: getHost" {
try testing.expectEqualSlices(u8, "example.com:8080", getHost("https://user:pass@example.com:8080/page")); try testing.expectEqualSlices(u8, "example.com:8080", getHost("https://user:pass@example.com:8080/page"));
try testing.expectEqualSlices(u8, "", getHost("not-a-url")); try testing.expectEqualSlices(u8, "", getHost("not-a-url"));
} }
test "URL: setPathname percent-encodes" {
// Use arena allocator to match production usage (setPathname makes intermediate allocations)
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Spaces must be encoded as %20
const result1 = try setPathname("http://a/", "c d", allocator);
try testing.expectEqualSlices(u8, "http://a/c%20d", result1);
// Already-encoded sequences must not be double-encoded
const result2 = try setPathname("https://example.com/path", "/already%20encoded", allocator);
try testing.expectEqualSlices(u8, "https://example.com/already%20encoded", result2);
// Query and hash must be preserved
const result3 = try setPathname("https://example.com/path?a=b#hash", "/new path", allocator);
try testing.expectEqualSlices(u8, "https://example.com/new%20path?a=b#hash", result3);
}

View File

@@ -591,6 +591,35 @@
testing.expectEqual('/new/path', url.pathname); testing.expectEqual('/new/path', url.pathname);
} }
// Pathname setter must percent-encode spaces and special characters
{
const url = new URL('http://a/');
url.pathname = 'c d';
testing.expectEqual('http://a/c%20d', url.href);
}
{
const url = new URL('https://example.com/path');
url.pathname = '/path with spaces/file name';
testing.expectEqual('https://example.com/path%20with%20spaces/file%20name', url.href);
testing.expectEqual('/path%20with%20spaces/file%20name', url.pathname);
}
// Already-encoded sequences should not be double-encoded
{
const url = new URL('https://example.com/path');
url.pathname = '/already%20encoded';
testing.expectEqual('https://example.com/already%20encoded', url.href);
}
// This is the exact check the URL polyfill uses to decide if native URL is sufficient
{
const url = new URL('b', 'http://a');
url.pathname = 'c d';
testing.expectEqual('http://a/c%20d', url.href);
testing.expectEqual(true, !!url.searchParams);
}
{ {
const url = new URL('https://example.com/path'); const url = new URL('https://example.com/path');
url.search = '?a=b'; url.search = '?a=b';
@@ -656,6 +685,20 @@
testing.expectEqual('', url.hash); testing.expectEqual('', url.hash);
} }
{
const url = new URL('https://example.com/path');
url.hash = '#a b';
testing.expectEqual('https://example.com/path#a%20b', url.href);
testing.expectEqual('#a%20b', url.hash);
}
{
const url = new URL('https://example.com/path');
url.hash = 'a b';
testing.expectEqual('https://example.com/path#a%20b', url.href);
testing.expectEqual('#a%20b', url.hash);
}
{ {
const url = new URL('https://example.com/path?a=b'); const url = new URL('https://example.com/path?a=b');
url.search = ''; url.search = '';
@@ -673,6 +716,20 @@
testing.expectEqual(null, url.searchParams.get('a')); testing.expectEqual(null, url.searchParams.get('a'));
} }
{
const url = new URL('https://example.com/path?a=b');
const sp = url.searchParams;
testing.expectEqual('b', sp.get('a'));
url.search = 'c=d b';
testing.expectEqual('d b', url.searchParams.get('c'));
testing.expectEqual(null, url.searchParams.get('a'));
url.search = 'c d=d b';
testing.expectEqual('d b', url.searchParams.get('c d'));
testing.expectEqual(null, url.searchParams.get('c'));
}
{ {
const url = new URL('https://example.com/path?a=b'); const url = new URL('https://example.com/path?a=b');
const sp = url.searchParams; const sp = url.searchParams;