Merge pull request #1646 from lightpanda-io/url_encoding

Add url encoding option to URL.resolve
This commit is contained in:
Karl Seguin
2026-02-26 07:02:49 +08:00
committed by GitHub
13 changed files with 338 additions and 20 deletions

View File

@@ -569,7 +569,7 @@ fn scheduleNavigationWithArena(self: *Page, arena: Allocator, request_url: []con
arena,
self.base(),
request_url,
.{ .always_dupe = true },
.{ .always_dupe = true, .encode = true },
);
const session = self._session;
@@ -1206,7 +1206,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
return;
}
const src = try iframe.getSrc(self);
const src = iframe.asElement().getAttributeSafe(comptime .wrap("src")) orelse return;
if (src.len == 0) {
return;
}
@@ -1228,8 +1228,16 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
.timestamp = timestamp(.monotonic),
});
page_frame.navigate(src, .{ .reason = .initialFrameNavigation }) catch |err| {
log.warn(.page, "iframe navigate failure", .{ .url = src, .err = err });
// navigate will dupe the url
const url = try URL.resolve(
self.call_arena,
self.base(),
src,
.{ .encode = true },
);
page_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| {
log.warn(.page, "iframe navigate failure", .{ .url = url, .err = err });
self._pending_loads -= 1;
iframe._content_window = null;
page_frame.deinit();

View File

@@ -20,44 +20,61 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const ResolveOpts = struct {
encode: bool = false,
always_dupe: bool = false,
};
// path is anytype, so that it can be used with both []const u8 and [:0]const u8
pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
const PT = @TypeOf(path);
if (base.len == 0 or isCompleteHTTPUrl(path)) {
if (comptime opts.always_dupe or !isNullTerminated(PT)) {
return allocator.dupeZ(u8, path);
const duped = try allocator.dupeZ(u8, path);
return encodeURL(allocator, duped, opts);
}
if (comptime opts.encode) {
return encodeURL(allocator, path, opts);
}
return path;
}
if (path.len == 0) {
if (comptime opts.always_dupe) {
return allocator.dupeZ(u8, base);
const duped = try allocator.dupeZ(u8, base);
return encodeURL(allocator, duped, opts);
}
if (comptime opts.encode) {
return encodeURL(allocator, base, opts);
}
return base;
}
if (path[0] == '?') {
const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len;
return std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
return encodeURL(allocator, result, opts);
}
if (path[0] == '#') {
const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len;
return std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
return encodeURL(allocator, result, opts);
}
if (std.mem.startsWith(u8, path, "//")) {
// network-path reference
const index = std.mem.indexOfScalar(u8, base, ':') orelse {
if (comptime isNullTerminated(PT)) {
if (comptime opts.encode) {
return encodeURL(allocator, path, opts);
}
return path;
}
return allocator.dupeZ(u8, path);
const duped = try allocator.dupeZ(u8, path);
return encodeURL(allocator, duped, opts);
};
const protocol = base[0 .. index + 1];
return std.mem.joinZ(allocator, "", &.{ protocol, path });
const result = try std.mem.joinZ(allocator, "", &.{ protocol, path });
return encodeURL(allocator, result, opts);
}
const scheme_end = std.mem.indexOf(u8, base, "://");
@@ -65,7 +82,8 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;
if (path[0] == '/') {
return std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
return encodeURL(allocator, result, opts);
}
var normalized_base: []const u8 = base[0..path_start];
@@ -127,7 +145,115 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
// we always have an extra space
out[out_i] = 0;
return out[0..out_i :0];
return encodeURL(allocator, out[0..out_i :0], opts);
}
fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
if (!comptime opts.encode) {
return url;
}
const scheme_end = std.mem.indexOf(u8, url, "://");
const authority_start = if (scheme_end) |end| end + 3 else 0;
const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;
const query_start = std.mem.indexOfScalarPos(u8, url, path_start, '?');
const fragment_start = std.mem.indexOfScalarPos(u8, url, query_start orelse path_start, '#');
const path_end = query_start orelse fragment_start orelse url.len;
const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end;
const path_to_encode = url[path_start..path_end];
const encoded_path = try percentEncodeSegment(allocator, path_to_encode, true);
const encoded_query = if (query_start) |qs| blk: {
const query_to_encode = url[qs + 1 .. query_end];
const encoded = try percentEncodeSegment(allocator, query_to_encode, false);
break :blk encoded;
} else null;
const encoded_fragment = if (fragment_start) |fs| blk: {
const fragment_to_encode = url[fs + 1 ..];
const encoded = try percentEncodeSegment(allocator, fragment_to_encode, false);
break :blk encoded;
} else null;
if (encoded_path.ptr == path_to_encode.ptr and
(encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and
(encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) {
// nothing has changed
return url;
}
var buf = try std.ArrayList(u8).initCapacity(allocator, url.len + 20);
try buf.appendSlice(allocator, url[0..path_start]);
try buf.appendSlice(allocator, encoded_path);
if (encoded_query) |eq| {
try buf.append(allocator, '?');
try buf.appendSlice(allocator, eq);
}
if (encoded_fragment) |ef| {
try buf.append(allocator, '#');
try buf.appendSlice(allocator, ef);
}
try buf.append(allocator, 0);
return buf.items[0 .. buf.items.len - 1 :0];
}
fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime is_path: bool) ![]const u8 {
// Check if encoding is needed
var needs_encoding = false;
for (segment) |c| {
if (shouldPercentEncode(c, is_path)) {
needs_encoding = true;
break;
}
}
if (!needs_encoding) {
return segment;
}
var buf = try std.ArrayList(u8).initCapacity(allocator, segment.len + 10);
var i: usize = 0;
while (i < segment.len) : (i += 1) {
const c = segment[i];
// Check if this is an already-encoded sequence (%XX)
if (c == '%' and i + 2 < segment.len) {
const end = i + 2;
const h1 = segment[i + 1];
const h2 = segment[end];
if (std.ascii.isHex(h1) and std.ascii.isHex(h2)) {
try buf.appendSlice(allocator, segment[i .. end + 1]);
i = end;
continue;
}
}
if (shouldPercentEncode(c, is_path)) {
try buf.writer(allocator).print("%{X:0>2}", .{c});
} else {
try buf.append(allocator, c);
}
}
return buf.items;
}
fn shouldPercentEncode(c: u8, comptime is_path: bool) bool {
return switch (c) {
// Unreserved characters (RFC 3986)
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false,
// sub-delims allowed in both path and query
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => false,
// Separators allowed in both path and query
'/', ':', '@' => false,
// Query-specific: '?' is allowed in queries but not in paths
'?' => comptime is_path,
// Everything else needs encoding (including space)
else => true,
};
}
fn isNullTerminated(comptime value: type) bool {
@@ -691,6 +817,172 @@ test "URL: resolve" {
}
}
test "URL: resolve with encoding" {
defer testing.reset();
const Case = struct {
base: [:0]const u8,
path: [:0]const u8,
expected: [:0]const u8,
};
const cases = [_]Case{
// Spaces should be encoded as %20, but ! is allowed
.{
.base = "https://example.com/dir/",
.path = "over 9000!",
.expected = "https://example.com/dir/over%209000!",
},
.{
.base = "https://example.com/",
.path = "hello world.html",
.expected = "https://example.com/hello%20world.html",
},
// Multiple spaces
.{
.base = "https://example.com/",
.path = "path with multiple spaces",
.expected = "https://example.com/path%20with%20%20multiple%20%20%20spaces",
},
// Special characters that need encoding
.{
.base = "https://example.com/",
.path = "file[1].html",
.expected = "https://example.com/file%5B1%5D.html",
},
.{
.base = "https://example.com/",
.path = "file{name}.html",
.expected = "https://example.com/file%7Bname%7D.html",
},
.{
.base = "https://example.com/",
.path = "file<test>.html",
.expected = "https://example.com/file%3Ctest%3E.html",
},
.{
.base = "https://example.com/",
.path = "file\"quote\".html",
.expected = "https://example.com/file%22quote%22.html",
},
.{
.base = "https://example.com/",
.path = "file|pipe.html",
.expected = "https://example.com/file%7Cpipe.html",
},
.{
.base = "https://example.com/",
.path = "file\\backslash.html",
.expected = "https://example.com/file%5Cbackslash.html",
},
.{
.base = "https://example.com/",
.path = "file^caret.html",
.expected = "https://example.com/file%5Ecaret.html",
},
.{
.base = "https://example.com/",
.path = "file`backtick`.html",
.expected = "https://example.com/file%60backtick%60.html",
},
// Characters that should NOT be encoded
.{
.base = "https://example.com/",
.path = "path-with_under~tilde.html",
.expected = "https://example.com/path-with_under~tilde.html",
},
.{
.base = "https://example.com/",
.path = "path/with/slashes",
.expected = "https://example.com/path/with/slashes",
},
.{
.base = "https://example.com/",
.path = "sub-delims!$&'()*+,;=.html",
.expected = "https://example.com/sub-delims!$&'()*+,;=.html",
},
// Already encoded characters should not be double-encoded
.{
.base = "https://example.com/",
.path = "already%20encoded",
.expected = "https://example.com/already%20encoded",
},
.{
.base = "https://example.com/",
.path = "file%5B1%5D.html",
.expected = "https://example.com/file%5B1%5D.html",
},
// Mix of encoded and unencoded
.{
.base = "https://example.com/",
.path = "part%20encoded and not",
.expected = "https://example.com/part%20encoded%20and%20not",
},
// Query strings and fragments ARE encoded
.{
.base = "https://example.com/",
.path = "file name.html?query=value with spaces",
.expected = "https://example.com/file%20name.html?query=value%20with%20spaces",
},
.{
.base = "https://example.com/",
.path = "file name.html#anchor with spaces",
.expected = "https://example.com/file%20name.html#anchor%20with%20spaces",
},
.{
.base = "https://example.com/",
.path = "file.html?hello=world !",
.expected = "https://example.com/file.html?hello=world%20!",
},
// Query structural characters should NOT be encoded
.{
.base = "https://example.com/",
.path = "file.html?a=1&b=2",
.expected = "https://example.com/file.html?a=1&b=2",
},
// Relative paths with encoding
.{
.base = "https://example.com/dir/page.html",
.path = "../other dir/file.html",
.expected = "https://example.com/other%20dir/file.html",
},
.{
.base = "https://example.com/dir/",
.path = "./sub dir/file.html",
.expected = "https://example.com/dir/sub%20dir/file.html",
},
// Absolute paths with encoding
.{
.base = "https://example.com/some/path",
.path = "/absolute path/file.html",
.expected = "https://example.com/absolute%20path/file.html",
},
// Unicode/high bytes (though ideally these should be UTF-8 encoded first)
.{
.base = "https://example.com/",
.path = "café",
.expected = "https://example.com/caf%C3%A9",
},
// Empty path
.{
.base = "https://example.com/",
.path = "",
.expected = "https://example.com/",
},
// Complete URL as path (should not be encoded)
.{
.base = "https://example.com/",
.path = "https://other.com/path with spaces",
.expected = "https://other.com/path%20with%20spaces",
},
};
for (cases) |case| {
const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true });
try testing.expectString(case.expected, result);
}
}
test "URL: eqlDocument" {
defer testing.reset();
{

View File

@@ -245,3 +245,11 @@
testing.expectEqual('', b.toString());
}
</script>
<script id=url_encode>
{
let a = document.createElement('a');
a.href = 'over 9000!';
testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!', a.href);
}
</script>

View File

@@ -172,3 +172,12 @@
});
}
</script>
<script id=url_encode>
{
let img = document.createElement('img');
img.src = 'over 9000!?hello=world !';
testing.expectEqual('over 9000!?hello=world !', img.getAttribute('src'));
testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!?hello=world%20!', img.src);
}
</script>

View File

@@ -7,7 +7,7 @@
}
</script>
<iframe id=f1 onload="frame1Onload" src="support/sub1.html"></iframe>
<iframe id=f1 onload="frame1Onload" src="support/sub 1.html"></iframe>
<iframe id=f2 src="support/sub2.html"></iframe>
<script id="basic">
@@ -25,6 +25,7 @@
testing.expectEqual(0, $('#f1').childNodes.length);
testing.expectEqual(testing.BASE_URL + 'frames/support/sub%201.html', $('#f1').src);
testing.expectEqual(window[0], $('#f1').contentWindow);
testing.expectEqual(window[1], $('#f2').contentWindow);

View File

@@ -44,7 +44,7 @@ pub fn getHref(self: *Anchor, page: *Page) ![]const u8 {
if (href.len == 0) {
return "";
}
return URL.resolve(page.call_arena, page.base(), href, .{});
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
}
pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void {

View File

@@ -50,7 +50,7 @@ pub fn getContentDocument(self: *const IFrame) ?*Document {
pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 {
if (self._src.len == 0) return "";
return try URL.resolve(page.call_arena, page.base(), self._src, .{});
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
}
pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void {

View File

@@ -46,7 +46,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 {
}
// Always resolve the src against the page URL
return URL.resolve(page.call_arena, page.base(), src, .{});
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
}
pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void {

View File

@@ -46,7 +46,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 {
}
// Always resolve the href against the page URL
return URL.resolve(page.call_arena, page.base(), href, .{});
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
}
pub fn setHref(self: *Link, value: []const u8, page: *Page) !void {

View File

@@ -236,7 +236,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 {
return "";
}
const URL = @import("../../URL.zig");
return URL.resolve(page.call_arena, page.base(), src, .{});
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
}
pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void {

View File

@@ -46,7 +46,7 @@ pub fn asNode(self: *Script) *Node {
pub fn getSrc(self: *const Script, page: *Page) ![]const u8 {
if (self._src.len == 0) return "";
return try URL.resolve(page.call_arena, page.base(), self._src, .{});
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
}
pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void {

View File

@@ -59,7 +59,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 {
}
const URL = @import("../../URL.zig");
return URL.resolve(page.call_arena, page.base(), poster, .{});
return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true });
}
pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void {