mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 04:34:44 +00:00
Merge pull request #1646 from lightpanda-io/url_encoding
Add url encoding option to URL.resolve
This commit is contained in:
@@ -569,7 +569,7 @@ fn scheduleNavigationWithArena(self: *Page, arena: Allocator, request_url: []con
|
||||
arena,
|
||||
self.base(),
|
||||
request_url,
|
||||
.{ .always_dupe = true },
|
||||
.{ .always_dupe = true, .encode = true },
|
||||
);
|
||||
|
||||
const session = self._session;
|
||||
@@ -1206,7 +1206,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
|
||||
return;
|
||||
}
|
||||
|
||||
const src = try iframe.getSrc(self);
|
||||
const src = iframe.asElement().getAttributeSafe(comptime .wrap("src")) orelse return;
|
||||
if (src.len == 0) {
|
||||
return;
|
||||
}
|
||||
@@ -1228,8 +1228,16 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void {
|
||||
.timestamp = timestamp(.monotonic),
|
||||
});
|
||||
|
||||
page_frame.navigate(src, .{ .reason = .initialFrameNavigation }) catch |err| {
|
||||
log.warn(.page, "iframe navigate failure", .{ .url = src, .err = err });
|
||||
// navigate will dupe the url
|
||||
const url = try URL.resolve(
|
||||
self.call_arena,
|
||||
self.base(),
|
||||
src,
|
||||
.{ .encode = true },
|
||||
);
|
||||
|
||||
page_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| {
|
||||
log.warn(.page, "iframe navigate failure", .{ .url = url, .err = err });
|
||||
self._pending_loads -= 1;
|
||||
iframe._content_window = null;
|
||||
page_frame.deinit();
|
||||
|
||||
@@ -20,44 +20,61 @@ const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const ResolveOpts = struct {
|
||||
encode: bool = false,
|
||||
always_dupe: bool = false,
|
||||
};
|
||||
|
||||
// path is anytype, so that it can be used with both []const u8 and [:0]const u8
|
||||
pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
|
||||
const PT = @TypeOf(path);
|
||||
if (base.len == 0 or isCompleteHTTPUrl(path)) {
|
||||
if (comptime opts.always_dupe or !isNullTerminated(PT)) {
|
||||
return allocator.dupeZ(u8, path);
|
||||
const duped = try allocator.dupeZ(u8, path);
|
||||
return encodeURL(allocator, duped, opts);
|
||||
}
|
||||
if (comptime opts.encode) {
|
||||
return encodeURL(allocator, path, opts);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
if (path.len == 0) {
|
||||
if (comptime opts.always_dupe) {
|
||||
return allocator.dupeZ(u8, base);
|
||||
const duped = try allocator.dupeZ(u8, base);
|
||||
return encodeURL(allocator, duped, opts);
|
||||
}
|
||||
if (comptime opts.encode) {
|
||||
return encodeURL(allocator, base, opts);
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
if (path[0] == '?') {
|
||||
const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len;
|
||||
return std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path });
|
||||
return encodeURL(allocator, result, opts);
|
||||
}
|
||||
if (path[0] == '#') {
|
||||
const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len;
|
||||
return std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path });
|
||||
return encodeURL(allocator, result, opts);
|
||||
}
|
||||
|
||||
if (std.mem.startsWith(u8, path, "//")) {
|
||||
// network-path reference
|
||||
const index = std.mem.indexOfScalar(u8, base, ':') orelse {
|
||||
if (comptime isNullTerminated(PT)) {
|
||||
if (comptime opts.encode) {
|
||||
return encodeURL(allocator, path, opts);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
return allocator.dupeZ(u8, path);
|
||||
const duped = try allocator.dupeZ(u8, path);
|
||||
return encodeURL(allocator, duped, opts);
|
||||
};
|
||||
const protocol = base[0 .. index + 1];
|
||||
return std.mem.joinZ(allocator, "", &.{ protocol, path });
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ protocol, path });
|
||||
return encodeURL(allocator, result, opts);
|
||||
}
|
||||
|
||||
const scheme_end = std.mem.indexOf(u8, base, "://");
|
||||
@@ -65,7 +82,8 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
|
||||
const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;
|
||||
|
||||
if (path[0] == '/') {
|
||||
return std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
|
||||
const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
|
||||
return encodeURL(allocator, result, opts);
|
||||
}
|
||||
|
||||
var normalized_base: []const u8 = base[0..path_start];
|
||||
@@ -127,7 +145,115 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
|
||||
|
||||
// we always have an extra space
|
||||
out[out_i] = 0;
|
||||
return out[0..out_i :0];
|
||||
return encodeURL(allocator, out[0..out_i :0], opts);
|
||||
}
|
||||
|
||||
fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
|
||||
if (!comptime opts.encode) {
|
||||
return url;
|
||||
}
|
||||
|
||||
const scheme_end = std.mem.indexOf(u8, url, "://");
|
||||
const authority_start = if (scheme_end) |end| end + 3 else 0;
|
||||
const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;
|
||||
|
||||
const query_start = std.mem.indexOfScalarPos(u8, url, path_start, '?');
|
||||
const fragment_start = std.mem.indexOfScalarPos(u8, url, query_start orelse path_start, '#');
|
||||
|
||||
const path_end = query_start orelse fragment_start orelse url.len;
|
||||
const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end;
|
||||
|
||||
const path_to_encode = url[path_start..path_end];
|
||||
const encoded_path = try percentEncodeSegment(allocator, path_to_encode, true);
|
||||
|
||||
const encoded_query = if (query_start) |qs| blk: {
|
||||
const query_to_encode = url[qs + 1 .. query_end];
|
||||
const encoded = try percentEncodeSegment(allocator, query_to_encode, false);
|
||||
break :blk encoded;
|
||||
} else null;
|
||||
|
||||
const encoded_fragment = if (fragment_start) |fs| blk: {
|
||||
const fragment_to_encode = url[fs + 1 ..];
|
||||
const encoded = try percentEncodeSegment(allocator, fragment_to_encode, false);
|
||||
break :blk encoded;
|
||||
} else null;
|
||||
|
||||
if (encoded_path.ptr == path_to_encode.ptr and
|
||||
(encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and
|
||||
(encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) {
|
||||
// nothing has changed
|
||||
return url;
|
||||
}
|
||||
|
||||
var buf = try std.ArrayList(u8).initCapacity(allocator, url.len + 20);
|
||||
try buf.appendSlice(allocator, url[0..path_start]);
|
||||
try buf.appendSlice(allocator, encoded_path);
|
||||
if (encoded_query) |eq| {
|
||||
try buf.append(allocator, '?');
|
||||
try buf.appendSlice(allocator, eq);
|
||||
}
|
||||
if (encoded_fragment) |ef| {
|
||||
try buf.append(allocator, '#');
|
||||
try buf.appendSlice(allocator, ef);
|
||||
}
|
||||
try buf.append(allocator, 0);
|
||||
return buf.items[0 .. buf.items.len - 1 :0];
|
||||
}
|
||||
|
||||
fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime is_path: bool) ![]const u8 {
|
||||
// Check if encoding is needed
|
||||
var needs_encoding = false;
|
||||
for (segment) |c| {
|
||||
if (shouldPercentEncode(c, is_path)) {
|
||||
needs_encoding = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!needs_encoding) {
|
||||
return segment;
|
||||
}
|
||||
|
||||
var buf = try std.ArrayList(u8).initCapacity(allocator, segment.len + 10);
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < segment.len) : (i += 1) {
|
||||
const c = segment[i];
|
||||
|
||||
// Check if this is an already-encoded sequence (%XX)
|
||||
if (c == '%' and i + 2 < segment.len) {
|
||||
const end = i + 2;
|
||||
const h1 = segment[i + 1];
|
||||
const h2 = segment[end];
|
||||
if (std.ascii.isHex(h1) and std.ascii.isHex(h2)) {
|
||||
try buf.appendSlice(allocator, segment[i .. end + 1]);
|
||||
i = end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldPercentEncode(c, is_path)) {
|
||||
try buf.writer(allocator).print("%{X:0>2}", .{c});
|
||||
} else {
|
||||
try buf.append(allocator, c);
|
||||
}
|
||||
}
|
||||
|
||||
return buf.items;
|
||||
}
|
||||
|
||||
fn shouldPercentEncode(c: u8, comptime is_path: bool) bool {
|
||||
return switch (c) {
|
||||
// Unreserved characters (RFC 3986)
|
||||
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false,
|
||||
// sub-delims allowed in both path and query
|
||||
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => false,
|
||||
// Separators allowed in both path and query
|
||||
'/', ':', '@' => false,
|
||||
// Query-specific: '?' is allowed in queries but not in paths
|
||||
'?' => comptime is_path,
|
||||
// Everything else needs encoding (including space)
|
||||
else => true,
|
||||
};
|
||||
}
|
||||
|
||||
fn isNullTerminated(comptime value: type) bool {
|
||||
@@ -691,6 +817,172 @@ test "URL: resolve" {
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: resolve with encoding" {
|
||||
defer testing.reset();
|
||||
|
||||
const Case = struct {
|
||||
base: [:0]const u8,
|
||||
path: [:0]const u8,
|
||||
expected: [:0]const u8,
|
||||
};
|
||||
|
||||
const cases = [_]Case{
|
||||
// Spaces should be encoded as %20, but ! is allowed
|
||||
.{
|
||||
.base = "https://example.com/dir/",
|
||||
.path = "over 9000!",
|
||||
.expected = "https://example.com/dir/over%209000!",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "hello world.html",
|
||||
.expected = "https://example.com/hello%20world.html",
|
||||
},
|
||||
// Multiple spaces
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "path with multiple spaces",
|
||||
.expected = "https://example.com/path%20with%20%20multiple%20%20%20spaces",
|
||||
},
|
||||
// Special characters that need encoding
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file[1].html",
|
||||
.expected = "https://example.com/file%5B1%5D.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file{name}.html",
|
||||
.expected = "https://example.com/file%7Bname%7D.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file<test>.html",
|
||||
.expected = "https://example.com/file%3Ctest%3E.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file\"quote\".html",
|
||||
.expected = "https://example.com/file%22quote%22.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file|pipe.html",
|
||||
.expected = "https://example.com/file%7Cpipe.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file\\backslash.html",
|
||||
.expected = "https://example.com/file%5Cbackslash.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file^caret.html",
|
||||
.expected = "https://example.com/file%5Ecaret.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file`backtick`.html",
|
||||
.expected = "https://example.com/file%60backtick%60.html",
|
||||
},
|
||||
// Characters that should NOT be encoded
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "path-with_under~tilde.html",
|
||||
.expected = "https://example.com/path-with_under~tilde.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "path/with/slashes",
|
||||
.expected = "https://example.com/path/with/slashes",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "sub-delims!$&'()*+,;=.html",
|
||||
.expected = "https://example.com/sub-delims!$&'()*+,;=.html",
|
||||
},
|
||||
// Already encoded characters should not be double-encoded
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "already%20encoded",
|
||||
.expected = "https://example.com/already%20encoded",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file%5B1%5D.html",
|
||||
.expected = "https://example.com/file%5B1%5D.html",
|
||||
},
|
||||
// Mix of encoded and unencoded
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "part%20encoded and not",
|
||||
.expected = "https://example.com/part%20encoded%20and%20not",
|
||||
},
|
||||
// Query strings and fragments ARE encoded
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file name.html?query=value with spaces",
|
||||
.expected = "https://example.com/file%20name.html?query=value%20with%20spaces",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file name.html#anchor with spaces",
|
||||
.expected = "https://example.com/file%20name.html#anchor%20with%20spaces",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file.html?hello=world !",
|
||||
.expected = "https://example.com/file.html?hello=world%20!",
|
||||
},
|
||||
// Query structural characters should NOT be encoded
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "file.html?a=1&b=2",
|
||||
.expected = "https://example.com/file.html?a=1&b=2",
|
||||
},
|
||||
// Relative paths with encoding
|
||||
.{
|
||||
.base = "https://example.com/dir/page.html",
|
||||
.path = "../other dir/file.html",
|
||||
.expected = "https://example.com/other%20dir/file.html",
|
||||
},
|
||||
.{
|
||||
.base = "https://example.com/dir/",
|
||||
.path = "./sub dir/file.html",
|
||||
.expected = "https://example.com/dir/sub%20dir/file.html",
|
||||
},
|
||||
// Absolute paths with encoding
|
||||
.{
|
||||
.base = "https://example.com/some/path",
|
||||
.path = "/absolute path/file.html",
|
||||
.expected = "https://example.com/absolute%20path/file.html",
|
||||
},
|
||||
// Unicode/high bytes (though ideally these should be UTF-8 encoded first)
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "café",
|
||||
.expected = "https://example.com/caf%C3%A9",
|
||||
},
|
||||
// Empty path
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "",
|
||||
.expected = "https://example.com/",
|
||||
},
|
||||
// Complete URL as path (should not be encoded)
|
||||
.{
|
||||
.base = "https://example.com/",
|
||||
.path = "https://other.com/path with spaces",
|
||||
.expected = "https://other.com/path%20with%20spaces",
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true });
|
||||
try testing.expectString(case.expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: eqlDocument" {
|
||||
defer testing.reset();
|
||||
{
|
||||
|
||||
@@ -245,3 +245,11 @@
|
||||
testing.expectEqual('', b.toString());
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=url_encode>
|
||||
{
|
||||
let a = document.createElement('a');
|
||||
a.href = 'over 9000!';
|
||||
testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!', a.href);
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -172,3 +172,12 @@
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=url_encode>
|
||||
{
|
||||
let img = document.createElement('img');
|
||||
img.src = 'over 9000!?hello=world !';
|
||||
testing.expectEqual('over 9000!?hello=world !', img.getAttribute('src'));
|
||||
testing.expectEqual(testing.BASE_URL + 'element/html/over%209000!?hello=world%20!', img.src);
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<iframe id=f1 onload="frame1Onload" src="support/sub1.html"></iframe>
|
||||
<iframe id=f1 onload="frame1Onload" src="support/sub 1.html"></iframe>
|
||||
<iframe id=f2 src="support/sub2.html"></iframe>
|
||||
|
||||
<script id="basic">
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
testing.expectEqual(0, $('#f1').childNodes.length);
|
||||
|
||||
testing.expectEqual(testing.BASE_URL + 'frames/support/sub%201.html', $('#f1').src);
|
||||
testing.expectEqual(window[0], $('#f1').contentWindow);
|
||||
testing.expectEqual(window[1], $('#f2').contentWindow);
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ pub fn getHref(self: *Anchor, page: *Page) ![]const u8 {
|
||||
if (href.len == 0) {
|
||||
return "";
|
||||
}
|
||||
return URL.resolve(page.call_arena, page.base(), href, .{});
|
||||
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -50,7 +50,7 @@ pub fn getContentDocument(self: *const IFrame) ?*Document {
|
||||
|
||||
pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 {
|
||||
if (self._src.len == 0) return "";
|
||||
return try URL.resolve(page.call_arena, page.base(), self._src, .{});
|
||||
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 {
|
||||
}
|
||||
|
||||
// Always resolve the src against the page URL
|
||||
return URL.resolve(page.call_arena, page.base(), src, .{});
|
||||
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 {
|
||||
}
|
||||
|
||||
// Always resolve the href against the page URL
|
||||
return URL.resolve(page.call_arena, page.base(), href, .{});
|
||||
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setHref(self: *Link, value: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -236,7 +236,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 {
|
||||
return "";
|
||||
}
|
||||
const URL = @import("../../URL.zig");
|
||||
return URL.resolve(page.call_arena, page.base(), src, .{});
|
||||
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn asNode(self: *Script) *Node {
|
||||
|
||||
pub fn getSrc(self: *const Script, page: *Page) ![]const u8 {
|
||||
if (self._src.len == 0) return "";
|
||||
return try URL.resolve(page.call_arena, page.base(), self._src, .{});
|
||||
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void {
|
||||
|
||||
@@ -59,7 +59,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 {
|
||||
}
|
||||
|
||||
const URL = @import("../../URL.zig");
|
||||
return URL.resolve(page.call_arena, page.base(), poster, .{});
|
||||
return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true });
|
||||
}
|
||||
|
||||
pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void {
|
||||
|
||||
Reference in New Issue
Block a user