From 21be3db51fa18d7f16919ebaf0d27895f0fa0cd7 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 26 Feb 2026 12:22:06 +0800 Subject: [PATCH] Callers to page.navigate ensure URL is properly encoded. Follow up to https://github.com/lightpanda-io/browser/pull/1646 The encodeURL (renamed to ensureEncoded and exposed in this commit) already handled already-encoded URLs, so this was largely a matter of exposing the functionality. The reason this isn't baked directly into Page.navigate is that, in some places e.g. internal navigation, the URL is already know to be encoded. So it's up to every caller to make sure they are passing a valid URL to navigate. --- src/browser/URL.zig | 151 +++++++++++++++++++++++++++++++++---- src/cdp/domains/page.zig | 4 +- src/cdp/domains/target.zig | 4 +- src/lightpanda.zig | 4 +- 4 files changed, 147 insertions(+), 16 deletions(-) diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 3a2a0514..6616d636 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -30,10 +30,10 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (base.len == 0 or isCompleteHTTPUrl(path)) { if (comptime opts.always_dupe or !isNullTerminated(PT)) { const duped = try allocator.dupeZ(u8, path); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); } if (comptime opts.encode) { - return encodeURL(allocator, path, opts); + return processResolved(allocator, path, opts); } return path; } @@ -41,10 +41,10 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path.len == 0) { if (comptime opts.always_dupe) { const duped = try allocator.dupeZ(u8, base); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); } if (comptime opts.encode) { - return encodeURL(allocator, base, opts); + return processResolved(allocator, base, opts); } return base; } @@ -52,12 +52,12 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path[0] == '?') { const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } if (path[0] == '#') { const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } if (std.mem.startsWith(u8, path, "//")) { @@ -65,16 +65,16 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime const index = std.mem.indexOfScalar(u8, base, ':') orelse { if (comptime isNullTerminated(PT)) { if (comptime opts.encode) { - return encodeURL(allocator, path, opts); + return processResolved(allocator, path, opts); } return path; } const duped = try allocator.dupeZ(u8, path); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); }; const protocol = base[0 .. index + 1]; const result = try std.mem.joinZ(allocator, "", &.{ protocol, path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } const scheme_end = std.mem.indexOf(u8, base, "://"); @@ -83,7 +83,7 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path[0] == '/') { const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } var normalized_base: []const u8 = base[0..path_start]; @@ -145,14 +145,17 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime // we always have an extra space out[out_i] = 0; - return encodeURL(allocator, out[0..out_i :0], opts); + return processResolved(allocator, out[0..out_i :0], opts); } -fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { +fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { if (!comptime opts.encode) { return url; } + return ensureEncoded(allocator, url); +} +pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; @@ -180,7 +183,8 @@ fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts if (encoded_path.ptr == path_to_encode.ptr and (encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and - (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) { + (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) + { // nothing has changed return url; } @@ -817,6 +821,127 @@ test "URL: resolve" { } } +test "URL: ensureEncoded" { + defer testing.reset(); + + const Case = struct { + url: [:0]const u8, + expected: [:0]const u8, + }; + + const cases = [_]Case{ + .{ + .url = "https://example.com/over 9000!", + .expected = "https://example.com/over%209000!", + }, + .{ + .url = "http://example.com/hello world.html", + .expected = "http://example.com/hello%20world.html", + }, + .{ + .url = "https://example.com/file[1].html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/file{name}.html", + .expected = "https://example.com/file%7Bname%7D.html", + }, + .{ + .url = "https://example.com/page?query=hello world", + .expected = "https://example.com/page?query=hello%20world", + }, + .{ + .url = "https://example.com/page?a=1&b=value with spaces", + .expected = "https://example.com/page?a=1&b=value%20with%20spaces", + }, + .{ + .url = "https://example.com/page#section one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/my path?query=my value#my anchor", + .expected = "https://example.com/my%20path?query=my%20value#my%20anchor", + }, + .{ + .url = "https://example.com/already%20encoded", + .expected = "https://example.com/already%20encoded", + }, + .{ + .url = "https://example.com/file%5B1%5D.html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/caf%C3%A9", + .expected = "https://example.com/caf%C3%A9", + }, + .{ + .url = "https://example.com/page?query=already%20encoded", + .expected = "https://example.com/page?query=already%20encoded", + }, + .{ + .url = "https://example.com/page?a=1&b=value%20here", + .expected = "https://example.com/page?a=1&b=value%20here", + }, + .{ + .url = "https://example.com/page#section%20one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/part%20encoded and not", + .expected = "https://example.com/part%20encoded%20and%20not", + }, + .{ + .url = "https://example.com/page?a=encoded%20value&b=not encoded", + .expected = "https://example.com/page?a=encoded%20value&b=not%20encoded", + }, + .{ + .url = "https://example.com/my%20path?query=not encoded#encoded%20anchor", + .expected = "https://example.com/my%20path?query=not%20encoded#encoded%20anchor", + }, + .{ + .url = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + .expected = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + }, + .{ + .url = "https://example.com/path-with_under~tilde", + .expected = "https://example.com/path-with_under~tilde", + }, + .{ + .url = "https://example.com/sub-delims!$&'()*+,;=", + .expected = "https://example.com/sub-delims!$&'()*+,;=", + }, + .{ + .url = "https://example.com", + .expected = "https://example.com", + }, + .{ + .url = "https://example.com?query=value", + .expected = "https://example.com?query=value", + }, + .{ + .url = "https://example.com/clean/path", + .expected = "https://example.com/clean/path", + }, + .{ + .url = "https://example.com/path?clean=query#clean-fragment", + .expected = "https://example.com/path?clean=query#clean-fragment", + }, + .{ + .url = "https://example.com/100% complete", + .expected = "https://example.com/100%25%20complete", + }, + .{ + .url = "https://example.com/path?value=100% done", + .expected = "https://example.com/path?value=100%25%20done", + }, + }; + + for (cases) |case| { + const result = try ensureEncoded(testing.arena_allocator, case.url); + try testing.expectString(case.expected, result); + } +} + test "URL: resolve with encoding" { defer testing.reset(); diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 5639cfde..3db5d67c 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -22,6 +22,7 @@ const lp = @import("lightpanda"); const id = @import("../id.zig"); const log = @import("../../log.zig"); const js = @import("../../browser/js/js.zig"); +const URL = @import("../../browser/URL.zig"); const Page = @import("../../browser/Page.zig"); const timestampF = @import("../../datetime.zig").timestamp; const Notification = @import("../../Notification.zig"); @@ -224,7 +225,8 @@ fn navigate(cmd: anytype) !void { page = try session.replacePage(); } - try page.navigate(params.url, .{ + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + try page.navigate(encoded_url, .{ .reason = .address_bar, .cdp_id = cmd.input.id, .kind = .{ .push = null }, diff --git a/src/cdp/domains/target.zig b/src/cdp/domains/target.zig index 7dfe59ef..8a818a27 100644 --- a/src/cdp/domains/target.zig +++ b/src/cdp/domains/target.zig @@ -21,6 +21,7 @@ const lp = @import("lightpanda"); const id = @import("../id.zig"); const log = @import("../../log.zig"); +const URL = @import("../../browser/URL.zig"); const js = @import("../../browser/js/js.zig"); // TODO: hard coded IDs @@ -218,8 +219,9 @@ fn createTarget(cmd: anytype) !void { } if (!std.mem.eql(u8, "about:blank", params.url)) { + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); try page.navigate( - params.url, + encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null } }, ); } diff --git a/src/lightpanda.zig b/src/lightpanda.zig index cdf1c6be..ca105ce5 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -20,6 +20,7 @@ const std = @import("std"); pub const App = @import("App.zig"); pub const Server = @import("Server.zig"); pub const Config = @import("Config.zig"); +pub const URL = @import("browser/URL.zig"); pub const Page = @import("browser/Page.zig"); pub const Browser = @import("browser/Browser.zig"); pub const Session = @import("browser/Session.zig"); @@ -92,7 +93,8 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { // } // } - _ = try page.navigate(url, .{ + const encoded_url = try URL.ensureEncoded(page.call_arena, url); + _ = try page.navigate(encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null }, });