diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 3a2a0514..6616d636 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -30,10 +30,10 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (base.len == 0 or isCompleteHTTPUrl(path)) { if (comptime opts.always_dupe or !isNullTerminated(PT)) { const duped = try allocator.dupeZ(u8, path); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); } if (comptime opts.encode) { - return encodeURL(allocator, path, opts); + return processResolved(allocator, path, opts); } return path; } @@ -41,10 +41,10 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path.len == 0) { if (comptime opts.always_dupe) { const duped = try allocator.dupeZ(u8, base); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); } if (comptime opts.encode) { - return encodeURL(allocator, base, opts); + return processResolved(allocator, base, opts); } return base; } @@ -52,12 +52,12 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path[0] == '?') { const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } if (path[0] == '#') { const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len; const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } if (std.mem.startsWith(u8, path, "//")) { @@ -65,16 +65,16 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime const index = std.mem.indexOfScalar(u8, base, ':') orelse { if (comptime isNullTerminated(PT)) { if (comptime opts.encode) { - return encodeURL(allocator, path, opts); + return processResolved(allocator, path, opts); } return path; } const duped = try allocator.dupeZ(u8, path); - return encodeURL(allocator, duped, opts); + return processResolved(allocator, duped, opts); }; const protocol = base[0 .. index + 1]; const result = try std.mem.joinZ(allocator, "", &.{ protocol, path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } const scheme_end = std.mem.indexOf(u8, base, "://"); @@ -83,7 +83,7 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime if (path[0] == '/') { const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path }); - return encodeURL(allocator, result, opts); + return processResolved(allocator, result, opts); } var normalized_base: []const u8 = base[0..path_start]; @@ -145,14 +145,17 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime // we always have an extra space out[out_i] = 0; - return encodeURL(allocator, out[0..out_i :0], opts); + return processResolved(allocator, out[0..out_i :0], opts); } -fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { +fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { if (!comptime opts.encode) { return url; } + return ensureEncoded(allocator, url); +} +pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; @@ -180,7 +183,8 @@ fn encodeURL(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts if (encoded_path.ptr == path_to_encode.ptr and (encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and - (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) { + (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) + { // nothing has changed return url; } @@ -817,6 +821,127 @@ test "URL: resolve" { } } +test "URL: ensureEncoded" { + defer testing.reset(); + + const Case = struct { + url: [:0]const u8, + expected: [:0]const u8, + }; + + const cases = [_]Case{ + .{ + .url = "https://example.com/over 9000!", + .expected = "https://example.com/over%209000!", + }, + .{ + .url = "http://example.com/hello world.html", + .expected = "http://example.com/hello%20world.html", + }, + .{ + .url = "https://example.com/file[1].html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/file{name}.html", + .expected = "https://example.com/file%7Bname%7D.html", + }, + .{ + .url = "https://example.com/page?query=hello world", + .expected = "https://example.com/page?query=hello%20world", + }, + .{ + .url = "https://example.com/page?a=1&b=value with spaces", + .expected = "https://example.com/page?a=1&b=value%20with%20spaces", + }, + .{ + .url = "https://example.com/page#section one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/my path?query=my value#my anchor", + .expected = "https://example.com/my%20path?query=my%20value#my%20anchor", + }, + .{ + .url = "https://example.com/already%20encoded", + .expected = "https://example.com/already%20encoded", + }, + .{ + .url = "https://example.com/file%5B1%5D.html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/caf%C3%A9", + .expected = "https://example.com/caf%C3%A9", + }, + .{ + .url = "https://example.com/page?query=already%20encoded", + .expected = "https://example.com/page?query=already%20encoded", + }, + .{ + .url = "https://example.com/page?a=1&b=value%20here", + .expected = "https://example.com/page?a=1&b=value%20here", + }, + .{ + .url = "https://example.com/page#section%20one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/part%20encoded and not", + .expected = "https://example.com/part%20encoded%20and%20not", + }, + .{ + .url = "https://example.com/page?a=encoded%20value&b=not encoded", + .expected = "https://example.com/page?a=encoded%20value&b=not%20encoded", + }, + .{ + .url = "https://example.com/my%20path?query=not encoded#encoded%20anchor", + .expected = "https://example.com/my%20path?query=not%20encoded#encoded%20anchor", + }, + .{ + .url = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + .expected = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + }, + .{ + .url = "https://example.com/path-with_under~tilde", + .expected = "https://example.com/path-with_under~tilde", + }, + .{ + .url = "https://example.com/sub-delims!$&'()*+,;=", + .expected = "https://example.com/sub-delims!$&'()*+,;=", + }, + .{ + .url = "https://example.com", + .expected = "https://example.com", + }, + .{ + .url = "https://example.com?query=value", + .expected = "https://example.com?query=value", + }, + .{ + .url = "https://example.com/clean/path", + .expected = "https://example.com/clean/path", + }, + .{ + .url = "https://example.com/path?clean=query#clean-fragment", + .expected = "https://example.com/path?clean=query#clean-fragment", + }, + .{ + .url = "https://example.com/100% complete", + .expected = "https://example.com/100%25%20complete", + }, + .{ + .url = "https://example.com/path?value=100% done", + .expected = "https://example.com/path?value=100%25%20done", + }, + }; + + for (cases) |case| { + const result = try ensureEncoded(testing.arena_allocator, case.url); + try testing.expectString(case.expected, result); + } +} + test "URL: resolve with encoding" { defer testing.reset(); diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 5639cfde..3db5d67c 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -22,6 +22,7 @@ const lp = @import("lightpanda"); const id = @import("../id.zig"); const log = @import("../../log.zig"); const js = @import("../../browser/js/js.zig"); +const URL = @import("../../browser/URL.zig"); const Page = @import("../../browser/Page.zig"); const timestampF = @import("../../datetime.zig").timestamp; const Notification = @import("../../Notification.zig"); @@ -224,7 +225,8 @@ fn navigate(cmd: anytype) !void { page = try session.replacePage(); } - try page.navigate(params.url, .{ + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + try page.navigate(encoded_url, .{ .reason = .address_bar, .cdp_id = cmd.input.id, .kind = .{ .push = null }, diff --git a/src/cdp/domains/target.zig b/src/cdp/domains/target.zig index 7dfe59ef..8a818a27 100644 --- a/src/cdp/domains/target.zig +++ b/src/cdp/domains/target.zig @@ -21,6 +21,7 @@ const lp = @import("lightpanda"); const id = @import("../id.zig"); const log = @import("../../log.zig"); +const URL = @import("../../browser/URL.zig"); const js = @import("../../browser/js/js.zig"); // TODO: hard coded IDs @@ -218,8 +219,9 @@ fn createTarget(cmd: anytype) !void { } if (!std.mem.eql(u8, "about:blank", params.url)) { + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); try page.navigate( - params.url, + encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null } }, ); } diff --git a/src/lightpanda.zig b/src/lightpanda.zig index cdf1c6be..ca105ce5 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -20,6 +20,7 @@ const std = @import("std"); pub const App = @import("App.zig"); pub const Server = @import("Server.zig"); pub const Config = @import("Config.zig"); +pub const URL = @import("browser/URL.zig"); pub const Page = @import("browser/Page.zig"); pub const Browser = @import("browser/Browser.zig"); pub const Session = @import("browser/Session.zig"); @@ -92,7 +93,8 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { // } // } - _ = try page.navigate(url, .{ + const encoded_url = try URL.ensureEncoded(page.call_arena, url); + _ = try page.navigate(encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null }, });