diff --git a/src/browser/html/document.zig b/src/browser/html/document.zig index 0516e76d..30a66359 100644 --- a/src/browser/html/document.zig +++ b/src/browser/html/document.zig @@ -85,7 +85,7 @@ pub const HTMLDocument = struct { pub fn get_cookie(_: *parser.DocumentHTML, page: *Page) ![]const u8 { var buf: std.ArrayListUnmanaged(u8) = .{}; - try page.cookie_jar.forRequest(&page.url.uri, buf.writer(page.arena), .{ + try page.cookie_jar.forRequest(page.url, buf.writer(page.arena), .{ .is_http = false, .is_navigation = true, }); @@ -95,7 +95,7 @@ pub const HTMLDocument = struct { pub fn set_cookie(_: *parser.DocumentHTML, cookie_str: []const u8, page: *Page) ![]const u8 { // we use the cookie jar's allocator to parse the cookie because it // outlives the page's arena. - const c = try Cookie.parse(page.cookie_jar.allocator, &page.url.uri, cookie_str); + const c = try Cookie.parse(page.cookie_jar.allocator, page.url, cookie_str); errdefer c.deinit(); if (c.http_only) { c.deinit(); diff --git a/src/browser/page.zig b/src/browser/page.zig index d668b67c..4355724d 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -150,6 +150,7 @@ pub const Page = struct { try self.registerBackgroundTasks(); } + // FIXME: Deinit self.url. pub fn deinit(self: *Page) void { self.script_manager.shutdown = true; @@ -239,7 +240,7 @@ pub const Page = struct { const doc = parser.documentHTMLToDocument(self.window.document); - // if the base si requested, add the base's node in the document's headers. + // if the base is requested, add the base's node in the document's headers. if (opts.with_base) { try self.addDOMTreeBase(); } @@ -525,10 +526,11 @@ pub const Page = struct { is_http: bool = true, is_navigation: bool = false, }; + pub fn requestCookie(self: *const Page, opts: RequestCookieOpts) Http.Client.RequestCookie { return .{ - .jar = self.cookie_jar, - .origin = &self.url.uri, + .cookie_jar = self.cookie_jar, + .origin_url = self.url, .is_http = opts.is_http, .is_navigation = opts.is_navigation, }; @@ -859,7 +861,7 @@ pub const Page = struct { self.window.setStorageShelf( try self.session.storage_shed.getOrPut(try self.origin(self.arena)), ); - try self.window.replaceLocation(.{ .url = try self.url.toWebApi(self.arena) }); + //try self.window.replaceLocation(.{ .url = try self.url.toWebApi(self.arena) }); } pub const MouseEvent = struct { diff --git a/src/browser/storage/cookie.zig b/src/browser/storage/cookie.zig index f4242ea7..bf6b41ef 100644 --- a/src/browser/storage/cookie.zig +++ b/src/browser/storage/cookie.zig @@ -6,14 +6,7 @@ const ArenaAllocator = std.heap.ArenaAllocator; const log = @import("../../log.zig"); const DateTime = @import("../../datetime.zig").DateTime; const public_suffix_list = @import("../../data/public_suffix_list.zig").lookup; - -pub const LookupOpts = struct { - request_time: ?i64 = null, - origin_uri: ?*const Uri = null, - is_http: bool, - is_navigation: bool = true, - prefix: ?[]const u8 = null, -}; +const URL = @import("../../url.zig").URL; pub const Jar = struct { allocator: Allocator, @@ -80,13 +73,21 @@ pub const Jar = struct { } } - pub fn forRequest(self: *Jar, target_uri: *const Uri, writer: anytype, opts: LookupOpts) !void { + pub const LookupOpts = struct { + request_time: ?i64 = null, + origin_url: ?URL = null, + is_http: bool, + is_navigation: bool = true, + prefix: ?[]const u8 = null, + }; + + pub fn forRequest(self: *Jar, target_url: URL, writer: anytype, opts: LookupOpts) !void { const target = PreparedUri{ - .host = (target_uri.host orelse return error.InvalidURI).percent_encoded, - .path = target_uri.path.percent_encoded, - .secure = std.mem.eql(u8, target_uri.scheme, "https"), + .host = target_url.host(), + .path = target_url.getPath(), + .secure = target_url.isSecure(), }; - const same_site = try areSameSite(opts.origin_uri, target.host); + const same_site = try areSameSite(opts.origin_url, target.host); removeExpired(self, opts.request_time); @@ -109,8 +110,8 @@ pub const Jar = struct { } } - pub fn populateFromResponse(self: *Jar, uri: *const Uri, set_cookie: []const u8) !void { - const c = Cookie.parse(self.allocator, uri, set_cookie) catch |err| { + pub fn populateFromResponse(self: *Jar, url: URL, set_cookie: []const u8) !void { + const c = Cookie.parse(self.allocator, url, set_cookie) catch |err| { log.warn(.web_api, "cookie parse failed", .{ .raw = set_cookie, .err = err }); return; }; @@ -148,9 +149,9 @@ fn areCookiesEqual(a: *const Cookie, b: *const Cookie) bool { return true; } -fn areSameSite(origin_uri_: ?*const std.Uri, target_host: []const u8) !bool { - const origin_uri = origin_uri_ orelse return true; - const origin_host = (origin_uri.host orelse return error.InvalidURI).percent_encoded; +fn areSameSite(maybe_origin_url: ?URL, target_host: []const u8) !bool { + const origin_url = maybe_origin_url orelse return true; + const origin_host = origin_url.host(); // common case if (std.mem.eql(u8, target_host, origin_host)) { @@ -161,6 +162,7 @@ fn areSameSite(origin_uri_: ?*const std.Uri, target_host: []const u8) !bool { } fn findSecondLevelDomain(host: []const u8) []const u8 { + // TODO: maybe reverseIterator? var i = std.mem.lastIndexOfScalar(u8, host, '.') orelse return host; while (true) { i = std.mem.lastIndexOfScalar(u8, host[0..i], '.') orelse return host; @@ -269,8 +271,8 @@ pub const Cookie = struct { const aa = arena.allocator(); const owned_name = try aa.dupe(u8, cookie_name); const owned_value = try aa.dupe(u8, cookie_value); - const owned_path = try parsePath(aa, uri, path); - const owned_domain = try parseDomain(aa, uri, domain); + const owned_path = try parsePath(aa, url, path); + const owned_domain = try parseDomain(aa, url, domain); var normalized_expires: ?f64 = null; if (max_age) |ma| { @@ -362,37 +364,35 @@ pub const Cookie = struct { } } - pub fn parsePath(arena: Allocator, uri: ?*const std.Uri, explicit_path: ?[]const u8) ![]const u8 { + pub fn parsePath(arena: Allocator, maybe_url: ?URL, maybe_explicit_path: ?[]const u8) ![]const u8 { // path attribute value either begins with a '/' or we // ignore it and use the "default-path" algorithm - if (explicit_path) |path| { + if (maybe_explicit_path) |path| { if (path.len > 0 and path[0] == '/') { - return try arena.dupe(u8, path); + return arena.dupe(u8, path); } } - // default-path - const url_path = (uri orelse return "/").path; + const url_path = blk: { + if (maybe_url) |url| { + break :blk url.getPath(); + } - const either = url_path.percent_encoded; - if (either.len == 0 or (either.len == 1 and either[0] == '/')) { + return "/"; + }; + + if (url_path.len == 0 or (url_path.len == 1 and url_path[0] == '/')) { return "/"; } - var owned_path: []const u8 = try percentEncode(arena, url_path, isPathChar); - const last = std.mem.lastIndexOfScalar(u8, owned_path[1..], '/') orelse { - return "/"; - }; - return try arena.dupe(u8, owned_path[0 .. last + 1]); + return arena.dupe(u8, url_path); } - pub fn parseDomain(arena: Allocator, uri: ?*const std.Uri, explicit_domain: ?[]const u8) ![]const u8 { + pub fn parseDomain(arena: Allocator, maybe_url: ?URL, explicit_domain: ?[]const u8) ![]const u8 { var encoded_host: ?[]const u8 = null; - if (uri) |uri_| { - const uri_host = uri_.host orelse return error.InvalidURI; - const host = try percentEncode(arena, uri_host, isHostChar); - _ = toLower(host); - encoded_host = host; + if (maybe_url) |url| { + const url_host = url.hostname(); + encoded_host = url_host; } if (explicit_domain) |domain| { @@ -421,19 +421,6 @@ pub const Cookie = struct { return encoded_host orelse return error.InvalidDomain; // default-domain } - pub fn percentEncode(arena: Allocator, component: std.Uri.Component, comptime isValidChar: fn (u8) bool) ![]u8 { - switch (component) { - .raw => |str| { - var aw = try std.Io.Writer.Allocating.initCapacity(arena, str.len); - try std.Uri.Component.percentEncode(&aw.writer, str, isValidChar); - return aw.written(); // @memory retains memory used before growing - }, - .percent_encoded => |str| { - return try arena.dupe(u8, str); - }, - } - } - pub fn isHostChar(c: u8) bool { return switch (c) { 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true, @@ -601,37 +588,40 @@ test "Jar: add" { defer jar.deinit(); try expectCookies(&.{}, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=9000;Max-Age=0"), now); + const test_url = try URL.parse("http://lightpanda.io/", null); + defer test_url.deinit(); + + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=9000;Max-Age=0"), now); try expectCookies(&.{}, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=9000"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=9000"), now); try expectCookies(&.{.{ "over", "9000" }}, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=9000!!"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=9000!!"), now); try expectCookies(&.{.{ "over", "9000!!" }}, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "spice=flow"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "spice=flow"), now); try expectCookies(&.{ .{ "over", "9000!!" }, .{ "spice", "flow" } }, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "spice=flows;Path=/"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "spice=flows;Path=/"), now); try expectCookies(&.{ .{ "over", "9000!!" }, .{ "spice", "flows" } }, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=9001;Path=/other"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=9001;Path=/other"), now); try expectCookies(&.{ .{ "over", "9000!!" }, .{ "spice", "flows" }, .{ "over", "9001" } }, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=9002;Path=/;Domain=lightpanda.io"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=9002;Path=/;Domain=lightpanda.io"), now); try expectCookies(&.{ .{ "over", "9000!!" }, .{ "spice", "flows" }, .{ "over", "9001" }, .{ "over", "9002" } }, jar); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "over=x;Path=/other;Max-Age=-200"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "over=x;Path=/other;Max-Age=-200"), now); try expectCookies(&.{ .{ "over", "9000!!" }, .{ "spice", "flows" }, .{ "over", "9002" } }, jar); } test "Jar: forRequest" { const expectCookies = struct { - fn expect(expected: []const u8, jar: *Jar, target_uri: Uri, opts: LookupOpts) !void { + fn expect(expected: []const u8, jar: *Jar, target_url: URL, opts: Jar.LookupOpts) !void { var arr: std.ArrayListUnmanaged(u8) = .empty; defer arr.deinit(testing.allocator); - try jar.forRequest(&target_uri, arr.writer(testing.allocator), opts); + try jar.forRequest(target_url, arr.writer(testing.allocator), opts); try testing.expectEqual(expected, arr.items); } }.expect; @@ -641,131 +631,142 @@ test "Jar: forRequest" { var jar = Jar.init(testing.allocator); defer jar.deinit(); - const test_uri_2 = Uri.parse("http://test.lightpanda.io/") catch unreachable; + const test_url = try URL.parse("http://lightpanda.io/", null); + defer test_url.deinit(); + + const test_url_2 = try URL.parse("http://test.lightpanda.io/", null); + defer test_url_2.deinit(); { // test with no cookies - try expectCookies("", &jar, test_uri, .{ .is_http = true }); + try expectCookies("", &jar, test_url, .{ .is_http = true }); } - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "global1=1"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "global2=2;Max-Age=30;domain=lightpanda.io"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "path1=3;Path=/about"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "path2=4;Path=/docs/"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "secure=5;Secure"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "sitenone=6;SameSite=None;Path=/x/;Secure"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "sitelax=7;SameSite=Lax;Path=/x/"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri, "sitestrict=8;SameSite=Strict;Path=/x/"), now); - try jar.add(try Cookie.parse(testing.allocator, &test_uri_2, "domain1=9;domain=test.lightpanda.io"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "global1=1"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "global2=2;Max-Age=30;domain=lightpanda.io"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "path1=3;Path=/about"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "path2=4;Path=/docs/"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "secure=5;Secure"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "sitenone=6;SameSite=None;Path=/x/;Secure"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "sitelax=7;SameSite=Lax;Path=/x/"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url, "sitestrict=8;SameSite=Strict;Path=/x/"), now); + try jar.add(try Cookie.parse(testing.allocator, test_url_2, "domain1=9;domain=test.lightpanda.io"), now); // nothing fancy here - try expectCookies("global1=1; global2=2", &jar, test_uri, .{ .is_http = true }); - try expectCookies("global1=1; global2=2", &jar, test_uri, .{ .origin_uri = &test_uri, .is_navigation = false, .is_http = true }); + try expectCookies("global1=1; global2=2", &jar, test_url, .{ .is_http = true }); + try expectCookies("global1=1; global2=2", &jar, test_url, .{ .origin_url = test_url, .is_navigation = false, .is_http = true }); + + // We reuse this URL to reparse. + const reuse_url = try URL.parse("http://anothersitelightpanda.io/", null); + defer reuse_url.deinit(); // We have a cookie where Domain=lightpanda.io // This should _not_ match xyxlightpanda.io - try expectCookies("", &jar, try std.Uri.parse("http://anothersitelightpanda.io/"), .{ - .origin_uri = &test_uri, + try expectCookies("", &jar, reuse_url, .{ + .origin_url = test_url, .is_http = true, }); // matching path without trailing / - try expectCookies("global1=1; global2=2; path1=3", &jar, try std.Uri.parse("http://lightpanda.io/about"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2; path1=3", &jar, try reuse_url.reparse("http://lightpanda.io/about"), .{ + .origin_url = test_url, .is_http = true, }); // incomplete prefix path - try expectCookies("global1=1; global2=2", &jar, try std.Uri.parse("http://lightpanda.io/abou"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2", &jar, try reuse_url.reparse("http://lightpanda.io/abou"), .{ + .origin_url = test_url, .is_http = true, }); // path doesn't match - try expectCookies("global1=1; global2=2", &jar, try std.Uri.parse("http://lightpanda.io/aboutus"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2", &jar, try reuse_url.reparse("http://lightpanda.io/aboutus"), .{ + .origin_url = test_url, .is_http = true, }); // path doesn't match cookie directory - try expectCookies("global1=1; global2=2", &jar, try std.Uri.parse("http://lightpanda.io/docs"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2", &jar, try reuse_url.reparse("http://lightpanda.io/docs"), .{ + .origin_url = test_url, .is_http = true, }); // exact directory match - try expectCookies("global1=1; global2=2; path2=4", &jar, try std.Uri.parse("http://lightpanda.io/docs/"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2; path2=4", &jar, try reuse_url.reparse("http://lightpanda.io/docs/"), .{ + .origin_url = test_url, .is_http = true, }); // sub directory match - try expectCookies("global1=1; global2=2; path2=4", &jar, try std.Uri.parse("http://lightpanda.io/docs/more"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2; path2=4", &jar, try reuse_url.reparse("http://lightpanda.io/docs/more"), .{ + .origin_url = test_url, .is_http = true, }); // secure - try expectCookies("global1=1; global2=2; secure=5", &jar, try std.Uri.parse("https://lightpanda.io/"), .{ - .origin_uri = &test_uri, + try expectCookies("global1=1; global2=2; secure=5", &jar, try reuse_url.reparse("https://lightpanda.io/"), .{ + .origin_url = test_url, .is_http = true, }); // navigational cross domain, secure - try expectCookies("global1=1; global2=2; secure=5; sitenone=6; sitelax=7", &jar, try std.Uri.parse("https://lightpanda.io/x/"), .{ - .origin_uri = &(try std.Uri.parse("https://example.com/")), + const example_com_url = try URL.parse("https://example.com/", null); + defer example_com_url.deinit(); + + try expectCookies("global1=1; global2=2; secure=5; sitenone=6; sitelax=7", &jar, try reuse_url.reparse("https://lightpanda.io/x/"), .{ + .origin_url = example_com_url, .is_http = true, }); // navigational cross domain, insecure - try expectCookies("global1=1; global2=2; sitelax=7", &jar, try std.Uri.parse("http://lightpanda.io/x/"), .{ - .origin_uri = &(try std.Uri.parse("https://example.com/")), + try expectCookies("global1=1; global2=2; sitelax=7", &jar, try reuse_url.reparse("http://lightpanda.io/x/"), .{ + .origin_url = example_com_url, .is_http = true, }); // non-navigational cross domain, insecure - try expectCookies("", &jar, try std.Uri.parse("http://lightpanda.io/x/"), .{ - .origin_uri = &(try std.Uri.parse("https://example.com/")), + try expectCookies("", &jar, try reuse_url.reparse("http://lightpanda.io/x/"), .{ + .origin_url = example_com_url, .is_http = true, .is_navigation = false, }); // non-navigational cross domain, secure - try expectCookies("sitenone=6", &jar, try std.Uri.parse("https://lightpanda.io/x/"), .{ - .origin_uri = &(try std.Uri.parse("https://example.com/")), + try expectCookies("sitenone=6", &jar, try reuse_url.reparse("https://lightpanda.io/x/"), .{ + .origin_url = example_com_url, .is_http = true, .is_navigation = false, }); // non-navigational same origin - try expectCookies("global1=1; global2=2; sitelax=7; sitestrict=8", &jar, try std.Uri.parse("http://lightpanda.io/x/"), .{ - .origin_uri = &(try std.Uri.parse("https://lightpanda.io/")), + try expectCookies("global1=1; global2=2; sitelax=7; sitestrict=8", &jar, try reuse_url.reparse("http://lightpanda.io/x/"), .{ + .origin_url = test_url, .is_http = true, .is_navigation = false, }); // exact domain match + suffix - try expectCookies("global2=2; domain1=9", &jar, try std.Uri.parse("http://test.lightpanda.io/"), .{ - .origin_uri = &test_uri, + try expectCookies("global2=2; domain1=9", &jar, try reuse_url.reparse("http://test.lightpanda.io/"), .{ + .origin_url = test_url, .is_http = true, }); // domain suffix match + suffix - try expectCookies("global2=2; domain1=9", &jar, try std.Uri.parse("http://1.test.lightpanda.io/"), .{ - .origin_uri = &test_uri, + try expectCookies("global2=2; domain1=9", &jar, try reuse_url.reparse("http://1.test.lightpanda.io/"), .{ + .origin_url = test_url, .is_http = true, }); // non-matching domain - try expectCookies("global2=2", &jar, try std.Uri.parse("http://other.lightpanda.io/"), .{ - .origin_uri = &test_uri, + try expectCookies("global2=2", &jar, try reuse_url.reparse("http://other.lightpanda.io/"), .{ + .origin_url = test_url, .is_http = true, }); const l = jar.cookies.items.len; - try expectCookies("global1=1", &jar, test_uri, .{ + try expectCookies("global1=1", &jar, test_url, .{ .request_time = now + 100, - .origin_uri = &test_uri, + .origin_url = test_url, .is_http = true, }); try testing.expectEqual(l - 1, jar.cookies.items.len); @@ -961,9 +962,11 @@ const ExpectedCookie = struct { same_site: Cookie.SameSite = .lax, }; -fn expectCookie(expected: ExpectedCookie, url: []const u8, set_cookie: []const u8) !void { - const uri = try Uri.parse(url); - var cookie = try Cookie.parse(testing.allocator, &uri, set_cookie); +fn expectCookie(expected: ExpectedCookie, url_str: []const u8, set_cookie: []const u8) !void { + const url = try URL.parse(url_str, null); + defer url.deinit(); + + var cookie = try Cookie.parse(testing.allocator, url, set_cookie); defer cookie.deinit(); try testing.expectEqual(expected.name, cookie.name); @@ -977,9 +980,11 @@ fn expectCookie(expected: ExpectedCookie, url: []const u8, set_cookie: []const u try testing.expectDelta(expected.expires, cookie.expires, 2.0); } -fn expectAttribute(expected: anytype, url: ?[]const u8, set_cookie: []const u8) !void { - const uri = if (url) |u| try Uri.parse(u) else test_uri; - var cookie = try Cookie.parse(testing.allocator, &uri, set_cookie); +fn expectAttribute(expected: anytype, maybe_url_str: ?[]const u8, set_cookie: []const u8) !void { + const url = try URL.parse(if (maybe_url_str) |url_str| url_str else "https://lightpanda.io/", null); + defer url.deinit(); + + var cookie = try Cookie.parse(testing.allocator, url, set_cookie); defer cookie.deinit(); inline for (@typeInfo(@TypeOf(expected)).@"struct".fields) |f| { @@ -994,9 +999,7 @@ fn expectAttribute(expected: anytype, url: ?[]const u8, set_cookie: []const u8) } } -fn expectError(expected: anyerror, url: ?[]const u8, set_cookie: []const u8) !void { - const uri = if (url) |u| try Uri.parse(u) else test_uri; - try testing.expectError(expected, Cookie.parse(testing.allocator, &uri, set_cookie)); +fn expectError(expected: anyerror, maybe_url_str: ?[]const u8, set_cookie: []const u8) !void { + const url = try URL.parse(if (maybe_url_str) |url_str| url_str else "https://lightpanda.io/", null); + try testing.expectError(expected, Cookie.parse(testing.allocator, url, set_cookie)); } - -const test_uri = Uri.parse("http://lightpanda.io/") catch unreachable; diff --git a/src/browser/url/url.zig b/src/browser/url/url.zig index 039bfc9e..837a1560 100644 --- a/src/browser/url/url.zig +++ b/src/browser/url/url.zig @@ -84,20 +84,10 @@ pub const URL = struct { break :blk ada.parse(url_str); }; - // Prepare search_params. - const params: URLSearchParams = blk: { - const search = ada.getSearch(internal); - if (search.data == null) { - break :blk .{}; - } - - break :blk try .initFromString(page.arena, search.data[0..search.length]); + return .{ + .internal = internal, + .search_params = try prepareSearchParams(page.arena, internal), }; - - // We're doing this since we track search params separately. - ada.clearSearch(internal); - - return .{ .internal = internal, .search_params = params }; } pub fn destructor(self: *const URL) void { @@ -105,8 +95,37 @@ pub const URL = struct { return ada.free(self.internal); } - pub fn initWithoutSearchParams(uri: std.Uri) URL { - return .{ .uri = uri, .search_params = .{} }; + /// Initializes a `URL` from given `internal`. + /// Note that this copies the given `internal`; meaning 2 instances + /// of it has to be tracked separately. + pub fn constructFromInternal(arena: Allocator, internal: ada.URL) !URL { + const copy = ada.copy(internal); + + return .{ + .internal = copy, + .search_params = try prepareSearchParams(arena, copy), + }; + } + + /// Prepares a `URLSearchParams` from given `internal`. + /// Resets `search` of `internal`. + fn prepareSearchParams(arena: Allocator, internal: ada.URL) !URLSearchParams { + const search = ada.getSearch(internal); + // Empty. + if (search.data == null) return .{}; + + const slice = search.data[0..search.length]; + const search_params = URLSearchParams.initFromString(arena, slice); + // After a call to this function, search params are tracked by + // `search_params`. So we reset the internal's search. + ada.clearSearch(internal); + + return search_params; + } + + // Alias to get_href. + pub fn _toString(self: *const URL, page: *Page) ![]const u8 { + return self.get_href(page); } pub fn _toString(self: *const URL) []const u8 { return ada.getHref(self.internal); @@ -178,7 +197,13 @@ pub const URL = struct { } pub fn get_pathname(self: *const URL) []const u8 { - return ada.getPathname(self.internal); + const path = ada.getPathnameNullable(self.internal); + // Return a slash if path is null. + if (path.data == null) { + return "/"; + } + + return path.data[0..path.length]; } // get_search depends on the current state of `search_params`. diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index f6fb302b..5697605a 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -208,7 +208,7 @@ pub fn requestIntercept(arena: Allocator, bc: anytype, intercept: *const Notific log.debug(.cdp, "request intercept", .{ .state = "paused", .id = transfer.id, - .url = transfer.uri, + .url = transfer.url, }); // Await either continueRequest, failRequest or fulfillRequest @@ -237,7 +237,7 @@ fn continueRequest(cmd: anytype) !void { log.debug(.cdp, "request intercept", .{ .state = "continue", .id = transfer.id, - .url = transfer.uri, + .url = transfer.url, .new_url = params.url, }); @@ -342,7 +342,7 @@ fn fulfillRequest(cmd: anytype) !void { log.debug(.cdp, "request intercept", .{ .state = "fulfilled", .id = transfer.id, - .url = transfer.uri, + .url = transfer.url, .status = params.responseCode, .body = params.body != null, }); @@ -376,7 +376,7 @@ fn failRequest(cmd: anytype) !void { log.info(.cdp, "request intercept", .{ .state = "fail", .id = request_id, - .url = transfer.uri, + .url = transfer.url, .reason = params.errorReason, }); return cmd.sendResult(null, .{}); @@ -420,7 +420,7 @@ pub fn requestAuthRequired(arena: Allocator, bc: anytype, intercept: *const Noti log.debug(.cdp, "request auth required", .{ .state = "paused", .id = transfer.id, - .url = transfer.uri, + .url = transfer.url, }); // Await continueWithAuth diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 0d7014d0..1c2c84c8 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -22,6 +22,7 @@ const Allocator = std.mem.Allocator; const CdpStorage = @import("storage.zig"); const Transfer = @import("../../http/Client.zig").Transfer; const Notification = @import("../../notification.zig").Notification; +const URL = @import("../../url.zig").URL; pub fn processMessage(cmd: anytype) !void { const action = std.meta.stringToEnum(enum { @@ -117,15 +118,20 @@ fn deleteCookies(cmd: anytype) !void { const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; const cookies = &bc.session.cookie_jar.cookies; - const uri = if (params.url) |url| std.Uri.parse(url) catch return error.InvalidParams else null; - const uri_ptr = if (uri) |u| &u else null; + const maybe_url: ?URL = blk: { + if (params.url) |url| { + break :blk URL.parse(url, null) catch return error.InvalidParams; + } + + break :blk null; + }; var index = cookies.items.len; while (index > 0) { index -= 1; const cookie = &cookies.items[index]; - const domain = try Cookie.parseDomain(cmd.arena, uri_ptr, params.domain); - const path = try Cookie.parsePath(cmd.arena, uri_ptr, params.path); + const domain = try Cookie.parseDomain(cmd.arena, maybe_url, params.domain); + const path = try Cookie.parsePath(cmd.arena, maybe_url, params.path); // We do not want to use Cookie.appliesTo here. As a Cookie with a shorter path would match. // Similar to deduplicating with areCookiesEqual, except domain and path are optional. @@ -133,6 +139,12 @@ fn deleteCookies(cmd: anytype) !void { cookies.swapRemove(index).deinit(); } } + + // Deinit URL if we had. + if (maybe_url) |url| { + url.deinit(); + } + return cmd.sendResult(null, .{}); } @@ -177,13 +189,14 @@ fn getCookies(cmd: anytype) !void { const param_urls = params.urls orelse &[_][]const u8{page_url orelse return error.InvalidParams}; var urls = try std.ArrayListUnmanaged(CdpStorage.PreparedUri).initCapacity(cmd.arena, param_urls.len); - for (param_urls) |url| { - const uri = std.Uri.parse(url) catch return error.InvalidParams; + for (param_urls) |url_str| { + const url = URL.parse(url_str, null) catch return error.InvalidParams; + defer url.deinit(); urls.appendAssumeCapacity(.{ - .host = try Cookie.parseDomain(cmd.arena, &uri, null), - .path = try Cookie.parsePath(cmd.arena, &uri, null), - .secure = std.mem.eql(u8, uri.scheme, "https"), + .host = try Cookie.parseDomain(cmd.arena, url, null), + .path = try Cookie.parsePath(cmd.arena, url, null), + .secure = url.isSecure(), }); } @@ -247,7 +260,7 @@ pub fn httpRequestStart(arena: Allocator, bc: anytype, msg: *const Notification. .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{transfer.id}), .frameId = target_id, .loaderId = bc.loader_id, - .documentUrl = DocumentUrlWriter.init(&page.url.uri), + .documentUrl = DocumentUrlWriter.init(page.url), .request = TransferAsRequestWriter.init(transfer), .initiator = .{ .type = "other" }, }, .{ .session_id = session_id }); @@ -300,23 +313,17 @@ pub const TransferAsRequestWriter = struct { try jws.objectField("url"); try jws.beginWriteRaw(); try writer.writeByte('\"'); - try transfer.uri.writeToStream(writer, .{ - .scheme = true, - .authentication = true, - .authority = true, - .path = true, - .query = true, - }); + try transfer.url.writeToStream(writer); try writer.writeByte('\"'); jws.endWriteRaw(); } { - if (transfer.uri.fragment) |frag| { + if (transfer.url.getFragment()) |frag| { try jws.objectField("urlFragment"); try jws.beginWriteRaw(); try writer.writeAll("\"#"); - try writer.writeAll(frag.percent_encoded); + try writer.writeAll(frag); try writer.writeByte('\"'); jws.endWriteRaw(); } @@ -370,13 +377,7 @@ const TransferAsResponseWriter = struct { try jws.objectField("url"); try jws.beginWriteRaw(); try writer.writeByte('\"'); - try transfer.uri.writeToStream(writer, .{ - .scheme = true, - .authentication = true, - .authority = true, - .path = true, - .query = true, - }); + try transfer.url.writeToStream(writer); try writer.writeByte('\"'); jws.endWriteRaw(); } @@ -417,29 +418,22 @@ const TransferAsResponseWriter = struct { }; const DocumentUrlWriter = struct { - uri: *std.Uri, + url: URL, - fn init(uri: *std.Uri) DocumentUrlWriter { - return .{ - .uri = uri, - }; + fn init(url: URL) DocumentUrlWriter { + return .{ .url = url }; } pub fn jsonStringify(self: *const DocumentUrlWriter, jws: anytype) !void { self._jsonStringify(jws) catch return error.WriteFailed; } + fn _jsonStringify(self: *const DocumentUrlWriter, jws: anytype) !void { const writer = jws.writer; try jws.beginWriteRaw(); try writer.writeByte('\"'); - try self.uri.writeToStream(writer, .{ - .scheme = true, - .authentication = true, - .authority = true, - .path = true, - .query = true, - }); + try self.url.writeToStream(writer); try writer.writeByte('\"'); jws.endWriteRaw(); } diff --git a/src/cdp/domains/storage.zig b/src/cdp/domains/storage.zig index 662d079f..54ab5e35 100644 --- a/src/cdp/domains/storage.zig +++ b/src/cdp/domains/storage.zig @@ -21,6 +21,7 @@ const std = @import("std"); const log = @import("../../log.zig"); const Cookie = @import("../../browser/storage/storage.zig").Cookie; const CookieJar = @import("../../browser/storage/storage.zig").CookieJar; +const URL = @import("../../url.zig").URL; pub const PreparedUri = @import("../../browser/storage/cookie.zig").PreparedUri; pub fn processMessage(cmd: anytype) !void { @@ -136,12 +137,25 @@ pub fn setCdpCookie(cookie_jar: *CookieJar, param: CdpCookie) !void { const a = arena.allocator(); // NOTE: The param.url can affect the default domain, (NOT path), secure, source port, and source scheme. - const uri = if (param.url) |url| std.Uri.parse(url) catch return error.InvalidParams else null; - const uri_ptr = if (uri) |*u| u else null; - const domain = try Cookie.parseDomain(a, uri_ptr, param.domain); + const maybe_url: ?URL = blk: { + if (param.url) |url| { + break :blk URL.parse(url, null) catch return error.InvalidParams; + } + + break :blk null; + }; + + const domain = try Cookie.parseDomain(a, maybe_url, param.domain); const path = if (param.path == null) "/" else try Cookie.parsePath(a, null, param.path); - const secure = if (param.secure) |s| s else if (uri) |uri_| std.mem.eql(u8, uri_.scheme, "https") else false; + const secure: bool = blk: { + // Check if params indicate security. + if (param.secure) |s| break :blk s; + // Check if protocol is secure. + if (maybe_url) |url| break :blk url.isSecure(); + // If all fails, insecure. + break :blk false; + }; const cookie = Cookie{ .arena = arena, @@ -158,6 +172,12 @@ pub fn setCdpCookie(cookie_jar: *CookieJar, param: CdpCookie) !void { .None => .none, }, }; + + // Free if we had. + if (maybe_url) |url| { + url.deinit(); + } + try cookie_jar.add(cookie, std.time.timestamp()); } diff --git a/src/http/Client.zig b/src/http/Client.zig index 7eb84b24..8fee4f1c 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -22,9 +22,9 @@ const builtin = @import("builtin"); const Http = @import("Http.zig"); const Notification = @import("../notification.zig").Notification; -const CookieJar = @import("../browser/storage/storage.zig").CookieJar; - -const urlStitch = @import("../url.zig").stitch; +const CookieJar = @import("../browser/storage/cookie.zig").Jar; +const URL = @import("../url.zig").URL; +const urlStitch = URL.stitch; const c = Http.c; const posix = std.posix; @@ -259,7 +259,7 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { errdefer req.headers.deinit(); // we need this for cookies - const uri = std.Uri.parse(req.url) catch |err| { + const url = URL.parse(req.url, null) catch |err| { log.warn(.http, "invalid url", .{ .err = err, .url = req.url }); return err; }; @@ -272,7 +272,7 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { transfer.* = .{ .arena = ArenaAllocator.init(self.allocator), .id = id, - .uri = uri, + .url = url, .req = req, .ctx = req.ctx, .client = self, @@ -595,20 +595,20 @@ pub const Handle = struct { pub const RequestCookie = struct { is_http: bool, is_navigation: bool, - origin: *const std.Uri, - jar: *@import("../browser/storage/cookie.zig").Jar, + origin_url: URL, + cookie_jar: *CookieJar, - pub fn headersForRequest(self: *const RequestCookie, temp: Allocator, url: [:0]const u8, headers: *Http.Headers) !void { - const uri = std.Uri.parse(url) catch |err| { - log.warn(.http, "invalid url", .{ .err = err, .url = url }); + pub fn headersForRequest(self: *const RequestCookie, temp: Allocator, url_str: [:0]const u8, headers: *Http.Headers) !void { + const url = URL.parse(url_str, null) catch |err| { + log.warn(.http, "invalid url", .{ .err = err, .url = url_str }); return error.InvalidUrl; }; var arr: std.ArrayListUnmanaged(u8) = .{}; - try self.jar.forRequest(&uri, arr.writer(temp), .{ + try self.cookie_jar.forRequest(url, arr.writer(temp), .{ .is_http = self.is_http, .is_navigation = self.is_navigation, - .origin_uri = self.origin, + .origin_url = self.origin_url, }); if (arr.items.len > 0) { @@ -688,7 +688,7 @@ pub const Transfer = struct { arena: ArenaAllocator, id: usize = 0, req: Request, - uri: std.Uri, // used for setting/getting the cookie + url: URL, // used for setting/getting the cookie ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers client: *Client, // total bytes received in the response, including the response status line, @@ -774,7 +774,7 @@ pub const Transfer = struct { pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { // for cookies - self.uri = try std.Uri.parse(url); + self.url = try self.url.reparse(url); // for the request itself self.req.url = url; @@ -833,7 +833,7 @@ pub const Transfer = struct { while (true) { const ct = getResponseHeader(easy, "set-cookie", i); if (ct == null) break; - try req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value); + try req.cookie_jar.populateFromResponse(transfer.url, ct.?.value); i += 1; if (i >= ct.?.amount) break; } @@ -847,14 +847,16 @@ pub const Transfer = struct { var baseurl: [*c]u8 = undefined; try errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &baseurl)); - const url = try urlStitch(arena, hlocation.?.value, std.mem.span(baseurl), .{}); - const uri = try std.Uri.parse(url); - transfer.uri = uri; + const stitched = try urlStitch(arena, hlocation.?.value, std.mem.span(baseurl), .{}); + // Since we're being redirected, we know url is valid. + // An assertation won't hurt, though. + std.debug.assert(transfer.url.isValid()); + _ = try transfer.url.reparse(stitched); var cookies: std.ArrayListUnmanaged(u8) = .{}; - try req.cookie_jar.forRequest(&uri, cookies.writer(arena), .{ + try req.cookie_jar.forRequest(transfer.url, cookies.writer(arena), .{ .is_http = true, - .origin_uri = &transfer.uri, + .origin_url = transfer.url, // used to enforce samesite cookie rules .is_navigation = req.resource_type == .document, }); @@ -883,7 +885,7 @@ pub const Transfer = struct { while (true) { const ct = getResponseHeader(easy, "set-cookie", i); if (ct == null) break; - transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { + transfer.req.cookie_jar.populateFromResponse(transfer.url, ct.?.value) catch |err| { log.err(.http, "set cookie", .{ .err = err, .req = transfer }); return err; }; diff --git a/src/url.zig b/src/url.zig index acfac256..f70c4422 100644 --- a/src/url.zig +++ b/src/url.zig @@ -1,82 +1,135 @@ const std = @import("std"); -const Uri = std.Uri; const Allocator = std.mem.Allocator; const WebApiURL = @import("browser/url/url.zig").URL; +const ada = @import("ada"); + pub const stitch = URL.stitch; pub const URL = struct { - uri: Uri, + internal: ada.URL, + /// This must outlive the URL structure. raw: []const u8, - pub const empty = URL{ .uri = .{ .scheme = "" }, .raw = "" }; - pub const about_blank = URL{ .uri = .{ .scheme = "" }, .raw = "about:blank" }; + pub const empty = URL{ .internal = null, .raw = "" }; + pub const invalid = URL{ .internal = null, .raw = "" }; + pub const blank = parse("about:blank", null) catch unreachable; - // We assume str will last as long as the URL - // In some cases, this is safe to do, because we know the URL is short lived. - // In most cases though, we assume the caller will just dupe the string URL - // into an arena - pub fn parse(str: []const u8, default_scheme: ?[]const u8) !URL { - var uri = Uri.parse(str) catch try Uri.parseAfterScheme(default_scheme orelse "https", str); + pub const ParseError = ada.ParseError; - // special case, url scheme is about, like about:blank. - // Use an empty string as host. - if (std.mem.eql(u8, uri.scheme, "about")) { - uri.host = .{ .percent_encoded = "" }; - } - - if (uri.host == null) { - return error.MissingHost; - } - - std.debug.assert(uri.host.? == .percent_encoded); - - return .{ - .uri = uri, - .raw = str, + /// We assume str will last as long as the URL + /// In some cases, this is safe to do, because we know the URL is short lived. + /// In most cases though, we assume the caller will just dupe the string URL + /// into an arena. + /// If `str` does not contain a scheme, `fallback_scheme` be used instead. + /// `fallback_scheme` is `https` if not provided. + pub fn parse(str: []const u8, fallback_scheme: ?[]const u8) ParseError!URL { + // Try parsing directly; if it fails, we might have to provide a base. + const internal = ada.parse(str) catch blk: { + break :blk try ada.parseWithBase(fallback_scheme orelse "https", str); }; + + return .{ .internal = internal, .raw = str }; } - pub fn fromURI(arena: Allocator, uri: *const Uri) !URL { - // This is embarrassing. - var buf: std.ArrayListUnmanaged(u8) = .{}; - try uri.writeToStream(.{ - .scheme = true, - .authentication = true, - .authority = true, - .path = true, - .query = true, - .fragment = true, - }, buf.writer(arena)); + /// Uses the same URL to parse in-place. + /// Assumes `internal` is valid. + pub fn reparse(self: URL, str: []const u8) ParseError!URL { + std.debug.assert(self.internal != null); - return parse(buf.items, null); + _ = ada.setHref(self.internal, str); + if (!ada.isValid(self.internal)) { + return error.Invalid; + } + //self.raw = str; + + return self; } - // Above, in `parse`, we error if a host doesn't exist - // In other words, we can't have a URL with a null host. - pub fn host(self: *const URL) []const u8 { - return self.uri.host.?.percent_encoded; + /// Deinitializes internal url. + pub fn deinit(self: URL) void { + std.debug.assert(self.internal != null); + ada.free(self.internal); } - pub fn port(self: *const URL) ?u16 { - return self.uri.port; + /// Returns true if `internal` is initialized. + pub fn isValid(self: URL) bool { + return ada.isValid(self.internal); } - pub fn scheme(self: *const URL) []const u8 { - return self.uri.scheme; + /// Above, in `parse`, we error if a host doesn't exist + /// In other words, we can't have a URL with a null host. + pub fn host(self: URL) []const u8 { + const str = ada.getHostNullable(self.internal); + return str.data[0..str.length]; } - pub fn origin(self: *const URL, writer: *std.Io.Writer) !void { - return self.uri.writeToStream(writer, .{ .scheme = true, .authority = true }); + pub fn href(self: URL) []const u8 { + return ada.getHref(self.internal); } - pub fn format(self: *const URL, writer: *std.Io.Writer) !void { + pub fn hostname(self: URL) []const u8 { + return ada.getHostname(self.internal); + } + + pub fn getFragment(self: URL) ?[]const u8 { + // Ada calls it "hash" instead of "fragment". + const hash = ada.getHashNullable(self.internal); + if (hash.data == null) return null; + + return hash.data[0..hash.length]; + } + + pub fn getProtocol(self: URL) []const u8 { + return ada.getProtocol(self.internal); + } + + pub fn getScheme(self: URL) []const u8 { + const proto = self.getProtocol(); + std.debug.assert(proto[proto.len - 1] == ':'); + + return proto.ptr[0 .. proto.len - 1]; + } + + /// Returns the path. + pub fn getPath(self: URL) []const u8 { + const pathname = ada.getPathnameNullable(self.internal); + // Return a slash if path is null. + if (pathname.data == null) { + return "/"; + } + + return pathname.data[0..pathname.length]; + } + + /// Returns true if the URL's protocol is secure. + pub fn isSecure(self: URL) bool { + const scheme = ada.getSchemeType(self.internal); + return scheme == ada.Scheme.https or scheme == ada.Scheme.wss; + } + + pub fn writeToStream(self: URL, writer: anytype) !void { + return writer.writeAll(self.href()); + } + + // TODO: Skip unnecessary allocation by writing url parts directly to stream. + pub fn origin(self: URL, writer: *std.Io.Writer) !void { + // Ada manages its own memory for origin. + // Here we write it to stream and free it afterwards. + const proto = ada.getOrigin(self.internal); + defer ada.freeOwnedString(.{ .data = proto.ptr, .length = proto.len }); + + return writer.writeAll(proto); + } + + pub fn format(self: URL, writer: *std.Io.Writer) !void { return writer.writeAll(self.raw); } - pub fn toWebApi(self: *const URL, allocator: Allocator) !WebApiURL { - return WebApiURL.init(allocator, self.uri); + /// Converts `URL` to `WebApiURL`. + pub fn toWebApi(self: URL, allocator: Allocator) !WebApiURL { + return WebApiURL.constructFromInternal(allocator, self.internal); } /// Properly stitches two URL fragments together.