From eab328e2b5f8ce513853f934566111eba42ef731 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 12 Dec 2025 21:50:13 +0800 Subject: [PATCH] Tweak URL, refactor Anchor and URL to share more common code --- src/browser/URL.zig | 164 +++++++++++- src/browser/tests/legacy/url/url.html | 10 +- src/browser/tests/url.html | 294 +++++++++++++++++++++ src/browser/webapi/Location.zig | 4 +- src/browser/webapi/URL.zig | 110 +++++++- src/browser/webapi/element/html/Anchor.zig | 121 +-------- src/browser/webapi/net/URLSearchParams.zig | 6 +- 7 files changed, 575 insertions(+), 134 deletions(-) diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 4a399a3b..7a3547c2 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -128,7 +128,7 @@ fn isNullTerminated(comptime value: type) bool { } pub fn isCompleteHTTPUrl(url: []const u8) bool { - if (url.len < 6) { + if (url.len < 3) { // Minimum is "x://" return false; } @@ -137,9 +137,32 @@ pub fn isCompleteHTTPUrl(url: []const u8) bool { return false; } - return std.ascii.startsWithIgnoreCase(url, "https://") or - std.ascii.startsWithIgnoreCase(url, "http://") or - std.ascii.startsWithIgnoreCase(url, "ftp://"); + // Check if there's a scheme (protocol) ending with :// + const colon_pos = std.mem.indexOfScalar(u8, url, ':') orelse return false; + + // Check if it's followed by // + if (colon_pos + 2 >= url.len or url[colon_pos + 1] != '/' or url[colon_pos + 2] != '/') { + return false; + } + + // Validate that everything before the colon is a valid scheme + // A scheme must start with a letter and contain only letters, digits, +, -, . + if (colon_pos == 0) { + return false; + } + + const scheme = url[0..colon_pos]; + if (!std.ascii.isAlphabetic(scheme[0])) { + return false; + } + + for (scheme[1..]) |c| { + if (!std.ascii.isAlphanumeric(c) and c != '+' and c != '-' and c != '.') { + return false; + } + } + + return true; } pub fn getUsername(raw: [:0]const u8) []const u8 { @@ -278,6 +301,139 @@ pub fn eqlDocument(first: [:0]const u8, second: [:0]const u8) bool { return std.mem.eql(u8, first[0..first_hash_index], second[0..second_hash_index]); } +// Helper function to build a URL from components +pub fn buildUrl( + allocator: Allocator, + protocol: []const u8, + host: []const u8, + pathname: []const u8, + search: []const u8, + hash: []const u8, +) ![:0]const u8 { + return std.fmt.allocPrintSentinel(allocator, "{s}//{s}{s}{s}{s}", .{ + protocol, + host, + pathname, + search, + hash, + }, 0); +} + +pub fn setProtocol(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const host = getHost(current); + const pathname = getPathname(current); + const search = getSearch(current); + const hash = getHash(current); + + // Add : suffix if not present + const protocol = if (value.len > 0 and value[value.len - 1] != ':') + try std.fmt.allocPrint(allocator, "{s}:", .{value}) + else + value; + + return buildUrl(allocator, protocol, host, pathname, search, hash); +} + +pub fn setHost(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const protocol = getProtocol(current); + const pathname = getPathname(current); + const search = getSearch(current); + const hash = getHash(current); + + // Check if the host includes a port + const colon_pos = std.mem.lastIndexOfScalar(u8, value, ':'); + const clean_host = if (colon_pos) |pos| blk: { + const port_str = value[pos + 1 ..]; + // Remove default ports + if (std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port_str, "443")) { + break :blk value[0..pos]; + } + if (std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port_str, "80")) { + break :blk value[0..pos]; + } + break :blk value; + } else value; + + return buildUrl(allocator, protocol, clean_host, pathname, search, hash); +} + +pub fn setHostname(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const current_port = getPort(current); + const new_host = if (current_port.len > 0) + try std.fmt.allocPrint(allocator, "{s}:{s}", .{ value, current_port }) + else + value; + + return setHost(current, new_host, allocator); +} + +pub fn setPort(current: [:0]const u8, value: ?[]const u8, allocator: Allocator) ![:0]const u8 { + const hostname = getHostname(current); + const protocol = getProtocol(current); + + // Handle null or default ports + const new_host = if (value) |port_str| blk: { + if (port_str.len == 0) { + break :blk hostname; + } + // Check if this is a default port for the protocol + if (std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port_str, "443")) { + break :blk hostname; + } + if (std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port_str, "80")) { + break :blk hostname; + } + break :blk try std.fmt.allocPrint(allocator, "{s}:{s}", .{ hostname, port_str }); + } else hostname; + + return setHost(current, new_host, allocator); +} + +pub fn setPathname(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const protocol = getProtocol(current); + const host = getHost(current); + const search = getSearch(current); + const hash = getHash(current); + + // Add / prefix if not present and value is not empty + const pathname = if (value.len > 0 and value[0] != '/') + try std.fmt.allocPrint(allocator, "/{s}", .{value}) + else + value; + + return buildUrl(allocator, protocol, host, pathname, search, hash); +} + +pub fn setSearch(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const protocol = getProtocol(current); + const host = getHost(current); + const pathname = getPathname(current); + const hash = getHash(current); + + // Add ? prefix if not present and value is not empty + const search = if (value.len > 0 and value[0] != '?') + try std.fmt.allocPrint(allocator, "?{s}", .{value}) + else + value; + + return buildUrl(allocator, protocol, host, pathname, search, hash); +} + +pub fn setHash(current: [:0]const u8, value: []const u8, allocator: Allocator) ![:0]const u8 { + const protocol = getProtocol(current); + const host = getHost(current); + const pathname = getPathname(current); + const search = getSearch(current); + + // Add # prefix if not present and value is not empty + const hash = if (value.len > 0 and value[0] != '#') + try std.fmt.allocPrint(allocator, "#{s}", .{value}) + else + value; + + return buildUrl(allocator, protocol, host, pathname, search, hash); +} + const KnownProtocol = enum { @"http:", @"https:", diff --git a/src/browser/tests/legacy/url/url.html b/src/browser/tests/legacy/url/url.html index ef770e46..72ca45f0 100644 --- a/src/browser/tests/legacy/url/url.html +++ b/src/browser/tests/legacy/url/url.html @@ -31,14 +31,14 @@ diff --git a/src/browser/tests/url.html b/src/browser/tests/url.html index 7faefc32..80b70823 100644 --- a/src/browser/tests/url.html +++ b/src/browser/tests/url.html @@ -313,6 +313,23 @@ url.searchParams.delete('b'); testing.expectEqual('https://example.com/path', url.href); } + + { + let url = new URL("https://foo.bar"); + const searchParams = url.searchParams; + + // SearchParams should be empty. + testing.expectEqual(0, searchParams.size); + + url.href = "https://lightpanda.io?over=9000&light=panda"; + // It won't hurt to check href and host too. + testing.expectEqual("https://lightpanda.io/?over=9000&light=panda", url.href); + testing.expectEqual("lightpanda.io", url.host); + // SearchParams should be updated too when URL is set. + testing.expectEqual(2, searchParams.size); + testing.expectEqual("9000", searchParams.get("over")); + testing.expectEqual("panda", searchParams.get("light")); + } + + + + + + diff --git a/src/browser/webapi/Location.zig b/src/browser/webapi/Location.zig index c2c2b1a8..87d0c282 100644 --- a/src/browser/webapi/Location.zig +++ b/src/browser/webapi/Location.zig @@ -57,8 +57,8 @@ pub fn getOrigin(self: *const Location, page: *const Page) ![]const u8 { return self._url.getOrigin(page); } -pub fn getSearch(self: *const Location) []const u8 { - return self._url.getSearch(); +pub fn getSearch(self: *const Location, page: *const Page) ![]const u8 { + return self._url.getSearch(page); } pub fn getHash(self: *const Location) []const u8 { diff --git a/src/browser/webapi/URL.zig b/src/browser/webapi/URL.zig index 49c03b1d..766bd3c2 100644 --- a/src/browser/webapi/URL.zig +++ b/src/browser/webapi/URL.zig @@ -35,6 +35,23 @@ _search_params: ?*URLSearchParams = null, pub const resolve = @import("../URL.zig").resolve; pub const eqlDocument = @import("../URL.zig").eqlDocument; +pub fn canParse(url: []const u8, base_: ?[]const u8, page: *Page) bool { + _ = page; + const url_is_absolute = U.isCompleteHTTPUrl(url); + + if (base_) |b| { + // Base must be valid even if URL is absolute + if (!U.isCompleteHTTPUrl(b)) { + return false; + } + return true; + } else if (!url_is_absolute) { + return false; + } else { + return true; + } +} + pub fn init(url: [:0]const u8, base_: ?[:0]const u8, page: *Page) !*URL { const url_is_absolute = @import("../URL.zig").isCompleteHTTPUrl(url); @@ -96,7 +113,17 @@ pub fn getOrigin(self: *const URL, page: *const Page) ![]const u8 { }; } -pub fn getSearch(self: *const URL) []const u8 { +pub fn getSearch(self: *const URL, page: *const Page) ![]const u8 { + // If searchParams has been accessed, generate search from it + if (self._search_params) |sp| { + if (sp.getSize() == 0) { + return ""; + } + var buf = std.Io.Writer.Allocating.init(page.call_arena); + try buf.writer.writeByte('?'); + try sp.toString(&buf.writer); + return buf.written(); + } return U.getSearch(self._raw); } @@ -110,7 +137,7 @@ pub fn getSearchParams(self: *URL, page: *Page) !*URLSearchParams { } // Get current search string (without the '?') - const search = self.getSearch(); + const search = try self.getSearch(page); const search_value = if (search.len > 0) search[1..] else ""; const params = try URLSearchParams.init(.{ .query_string = search_value }, page); @@ -118,6 +145,61 @@ pub fn getSearchParams(self: *URL, page: *Page) !*URLSearchParams { return params; } +pub fn setHref(self: *URL, value: []const u8, page: *Page) !void { + const base = if (U.isCompleteHTTPUrl(value)) page.url else self._raw; + const raw = try U.resolve(self._arena orelse page.arena, base, value, .{ .always_dupe = true }); + self._raw = raw; + + // Update existing searchParams if it exists + if (self._search_params) |sp| { + const search = U.getSearch(raw); + const search_value = if (search.len > 0) search[1..] else ""; + try sp.updateFromString(search_value, page); + } +} + +pub fn setProtocol(self: *URL, value: []const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setProtocol(self._raw, value, allocator); +} + +pub fn setHost(self: *URL, value: []const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setHost(self._raw, value, allocator); +} + +pub fn setHostname(self: *URL, value: []const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setHostname(self._raw, value, allocator); +} + +pub fn setPort(self: *URL, value: ?[]const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setPort(self._raw, value, allocator); +} + +pub fn setPathname(self: *URL, value: []const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setPathname(self._raw, value, allocator); +} + +pub fn setSearch(self: *URL, value: []const u8, page: *Page) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setSearch(self._raw, value, allocator); + + // Update existing searchParams if it exists + if (self._search_params) |sp| { + const search = U.getSearch(self._raw); + const search_value = if (search.len > 0) search[1..] else ""; + try sp.updateFromString(search_value, page); + } +} + +pub fn setHash(self: *URL, value: []const u8) !void { + const allocator = self._arena orelse return error.NoAllocator; + self._raw = try U.setHash(self._raw, value, allocator); +} + pub fn toString(self: *const URL, page: *const Page) ![:0]const u8 { const sp = self._search_params orelse { return self._raw; @@ -137,6 +219,13 @@ pub fn toString(self: *const URL, page: *const Page) ![:0]const u8 { var buf = std.Io.Writer.Allocating.init(page.call_arena); try buf.writer.writeAll(base); + // Add / if missing (e.g., "https://example.com" -> "https://example.com/") + // Only add if pathname is just "/" and not already in the base + const pathname = U.getPathname(raw); + if (std.mem.eql(u8, pathname, "/") and !std.mem.endsWith(u8, base, "/")) { + try buf.writer.writeByte('/'); + } + // Only add ? if there are params if (sp.getSize() > 0) { try buf.writer.writeByte('?'); @@ -159,19 +248,20 @@ pub const JsApi = struct { }; pub const constructor = bridge.constructor(URL.init, .{}); + pub const canParse = bridge.function(URL.canParse, .{ .static = true }); pub const toString = bridge.function(URL.toString, .{}); pub const toJSON = bridge.function(URL.toString, .{}); - pub const href = bridge.accessor(URL.toString, null, .{}); - pub const search = bridge.accessor(URL.getSearch, null, .{}); - pub const hash = bridge.accessor(URL.getHash, null, .{}); - pub const pathname = bridge.accessor(URL.getPathname, null, .{}); + pub const href = bridge.accessor(URL.toString, URL.setHref, .{}); + pub const search = bridge.accessor(URL.getSearch, URL.setSearch, .{}); + pub const hash = bridge.accessor(URL.getHash, URL.setHash, .{}); + pub const pathname = bridge.accessor(URL.getPathname, URL.setPathname, .{}); pub const username = bridge.accessor(URL.getUsername, null, .{}); pub const password = bridge.accessor(URL.getPassword, null, .{}); - pub const hostname = bridge.accessor(URL.getHostname, null, .{}); - pub const host = bridge.accessor(URL.getHost, null, .{}); - pub const port = bridge.accessor(URL.getPort, null, .{}); + pub const hostname = bridge.accessor(URL.getHostname, URL.setHostname, .{}); + pub const host = bridge.accessor(URL.getHost, URL.setHost, .{}); + pub const port = bridge.accessor(URL.getPort, URL.setPort, .{}); pub const origin = bridge.accessor(URL.getOrigin, null, .{}); - pub const protocol = bridge.accessor(URL.getProtocol, null, .{}); + pub const protocol = bridge.accessor(URL.getProtocol, URL.setProtocol, .{}); pub const searchParams = bridge.accessor(URL.getSearchParams, null, .{}); }; diff --git a/src/browser/webapi/element/html/Anchor.zig b/src/browser/webapi/element/html/Anchor.zig index 006843db..75e61c20 100644 --- a/src/browser/webapi/element/html/Anchor.zig +++ b/src/browser/webapi/element/html/Anchor.zig @@ -84,26 +84,7 @@ pub fn getHost(self: *Anchor, page: *Page) ![]const u8 { pub fn setHost(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const protocol = URL.getProtocol(href); - const pathname = URL.getPathname(href); - const search = URL.getSearch(href); - const hash = URL.getHash(href); - - // Check if the host includes a port - const colon_pos = std.mem.lastIndexOfScalar(u8, value, ':'); - const clean_host = if (colon_pos) |pos| blk: { - const port_str = value[pos + 1 ..]; - // Remove default ports - if (std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port_str, "443")) { - break :blk value[0..pos]; - } - if (std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port_str, "80")) { - break :blk value[0..pos]; - } - break :blk value; - } else value; - - const new_href = try buildUrl(page.call_arena, protocol, clean_host, pathname, search, hash); + const new_href = try URL.setHost(href, value, page.call_arena); try setHref(self, new_href, page); } @@ -114,13 +95,8 @@ pub fn getHostname(self: *Anchor, page: *Page) ![]const u8 { pub fn setHostname(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const current_port = URL.getPort(href); - const new_host = if (current_port.len > 0) - try std.fmt.allocPrint(page.call_arena, "{s}:{s}", .{ value, current_port }) - else - value; - - try setHost(self, new_host, page); + const new_href = try URL.setHostname(href, value, page.call_arena); + try setHref(self, new_href, page); } pub fn getPort(self: *Anchor, page: *Page) ![]const u8 { @@ -142,25 +118,8 @@ pub fn getPort(self: *Anchor, page: *Page) ![]const u8 { pub fn setPort(self: *Anchor, value: ?[]const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const hostname = URL.getHostname(href); - const protocol = URL.getProtocol(href); - - // Handle null or default ports - const new_host = if (value) |port_str| blk: { - if (port_str.len == 0) { - break :blk hostname; - } - // Check if this is a default port for the protocol - if (std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port_str, "443")) { - break :blk hostname; - } - if (std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port_str, "80")) { - break :blk hostname; - } - break :blk try std.fmt.allocPrint(page.call_arena, "{s}:{s}", .{ hostname, port_str }); - } else hostname; - - try setHost(self, new_host, page); + const new_href = try URL.setPort(href, value, page.call_arena); + try setHref(self, new_href, page); } pub fn getSearch(self: *Anchor, page: *Page) ![]const u8 { @@ -170,18 +129,7 @@ pub fn getSearch(self: *Anchor, page: *Page) ![]const u8 { pub fn setSearch(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const protocol = URL.getProtocol(href); - const host = URL.getHost(href); - const pathname = URL.getPathname(href); - const hash = URL.getHash(href); - - // Add ? prefix if not present and value is not empty - const search = if (value.len > 0 and value[0] != '?') - try std.fmt.allocPrint(page.call_arena, "?{s}", .{value}) - else - value; - - const new_href = try buildUrl(page.call_arena, protocol, host, pathname, search, hash); + const new_href = try URL.setSearch(href, value, page.call_arena); try setHref(self, new_href, page); } @@ -192,18 +140,7 @@ pub fn getHash(self: *Anchor, page: *Page) ![]const u8 { pub fn setHash(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const protocol = URL.getProtocol(href); - const host = URL.getHost(href); - const pathname = URL.getPathname(href); - const search = URL.getSearch(href); - - // Add # prefix if not present and value is not empty - const hash = if (value.len > 0 and value[0] != '#') - try std.fmt.allocPrint(page.call_arena, "#{s}", .{value}) - else - value; - - const new_href = try buildUrl(page.call_arena, protocol, host, pathname, search, hash); + const new_href = try URL.setHash(href, value, page.call_arena); try setHref(self, new_href, page); } @@ -214,18 +151,7 @@ pub fn getPathname(self: *Anchor, page: *Page) ![]const u8 { pub fn setPathname(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const protocol = URL.getProtocol(href); - const host = URL.getHost(href); - const search = URL.getSearch(href); - const hash = URL.getHash(href); - - // Add / prefix if not present and value is not empty - const pathname = if (value.len > 0 and value[0] != '/') - try std.fmt.allocPrint(page.call_arena, "/{s}", .{value}) - else - value; - - const new_href = try buildUrl(page.call_arena, protocol, host, pathname, search, hash); + const new_href = try URL.setPathname(href, value, page.call_arena); try setHref(self, new_href, page); } @@ -236,18 +162,7 @@ pub fn getProtocol(self: *Anchor, page: *Page) ![]const u8 { pub fn setProtocol(self: *Anchor, value: []const u8, page: *Page) !void { const href = try getResolvedHref(self, page) orelse return; - const host = URL.getHost(href); - const pathname = URL.getPathname(href); - const search = URL.getSearch(href); - const hash = URL.getHash(href); - - // Add : suffix if not present - const protocol = if (value.len > 0 and value[value.len - 1] != ':') - try std.fmt.allocPrint(page.call_arena, "{s}:", .{value}) - else - value; - - const new_href = try buildUrl(page.call_arena, protocol, host, pathname, search, hash); + const new_href = try URL.setProtocol(href, value, page.call_arena); try setHref(self, new_href, page); } @@ -283,24 +198,6 @@ fn getResolvedHref(self: *Anchor, page: *Page) !?[:0]const u8 { return try URL.resolve(page.call_arena, page.url, href, .{}); } -// Helper function to build a new URL from components -fn buildUrl( - allocator: std.mem.Allocator, - protocol: []const u8, - host: []const u8, - pathname: []const u8, - search: []const u8, - hash: []const u8, -) ![:0]const u8 { - return std.fmt.allocPrintSentinel(allocator, "{s}//{s}{s}{s}{s}", .{ - protocol, - host, - pathname, - search, - hash, - }, 0); -} - pub const JsApi = struct { pub const bridge = js.Bridge(Anchor); diff --git a/src/browser/webapi/net/URLSearchParams.zig b/src/browser/webapi/net/URLSearchParams.zig index f3069531..2cfe3b2c 100644 --- a/src/browser/webapi/net/URLSearchParams.zig +++ b/src/browser/webapi/net/URLSearchParams.zig @@ -61,6 +61,10 @@ pub fn init(opts_: ?InitOpts, page: *Page) !*URLSearchParams { }); } +pub fn updateFromString(self: *URLSearchParams, query_string: []const u8, page: *Page) !void { + self._params = try paramsFromString(self._arena, query_string, &page.buf); +} + pub fn getSize(self: *const URLSearchParams) usize { return self._params.len(); } @@ -277,7 +281,7 @@ fn escape(input: []const u8, writer: *std.Io.Writer) !void { fn isUnreserved(c: u8) bool { return switch (c) { - 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true, + 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_' => true, else => false, }; }