basic url parsing working

* also reintroduces old `URLSearchParams` implementation since ada prefers its own iterator where we'd like to use our own.
This commit is contained in:
Halil Durak
2025-10-07 17:18:08 +03:00
parent 1c7971b096
commit c1160543ad
4 changed files with 265 additions and 97 deletions

View File

@@ -42,12 +42,12 @@ pub const HTMLDocument = struct {
// JS funcs // JS funcs
// -------- // --------
pub fn get_domain(self: *parser.DocumentHTML, page: *Page) ![]const u8 { pub fn get_domain(self: *parser.DocumentHTML) ![]const u8 {
// libdom's document_html get_domain always returns null, this is // libdom's document_html get_domain always returns null, this is
// the way MDN recommends getting the domain anyways, since document.domain // the way MDN recommends getting the domain anyways, since document.domain
// is deprecated. // is deprecated.
const location = try parser.documentHTMLGetLocation(Location, self) orelse return ""; const location = try parser.documentHTMLGetLocation(Location, self) orelse return "";
return location.get_host(page); return location.get_host();
} }
pub fn set_domain(_: *parser.DocumentHTML, _: []const u8) ![]const u8 { pub fn set_domain(_: *parser.DocumentHTML, _: []const u8) ![]const u8 {

View File

@@ -275,7 +275,7 @@ pub const HTMLAnchorElement = struct {
// TODO return a disposable string // TODO return a disposable string
pub fn get_origin(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_origin(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try u.get_origin(page); return u.get_origin(page);
} }
// TODO return a disposable string // TODO return a disposable string
@@ -286,43 +286,42 @@ pub const HTMLAnchorElement = struct {
pub fn set_protocol(self: *parser.Anchor, v: []const u8, page: *Page) !void { pub fn set_protocol(self: *parser.Anchor, v: []const u8, page: *Page) !void {
const arena = page.arena; const arena = page.arena;
_ = arena;
var u = try url(self, page); var u = try url(self, page);
u.uri.scheme = v; u.set_protocol(v);
const href = try u.toString(arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
// TODO return a disposable string
pub fn get_host(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_host(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try u.get_host(page); return u.get_host();
} }
pub fn set_host(self: *parser.Anchor, v: []const u8, page: *Page) !void { pub fn set_host(self: *parser.Anchor, v: []const u8, page: *Page) !void {
// search : separator // search : separator
var p: ?u16 = null; var p: ?[]const u8 = null;
var h: []const u8 = undefined; var h: []const u8 = undefined;
for (v, 0..) |c, i| { for (v, 0..) |c, i| {
if (c == ':') { if (c == ':') {
h = v[0..i]; h = v[0..i];
p = try std.fmt.parseInt(u16, v[i + 1 ..], 10); //p = try std.fmt.parseInt(u16, v[i + 1 ..], 10);
p = v[i + 1 ..];
break; break;
} }
} }
const arena = page.arena;
var u = try url(self, page); var u = try url(self, page);
if (p) |pp| { if (p) |port| {
u.uri.host = .{ .raw = h }; u.set_host(h);
u.uri.port = pp; u.set_port(port);
} else { } else {
u.uri.host = .{ .raw = v }; u.set_host(v);
u.uri.port = null;
} }
const href = try u.toString(arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
@@ -332,30 +331,26 @@ pub const HTMLAnchorElement = struct {
} }
pub fn set_hostname(self: *parser.Anchor, v: []const u8, page: *Page) !void { pub fn set_hostname(self: *parser.Anchor, v: []const u8, page: *Page) !void {
const arena = page.arena;
var u = try url(self, page); var u = try url(self, page);
u.uri.host = .{ .raw = v }; u.set_host(v);
const href = try u.toString(arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
// TODO return a disposable string // TODO return a disposable string
pub fn get_port(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_port(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try u.get_port(page); return u.get_port();
} }
pub fn set_port(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void { pub fn set_port(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void {
const arena = page.arena;
var u = try url(self, page); var u = try url(self, page);
if (v != null and v.?.len > 0) { if (v != null and v.?.len > 0) {
u.uri.port = try std.fmt.parseInt(u16, v.?, 10); u.set_host(v.?);
} else {
u.uri.port = null;
} }
const href = try u.toString(arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
@@ -366,38 +361,29 @@ pub const HTMLAnchorElement = struct {
} }
pub fn set_username(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void { pub fn set_username(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void {
const arena = page.arena; if (v) |username| {
var u = try url(self, page); var u = try url(self, page);
u.set_username(username);
if (v) |vv| { const href = try u.get_href(page);
u.uri.user = .{ .raw = vv };
} else {
u.uri.user = null;
}
const href = try u.toString(arena);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
}
// TODO return a disposable string
pub fn get_password(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_password(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try page.arena.dupe(u8, u.get_password()); return u.get_password();
} }
pub fn set_password(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void { pub fn set_password(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void {
const arena = page.arena; if (v) |password| {
var u = try url(self, page); var u = try url(self, page);
u.set_password(password);
if (v) |vv| { const href = try u.get_href(page);
u.uri.password = .{ .raw = vv };
} else {
u.uri.password = null;
}
const href = try u.toString(arena);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
}
// TODO return a disposable string // TODO return a disposable string
pub fn get_pathname(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_pathname(self: *parser.Anchor, page: *Page) ![]const u8 {
@@ -406,44 +392,35 @@ pub const HTMLAnchorElement = struct {
} }
pub fn set_pathname(self: *parser.Anchor, v: []const u8, page: *Page) !void { pub fn set_pathname(self: *parser.Anchor, v: []const u8, page: *Page) !void {
const arena = page.arena;
var u = try url(self, page); var u = try url(self, page);
u.uri.path = .{ .raw = v }; u.set_pathname(v);
const href = try u.toString(arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
pub fn get_search(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_search(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try u.get_search(page); return u.get_search(page);
} }
pub fn set_search(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void { pub fn set_search(self: *parser.Anchor, v: []const u8, page: *Page) !void {
var u = try url(self, page); var u = try url(self, page);
try u.set_search(v, page); try u.set_search(v, page);
const href = try u.toString(page.call_arena); const href = try u.get_href(page);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
// TODO return a disposable string // TODO return a disposable string
pub fn get_hash(self: *parser.Anchor, page: *Page) ![]const u8 { pub fn get_hash(self: *parser.Anchor, page: *Page) ![]const u8 {
var u = try url(self, page); var u = try url(self, page);
return try u.get_hash(page); return u.get_hash();
} }
pub fn set_hash(self: *parser.Anchor, v: ?[]const u8, page: *Page) !void { pub fn set_hash(self: *parser.Anchor, v: []const u8, page: *Page) !void {
const arena = page.arena;
var u = try url(self, page); var u = try url(self, page);
u.set_hash(v);
if (v) |vv| { const href = try u.get_href(page);
u.uri.fragment = .{ .raw = vv };
} else {
u.uri.fragment = null;
}
const href = try u.toString(arena);
try parser.anchorSetHref(self, href); try parser.anchorSetHref(self, href);
} }
}; };

View File

@@ -86,7 +86,7 @@ pub const Location = struct {
} }
pub fn _toString(self: *Location, page: *Page) ![]const u8 { pub fn _toString(self: *Location, page: *Page) ![]const u8 {
return try self.get_href(page); return self.get_href(page);
} }
}; };

View File

@@ -18,6 +18,7 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const Writer = std.Io.Writer;
const ada = @import("ada"); const ada = @import("ada");
const js = @import("../js/js.zig"); const js = @import("../js/js.zig");
@@ -39,38 +40,69 @@ pub const Interfaces = .{
/// https://developer.mozilla.org/en-US/docs/Web/API/URL/URL /// https://developer.mozilla.org/en-US/docs/Web/API/URL/URL
pub const URL = struct { pub const URL = struct {
internal: ada.URL, internal: ada.URL,
/// We prefer in-house search params solution here;
/// ada's search params impl use more memory.
/// It also offers it's own iterator implementation
/// where we'd like to use ours.
search_params: URLSearchParams,
pub const empty = URL{
.internal = null,
.search_params = .{},
};
// You can use an existing URL object for either argument, and it will be // You can use an existing URL object for either argument, and it will be
// stringified from the object's href property. // stringified from the object's href property.
pub const ConstructorArg = union(enum) { const ConstructorArg = union(enum) {
url: *URL,
element: *parser.Element,
string: []const u8, string: []const u8,
url: *const URL,
element: *parser.Element,
fn toString(self: *const ConstructorArg) error{Invalid}![]const u8 { fn toString(self: ConstructorArg, page: *Page) ![]const u8 {
return switch (self) { return switch (self) {
.string => |s| s, .string => |s| s,
.url => |url| url._toString(), .url => |url| url._toString(page),
.element => |e| parser.elementGetAttribute(@ptrCast(e), "href") orelse error.Invalid, .element => |e| {
const attrib = try parser.elementGetAttribute(@ptrCast(e), "href") orelse {
return error.InvalidArgument;
};
return attrib;
},
}; };
} }
}; };
pub fn constructor(url: ConstructorArg, maybe_base: ?ConstructorArg, _: *Page) !URL { pub fn constructor(url: ConstructorArg, maybe_base: ?ConstructorArg, page: *Page) !URL {
const u = blk: { const url_str = try url.toString(page);
const url_str = try url.toString();
const internal = try blk: {
if (maybe_base) |base| { if (maybe_base) |base| {
break :blk ada.parseWithBase(url_str, try base.toString()); break :blk ada.parseWithBase(url_str, try base.toString(page));
} }
break :blk ada.parse(url_str); break :blk ada.parse(url_str);
}; };
return .{ .url = u }; // Prepare search_params.
const params: URLSearchParams = blk: {
const search = ada.getSearch(internal);
if (search.data == null) {
break :blk .{};
}
break :blk try .initFromString(page.arena, search.data[0..search.length]);
};
// We're doing this since we track search params separately.
ada.clearSearch(internal);
return .{ .internal = internal, .search_params = params };
} }
pub fn destructor(self: *const URL) void { pub fn destructor(self: *const URL) void {
ada.free(self.internal); // Not tracked by arena.
return ada.free(self.internal);
} }
pub fn initWithoutSearchParams(uri: std.Uri) URL { pub fn initWithoutSearchParams(uri: std.Uri) URL {
@@ -82,6 +114,10 @@ pub const URL = struct {
// Getters. // Getters.
pub fn get_searchParams(self: *URL) *URLSearchParams {
return &self.search_params;
}
pub fn get_origin(self: *const URL, page: *Page) ![]const u8 { pub fn get_origin(self: *const URL, page: *Page) ![]const u8 {
const arena = page.arena; const arena = page.arena;
// `ada.getOrigin` allocates memory in order to find the `origin`. // `ada.getOrigin` allocates memory in order to find the `origin`.
@@ -94,8 +130,27 @@ pub const URL = struct {
return arena.dupe(u8, origin); return arena.dupe(u8, origin);
} }
pub fn get_href(self: *const URL) []const u8 { pub fn get_href(self: *const URL, page: *Page) ![]const u8 {
return ada.getHref(self.internal); var w: Writer.Allocating = .init(page.arena);
const href = ada.getHref(self.internal);
const comps = ada.getComponents(self.internal);
const has_hash = comps.hash_start != ada.URLOmitted;
const href_part = if (has_hash) href[0..comps.hash_start] else href;
try w.writer.writeAll(href_part);
// Write search params if provided.
if (self.search_params.get_size() > 0) {
try w.writer.writeByte('?');
try self.search_params.write(&w.writer);
}
// Write hash if provided before.
const hash = self.get_hash();
try w.writer.writeAll(hash);
return w.written();
} }
pub fn get_username(self: *const URL) []const u8 { pub fn get_username(self: *const URL) []const u8 {
@@ -126,39 +181,175 @@ pub const URL = struct {
return ada.getPathname(self.internal); return ada.getPathname(self.internal);
} }
pub fn get_search(self: *const URL) []const u8 { // get_search depends on the current state of `search_params`.
return ada.getSearch(self.internal); pub fn get_search(self: *const URL, page: *Page) ![]const u8 {
const arena = page.arena;
if (self.search_params.get_size() == 0) {
return "";
}
var buf: std.ArrayListUnmanaged(u8) = .{};
try buf.append(arena, '?');
try self.search_params.encode(buf.writer(arena));
return buf.items;
} }
pub fn get_protocol(self: *const URL) []const u8 { pub fn get_protocol(self: *const URL) []const u8 {
return ada.getProtocol(self.internal); return ada.getProtocol(self.internal);
} }
// Setters.
// FIXME: reinit search_params?
pub fn set_href(self: *const URL, input: []const u8) void {
_ = ada.setHref(self.internal, input);
}
pub fn set_host(self: *const URL, input: []const u8) void {
_ = ada.setHost(self.internal, input);
}
pub fn set_hostname(self: *const URL, input: []const u8) void {
_ = ada.setHostname(self.internal, input);
}
pub fn set_protocol(self: *const URL, input: []const u8) void {
_ = ada.setProtocol(self.internal, input);
}
pub fn set_username(self: *const URL, input: []const u8) void {
_ = ada.setUsername(self.internal, input);
}
pub fn set_password(self: *const URL, input: []const u8) void {
_ = ada.setPassword(self.internal, input);
}
pub fn set_port(self: *const URL, input: []const u8) void {
_ = ada.setPort(self.internal, input);
}
pub fn set_pathname(self: *const URL, input: []const u8) void {
_ = ada.setPathname(self.internal, input);
}
pub fn set_search(self: *URL, maybe_input: ?[]const u8, page: *Page) !void {
self.search_params = .{};
if (maybe_input) |input| {
self.search_params = try .initFromString(page.arena, input);
}
}
pub fn set_hash(self: *const URL, input: []const u8) void {
_ = ada.setHash(self.internal, input);
}
}; };
pub const URLSearchParams = struct { pub const URLSearchParams = struct {
internal: ada.URLSearchParams, entries: kv.List = .{},
pub const ConstructorOptions = union(enum) { pub const ConstructorOptions = union(enum) {
string: []const u8, query_string: []const u8,
form_data: *const FormData, form_data: *const FormData,
object: js.JsObject, object: js.JsObject,
}; };
};
// uriComponentNullStr converts an optional std.Uri.Component to string value. pub fn constructor(maybe_options: ?ConstructorOptions, page: *Page) !URLSearchParams {
// The string value can be undecoded. const options = maybe_options orelse return .{};
fn uriComponentNullStr(c: ?std.Uri.Component) []const u8 {
if (c == null) return "";
return uriComponentStr(c.?); const arena = page.arena;
} return switch (options) {
.query_string => |string| .{ .entries = try parseQuery(arena, string) },
.form_data => |form_data| .{ .entries = try form_data.entries.clone(arena) },
.object => |object| {
var it = object.nameIterator();
fn uriComponentStr(c: std.Uri.Component) []const u8 { var entries = kv.List{};
return switch (c) { try entries.ensureTotalCapacity(arena, it.count);
.raw => |v| v,
.percent_encoded => |v| v, while (try it.next()) |js_name| {
const name = try js_name.toString(arena);
const js_value = try object.get(name);
const value = try js_value.toString(arena);
entries.appendOwnedAssumeCapacity(name, value);
}
return .{ .entries = entries };
},
}; };
} }
/// Initializes URLSearchParams from a query string.
pub fn initFromString(arena: Allocator, query_string: []const u8) !URLSearchParams {
return .{ .entries = try parseQuery(arena, query_string) };
}
pub fn get_size(self: *const URLSearchParams) u32 {
return @intCast(self.entries.count());
}
pub fn _append(self: *URLSearchParams, name: []const u8, value: []const u8, page: *Page) !void {
return self.entries.append(page.arena, name, value);
}
pub fn _set(self: *URLSearchParams, name: []const u8, value: []const u8, page: *Page) !void {
return self.entries.set(page.arena, name, value);
}
pub fn _delete(self: *URLSearchParams, name: []const u8, value_: ?[]const u8) void {
if (value_) |value| {
return self.entries.deleteKeyValue(name, value);
}
return self.entries.delete(name);
}
pub fn _get(self: *const URLSearchParams, name: []const u8) ?[]const u8 {
return self.entries.get(name);
}
pub fn _getAll(self: *const URLSearchParams, name: []const u8, page: *Page) ![]const []const u8 {
return self.entries.getAll(page.call_arena, name);
}
pub fn _has(self: *const URLSearchParams, name: []const u8) bool {
return self.entries.has(name);
}
pub fn _keys(self: *const URLSearchParams) KeyIterable {
return .{ .inner = self.entries.keyIterator() };
}
pub fn _values(self: *const URLSearchParams) ValueIterable {
return .{ .inner = self.entries.valueIterator() };
}
pub fn _entries(self: *const URLSearchParams) EntryIterable {
return .{ .inner = self.entries.entryIterator() };
}
pub fn _symbol_iterator(self: *const URLSearchParams) EntryIterable {
return self._entries();
}
pub fn _toString(self: *const URLSearchParams, page: *Page) ![]const u8 {
var arr: std.ArrayListUnmanaged(u8) = .empty;
try self.write(arr.writer(page.call_arena));
return arr.items;
}
fn write(self: *const URLSearchParams, writer: anytype) !void {
return kv.urlEncode(self.entries, .query, writer);
}
// TODO
pub fn _sort(_: *URLSearchParams) void {}
fn encode(self: *const URLSearchParams, writer: anytype) !void {
return kv.urlEncode(self.entries, .query, writer);
}
};
// Parse the given query. // Parse the given query.
fn parseQuery(arena: Allocator, s: []const u8) !kv.List { fn parseQuery(arena: Allocator, s: []const u8) !kv.List {