initial URL refactor

This commit is contained in:
nikneym
2025-10-05 11:02:53 +03:00
committed by Halil Durak
parent 8f99e36cde
commit 45cd494298

View File

@@ -18,6 +18,7 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const ada = @import("ada");
const js = @import("../js/js.zig"); const js = @import("../js/js.zig");
const parser = @import("../netsurf.zig"); const parser = @import("../netsurf.zig");
@@ -35,215 +36,113 @@ pub const Interfaces = .{
EntryIterable, EntryIterable,
}; };
// https://url.spec.whatwg.org/#url /// https://developer.mozilla.org/en-US/docs/Web/API/URL/URL
//
// TODO we could avoid many of these getter string allocatoration in two differents
// way:
//
// 1. We can eventually get the slice of scheme *with* the following char in
// the underlying string. But I don't know if it's possible and how to do that.
// I mean, if the rawuri contains `https://foo.bar`, uri.scheme is a slice
// containing only `https`. I want `https:` so, in theory, I don't need to
// allocatorate data, I should be able to retrieve the scheme + the following `:`
// from rawuri.
//
// 2. The other way would be to copy the `std.Uri` code to have a dedicated
// parser including the characters we want for the web API.
pub const URL = struct { pub const URL = struct {
uri: std.Uri, internal: ada.URL,
search_params: URLSearchParams,
pub const empty = URL{ // You can use an existing URL object for either argument, and it will be
.uri = .{ .scheme = "" }, // stringified from the object's href property.
.search_params = .{}, pub const ConstructorArg = union(enum) {
};
const URLArg = union(enum) {
url: *URL, url: *URL,
element: *parser.ElementHTML, element: *parser.Element,
string: []const u8, string: []const u8,
fn toString(self: URLArg, arena: Allocator) !?[]const u8 { fn toString(self: *const ConstructorArg) error{Invalid}![]const u8 {
switch (self) { return switch (self) {
.string => |s| return s, .string => |s| s,
.url => |url| return try url.toString(arena), .url => |url| url._toString(),
.element => |e| return try parser.elementGetAttribute(@ptrCast(e), "href"), .element => |e| parser.elementGetAttribute(@ptrCast(e), "href") orelse error.Invalid,
} };
} }
}; };
pub fn constructor(url: URLArg, base: ?URLArg, page: *Page) !URL { pub fn constructor(url: ConstructorArg, maybe_base: ?ConstructorArg, _: *Page) !URL {
const arena = page.arena; const u = blk: {
const url_str = try url.toString(arena) orelse return error.InvalidArgument; const url_str = try url.toString();
if (maybe_base) |base| {
var raw: ?[]const u8 = null; break :blk ada.parseWithBase(url_str, try base.toString());
if (base) |b| {
if (try b.toString(arena)) |bb| {
raw = try @import("../../url.zig").URL.stitch(arena, url_str, bb, .{});
} }
}
if (raw == null) { break :blk ada.parse(url_str);
// if it was a URL, then it's already be owned by the arena
raw = if (url == .url) url_str else try arena.dupe(u8, url_str);
}
const uri = std.Uri.parse(raw.?) catch blk: {
if (!std.mem.endsWith(u8, raw.?, "://")) {
return error.TypeError;
}
// schema only is valid!
break :blk std.Uri{
.scheme = raw.?[0 .. raw.?.len - 3],
.host = .{ .percent_encoded = "" },
};
}; };
return init(arena, uri); return .{ .url = u };
} }
pub fn init(arena: Allocator, uri: std.Uri) !URL { pub fn destructor(self: *const URL) void {
return .{ ada.free(self.internal);
.uri = uri,
.search_params = try URLSearchParams.init(
arena,
uriComponentNullStr(uri.query),
),
};
} }
pub fn initWithoutSearchParams(uri: std.Uri) URL { pub fn initWithoutSearchParams(uri: std.Uri) URL {
return .{ .uri = uri, .search_params = .{} }; return .{ .uri = uri, .search_params = .{} };
} }
pub fn _toString(self: *const URL) []const u8 {
pub fn get_origin(self: *URL, page: *Page) ![]const u8 { return ada.getHref(self.internal);
var aw = std.Io.Writer.Allocating.init(page.arena);
try self.uri.writeToStream(&aw.writer, .{
.scheme = true,
.authentication = false,
.authority = true,
.path = false,
.query = false,
.fragment = false,
});
return aw.written();
} }
// get_href returns the URL by writing all its components. // Getters.
pub fn get_href(self: *URL, page: *Page) ![]const u8 {
return self.toString(page.arena); pub fn get_origin(self: *const URL, page: *Page) ![]const u8 {
const arena = page.arena;
// `ada.getOrigin` allocates memory in order to find the `origin`.
// We'd like to use our arena allocator for such case;
// so here we allocate the `origin` in page arena and free the original.
const origin = ada.getOrigin(self.internal);
// `OwnedString` itself is not heap allocated so this is safe.
defer ada.freeOwnedString(.{ .data = origin.ptr, .length = origin.len });
return arena.dupe(u8, origin);
} }
pub fn _toString(self: *URL, page: *Page) ![]const u8 { pub fn get_href(self: *const URL) []const u8 {
return self.toString(page.arena); return ada.getHref(self.internal);
} }
// format the url with all its components. pub fn get_username(self: *const URL) []const u8 {
pub fn toString(self: *const URL, arena: Allocator) ![]const u8 { return ada.getUsername(self.internal);
var aw = std.Io.Writer.Allocating.init(arena); }
try self.uri.writeToStream(&aw.writer, .{
.scheme = true,
.authentication = true,
.authority = true,
.path = uriComponentNullStr(self.uri.path).len > 0,
});
if (self.search_params.get_size() > 0) { pub fn get_password(self: *const URL) []const u8 {
try aw.writer.writeByte('?'); return ada.getPassword(self.internal);
try self.search_params.write(&aw.writer); }
}
{ pub fn get_port(self: *const URL) []const u8 {
const fragment = uriComponentNullStr(self.uri.fragment); return ada.getPort(self.internal);
if (fragment.len > 0) { }
try aw.writer.writeByte('#');
try aw.writer.writeAll(fragment);
}
}
return aw.written(); pub fn get_hash(self: *const URL) []const u8 {
return ada.getHash(self.internal);
}
pub fn get_host(self: *const URL) []const u8 {
return ada.getHost(self.internal);
}
pub fn get_hostname(self: *const URL) []const u8 {
return ada.getHostname(self.internal);
}
pub fn get_pathname(self: *const URL) []const u8 {
return ada.getPathname(self.internal);
}
pub fn get_search(self: *const URL) []const u8 {
return ada.getSearch(self.internal);
} }
pub fn get_protocol(self: *const URL) []const u8 { pub fn get_protocol(self: *const URL) []const u8 {
// std.Uri keeps a pointer to "https", "http" (scheme part) so we know return ada.getProtocol(self.internal);
// its followed by ':'.
const scheme = self.uri.scheme;
return scheme.ptr[0 .. scheme.len + 1];
} }
};
pub fn get_username(self: *URL) []const u8 { pub const URLSearchParams = struct {
return uriComponentNullStr(self.uri.user); internal: ada.URLSearchParams,
}
pub fn get_password(self: *URL) []const u8 { pub const ConstructorOptions = union(enum) {
return uriComponentNullStr(self.uri.password); string: []const u8,
} form_data: *const FormData,
object: js.JsObject,
pub fn get_host(self: *URL, page: *Page) ![]const u8 { };
var aw = std.Io.Writer.Allocating.init(page.arena);
try self.uri.writeToStream(&aw.writer, .{
.scheme = false,
.authentication = false,
.authority = true,
.path = false,
.query = false,
.fragment = false,
});
return aw.written();
}
pub fn get_hostname(self: *URL) []const u8 {
return uriComponentNullStr(self.uri.host);
}
pub fn get_port(self: *URL, page: *Page) ![]const u8 {
const arena = page.arena;
if (self.uri.port == null) return try arena.dupe(u8, "");
var aw = std.Io.Writer.Allocating.init(arena);
try aw.writer.printInt(self.uri.port.?, 10, .lower, .{});
return aw.written();
}
pub fn get_pathname(self: *URL) []const u8 {
if (uriComponentStr(self.uri.path).len == 0) return "/";
return uriComponentStr(self.uri.path);
}
pub fn get_search(self: *URL, page: *Page) ![]const u8 {
const arena = page.arena;
if (self.search_params.get_size() == 0) {
return "";
}
var buf: std.ArrayListUnmanaged(u8) = .{};
try buf.append(arena, '?');
try self.search_params.encode(buf.writer(arena));
return buf.items;
}
pub fn set_search(self: *URL, qs_: ?[]const u8, page: *Page) !void {
self.search_params = .{};
if (qs_) |qs| {
self.search_params = try URLSearchParams.init(page.arena, qs);
}
}
pub fn get_hash(self: *URL, page: *Page) ![]const u8 {
const arena = page.arena;
if (self.uri.fragment == null) return try arena.dupe(u8, "");
return try std.mem.concat(arena, u8, &[_][]const u8{ "#", uriComponentNullStr(self.uri.fragment) });
}
pub fn get_searchParams(self: *URL) *URLSearchParams {
return &self.search_params;
}
pub fn _toJSON(self: *URL, page: *Page) ![]const u8 {
return self.get_href(page);
}
}; };
// uriComponentNullStr converts an optional std.Uri.Component to string value. // uriComponentNullStr converts an optional std.Uri.Component to string value.
@@ -261,112 +160,6 @@ fn uriComponentStr(c: std.Uri.Component) []const u8 {
}; };
} }
// https://url.spec.whatwg.org/#interface-urlsearchparams
pub const URLSearchParams = struct {
entries: kv.List = .{},
const URLSearchParamsOpts = union(enum) {
qs: []const u8,
form_data: *const FormData,
js_obj: js.Object,
};
pub fn constructor(opts_: ?URLSearchParamsOpts, page: *Page) !URLSearchParams {
const opts = opts_ orelse return .{ .entries = .{} };
return switch (opts) {
.qs => |qs| init(page.arena, qs),
.form_data => |fd| .{ .entries = try fd.entries.clone(page.arena) },
.js_obj => |js_obj| {
const arena = page.arena;
var it = js_obj.nameIterator();
var entries: kv.List = .{};
try entries.ensureTotalCapacity(arena, it.count);
while (try it.next()) |js_name| {
const name = try js_name.toString(arena);
const js_val = try js_obj.get(name);
entries.appendOwnedAssumeCapacity(
name,
try js_val.toString(arena),
);
}
return .{ .entries = entries };
},
};
}
pub fn init(arena: Allocator, qs_: ?[]const u8) !URLSearchParams {
return .{
.entries = if (qs_) |qs| try parseQuery(arena, qs) else .{},
};
}
pub fn get_size(self: *const URLSearchParams) u32 {
return @intCast(self.entries.count());
}
pub fn _append(self: *URLSearchParams, name: []const u8, value: []const u8, page: *Page) !void {
return self.entries.append(page.arena, name, value);
}
pub fn _set(self: *URLSearchParams, name: []const u8, value: []const u8, page: *Page) !void {
return self.entries.set(page.arena, name, value);
}
pub fn _delete(self: *URLSearchParams, name: []const u8, value_: ?[]const u8) void {
if (value_) |value| {
return self.entries.deleteKeyValue(name, value);
}
return self.entries.delete(name);
}
pub fn _get(self: *const URLSearchParams, name: []const u8) ?[]const u8 {
return self.entries.get(name);
}
pub fn _getAll(self: *const URLSearchParams, name: []const u8, page: *Page) ![]const []const u8 {
return self.entries.getAll(page.call_arena, name);
}
pub fn _has(self: *const URLSearchParams, name: []const u8) bool {
return self.entries.has(name);
}
pub fn _keys(self: *const URLSearchParams) KeyIterable {
return .{ .inner = self.entries.keyIterator() };
}
pub fn _values(self: *const URLSearchParams) ValueIterable {
return .{ .inner = self.entries.valueIterator() };
}
pub fn _entries(self: *const URLSearchParams) EntryIterable {
return .{ .inner = self.entries.entryIterator() };
}
pub fn _symbol_iterator(self: *const URLSearchParams) EntryIterable {
return self._entries();
}
pub fn _toString(self: *const URLSearchParams, page: *Page) ![]const u8 {
var arr: std.ArrayListUnmanaged(u8) = .empty;
try self.write(arr.writer(page.call_arena));
return arr.items;
}
fn write(self: *const URLSearchParams, writer: anytype) !void {
return kv.urlEncode(self.entries, .query, writer);
}
// TODO
pub fn _sort(_: *URLSearchParams) void {}
fn encode(self: *const URLSearchParams, writer: anytype) !void {
return kv.urlEncode(self.entries, .query, writer);
}
};
// Parse the given query. // Parse the given query.
fn parseQuery(arena: Allocator, s: []const u8) !kv.List { fn parseQuery(arena: Allocator, s: []const u8) !kv.List {
var list = kv.List{}; var list = kv.List{};