Merge pull request #230 from lightpanda-io/dom-url

URL api
This commit is contained in:
Pierre Tachoire
2024-05-13 11:26:55 +02:00
committed by GitHub
6 changed files with 612 additions and 87 deletions

View File

@@ -7,6 +7,7 @@ const HTML = @import("html/html.zig");
const Events = @import("events/event.zig");
const XHR = @import("xhr/xhr.zig");
const Storage = @import("storage/storage.zig");
const URL = @import("url/url.zig");
pub const HTMLDocument = @import("html/document.zig").HTMLDocument;
@@ -18,4 +19,5 @@ pub const Interfaces = generate.Tuple(.{
HTML.Interfaces,
XHR.Interfaces,
Storage.Interfaces,
URL.Interfaces,
});

View File

@@ -1,6 +1,10 @@
const std = @import("std");
const testing = std.testing;
const strparser = @import("../str/parser.zig");
const Reader = strparser.Reader;
const trim = strparser.trim;
const Self = @This();
const MimeError = error{
@@ -21,91 +25,6 @@ pub const Empty = Self{ .mtype = "", .msubtype = "" };
pub const HTML = Self{ .mtype = "text", .msubtype = "html" };
pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" };
const reader = struct {
s: []const u8,
i: usize = 0,
fn until(self: *reader, c: u8) []const u8 {
const ln = self.s.len;
const start = self.i;
while (self.i < ln) {
if (c == self.s[self.i]) return self.s[start..self.i];
self.i += 1;
}
return self.s[start..self.i];
}
fn tail(self: *reader) []const u8 {
if (self.i > self.s.len) return "";
defer self.i = self.s.len;
return self.s[self.i..];
}
fn skip(self: *reader) bool {
if (self.i >= self.s.len) return false;
self.i += 1;
return true;
}
};
test "reader.skip" {
var r = reader{ .s = "foo" };
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(!r.skip());
try testing.expect(!r.skip());
}
test "reader.tail" {
var r = reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.tail());
try testing.expectEqualStrings("", r.tail());
}
test "reader.until" {
var r = reader{ .s = "foo.bar.baz" };
try testing.expectEqualStrings("foo", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("bar", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("baz", r.until('.'));
r = reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.until('.'));
r = reader{ .s = "" };
try testing.expectEqualStrings("", r.until('.'));
}
fn trim(s: []const u8) []const u8 {
const ln = s.len;
if (ln == 0) {
return "";
}
var start: usize = 0;
while (start < ln) {
if (!std.ascii.isWhitespace(s[start])) break;
start += 1;
}
var end: usize = ln;
while (end > 0) {
if (!std.ascii.isWhitespace(s[end - 1])) break;
end -= 1;
}
return s[start..end];
}
test "trim" {
try testing.expectEqualStrings("", trim(""));
try testing.expectEqualStrings("foo", trim("foo"));
try testing.expectEqualStrings("foo", trim(" \n\tfoo"));
try testing.expectEqualStrings("foo", trim("foo \n\t"));
}
// https://mimesniff.spec.whatwg.org/#http-token-code-point
fn isHTTPCodePoint(c: u8) bool {
return switch (c) {
@@ -133,7 +52,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self {
if (ln > 255) return MimeError.TooBig;
var res = Self{ .mtype = "", .msubtype = "" };
var r = reader{ .s = s };
var r = Reader{ .s = s };
res.mtype = trim(r.until('/'));
if (res.mtype.len == 0) return MimeError.Invalid;
@@ -150,7 +69,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self {
// parse well known parameters.
// don't check invalid parameter format.
var rp = reader{ .s = res.params };
var rp = Reader{ .s = res.params };
while (true) {
const name = trim(rp.until('='));
if (!rp.skip()) return res;

View File

@@ -10,6 +10,8 @@ const apiweb = @import("apiweb.zig");
const Window = @import("html/window.zig").Window;
const xhr = @import("xhr/xhr.zig");
const storage = @import("storage/storage.zig");
const url = @import("url/url.zig");
const urlquery = @import("url/query.zig");
const documentTestExecFn = @import("dom/document.zig").testExecFn;
const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn;
@@ -30,6 +32,7 @@ const EventTestExecFn = @import("events/event.zig").testExecFn;
const XHRTestExecFn = xhr.testExecFn;
const ProgressEventTestExecFn = @import("xhr/progress_event.zig").testExecFn;
const StorageTestExecFn = storage.testExecFn;
const URLTestExecFn = url.testExecFn;
pub const Types = jsruntime.reflect(apiweb.Interfaces);
@@ -95,6 +98,7 @@ fn testsAllExecFn(
ProgressEventTestExecFn,
ProcessingInstructionTestExecFn,
StorageTestExecFn,
URLTestExecFn,
};
inline for (testFns) |testFn| {
@@ -261,6 +265,9 @@ test {
const dumpTest = @import("browser/dump.zig");
std.testing.refAllDecls(dumpTest);
const mimeTest = @import("browser/mime.zig");
std.testing.refAllDecls(mimeTest);
const cssTest = @import("css/css.zig");
std.testing.refAllDecls(cssTest);
@@ -272,6 +279,9 @@ test {
const cssLibdomTest = @import("css/libdom_test.zig");
std.testing.refAllDecls(cssLibdomTest);
const queryTest = @import("url/query.zig");
std.testing.refAllDecls(queryTest);
}
fn testJSRuntime(alloc: std.mem.Allocator) !void {

88
src/str/parser.zig Normal file
View File

@@ -0,0 +1,88 @@
// some utils to parser strings.
const std = @import("std");
const testing = std.testing;
pub const Reader = struct {
s: []const u8,
i: usize = 0,
pub fn until(self: *Reader, c: u8) []const u8 {
const ln = self.s.len;
const start = self.i;
while (self.i < ln) {
if (c == self.s[self.i]) return self.s[start..self.i];
self.i += 1;
}
return self.s[start..self.i];
}
pub fn tail(self: *Reader) []const u8 {
if (self.i > self.s.len) return "";
defer self.i = self.s.len;
return self.s[self.i..];
}
pub fn skip(self: *Reader) bool {
if (self.i >= self.s.len) return false;
self.i += 1;
return true;
}
};
test "Reader.skip" {
var r = Reader{ .s = "foo" };
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(!r.skip());
try testing.expect(!r.skip());
}
test "Reader.tail" {
var r = Reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.tail());
try testing.expectEqualStrings("", r.tail());
}
test "Reader.until" {
var r = Reader{ .s = "foo.bar.baz" };
try testing.expectEqualStrings("foo", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("bar", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("baz", r.until('.'));
r = Reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.until('.'));
r = Reader{ .s = "" };
try testing.expectEqualStrings("", r.until('.'));
}
pub fn trim(s: []const u8) []const u8 {
const ln = s.len;
if (ln == 0) {
return "";
}
var start: usize = 0;
while (start < ln) {
if (!std.ascii.isWhitespace(s[start])) break;
start += 1;
}
var end: usize = ln;
while (end > 0) {
if (!std.ascii.isWhitespace(s[end - 1])) break;
end -= 1;
}
return s[start..end];
}
test "trim" {
try testing.expectEqualStrings("", trim(""));
try testing.expectEqualStrings("foo", trim("foo"));
try testing.expectEqualStrings("foo", trim(" \n\tfoo"));
try testing.expectEqualStrings("foo", trim("foo \n\t"));
}

265
src/url/query.zig Normal file
View File

@@ -0,0 +1,265 @@
const std = @import("std");
const Reader = @import("../str/parser.zig").Reader;
// Values is a map with string key of string values.
pub const Values = struct {
alloc: std.mem.Allocator,
map: std.StringArrayHashMapUnmanaged(List),
const List = std.ArrayListUnmanaged([]const u8);
pub fn init(alloc: std.mem.Allocator) Values {
return .{
.alloc = alloc,
.map = .{},
};
}
pub fn deinit(self: *Values) void {
var it = self.map.iterator();
while (it.next()) |entry| {
for (entry.value_ptr.items) |v| self.alloc.free(v);
entry.value_ptr.deinit(self.alloc);
self.alloc.free(entry.key_ptr.*);
}
self.map.deinit(self.alloc);
}
// add the key value couple to the values.
// the key and the value are duplicated.
pub fn append(self: *Values, k: []const u8, v: []const u8) !void {
const vv = try self.alloc.dupe(u8, v);
if (self.map.getPtr(k)) |list| {
return try list.append(self.alloc, vv);
}
const kk = try self.alloc.dupe(u8, k);
var list = List{};
try list.append(self.alloc, vv);
try self.map.put(self.alloc, kk, list);
}
// append by taking the ownership of the key and the value
fn appendOwned(self: *Values, k: []const u8, v: []const u8) !void {
if (self.map.getPtr(k)) |list| {
return try list.append(self.alloc, v);
}
var list = List{};
try list.append(self.alloc, v);
try self.map.put(self.alloc, k, list);
}
pub fn get(self: *Values, k: []const u8) [][]const u8 {
if (self.map.get(k)) |list| {
return list.items;
}
return &[_][]const u8{};
}
pub fn first(self: *Values, k: []const u8) []const u8 {
if (self.map.getPtr(k)) |list| {
if (list.items.len == 0) return "";
return list.items[0];
}
return "";
}
pub fn delete(self: *Values, k: []const u8) void {
if (self.map.getPtr(k)) |list| {
list.deinit(self.alloc);
_ = self.map.fetchSwapRemove(k);
}
}
pub fn deleteValue(self: *Values, k: []const u8, v: []const u8) void {
const list = self.map.getPtr(k) orelse return;
for (list.items, 0..) |vv, i| {
if (std.mem.eql(u8, v, vv)) {
_ = list.swapRemove(i);
return;
}
}
}
pub fn count(self: *Values) usize {
return self.map.count();
}
// the caller owned the returned string.
pub fn encode(self: *Values, writer: anytype) !void {
var i: usize = 0;
var it = self.map.iterator();
while (it.next()) |entry| {
defer i += 1;
if (i > 0) try writer.writeByte('&');
if (entry.value_ptr.items.len == 0) {
try escape(writer, entry.key_ptr.*);
continue;
}
const start = i;
for (entry.value_ptr.items) |v| {
defer i += 1;
if (start < i) try writer.writeByte('&');
try escape(writer, entry.key_ptr.*);
if (v.len > 0) try writer.writeByte('=');
try escape(writer, v);
}
}
}
};
fn unhex(c: u8) u8 {
if ('0' <= c and c <= '9') return c - '0';
if ('a' <= c and c <= 'f') return c - 'a' + 10;
if ('A' <= c and c <= 'F') return c - 'A' + 10;
return 0;
}
// unescape decodes a percent encoded string.
// The caller owned the returned string.
pub fn unescape(alloc: std.mem.Allocator, s: []const u8) ![]const u8 {
var buf: std.ArrayListUnmanaged(u8) = .{};
defer buf.deinit(alloc);
var i: usize = 0;
while (i < s.len) {
defer i += 1;
switch (s[i]) {
'%' => {
if (i + 2 > s.len) return error.EscapeError;
if (!std.ascii.isHex(s[i + 1])) return error.EscapeError;
if (!std.ascii.isHex(s[i + 2])) return error.EscapeError;
try buf.append(alloc, unhex(s[i + 1]) << 4 | unhex(s[i + 2]));
i += 2;
},
'+' => try buf.append(alloc, ' '), // TODO should we decode or keep as it?
else => try buf.append(alloc, s[i]),
}
}
return try buf.toOwnedSlice(alloc);
}
test "unescape" {
var v: []const u8 = undefined;
const alloc = std.testing.allocator;
v = try unescape(alloc, "%7E");
try std.testing.expect(std.mem.eql(u8, "~", v));
alloc.free(v);
}
pub fn escape(writer: anytype, raw: []const u8) !void {
var start: usize = 0;
for (raw, 0..) |char, index| {
if ('a' <= char and char <= 'z' or 'A' <= char and char <= 'Z' or '0' <= char and char <= '9') {
continue;
}
try writer.print("{s}%{X:0>2}", .{ raw[start..index], char });
start = index + 1;
}
try writer.writeAll(raw[start..]);
}
// Parse the given query.
pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values {
var values = Values.init(alloc);
errdefer values.deinit();
const ln = s.len;
if (ln == 0) return values;
var r = Reader{ .s = s };
while (true) {
const param = r.until('&');
if (param.len == 0) break;
var rr = Reader{ .s = param };
const k = rr.until('=');
if (k.len == 0) continue;
_ = rr.skip();
const v = rr.tail();
// decode k and v
const kk = try unescape(alloc, k);
const vv = try unescape(alloc, v);
try values.appendOwned(kk, vv);
if (!r.skip()) break;
}
return values;
}
test "parse empty query" {
var values = try parseQuery(std.testing.allocator, "");
defer values.deinit();
try std.testing.expect(values.count() == 0);
}
test "parse empty query &" {
var values = try parseQuery(std.testing.allocator, "&");
defer values.deinit();
try std.testing.expect(values.count() == 0);
}
test "parse query" {
var values = try parseQuery(std.testing.allocator, "a=b&b=c");
defer values.deinit();
try std.testing.expect(values.count() == 2);
try std.testing.expect(values.get("a").len == 1);
try std.testing.expect(std.mem.eql(u8, values.get("a")[0], "b"));
try std.testing.expect(std.mem.eql(u8, values.first("a"), "b"));
try std.testing.expect(values.get("b").len == 1);
try std.testing.expect(std.mem.eql(u8, values.get("b")[0], "c"));
try std.testing.expect(std.mem.eql(u8, values.first("b"), "c"));
}
test "parse query no value" {
var values = try parseQuery(std.testing.allocator, "a");
defer values.deinit();
try std.testing.expect(values.count() == 1);
try std.testing.expect(std.mem.eql(u8, values.first("a"), ""));
}
test "parse query dup" {
var values = try parseQuery(std.testing.allocator, "a=b&a=c");
defer values.deinit();
try std.testing.expect(values.count() == 1);
try std.testing.expect(std.mem.eql(u8, values.first("a"), "b"));
try std.testing.expect(values.get("a").len == 2);
}
test "encode query" {
var values = try parseQuery(std.testing.allocator, "a=b&b=c");
defer values.deinit();
try values.append("a", "~");
var buf: std.ArrayListUnmanaged(u8) = .{};
defer buf.deinit(std.testing.allocator);
try values.encode(buf.writer(std.testing.allocator));
try std.testing.expect(std.mem.eql(u8, buf.items, "a=b&a=%7E&b=c"));
}

241
src/url/url.zig Normal file
View File

@@ -0,0 +1,241 @@
const std = @import("std");
const jsruntime = @import("jsruntime");
const Case = jsruntime.test_utils.Case;
const checkCases = jsruntime.test_utils.checkCases;
const generate = @import("../generate.zig");
const query = @import("query.zig");
pub const Interfaces = generate.Tuple(.{
URL,
URLSearchParams,
});
// https://url.spec.whatwg.org/#url
//
// TODO we could avoid many of these getter string allocation in two differents
// way:
//
// 1. We can eventually get the slice of scheme *with* the following char in
// the underlying string. But I don't know if it's possible and how to do that.
// I mean, if the rawuri contains `https://foo.bar`, uri.scheme is a slice
// containing only `https`. I want `https:` so, in theory, I don't need to
// allocate data, I should be able to retrieve the scheme + the following `:`
// from rawuri.
//
// 2. The other way would bu to copy the `std.Uri` code to ahve a dedicated
// parser including the characters we want for the web API.
pub const URL = struct {
rawuri: []const u8,
uri: std.Uri,
search_params: URLSearchParams,
pub const mem_guarantied = true;
pub fn constructor(alloc: std.mem.Allocator, url: []const u8, base: ?[]const u8) !URL {
const raw = try std.mem.concat(alloc, u8, &[_][]const u8{ url, base orelse "" });
errdefer alloc.free(raw);
const uri = std.Uri.parse(raw) catch {
return error.TypeError;
};
return .{
.rawuri = raw,
.uri = uri,
.search_params = try URLSearchParams.constructor(alloc, uri.query),
};
}
pub fn deinit(self: *URL, alloc: std.mem.Allocator) void {
self.search_params.deinit();
alloc.free(self.rawuri);
}
// the caller must free the returned string.
// TODO return a disposable string
// https://github.com/lightpanda-io/jsruntime-lib/issues/195
pub fn get_href(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
var buf = std.ArrayList(u8).init(alloc);
defer buf.deinit();
// retrieve the query search from search_params.
const cur = self.uri.query;
defer self.uri.query = cur;
var q = std.ArrayList(u8).init(alloc);
defer q.deinit();
try self.search_params.values.encode(q.writer());
self.uri.query = q.items;
try self.uri.writeToStream(.{
.scheme = true,
.authentication = true,
.authority = true,
.path = true,
.query = true,
.fragment = true,
}, buf.writer());
return try buf.toOwnedSlice();
}
// the caller must free the returned string.
// TODO return a disposable string
// https://github.com/lightpanda-io/jsruntime-lib/issues/195
pub fn get_protocol(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
return try std.mem.concat(alloc, u8, &[_][]const u8{ self.uri.scheme, ":" });
}
pub fn get_username(self: *URL) []const u8 {
return self.uri.user orelse "";
}
pub fn get_password(self: *URL) []const u8 {
return self.uri.password orelse "";
}
pub fn get_host(self: *URL) []const u8 {
return self.uri.host orelse "";
}
pub fn get_hostname(self: *URL) []const u8 {
return self.uri.host orelse "";
}
// the caller must free the returned string.
// TODO return a disposable string
// https://github.com/lightpanda-io/jsruntime-lib/issues/195
pub fn get_port(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
if (self.uri.port == null) return try alloc.dupe(u8, "");
var buf = std.ArrayList(u8).init(alloc);
defer buf.deinit();
try std.fmt.formatInt(self.uri.port.?, 10, .lower, .{}, buf.writer());
return try buf.toOwnedSlice();
}
pub fn get_pathname(self: *URL) []const u8 {
if (self.uri.path.len == 0) return "/";
return self.uri.path;
}
// the caller must free the returned string.
// TODO return a disposable string
// https://github.com/lightpanda-io/jsruntime-lib/issues/195
pub fn get_search(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
if (self.search_params.get_size() == 0) return try alloc.dupe(u8, "");
var buf: std.ArrayListUnmanaged(u8) = .{};
defer buf.deinit(alloc);
try buf.append(alloc, '?');
try self.search_params.values.encode(buf.writer(alloc));
return buf.toOwnedSlice(alloc);
}
// the caller must free the returned string.
// TODO return a disposable string
// https://github.com/lightpanda-io/jsruntime-lib/issues/195
pub fn get_hash(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
if (self.uri.fragment == null) return try alloc.dupe(u8, "");
return try std.mem.concat(alloc, u8, &[_][]const u8{ "#", self.uri.fragment.? });
}
pub fn get_searchParams(self: *URL) *URLSearchParams {
return &self.search_params;
}
pub fn _toJSON(self: *URL, alloc: std.mem.Allocator) ![]const u8 {
return try self.get_href(alloc);
}
};
// https://url.spec.whatwg.org/#interface-urlsearchparams
// TODO array like
pub const URLSearchParams = struct {
values: query.Values,
pub const mem_guarantied = true;
pub fn constructor(alloc: std.mem.Allocator, init: ?[]const u8) !URLSearchParams {
return .{
.values = try query.parseQuery(alloc, init orelse ""),
};
}
pub fn deinit(self: *URLSearchParams, _: std.mem.Allocator) void {
self.values.deinit();
}
pub fn get_size(self: *URLSearchParams) u32 {
return @intCast(self.values.count());
}
pub fn _append(self: *URLSearchParams, name: []const u8, value: []const u8) !void {
try self.values.append(name, value);
}
pub fn _delete(self: *URLSearchParams, name: []const u8, value: ?[]const u8) !void {
if (value) |v| return self.values.deleteValue(name, v);
self.values.delete(name);
}
pub fn _get(self: *URLSearchParams, name: []const u8) ?[]const u8 {
return self.values.first(name);
}
// TODO return generates an error: caught unexpected error 'TypeLookup'
// pub fn _getAll(self: *URLSearchParams, name: []const u8) [][]const u8 {
// try self.values.get(name);
// }
// TODO
pub fn _sort(_: *URLSearchParams) void {}
};
// Tests
// -----
pub fn testExecFn(
_: std.mem.Allocator,
js_env: *jsruntime.Env,
) anyerror!void {
var url = [_]Case{
.{ .src = "var url = new URL('https://foo.bar/path?query#fragment')", .ex = "undefined" },
.{ .src = "url.href", .ex = "https://foo.bar/path?query#fragment" },
.{ .src = "url.protocol", .ex = "https:" },
.{ .src = "url.username", .ex = "" },
.{ .src = "url.password", .ex = "" },
.{ .src = "url.host", .ex = "foo.bar" },
.{ .src = "url.hostname", .ex = "foo.bar" },
.{ .src = "url.port", .ex = "" },
.{ .src = "url.pathname", .ex = "/path" },
.{ .src = "url.search", .ex = "?query" },
.{ .src = "url.hash", .ex = "#fragment" },
.{ .src = "url.searchParams.get('query')", .ex = "" },
};
try checkCases(js_env, &url);
var qs = [_]Case{
.{ .src = "var url = new URL('https://foo.bar/path?a=~&b=%7E#fragment')", .ex = "undefined" },
.{ .src = "url.searchParams.get('a')", .ex = "~" },
.{ .src = "url.searchParams.get('b')", .ex = "~" },
.{ .src = "url.searchParams.append('c', 'foo')", .ex = "undefined" },
.{ .src = "url.searchParams.get('c')", .ex = "foo" },
.{ .src = "url.searchParams.size", .ex = "3" },
// search is dynamic
.{ .src = "url.search", .ex = "?a=%7E&b=%7E&c=foo" },
// href is dynamic
.{ .src = "url.href", .ex = "https://foo.bar/path?a=%7E&b=%7E&c=foo#fragment" },
.{ .src = "url.searchParams.delete('c', 'foo')", .ex = "undefined" },
.{ .src = "url.searchParams.get('c')", .ex = "" },
.{ .src = "url.searchParams.delete('a')", .ex = "undefined" },
.{ .src = "url.searchParams.get('a')", .ex = "" },
};
try checkCases(js_env, &qs);
}