From 28a87c2a47e0927004119f83c2b3794ab3b049e5 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 3 May 2024 16:18:11 +0200 Subject: [PATCH 1/5] url: first draft --- src/apiweb.zig | 2 + src/run_tests.zig | 3 + src/url/url.zig | 159 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 src/url/url.zig diff --git a/src/apiweb.zig b/src/apiweb.zig index 87197b51..9eb7f936 100644 --- a/src/apiweb.zig +++ b/src/apiweb.zig @@ -7,6 +7,7 @@ const HTML = @import("html/html.zig"); const Events = @import("events/event.zig"); const XHR = @import("xhr/xhr.zig"); const Storage = @import("storage/storage.zig"); +const URL = @import("url/url.zig"); pub const HTMLDocument = @import("html/document.zig").HTMLDocument; @@ -18,4 +19,5 @@ pub const Interfaces = generate.Tuple(.{ HTML.Interfaces, XHR.Interfaces, Storage.Interfaces, + URL.Interfaces, }); diff --git a/src/run_tests.zig b/src/run_tests.zig index 3261a9eb..275b48ad 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -10,6 +10,7 @@ const apiweb = @import("apiweb.zig"); const Window = @import("html/window.zig").Window; const xhr = @import("xhr/xhr.zig"); const storage = @import("storage/storage.zig"); +const url = @import("url/url.zig"); const documentTestExecFn = @import("dom/document.zig").testExecFn; const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn; @@ -30,6 +31,7 @@ const EventTestExecFn = @import("events/event.zig").testExecFn; const XHRTestExecFn = xhr.testExecFn; const ProgressEventTestExecFn = @import("xhr/progress_event.zig").testExecFn; const StorageTestExecFn = storage.testExecFn; +const URLTestExecFn = url.testExecFn; pub const Types = jsruntime.reflect(apiweb.Interfaces); @@ -95,6 +97,7 @@ fn testsAllExecFn( ProgressEventTestExecFn, ProcessingInstructionTestExecFn, StorageTestExecFn, + URLTestExecFn, }; inline for (testFns) |testFn| { diff --git a/src/url/url.zig b/src/url/url.zig new file mode 100644 index 00000000..7a627196 --- /dev/null +++ b/src/url/url.zig @@ -0,0 +1,159 @@ +const std = @import("std"); + +const jsruntime = @import("jsruntime"); +const Case = jsruntime.test_utils.Case; +const checkCases = jsruntime.test_utils.checkCases; +const generate = @import("../generate.zig"); + +pub const Interfaces = generate.Tuple(.{ + URL, + URLSearchParams, +}); + +// https://url.spec.whatwg.org/#url +// +// TODO we could avoid many of these getter string allocation in two differents +// way: +// +// 1. We can eventually get the slice of scheme *with* the following char in +// the underlying string. But I don't know if it's possible and how to do that. +// I mean, if the rawuri contains `https://foo.bar`, uri.scheme is a slice +// containing only `https`. I want `https:` so, in theory, I don't need to +// allocate data, I should be able to retrieve the scheme + the following `:` +// from rawuri. +// +// 2. The other way would bu to copy the `std.Uri` code to ahve a dedicated +// parser including the characters we want for the web API. +pub const URL = struct { + rawuri: []const u8, + uri: std.Uri, + + pub const mem_guarantied = true; + + pub fn constructor(alloc: std.mem.Allocator, url: []const u8, base: ?[]const u8) !URL { + const raw = try std.mem.concat(alloc, u8, &[_][]const u8{ url, base orelse "" }); + errdefer alloc.free(raw); + + const uri = std.Uri.parse(raw) catch { + return error.TypeError; + }; + + return .{ + .rawuri = raw, + .uri = uri, + }; + } + + pub fn deinit(self: *URL, alloc: std.mem.Allocator) void { + alloc.free(self.rawuri); + } + + // the caller must free the returned string. + // TODO return a disposable string + // https://github.com/lightpanda-io/jsruntime-lib/issues/195 + pub fn get_href(self: URL, alloc: std.mem.Allocator) ![]const u8 { + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try self.uri.writeToStream(.{ + .scheme = true, + .authentication = true, + .authority = true, + .path = true, + .query = true, + .fragment = true, + }, buf.writer()); + return try buf.toOwnedSlice(); + } + + // the caller must free the returned string. + // TODO return a disposable string + // https://github.com/lightpanda-io/jsruntime-lib/issues/195 + pub fn get_protocol(self: *URL, alloc: std.mem.Allocator) ![]const u8 { + return try std.mem.concat(alloc, u8, &[_][]const u8{ self.uri.scheme, ":" }); + } + + pub fn get_username(self: *URL) []const u8 { + return self.uri.user orelse ""; + } + + pub fn get_password(self: *URL) []const u8 { + return self.uri.password orelse ""; + } + + pub fn get_host(self: *URL) []const u8 { + return self.uri.host orelse ""; + } + + pub fn get_hostname(self: *URL) []const u8 { + return self.uri.host orelse ""; + } + + // the caller must free the returned string. + // TODO return a disposable string + // https://github.com/lightpanda-io/jsruntime-lib/issues/195 + pub fn get_port(self: *URL, alloc: std.mem.Allocator) ![]const u8 { + if (self.uri.port == null) return try alloc.dupe(u8, ""); + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try std.fmt.formatInt(self.uri.port.?, 10, .lower, .{}, buf.writer()); + return try buf.toOwnedSlice(); + } + + pub fn get_pathname(self: *URL) []const u8 { + if (self.uri.path.len == 0) return "/"; + return self.uri.path; + } + + // the caller must free the returned string. + // TODO return a disposable string + // https://github.com/lightpanda-io/jsruntime-lib/issues/195 + pub fn get_search(self: *URL, alloc: std.mem.Allocator) ![]const u8 { + if (self.uri.query == null) return try alloc.dupe(u8, ""); + + return try std.mem.concat(alloc, u8, &[_][]const u8{ "?", self.uri.query.? }); + } + + // the caller must free the returned string. + // TODO return a disposable string + // https://github.com/lightpanda-io/jsruntime-lib/issues/195 + pub fn get_hash(self: *URL, alloc: std.mem.Allocator) ![]const u8 { + if (self.uri.fragment == null) return try alloc.dupe(u8, ""); + + return try std.mem.concat(alloc, u8, &[_][]const u8{ "#", self.uri.fragment.? }); + } + + pub fn _toJSON(self: *URL, alloc: std.mem.Allocator) ![]const u8 { + return try self.get_href(alloc); + } +}; + +// https://url.spec.whatwg.org/#interface-urlsearchparams +pub const URLSearchParams = struct { + pub const mem_guarantied = true; +}; + +// Tests +// ----- + +pub fn testExecFn( + _: std.mem.Allocator, + js_env: *jsruntime.Env, +) anyerror!void { + var url = [_]Case{ + .{ .src = "var url = new URL('https://foo.bar/path?query#fragment')", .ex = "undefined" }, + .{ .src = "url.href", .ex = "https://foo.bar/path?query#fragment" }, + .{ .src = "url.protocol", .ex = "https:" }, + .{ .src = "url.username", .ex = "" }, + .{ .src = "url.password", .ex = "" }, + .{ .src = "url.host", .ex = "foo.bar" }, + .{ .src = "url.hostname", .ex = "foo.bar" }, + .{ .src = "url.port", .ex = "" }, + .{ .src = "url.pathname", .ex = "/path" }, + .{ .src = "url.search", .ex = "?query" }, + .{ .src = "url.hash", .ex = "#fragment" }, + }; + try checkCases(js_env, &url); +} From e42b03acd81622909e2f9b191887fbd98b0755c9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 6 May 2024 12:44:45 +0200 Subject: [PATCH 2/5] mime: extract string parser --- src/browser/mime.zig | 93 +++----------------------------------------- src/run_tests.zig | 3 ++ src/str/parser.zig | 88 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 87 deletions(-) create mode 100644 src/str/parser.zig diff --git a/src/browser/mime.zig b/src/browser/mime.zig index f7ec4763..73cc8dc9 100644 --- a/src/browser/mime.zig +++ b/src/browser/mime.zig @@ -1,6 +1,10 @@ const std = @import("std"); const testing = std.testing; +const strparser = @import("../str/parser.zig"); +const Reader = strparser.Reader; +const trim = strparser.trim; + const Self = @This(); const MimeError = error{ @@ -21,91 +25,6 @@ pub const Empty = Self{ .mtype = "", .msubtype = "" }; pub const HTML = Self{ .mtype = "text", .msubtype = "html" }; pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" }; -const reader = struct { - s: []const u8, - i: usize = 0, - - fn until(self: *reader, c: u8) []const u8 { - const ln = self.s.len; - const start = self.i; - while (self.i < ln) { - if (c == self.s[self.i]) return self.s[start..self.i]; - self.i += 1; - } - - return self.s[start..self.i]; - } - - fn tail(self: *reader) []const u8 { - if (self.i > self.s.len) return ""; - defer self.i = self.s.len; - return self.s[self.i..]; - } - - fn skip(self: *reader) bool { - if (self.i >= self.s.len) return false; - self.i += 1; - return true; - } -}; - -test "reader.skip" { - var r = reader{ .s = "foo" }; - try testing.expect(r.skip()); - try testing.expect(r.skip()); - try testing.expect(r.skip()); - try testing.expect(!r.skip()); - try testing.expect(!r.skip()); -} - -test "reader.tail" { - var r = reader{ .s = "foo" }; - try testing.expectEqualStrings("foo", r.tail()); - try testing.expectEqualStrings("", r.tail()); -} - -test "reader.until" { - var r = reader{ .s = "foo.bar.baz" }; - try testing.expectEqualStrings("foo", r.until('.')); - _ = r.skip(); - try testing.expectEqualStrings("bar", r.until('.')); - _ = r.skip(); - try testing.expectEqualStrings("baz", r.until('.')); - - r = reader{ .s = "foo" }; - try testing.expectEqualStrings("foo", r.until('.')); - - r = reader{ .s = "" }; - try testing.expectEqualStrings("", r.until('.')); -} - -fn trim(s: []const u8) []const u8 { - const ln = s.len; - if (ln == 0) { - return ""; - } - var start: usize = 0; - while (start < ln) { - if (!std.ascii.isWhitespace(s[start])) break; - start += 1; - } - - var end: usize = ln; - while (end > 0) { - if (!std.ascii.isWhitespace(s[end - 1])) break; - end -= 1; - } - - return s[start..end]; -} - -test "trim" { - try testing.expectEqualStrings("", trim("")); - try testing.expectEqualStrings("foo", trim("foo")); - try testing.expectEqualStrings("foo", trim(" \n\tfoo")); - try testing.expectEqualStrings("foo", trim("foo \n\t")); -} - // https://mimesniff.spec.whatwg.org/#http-token-code-point fn isHTTPCodePoint(c: u8) bool { return switch (c) { @@ -133,7 +52,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self { if (ln > 255) return MimeError.TooBig; var res = Self{ .mtype = "", .msubtype = "" }; - var r = reader{ .s = s }; + var r = Reader{ .s = s }; res.mtype = trim(r.until('/')); if (res.mtype.len == 0) return MimeError.Invalid; @@ -150,7 +69,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self { // parse well known parameters. // don't check invalid parameter format. - var rp = reader{ .s = res.params }; + var rp = Reader{ .s = res.params }; while (true) { const name = trim(rp.until('=')); if (!rp.skip()) return res; diff --git a/src/run_tests.zig b/src/run_tests.zig index 275b48ad..dd424238 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -264,6 +264,9 @@ test { const dumpTest = @import("browser/dump.zig"); std.testing.refAllDecls(dumpTest); + const mimeTest = @import("browser/mime.zig"); + std.testing.refAllDecls(mimeTest); + const cssTest = @import("css/css.zig"); std.testing.refAllDecls(cssTest); diff --git a/src/str/parser.zig b/src/str/parser.zig new file mode 100644 index 00000000..32c1741a --- /dev/null +++ b/src/str/parser.zig @@ -0,0 +1,88 @@ +// some utils to parser strings. +const std = @import("std"); +const testing = std.testing; + +pub const Reader = struct { + s: []const u8, + i: usize = 0, + + pub fn until(self: *Reader, c: u8) []const u8 { + const ln = self.s.len; + const start = self.i; + while (self.i < ln) { + if (c == self.s[self.i]) return self.s[start..self.i]; + self.i += 1; + } + + return self.s[start..self.i]; + } + + pub fn tail(self: *Reader) []const u8 { + if (self.i > self.s.len) return ""; + defer self.i = self.s.len; + return self.s[self.i..]; + } + + pub fn skip(self: *Reader) bool { + if (self.i >= self.s.len) return false; + self.i += 1; + return true; + } +}; + +test "Reader.skip" { + var r = Reader{ .s = "foo" }; + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(!r.skip()); + try testing.expect(!r.skip()); +} + +test "Reader.tail" { + var r = Reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.tail()); + try testing.expectEqualStrings("", r.tail()); +} + +test "Reader.until" { + var r = Reader{ .s = "foo.bar.baz" }; + try testing.expectEqualStrings("foo", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("bar", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("baz", r.until('.')); + + r = Reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.until('.')); + + r = Reader{ .s = "" }; + try testing.expectEqualStrings("", r.until('.')); +} + +pub fn trim(s: []const u8) []const u8 { + const ln = s.len; + if (ln == 0) { + return ""; + } + var start: usize = 0; + while (start < ln) { + if (!std.ascii.isWhitespace(s[start])) break; + start += 1; + } + + var end: usize = ln; + while (end > 0) { + if (!std.ascii.isWhitespace(s[end - 1])) break; + end -= 1; + } + + return s[start..end]; +} + +test "trim" { + try testing.expectEqualStrings("", trim("")); + try testing.expectEqualStrings("foo", trim("foo")); + try testing.expectEqualStrings("foo", trim(" \n\tfoo")); + try testing.expectEqualStrings("foo", trim("foo \n\t")); +} From f7040153cdcee4271f4258562b761f8a90229e17 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 6 May 2024 12:45:14 +0200 Subject: [PATCH 3/5] url: implement query parsing --- src/run_tests.zig | 4 ++ src/url/query.zig | 159 ++++++++++++++++++++++++++++++++++++++++++++++ src/url/url.zig | 63 ++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 src/url/query.zig diff --git a/src/run_tests.zig b/src/run_tests.zig index dd424238..fb554cd1 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -11,6 +11,7 @@ const Window = @import("html/window.zig").Window; const xhr = @import("xhr/xhr.zig"); const storage = @import("storage/storage.zig"); const url = @import("url/url.zig"); +const urlquery = @import("url/query.zig"); const documentTestExecFn = @import("dom/document.zig").testExecFn; const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn; @@ -278,6 +279,9 @@ test { const cssLibdomTest = @import("css/libdom_test.zig"); std.testing.refAllDecls(cssLibdomTest); + + const queryTest = @import("url/query.zig"); + std.testing.refAllDecls(queryTest); } fn testJSRuntime(alloc: std.mem.Allocator) !void { diff --git a/src/url/query.zig b/src/url/query.zig new file mode 100644 index 00000000..30d71b51 --- /dev/null +++ b/src/url/query.zig @@ -0,0 +1,159 @@ +const std = @import("std"); + +const Reader = @import("../str/parser.zig").Reader; + +// Values is a map with string key of string values. +pub const Values = struct { + alloc: std.mem.Allocator, + map: std.StringArrayHashMapUnmanaged(List), + + const List = std.ArrayListUnmanaged([]const u8); + + pub fn init(alloc: std.mem.Allocator) Values { + return .{ + .alloc = alloc, + .map = .{}, + }; + } + + pub fn deinit(self: *Values) void { + var it = self.map.iterator(); + while (it.next()) |entry| { + for (entry.value_ptr.items) |v| self.alloc.free(v); + entry.value_ptr.deinit(self.alloc); + self.alloc.free(entry.key_ptr.*); + } + self.map.deinit(self.alloc); + } + + // add the key value couple to the values. + // the key and the value are duplicated. + pub fn append(self: *Values, k: []const u8, v: []const u8) !void { + const vv = try self.alloc.dupe(u8, v); + + if (self.map.getPtr(k)) |list| { + return try list.append(self.alloc, vv); + } + + const kk = try self.alloc.dupe(u8, k); + var list = List{}; + try list.append(self.alloc, vv); + try self.map.put(self.alloc, kk, list); + } + + pub fn get(self: *Values, k: []const u8) [][]const u8 { + if (self.map.get(k)) |list| { + return list.items; + } + + return &[_][]const u8{}; + } + + pub fn first(self: *Values, k: []const u8) []const u8 { + if (self.map.getPtr(k)) |list| { + if (list.items.len == 0) return ""; + return list.items[0]; + } + + return ""; + } + + pub fn delete(self: *Values, k: []const u8) void { + if (self.map.getPtr(k)) |list| { + list.deinit(self.alloc); + _ = self.map.fetchSwapRemove(k); + } + } + + pub fn deleteValue(self: *Values, k: []const u8, v: []const u8) void { + const list = self.map.getPtr(k) orelse return; + + var i: usize = 0; + while (i < list.items.len) { + if (std.mem.eql(u8, v, list.items[i])) { + _ = list.swapRemove(i); + return; + } + i += 1; + } + } + + pub fn count(self: *Values) usize { + return self.map.count(); + } +}; + +// Parse the given query. +pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values { + var values = Values.init(alloc); + errdefer values.deinit(); + + const ln = s.len; + if (ln == 0) return values; + + var r = Reader{ .s = s }; + while (true) { + const param = r.until('&'); + if (param.len == 0) break; + + var rr = Reader{ .s = param }; + const k = rr.until('='); + if (k.len == 0) continue; + + _ = rr.skip(); + const v = rr.tail(); + + // TODO decode k and v + + try values.append(k, v); + + if (!r.skip()) break; + } + + return values; +} + +test "parse empty query" { + var values = try parseQuery(std.testing.allocator, ""); + defer values.deinit(); + + try std.testing.expect(values.count() == 0); +} + +test "parse empty query &" { + var values = try parseQuery(std.testing.allocator, "&"); + defer values.deinit(); + + try std.testing.expect(values.count() == 0); +} + +test "parse query" { + var values = try parseQuery(std.testing.allocator, "a=b&b=c"); + defer values.deinit(); + + try std.testing.expect(values.count() == 2); + try std.testing.expect(values.get("a").len == 1); + try std.testing.expect(std.mem.eql(u8, values.get("a")[0], "b")); + try std.testing.expect(std.mem.eql(u8, values.first("a"), "b")); + + try std.testing.expect(values.get("b").len == 1); + try std.testing.expect(std.mem.eql(u8, values.get("b")[0], "c")); + try std.testing.expect(std.mem.eql(u8, values.first("b"), "c")); +} + +test "parse query no value" { + var values = try parseQuery(std.testing.allocator, "a"); + defer values.deinit(); + + try std.testing.expect(values.count() == 1); + try std.testing.expect(std.mem.eql(u8, values.first("a"), "")); +} + +test "parse query dup" { + var values = try parseQuery(std.testing.allocator, "a=b&a=c"); + defer values.deinit(); + + try std.testing.expect(values.count() == 1); + try std.testing.expect(std.mem.eql(u8, values.first("a"), "b")); + try std.testing.expect(values.get("a").len == 2); +} diff --git a/src/url/url.zig b/src/url/url.zig index 7a627196..f268493d 100644 --- a/src/url/url.zig +++ b/src/url/url.zig @@ -5,6 +5,8 @@ const Case = jsruntime.test_utils.Case; const checkCases = jsruntime.test_utils.checkCases; const generate = @import("../generate.zig"); +const query = @import("query.zig"); + pub const Interfaces = generate.Tuple(.{ URL, URLSearchParams, @@ -27,6 +29,7 @@ pub const Interfaces = generate.Tuple(.{ pub const URL = struct { rawuri: []const u8, uri: std.Uri, + search_params: URLSearchParams, pub const mem_guarantied = true; @@ -41,10 +44,12 @@ pub const URL = struct { return .{ .rawuri = raw, .uri = uri, + .search_params = try URLSearchParams.constructor(alloc, uri.query), }; } pub fn deinit(self: *URL, alloc: std.mem.Allocator) void { + self.search_params.deinit(); alloc.free(self.rawuri); } @@ -125,14 +130,57 @@ pub const URL = struct { return try std.mem.concat(alloc, u8, &[_][]const u8{ "#", self.uri.fragment.? }); } + pub fn get_searchParams(self: *URL) *URLSearchParams { + return &self.search_params; + } + pub fn _toJSON(self: *URL, alloc: std.mem.Allocator) ![]const u8 { return try self.get_href(alloc); } }; // https://url.spec.whatwg.org/#interface-urlsearchparams +// TODO array like pub const URLSearchParams = struct { + values: query.Values, + pub const mem_guarantied = true; + + pub fn constructor(alloc: std.mem.Allocator, init: ?[]const u8) !URLSearchParams { + return .{ + .values = try query.parseQuery(alloc, init orelse ""), + }; + } + + pub fn deinit(self: *URLSearchParams, _: std.mem.Allocator) void { + self.values.deinit(); + } + + pub fn get_size(self: *URLSearchParams) u32 { + return @intCast(self.values.count()); + } + + pub fn _append(self: *URLSearchParams, name: []const u8, value: []const u8) !void { + try self.values.append(name, value); + } + + pub fn _delete(self: *URLSearchParams, name: []const u8, value: ?[]const u8) !void { + if (value) |v| return self.values.deleteValue(name, v); + + self.values.delete(name); + } + + pub fn _get(self: *URLSearchParams, name: []const u8) ?[]const u8 { + return self.values.first(name); + } + + // TODO return generates an error: caught unexpected error 'TypeLookup' + // pub fn _getAll(self: *URLSearchParams, name: []const u8) [][]const u8 { + // try self.values.get(name); + // } + + // TODO + pub fn _sort(_: *URLSearchParams) void {} }; // Tests @@ -154,6 +202,21 @@ pub fn testExecFn( .{ .src = "url.pathname", .ex = "/path" }, .{ .src = "url.search", .ex = "?query" }, .{ .src = "url.hash", .ex = "#fragment" }, + .{ .src = "url.searchParams.get('query')", .ex = "" }, }; try checkCases(js_env, &url); + + var qs = [_]Case{ + .{ .src = "var url = new URL('https://foo.bar/path?a=~&b=%7E')", .ex = "undefined" }, + .{ .src = "url.searchParams.get('a')", .ex = "~" }, + .{ .src = "url.searchParams.get('b')", .ex = "~" }, + .{ .src = "url.searchParams.append('c', 'foo')", .ex = "undefined" }, + .{ .src = "url.searchParams.get('c')", .ex = "foo" }, + .{ .src = "url.searchParams.size", .ex = "3" }, + .{ .src = "url.searchParams.delete('c', 'foo')", .ex = "undefined" }, + .{ .src = "url.searchParams.get('c')", .ex = "" }, + .{ .src = "url.searchParams.delete('a')", .ex = "undefined" }, + .{ .src = "url.searchParams.get('a')", .ex = "" }, + }; + try checkCases(js_env, &qs); } From a9842fd7900e19d6d7ec2e4b40d914a31b30c998 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 6 May 2024 15:06:03 +0200 Subject: [PATCH 4/5] url: decode query --- src/url/query.zig | 60 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/src/url/query.zig b/src/url/query.zig index 30d71b51..5a6253e9 100644 --- a/src/url/query.zig +++ b/src/url/query.zig @@ -41,6 +41,17 @@ pub const Values = struct { try self.map.put(self.alloc, kk, list); } + // append by taking the ownership of the key and the value + fn appendOwned(self: *Values, k: []const u8, v: []const u8) !void { + if (self.map.getPtr(k)) |list| { + return try list.append(self.alloc, v); + } + + var list = List{}; + try list.append(self.alloc, v); + try self.map.put(self.alloc, k, list); + } + pub fn get(self: *Values, k: []const u8) [][]const u8 { if (self.map.get(k)) |list| { return list.items; @@ -83,6 +94,49 @@ pub const Values = struct { } }; +fn unhex(c: u8) u8 { + if ('0' <= c and c <= '9') return c - '0'; + if ('a' <= c and c <= 'f') return c - 'a' + 10; + if ('A' <= c and c <= 'F') return c - 'A' + 10; + return 0; +} + +// unescape decodes a percent encoded string. +// The caller owned the returned string. +pub fn unescape(alloc: std.mem.Allocator, s: []const u8) ![]const u8 { + var buf: std.ArrayListUnmanaged(u8) = .{}; + defer buf.deinit(alloc); + + var i: usize = 0; + while (i < s.len) { + defer i += 1; + + switch (s[i]) { + '%' => { + if (i + 2 > s.len) return error.EscapeError; + if (!std.ascii.isHex(s[i + 1])) return error.EscapeError; + if (!std.ascii.isHex(s[i + 2])) return error.EscapeError; + + try buf.append(alloc, unhex(s[i + 1]) << 4 | unhex(s[i + 2])); + i += 2; + }, + '+' => try buf.append(alloc, ' '), // TODO should we decode or keep as it? + else => try buf.append(alloc, s[i]), + } + } + + return try buf.toOwnedSlice(alloc); +} + +test "unescape" { + var v: []const u8 = undefined; + const alloc = std.testing.allocator; + + v = try unescape(alloc, "%7E"); + try std.testing.expect(std.mem.eql(u8, "~", v)); + alloc.free(v); +} + // Parse the given query. pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values { var values = Values.init(alloc); @@ -103,9 +157,11 @@ pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values { _ = rr.skip(); const v = rr.tail(); - // TODO decode k and v + // decode k and v + const kk = try unescape(alloc, k); + const vv = try unescape(alloc, v); - try values.append(k, v); + try values.appendOwned(kk, vv); if (!r.skip()) break; } From d0c741f3bb33c84debefbc0b27167b8d2a135944 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 6 May 2024 16:32:20 +0200 Subject: [PATCH 5/5] url: search query dynamic and encoded --- src/url/query.zig | 58 +++++++++++++++++++++++++++++++++++++++++++---- src/url/url.zig | 27 ++++++++++++++++++---- 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/src/url/query.zig b/src/url/query.zig index 5a6253e9..899e8ea1 100644 --- a/src/url/query.zig +++ b/src/url/query.zig @@ -79,19 +79,42 @@ pub const Values = struct { pub fn deleteValue(self: *Values, k: []const u8, v: []const u8) void { const list = self.map.getPtr(k) orelse return; - var i: usize = 0; - while (i < list.items.len) { - if (std.mem.eql(u8, v, list.items[i])) { + for (list.items, 0..) |vv, i| { + if (std.mem.eql(u8, v, vv)) { _ = list.swapRemove(i); return; } - i += 1; } } pub fn count(self: *Values) usize { return self.map.count(); } + + // the caller owned the returned string. + pub fn encode(self: *Values, writer: anytype) !void { + var i: usize = 0; + var it = self.map.iterator(); + while (it.next()) |entry| { + defer i += 1; + if (i > 0) try writer.writeByte('&'); + + if (entry.value_ptr.items.len == 0) { + try escape(writer, entry.key_ptr.*); + continue; + } + + const start = i; + for (entry.value_ptr.items) |v| { + defer i += 1; + if (start < i) try writer.writeByte('&'); + + try escape(writer, entry.key_ptr.*); + if (v.len > 0) try writer.writeByte('='); + try escape(writer, v); + } + } + } }; fn unhex(c: u8) u8 { @@ -137,6 +160,19 @@ test "unescape" { alloc.free(v); } +pub fn escape(writer: anytype, raw: []const u8) !void { + var start: usize = 0; + for (raw, 0..) |char, index| { + if ('a' <= char and char <= 'z' or 'A' <= char and char <= 'Z' or '0' <= char and char <= '9') { + continue; + } + + try writer.print("{s}%{X:0>2}", .{ raw[start..index], char }); + start = index + 1; + } + try writer.writeAll(raw[start..]); +} + // Parse the given query. pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values { var values = Values.init(alloc); @@ -213,3 +249,17 @@ test "parse query dup" { try std.testing.expect(std.mem.eql(u8, values.first("a"), "b")); try std.testing.expect(values.get("a").len == 2); } + +test "encode query" { + var values = try parseQuery(std.testing.allocator, "a=b&b=c"); + defer values.deinit(); + + try values.append("a", "~"); + + var buf: std.ArrayListUnmanaged(u8) = .{}; + defer buf.deinit(std.testing.allocator); + + try values.encode(buf.writer(std.testing.allocator)); + + try std.testing.expect(std.mem.eql(u8, buf.items, "a=b&a=%7E&b=c")); +} diff --git a/src/url/url.zig b/src/url/url.zig index f268493d..9c99587f 100644 --- a/src/url/url.zig +++ b/src/url/url.zig @@ -56,10 +56,18 @@ pub const URL = struct { // the caller must free the returned string. // TODO return a disposable string // https://github.com/lightpanda-io/jsruntime-lib/issues/195 - pub fn get_href(self: URL, alloc: std.mem.Allocator) ![]const u8 { + pub fn get_href(self: *URL, alloc: std.mem.Allocator) ![]const u8 { var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); + // retrieve the query search from search_params. + const cur = self.uri.query; + defer self.uri.query = cur; + var q = std.ArrayList(u8).init(alloc); + defer q.deinit(); + try self.search_params.values.encode(q.writer()); + self.uri.query = q.items; + try self.uri.writeToStream(.{ .scheme = true, .authentication = true, @@ -116,9 +124,14 @@ pub const URL = struct { // TODO return a disposable string // https://github.com/lightpanda-io/jsruntime-lib/issues/195 pub fn get_search(self: *URL, alloc: std.mem.Allocator) ![]const u8 { - if (self.uri.query == null) return try alloc.dupe(u8, ""); + if (self.search_params.get_size() == 0) return try alloc.dupe(u8, ""); - return try std.mem.concat(alloc, u8, &[_][]const u8{ "?", self.uri.query.? }); + var buf: std.ArrayListUnmanaged(u8) = .{}; + defer buf.deinit(alloc); + + try buf.append(alloc, '?'); + try self.search_params.values.encode(buf.writer(alloc)); + return buf.toOwnedSlice(alloc); } // the caller must free the returned string. @@ -207,12 +220,18 @@ pub fn testExecFn( try checkCases(js_env, &url); var qs = [_]Case{ - .{ .src = "var url = new URL('https://foo.bar/path?a=~&b=%7E')", .ex = "undefined" }, + .{ .src = "var url = new URL('https://foo.bar/path?a=~&b=%7E#fragment')", .ex = "undefined" }, .{ .src = "url.searchParams.get('a')", .ex = "~" }, .{ .src = "url.searchParams.get('b')", .ex = "~" }, .{ .src = "url.searchParams.append('c', 'foo')", .ex = "undefined" }, .{ .src = "url.searchParams.get('c')", .ex = "foo" }, .{ .src = "url.searchParams.size", .ex = "3" }, + + // search is dynamic + .{ .src = "url.search", .ex = "?a=%7E&b=%7E&c=foo" }, + // href is dynamic + .{ .src = "url.href", .ex = "https://foo.bar/path?a=%7E&b=%7E&c=foo#fragment" }, + .{ .src = "url.searchParams.delete('c', 'foo')", .ex = "undefined" }, .{ .src = "url.searchParams.get('c')", .ex = "" }, .{ .src = "url.searchParams.delete('a')", .ex = "undefined" },