Merge pull request #414 from karlseguin/url_query
Some checks failed
wpt / web platform tests (push) Has been cancelled
zig-test / zig build dev (push) Has been cancelled
zig-test / zig build release (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
wpt / perf-fmt (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
zig-test / demo-puppeteer (push) Has been cancelled

Improve memory and performance of url.Query
This commit is contained in:
Pierre Tachoire
2025-02-10 09:30:37 +01:00
committed by GitHub
2 changed files with 316 additions and 129 deletions

View File

@@ -52,6 +52,27 @@ pub const Reader = struct {
}
};
// converts a comptime-known string (i.e. null terminated) to an uint
pub fn asUint(comptime string: anytype) AsUintReturn(string) {
const byteLength = @bitSizeOf(@TypeOf(string.*)) / 8 - 1;
const expectedType = *const [byteLength:0]u8;
if (@TypeOf(string) != expectedType) {
@compileError("expected : " ++ @typeName(expectedType) ++
", got: " ++ @typeName(@TypeOf(string)));
}
return @bitCast(@as(*const [byteLength]u8, string).*);
}
fn AsUintReturn(comptime string: anytype) type {
return @Type(.{
.Int = .{
.bits = @bitSizeOf(@TypeOf(string.*)) - 8, // (- 8) to exclude sentinel 0
.signedness = .unsigned,
},
});
}
const testing = std.testing;
test "parser.Reader: skip" {
var r = Reader{ .data = "foo" };
@@ -85,3 +106,20 @@ test "parser.Reader: until" {
try testing.expectEqualStrings("", r.until('.'));
try testing.expectEqualStrings("", r.tail());
}
test "parser: asUint" {
const ASCII_x = @as(u8, @bitCast([1]u8{'x'}));
const ASCII_ab = @as(u16, @bitCast([2]u8{ 'a', 'b' }));
const ASCII_xyz = @as(u24, @bitCast([3]u8{ 'x', 'y', 'z' }));
const ASCII_abcd = @as(u32, @bitCast([4]u8{ 'a', 'b', 'c', 'd' }));
try testing.expectEqual(ASCII_x, asUint("x"));
try testing.expectEqual(ASCII_ab, asUint("ab"));
try testing.expectEqual(ASCII_xyz, asUint("xyz"));
try testing.expectEqual(ASCII_abcd, asUint("abcd"));
try testing.expectEqual(u8, @TypeOf(asUint("x")));
try testing.expectEqual(u16, @TypeOf(asUint("ab")));
try testing.expectEqual(u24, @TypeOf(asUint("xyz")));
try testing.expectEqual(u32, @TypeOf(asUint("abcd")));
}

View File

@@ -19,59 +19,58 @@
const std = @import("std");
const Reader = @import("../str/parser.zig").Reader;
const asUint = @import("../str/parser.zig").asUint;
// Values is a map with string key of string values.
pub const Values = struct {
alloc: std.mem.Allocator,
arena: std.heap.ArenaAllocator,
map: std.StringArrayHashMapUnmanaged(List),
const List = std.ArrayListUnmanaged([]const u8);
pub fn init(alloc: std.mem.Allocator) Values {
pub fn init(allocator: std.mem.Allocator) Values {
return .{
.alloc = alloc,
.map = .{},
.arena = std.heap.ArenaAllocator.init(allocator),
};
}
pub fn deinit(self: *Values) void {
var it = self.map.iterator();
while (it.next()) |entry| {
for (entry.value_ptr.items) |v| self.alloc.free(v);
entry.value_ptr.deinit(self.alloc);
self.alloc.free(entry.key_ptr.*);
}
self.map.deinit(self.alloc);
self.arena.deinit();
}
// add the key value couple to the values.
// the key and the value are duplicated.
pub fn append(self: *Values, k: []const u8, v: []const u8) !void {
const vv = try self.alloc.dupe(u8, v);
const allocator = self.arena.allocator();
const owned_value = try allocator.dupe(u8, v);
if (self.map.getPtr(k)) |list| {
return try list.append(self.alloc, vv);
var gop = try self.map.getOrPut(allocator, k);
if (gop.found_existing) {
return gop.value_ptr.append(allocator, owned_value);
}
const kk = try self.alloc.dupe(u8, k);
gop.key_ptr.* = try allocator.dupe(u8, k);
var list = List{};
try list.append(self.alloc, vv);
try self.map.put(self.alloc, kk, list);
try list.append(allocator, owned_value);
gop.value_ptr.* = list;
}
// append by taking the ownership of the key and the value
fn appendOwned(self: *Values, k: []const u8, v: []const u8) !void {
if (self.map.getPtr(k)) |list| {
self.alloc.free(k);
return try list.append(self.alloc, v);
const allocator = self.arena.allocator();
var gop = try self.map.getOrPut(allocator, k);
if (gop.found_existing) {
return gop.value_ptr.append(allocator, v);
}
var list = List{};
try list.append(self.alloc, v);
try self.map.put(self.alloc, k, list);
try list.append(allocator, v);
gop.value_ptr.* = list;
}
pub fn get(self: *Values, k: []const u8) [][]const u8 {
pub fn get(self: *const Values, k: []const u8) []const []const u8 {
if (self.map.get(k)) |list| {
return list.items;
}
@@ -79,7 +78,7 @@ pub const Values = struct {
return &[_][]const u8{};
}
pub fn first(self: *Values, k: []const u8) []const u8 {
pub fn first(self: *const Values, k: []const u8) []const u8 {
if (self.map.getPtr(k)) |list| {
if (list.items.len == 0) return "";
return list.items[0];
@@ -89,10 +88,7 @@ pub const Values = struct {
}
pub fn delete(self: *Values, k: []const u8) void {
if (self.map.getPtr(k)) |list| {
list.deinit(self.alloc);
_ = self.map.fetchSwapRemove(k);
}
_ = self.map.fetchSwapRemove(k);
}
pub fn deleteValue(self: *Values, k: []const u8, v: []const u8) void {
@@ -106,80 +102,48 @@ pub const Values = struct {
}
}
pub fn count(self: *Values) usize {
pub fn count(self: *const Values) usize {
return self.map.count();
}
// the caller owned the returned string.
pub fn encode(self: *Values, writer: anytype) !void {
var i: usize = 0;
pub fn encode(self: *const Values, writer: anytype) !void {
var it = self.map.iterator();
const first_entry = it.next() orelse return;
try encodeKeyValues(first_entry, writer);
while (it.next()) |entry| {
defer i += 1;
if (i > 0) try writer.writeByte('&');
if (entry.value_ptr.items.len == 0) {
try escape(writer, entry.key_ptr.*);
continue;
}
const start = i;
for (entry.value_ptr.items) |v| {
defer i += 1;
if (start < i) try writer.writeByte('&');
try escape(writer, entry.key_ptr.*);
if (v.len > 0) try writer.writeByte('=');
try escape(writer, v);
}
try writer.writeByte('&');
try encodeKeyValues(entry, writer);
}
}
};
fn unhex(c: u8) u8 {
if ('0' <= c and c <= '9') return c - '0';
if ('a' <= c and c <= 'f') return c - 'a' + 10;
if ('A' <= c and c <= 'F') return c - 'A' + 10;
return 0;
}
fn encodeKeyValues(entry: anytype, writer: anytype) !void {
const key = entry.key_ptr.*;
// unescape decodes a percent encoded string.
// The caller owned the returned string.
pub fn unescape(alloc: std.mem.Allocator, s: []const u8) ![]const u8 {
var buf: std.ArrayListUnmanaged(u8) = .{};
defer buf.deinit(alloc);
var i: usize = 0;
while (i < s.len) {
defer i += 1;
switch (s[i]) {
'%' => {
if (i + 2 > s.len) return error.EscapeError;
if (!std.ascii.isHex(s[i + 1])) return error.EscapeError;
if (!std.ascii.isHex(s[i + 2])) return error.EscapeError;
try buf.append(alloc, unhex(s[i + 1]) << 4 | unhex(s[i + 2]));
i += 2;
},
'+' => try buf.append(alloc, ' '), // TODO should we decode or keep as it?
else => try buf.append(alloc, s[i]),
}
try escape(key, writer);
const values = entry.value_ptr.items;
if (values.len == 0) {
return;
}
return try buf.toOwnedSlice(alloc);
if (values[0].len > 0) {
try writer.writeByte('=');
try escape(values[0], writer);
}
for (values[1..]) |value| {
try writer.writeByte('&');
try escape(key, writer);
if (value.len > 0) {
try writer.writeByte('=');
try escape(value, writer);
}
}
}
test "unescape" {
var v: []const u8 = undefined;
const alloc = std.testing.allocator;
v = try unescape(alloc, "%7E");
try std.testing.expect(std.mem.eql(u8, "~", v));
alloc.free(v);
}
pub fn escape(writer: anytype, raw: []const u8) !void {
fn escape(raw: []const u8, writer: anytype) !void {
var start: usize = 0;
for (raw, 0..) |char, index| {
if ('a' <= char and char <= 'z' or 'A' <= char and char <= 'Z' or '0' <= char and char <= '9') {
@@ -197,6 +161,8 @@ pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values {
var values = Values.init(alloc);
errdefer values.deinit();
const arena = values.arena.allocator();
const ln = s.len;
if (ln == 0) return values;
@@ -213,8 +179,8 @@ pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values {
const v = rr.tail();
// decode k and v
const kk = try unescape(alloc, k);
const vv = try unescape(alloc, v);
const kk = try unescape(arena, k);
const vv = try unescape(arena, v);
try values.appendOwned(kk, vv);
@@ -224,61 +190,244 @@ pub fn parseQuery(alloc: std.mem.Allocator, s: []const u8) !Values {
return values;
}
test "parse empty query" {
var values = try parseQuery(std.testing.allocator, "");
defer values.deinit();
// The return'd string may or may not be allocated. Callers should use arenas
fn unescape(allocator: std.mem.Allocator, input: []const u8) ![]const u8 {
const HEX_CHAR = comptime blk: {
var all = std.mem.zeroes([256]bool);
for ('a'..('f' + 1)) |b| all[b] = true;
for ('A'..('F' + 1)) |b| all[b] = true;
for ('0'..('9' + 1)) |b| all[b] = true;
break :blk all;
};
try std.testing.expect(values.count() == 0);
const HEX_DECODE = comptime blk: {
var all = std.mem.zeroes([256]u8);
for ('a'..('z' + 1)) |b| all[b] = b - 'a' + 10;
for ('A'..('Z' + 1)) |b| all[b] = b - 'A' + 10;
for ('0'..('9' + 1)) |b| all[b] = b - '0';
break :blk all;
};
var has_plus = false;
var unescaped_len = input.len;
{
// Figure out if we have any spaces and what the final unescaped length
// will be (which will let us know if we have anything to unescape in
// the first place)
var i: usize = 0;
while (i < input.len) {
const c = input[i];
if (c == '%') {
if (i + 2 >= input.len or !HEX_CHAR[input[i + 1]] or !HEX_CHAR[input[i + 2]]) {
return error.EscapeError;
}
i += 3;
unescaped_len -= 2;
} else if (c == '+') {
has_plus = true;
i += 1;
} else {
i += 1;
}
}
}
// no encoding, and no plus. nothing to unescape
if (unescaped_len == input.len and has_plus == false) {
return input;
}
var unescaped = try allocator.alloc(u8, unescaped_len);
errdefer allocator.free(unescaped);
var input_pos: usize = 0;
for (0..unescaped_len) |unescaped_pos| {
switch (input[input_pos]) {
'+' => {
unescaped[unescaped_pos] = ' ';
input_pos += 1;
},
'%' => {
const encoded = input[input_pos + 1 .. input_pos + 3];
const encoded_as_uint = @as(u16, @bitCast(encoded[0..2].*));
unescaped[unescaped_pos] = switch (encoded_as_uint) {
asUint("20") => ' ',
asUint("21") => '!',
asUint("22") => '"',
asUint("23") => '#',
asUint("24") => '$',
asUint("25") => '%',
asUint("26") => '&',
asUint("27") => '\'',
asUint("28") => '(',
asUint("29") => ')',
asUint("2A") => '*',
asUint("2B") => '+',
asUint("2C") => ',',
asUint("2F") => '/',
asUint("3A") => ':',
asUint("3B") => ';',
asUint("3D") => '=',
asUint("3F") => '?',
asUint("40") => '@',
asUint("5B") => '[',
asUint("5D") => ']',
else => HEX_DECODE[encoded[0]] << 4 | HEX_DECODE[encoded[1]],
};
input_pos += 3;
},
else => |c| {
unescaped[unescaped_pos] = c;
input_pos += 1;
},
}
}
return unescaped;
}
test "parse empty query &" {
var values = try parseQuery(std.testing.allocator, "&");
defer values.deinit();
const testing = std.testing;
test "url.Query: unescape" {
const allocator = testing.allocator;
const cases = [_]struct { expected: []const u8, input: []const u8, free: bool }{
.{ .expected = "", .input = "", .free = false },
.{ .expected = "over", .input = "over", .free = false },
.{ .expected = "Hello World", .input = "Hello World", .free = false },
.{ .expected = "~", .input = "%7E", .free = true },
.{ .expected = "~", .input = "%7e", .free = true },
.{ .expected = "Hello~World", .input = "Hello%7eWorld", .free = true },
.{ .expected = "Hello World", .input = "Hello++World", .free = true },
};
try std.testing.expect(values.count() == 0);
for (cases) |case| {
const value = try unescape(allocator, case.input);
defer if (case.free) {
allocator.free(value);
};
try testing.expectEqualStrings(case.expected, value);
}
try testing.expectError(error.EscapeError, unescape(undefined, "%"));
try testing.expectError(error.EscapeError, unescape(undefined, "%a"));
try testing.expectError(error.EscapeError, unescape(undefined, "%1"));
try testing.expectError(error.EscapeError, unescape(undefined, "123%45%6"));
try testing.expectError(error.EscapeError, unescape(undefined, "%zzzzz"));
try testing.expectError(error.EscapeError, unescape(undefined, "%0\xff"));
}
test "parse query" {
var values = try parseQuery(std.testing.allocator, "a=b&b=c");
defer values.deinit();
test "url.Query: parseQuery" {
try testParseQuery(.{}, "");
try std.testing.expect(values.count() == 2);
try std.testing.expect(values.get("a").len == 1);
try std.testing.expect(std.mem.eql(u8, values.get("a")[0], "b"));
try std.testing.expect(std.mem.eql(u8, values.first("a"), "b"));
try testParseQuery(.{}, "&");
try std.testing.expect(values.get("b").len == 1);
try std.testing.expect(std.mem.eql(u8, values.get("b")[0], "c"));
try std.testing.expect(std.mem.eql(u8, values.first("b"), "c"));
try testParseQuery(.{ .a = [_][]const u8{"b"} }, "a=b");
try testParseQuery(.{ .hello = [_][]const u8{"world"} }, "hello=world");
try testParseQuery(.{ .hello = [_][]const u8{ "world", "all" } }, "hello=world&hello=all");
try testParseQuery(.{
.a = [_][]const u8{"b"},
.b = [_][]const u8{"c"},
}, "a=b&b=c");
try testParseQuery(.{ .a = [_][]const u8{""} }, "a");
try testParseQuery(.{ .a = [_][]const u8{ "", "", "" } }, "a&a&a");
try testParseQuery(.{ .abc = [_][]const u8{""} }, "abc");
try testParseQuery(.{
.abc = [_][]const u8{""},
.dde = [_][]const u8{ "", "" },
}, "abc&dde&dde");
try testParseQuery(.{
.@"power is >" = [_][]const u8{"9,000?"},
}, "power%20is%20%3E=9%2C000%3F");
}
test "parse query no value" {
var values = try parseQuery(std.testing.allocator, "a");
test "url.Query.Values: get/first/count" {
var values = Values.init(testing.allocator);
defer values.deinit();
try std.testing.expect(values.count() == 1);
try std.testing.expect(std.mem.eql(u8, values.first("a"), ""));
{
// empty
try testing.expectEqual(0, values.count());
try testing.expectEqual(0, values.get("").len);
try testing.expectEqualStrings("", values.first(""));
try testing.expectEqual(0, values.get("key").len);
try testing.expectEqualStrings("", values.first("key"));
}
{
// add 1 value => key
try values.appendOwned("key", "value");
try testing.expectEqual(1, values.count());
try testing.expectEqual(1, values.get("key").len);
try testing.expectEqualSlices(
[]const u8,
&.{"value"},
values.get("key"),
);
try testing.expectEqualStrings("value", values.first("key"));
}
{
// add another value for the same key
try values.appendOwned("key", "another");
try testing.expectEqual(1, values.count());
try testing.expectEqual(2, values.get("key").len);
try testing.expectEqualSlices(
[]const u8,
&.{ "value", "another" },
values.get("key"),
);
try testing.expectEqualStrings("value", values.first("key"));
}
{
// add a new key (and value)
try values.appendOwned("over", "9000!");
try testing.expectEqual(2, values.count());
try testing.expectEqual(2, values.get("key").len);
try testing.expectEqual(1, values.get("over").len);
try testing.expectEqualSlices(
[]const u8,
&.{"9000!"},
values.get("over"),
);
try testing.expectEqualStrings("9000!", values.first("over"));
}
}
test "parse query dup" {
var values = try parseQuery(std.testing.allocator, "a=b&a=c");
test "url.Query.Values: encode" {
var values = try parseQuery(
testing.allocator,
"hello=world&i%20will%20not%20fear=%3E%3E&a=b&a=c",
);
defer values.deinit();
try std.testing.expect(values.count() == 1);
try std.testing.expect(std.mem.eql(u8, values.first("a"), "b"));
try std.testing.expect(values.get("a").len == 2);
}
test "encode query" {
var values = try parseQuery(std.testing.allocator, "a=b&b=c");
defer values.deinit();
try values.append("a", "~");
var buf: std.ArrayListUnmanaged(u8) = .{};
defer buf.deinit(std.testing.allocator);
try values.encode(buf.writer(std.testing.allocator));
try std.testing.expect(std.mem.eql(u8, buf.items, "a=b&a=%7E&b=c"));
defer buf.deinit(testing.allocator);
try values.encode(buf.writer(testing.allocator));
try testing.expectEqualStrings(
"hello=world&i%20will%20not%20fear=%3E%3E&a=b&a=c",
buf.items,
);
}
fn testParseQuery(expected: anytype, query: []const u8) !void {
var values = try parseQuery(testing.allocator, query);
defer values.deinit();
var count: usize = 0;
inline for (@typeInfo(@TypeOf(expected)).Struct.fields) |f| {
const actual = values.get(f.name);
const expect = @field(expected, f.name);
try testing.expectEqual(expect.len, actual.len);
for (expect, actual) |e, a| {
try testing.expectEqualStrings(e, a);
}
count += 1;
}
try testing.expectEqual(count, values.count());
}