Merge pull request #1679 from lightpanda-io/escape_data_uri
Some checks failed
e2e-test / zig build release (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
nightly build / build-linux-x86_64 (push) Has been cancelled
nightly build / build-linux-aarch64 (push) Has been cancelled
nightly build / build-macos-aarch64 (push) Has been cancelled
nightly build / build-macos-x86_64 (push) Has been cancelled
wpt / zig build release (push) Has been cancelled
wpt / build wpt runner (push) Has been cancelled
wpt / web platform tests json output (push) Has been cancelled
wpt / perf-fmt (push) Has been cancelled
e2e-integration-test / zig build release (push) Has been cancelled
e2e-integration-test / demo-integration-scripts (push) Has been cancelled

Escape DataURIs
This commit is contained in:
Karl Seguin
2026-02-28 14:45:12 +08:00
committed by GitHub
3 changed files with 126 additions and 12 deletions

View File

@@ -1051,23 +1051,35 @@ fn parseDataURI(allocator: Allocator, src: []const u8) !?[]const u8 {
const uri = src[5..]; const uri = src[5..];
const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null;
const data = uri[data_starts + 1 ..];
var data = uri[data_starts + 1 ..]; const unescaped = try URL.unescape(allocator, data);
// Extract the encoding.
const metadata = uri[0..data_starts]; const metadata = uri[0..data_starts];
if (std.mem.endsWith(u8, metadata, ";base64")) { if (std.mem.endsWith(u8, metadata, ";base64") == false) {
const decoder = std.base64.standard.Decoder; return unescaped;
const decoded_size = try decoder.calcSizeForSlice(data);
const buffer = try allocator.alloc(u8, decoded_size);
errdefer allocator.free(buffer);
try decoder.decode(buffer, data);
data = buffer;
} }
return data; // Forgiving base64 decode per WHATWG spec:
// https://infra.spec.whatwg.org/#forgiving-base64-decode
// Step 1: Remove all ASCII whitespace
var stripped = try std.ArrayList(u8).initCapacity(allocator, unescaped.len);
for (unescaped) |c| {
if (!std.ascii.isWhitespace(c)) {
stripped.appendAssumeCapacity(c);
}
}
const trimmed = std.mem.trimRight(u8, stripped.items, "=");
// Length % 4 == 1 is invalid
if (trimmed.len % 4 == 1) {
return error.InvalidCharacterError;
}
const decoded_size = std.base64.standard_no_pad.Decoder.calcSizeForSlice(trimmed) catch return error.InvalidCharacterError;
const buffer = try allocator.alloc(u8, decoded_size);
std.base64.standard_no_pad.Decoder.decode(buffer, trimmed) catch return error.InvalidCharacterError;
return buffer;
} }
const testing = @import("../testing.zig"); const testing = @import("../testing.zig");

View File

@@ -642,6 +642,33 @@ pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 {
); );
} }
pub fn unescape(arena: Allocator, input: []const u8) ![]const u8 {
if (std.mem.indexOfScalar(u8, input, '%') == null) {
return input;
}
var result = try std.ArrayList(u8).initCapacity(arena, input.len);
var i: usize = 0;
while (i < input.len) {
if (input[i] == '%' and i + 2 < input.len) {
const hex = input[i + 1 .. i + 3];
const byte = std.fmt.parseInt(u8, hex, 16) catch {
result.appendAssumeCapacity(input[i]);
i += 1;
continue;
};
result.appendAssumeCapacity(byte);
i += 3;
} else {
result.appendAssumeCapacity(input[i]);
i += 1;
}
}
return result.items;
}
const testing = @import("../testing.zig"); const testing = @import("../testing.zig");
test "URL: isCompleteHTTPUrl" { test "URL: isCompleteHTTPUrl" {
try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about")); try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about"));
@@ -1233,3 +1260,68 @@ test "URL: getRobotsUrl" {
try testing.expectString("https://example.com/robots.txt", url); try testing.expectString("https://example.com/robots.txt", url);
} }
} }
test "URL: unescape" {
defer testing.reset();
const arena = testing.arena_allocator;
{
const result = try unescape(arena, "hello world");
try testing.expectEqual("hello world", result);
}
{
const result = try unescape(arena, "hello%20world");
try testing.expectEqual("hello world", result);
}
{
const result = try unescape(arena, "%48%65%6c%6c%6f");
try testing.expectEqual("Hello", result);
}
{
const result = try unescape(arena, "%48%65%6C%6C%6F");
try testing.expectEqual("Hello", result);
}
{
const result = try unescape(arena, "a%3Db");
try testing.expectEqual("a=b", result);
}
{
const result = try unescape(arena, "a%3DB");
try testing.expectEqual("a=B", result);
}
{
const result = try unescape(arena, "ZDIgPSAndHdvJzs%3D");
try testing.expectEqual("ZDIgPSAndHdvJzs=", result);
}
{
const result = try unescape(arena, "%5a%44%4d%67%50%53%41%6e%64%47%68%79%5a%57%55%6e%4f%77%3D%3D");
try testing.expectEqual("ZDMgPSAndGhyZWUnOw==", result);
}
{
const result = try unescape(arena, "hello%2world");
try testing.expectEqual("hello%2world", result);
}
{
const result = try unescape(arena, "hello%ZZworld");
try testing.expectEqual("hello%ZZworld", result);
}
{
const result = try unescape(arena, "hello%");
try testing.expectEqual("hello%", result);
}
{
const result = try unescape(arena, "hello%2");
try testing.expectEqual("hello%2", result);
}
}

View File

@@ -19,3 +19,13 @@
</script> </script>
<script id=datauri src="data:text/plain;charset=utf-8;base64,dGVzdGluZy5leHBlY3RFcXVhbCh0cnVlLCB0cnVlKTs="></script> <script id=datauri src="data:text/plain;charset=utf-8;base64,dGVzdGluZy5leHBlY3RFcXVhbCh0cnVlLCB0cnVlKTs="></script>
<script id=datauri_url_encoded_text src="data:text/javascript,testing.expectEqual(3, 3);"></script>
<script id=datauri_encoded_padding src="data:text/javascript;base64,dGVzdGluZy5leHBlY3RFcXVhbCgxLCAxKTs%3D"></script>
<script id=datauri_fully_encoded src="data:text/javascript;base64,%64%47%56%7a%64%47%6c%75%5a%79%35%6c%65%48%42%6c%59%33%52%46%63%58%56%68%62%43%67%79%4c%43%41%79%4b%54%73%3d"></script>
<script id=datauri_with_whitespace src="data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207%20"></script>
<script id=datauri_url_encoded_unicode src="data:text/javascript,testing.expectEqual(4%2C%204)%3B"></script>