From 3d51667fc89876c901e97d6c705d59a9c3e35e1c Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Sat, 28 Feb 2026 12:24:26 +0800 Subject: [PATCH] Escape DataURIs Support forgiving base64 decoder Support non-encoded DataURIs --- src/browser/ScriptManager.zig | 36 +++++--- src/browser/URL.zig | 92 +++++++++++++++++++ .../tests/legacy/html/script/script.html | 10 ++ 3 files changed, 126 insertions(+), 12 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 8334a1a7..403a1199 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -1051,23 +1051,35 @@ fn parseDataURI(allocator: Allocator, src: []const u8) !?[]const u8 { const uri = src[5..]; const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; + const data = uri[data_starts + 1 ..]; - var data = uri[data_starts + 1 ..]; + const unescaped = try URL.unescape(allocator, data); - // Extract the encoding. const metadata = uri[0..data_starts]; - if (std.mem.endsWith(u8, metadata, ";base64")) { - const decoder = std.base64.standard.Decoder; - const decoded_size = try decoder.calcSizeForSlice(data); - - const buffer = try allocator.alloc(u8, decoded_size); - errdefer allocator.free(buffer); - - try decoder.decode(buffer, data); - data = buffer; + if (std.mem.endsWith(u8, metadata, ";base64") == false) { + return unescaped; } - return data; + // Forgiving base64 decode per WHATWG spec: + // https://infra.spec.whatwg.org/#forgiving-base64-decode + // Step 1: Remove all ASCII whitespace + var stripped = try std.ArrayList(u8).initCapacity(allocator, unescaped.len); + for (unescaped) |c| { + if (!std.ascii.isWhitespace(c)) { + stripped.appendAssumeCapacity(c); + } + } + const trimmed = std.mem.trimRight(u8, stripped.items, "="); + + // Length % 4 == 1 is invalid + if (trimmed.len % 4 == 1) { + return error.InvalidCharacterError; + } + + const decoded_size = std.base64.standard_no_pad.Decoder.calcSizeForSlice(trimmed) catch return error.InvalidCharacterError; + const buffer = try allocator.alloc(u8, decoded_size); + std.base64.standard_no_pad.Decoder.decode(buffer, trimmed) catch return error.InvalidCharacterError; + return buffer; } const testing = @import("../testing.zig"); diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 6616d636..b8d8d563 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -642,6 +642,33 @@ pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 { ); } +pub fn unescape(arena: Allocator, input: []const u8) ![]const u8 { + if (std.mem.indexOfScalar(u8, input, '%') == null) { + return input; + } + + var result = try std.ArrayList(u8).initCapacity(arena, input.len); + + var i: usize = 0; + while (i < input.len) { + if (input[i] == '%' and i + 2 < input.len) { + const hex = input[i + 1 .. i + 3]; + const byte = std.fmt.parseInt(u8, hex, 16) catch { + result.appendAssumeCapacity(input[i]); + i += 1; + continue; + }; + result.appendAssumeCapacity(byte); + i += 3; + } else { + result.appendAssumeCapacity(input[i]); + i += 1; + } + } + + return result.items; +} + const testing = @import("../testing.zig"); test "URL: isCompleteHTTPUrl" { try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about")); @@ -1233,3 +1260,68 @@ test "URL: getRobotsUrl" { try testing.expectString("https://example.com/robots.txt", url); } } + +test "URL: unescape" { + defer testing.reset(); + const arena = testing.arena_allocator; + + { + const result = try unescape(arena, "hello world"); + try testing.expectEqual("hello world", result); + } + + { + const result = try unescape(arena, "hello%20world"); + try testing.expectEqual("hello world", result); + } + + { + const result = try unescape(arena, "%48%65%6c%6c%6f"); + try testing.expectEqual("Hello", result); + } + + { + const result = try unescape(arena, "%48%65%6C%6C%6F"); + try testing.expectEqual("Hello", result); + } + + { + const result = try unescape(arena, "a%3Db"); + try testing.expectEqual("a=b", result); + } + + { + const result = try unescape(arena, "a%3DB"); + try testing.expectEqual("a=B", result); + } + + { + const result = try unescape(arena, "ZDIgPSAndHdvJzs%3D"); + try testing.expectEqual("ZDIgPSAndHdvJzs=", result); + } + + { + const result = try unescape(arena, "%5a%44%4d%67%50%53%41%6e%64%47%68%79%5a%57%55%6e%4f%77%3D%3D"); + try testing.expectEqual("ZDMgPSAndGhyZWUnOw==", result); + } + + { + const result = try unescape(arena, "hello%2world"); + try testing.expectEqual("hello%2world", result); + } + + { + const result = try unescape(arena, "hello%ZZworld"); + try testing.expectEqual("hello%ZZworld", result); + } + + { + const result = try unescape(arena, "hello%"); + try testing.expectEqual("hello%", result); + } + + { + const result = try unescape(arena, "hello%2"); + try testing.expectEqual("hello%2", result); + } +} diff --git a/src/browser/tests/legacy/html/script/script.html b/src/browser/tests/legacy/html/script/script.html index 5049e4bb..d5910a62 100644 --- a/src/browser/tests/legacy/html/script/script.html +++ b/src/browser/tests/legacy/html/script/script.html @@ -19,3 +19,13 @@ + + + + + + + + + +