From b32839292cf5bc03e5a2f6edb9a8575aacfe8f96 Mon Sep 17 00:00:00 2001 From: Kilari Teja Date: Mon, 5 May 2025 12:18:21 +0530 Subject: [PATCH] Support Data URI in scripts tags (#596) * Support text/javascript mime type * Support base64 encoded scripts Related to https://github.com/lightpanda-io/browser/issues/412 --- src/browser/browser.zig | 7 ++++ src/browser/datauri.zig | 79 +++++++++++++++++++++++++++++++++++++++++ src/browser/mime.zig | 28 ++++++++++----- 3 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 src/browser/datauri.zig diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 3d35b603..7cb5c043 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -24,6 +24,7 @@ const ArenaAllocator = std.heap.ArenaAllocator; const Dump = @import("dump.zig"); const Mime = @import("mime.zig").Mime; +const DataURI = @import("datauri.zig").DataURI; const parser = @import("netsurf.zig"); const Window = @import("html/window.zig").Window; @@ -585,6 +586,12 @@ pub const Page = struct { log.debug("starting fetch {s}", .{src}); const arena = self.arena; + + // Handle data URIs. + if (try DataURI.parse(arena, src)) |data_uri| { + return data_uri.data; + } + var res_src = src; // if a base path is given, we resolve src using base. diff --git a/src/browser/datauri.zig b/src/browser/datauri.zig new file mode 100644 index 00000000..d600a255 --- /dev/null +++ b/src/browser/datauri.zig @@ -0,0 +1,79 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +// Represents https://developer.mozilla.org/en-US/docs/Web/URI/Reference/Schemes/data +pub const DataURI = struct { + was_base64_encoded: bool, + // The contents in the uri. It will be base64 decoded but not prepared in + // any way for mime.charset. + data: []const u8, + + // Parses data:[][;base64], + pub fn parse(allocator: Allocator, src: []const u8) !?DataURI { + if (!std.mem.startsWith(u8, src, "data:")) { + return null; + } + + const uri = src[5..]; + const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; + + // Extract the encoding. + var metadata = uri[0..data_starts]; + var base64_encoded = false; + if (std.mem.endsWith(u8, metadata, ";base64")) { + base64_encoded = true; + metadata = metadata[0 .. metadata.len - 7]; + } + + // TODO: Extract mime type. This not trivial because Mime.parse requires + // a []u8 and might mutate the src. And, the DataURI.parse references atm + // do not have deinit calls. + + // Prepare the data. + var data = uri[data_starts + 1 ..]; + if (base64_encoded) { + const decoder = std.base64.standard.Decoder; + const decoded_size = try decoder.calcSizeForSlice(data); + + const buffer = try allocator.alloc(u8, decoded_size); + errdefer allocator.free(buffer); + + try decoder.decode(buffer, data); + data = buffer; + } + + return .{ + .was_base64_encoded = base64_encoded, + .data = data, + }; + } + + pub fn deinit(self: *const DataURI, allocator: Allocator) void { + if (self.was_base64_encoded) { + allocator.free(self.data); + } + } +}; + +const testing = std.testing; +test "DataURI: parse valid" { + try test_valid("data:text/javascript; charset=utf-8;base64,Zm9v", "foo"); + try test_valid("data:text/javascript; charset=utf-8;,foo", "foo"); + try test_valid("data:,foo", "foo"); +} + +test "DataURI: parse invalid" { + try test_cannot_parse("atad:,foo"); + try test_cannot_parse("data:foo"); + try test_cannot_parse("data:"); +} + +fn test_valid(uri: []const u8, expected: []const u8) !void { + const data_uri = try DataURI.parse(std.testing.allocator, uri) orelse return error.TestFailed; + defer data_uri.deinit(testing.allocator); + try testing.expectEqualStrings(expected, data_uri.data); +} + +fn test_cannot_parse(uri: []const u8) !void { + try testing.expectEqual(null, DataURI.parse(std.testing.allocator, uri)); +} diff --git a/src/browser/mime.zig b/src/browser/mime.zig index 21e4cb8c..480d0e75 100644 --- a/src/browser/mime.zig +++ b/src/browser/mime.zig @@ -33,6 +33,7 @@ pub const Mime = struct { pub const ContentTypeEnum = enum { text_xml, text_html, + text_javascript, text_plain, unknown, other, @@ -41,6 +42,7 @@ pub const Mime = struct { pub const ContentType = union(ContentTypeEnum) { text_xml: void, text_html: void, + text_javascript: void, text_plain: void, unknown: void, other: struct { type: []const u8, sub_type: []const u8 }, @@ -172,11 +174,17 @@ pub const Mime = struct { if (std.meta.stringToEnum(enum { @"text/xml", @"text/html", + + @"text/javascript", + @"application/javascript", + @"application/x-javascript", + @"text/plain", }, type_name)) |known_type| { const ct: ContentType = switch (known_type) { .@"text/xml" => .{ .text_xml = {} }, .@"text/html" => .{ .text_html = {} }, + .@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} }, .@"text/plain" => .{ .text_plain = {} }, }; return .{ ct, attribute_start }; @@ -337,22 +345,26 @@ test "Mime: parse common" { try expect(.{ .content_type = .{ .text_xml = {} } }, " TeXT/xml"); try expect(.{ .content_type = .{ .text_html = {} } }, "teXt/HtML ;"); try expect(.{ .content_type = .{ .text_plain = {} } }, "tExT/PlAiN;"); + + try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript"); + try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript"); + try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript"); } test "Mime: parse uncommon" { defer testing.reset(); - const text_javascript = Expectation{ - .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } }, + const text_csv = Expectation{ + .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } }, }; - try expect(text_javascript, "text/javascript"); - try expect(text_javascript, "text/javascript;"); - try expect(text_javascript, " text/javascript\t "); - try expect(text_javascript, " text/javascript\t ;"); + try expect(text_csv, "text/csv"); + try expect(text_csv, "text/csv;"); + try expect(text_csv, " text/csv\t "); + try expect(text_csv, " text/csv\t ;"); try expect( - .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } }, - "Text/Javascript", + .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } } }, + "Text/CSV", ); }