Support Data URI in scripts tags (#596)

* Support text/javascript mime type * Support base64 encoded scripts Related to https://github.com/lightpanda-io/browser/issues/412
2025-10-29 15:13:28 +00:00 · 2025-05-05 12:18:21 +05:30
parent d9f21e0475
commit b32839292c
3 changed files with 106 additions and 8 deletions
--- a/src/browser/browser.zig
+++ b/src/browser/browser.zig
@@ -24,6 +24,7 @@ const ArenaAllocator = std.heap.ArenaAllocator;

 const Dump = @import("dump.zig");
 const Mime = @import("mime.zig").Mime;
+const DataURI = @import("datauri.zig").DataURI;
 const parser = @import("netsurf.zig");

 const Window = @import("html/window.zig").Window;
@@ -585,6 +586,12 @@ pub const Page = struct {
        log.debug("starting fetch {s}", .{src});

        const arena = self.arena;
+
+        // Handle data URIs.
+        if (try DataURI.parse(arena, src)) |data_uri| {
+            return data_uri.data;
+        }
+
        var res_src = src;

        // if a base path is given, we resolve src using base.
--- a/src/browser/datauri.zig
+++ b/src/browser/datauri.zig
@@ -0,0 +1,79 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+// Represents https://developer.mozilla.org/en-US/docs/Web/URI/Reference/Schemes/data
+pub const DataURI = struct {
+    was_base64_encoded: bool,
+    // The contents in the uri. It will be base64 decoded but not prepared in
+    // any way for mime.charset.
+    data: []const u8,
+
+    // Parses data:[<media-type>][;base64],<data>
+    pub fn parse(allocator: Allocator, src: []const u8) !?DataURI {
+        if (!std.mem.startsWith(u8, src, "data:")) {
+            return null;
+        }
+
+        const uri = src[5..];
+        const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null;
+
+        // Extract the encoding.
+        var metadata = uri[0..data_starts];
+        var base64_encoded = false;
+        if (std.mem.endsWith(u8, metadata, ";base64")) {
+            base64_encoded = true;
+            metadata = metadata[0 .. metadata.len - 7];
+        }
+
+        // TODO: Extract mime type. This not trivial because Mime.parse requires
+        // a []u8 and might mutate the src. And, the DataURI.parse references atm
+        // do not have deinit calls.
+
+        // Prepare the data.
+        var data = uri[data_starts + 1 ..];
+        if (base64_encoded) {
+            const decoder = std.base64.standard.Decoder;
+            const decoded_size = try decoder.calcSizeForSlice(data);
+
+            const buffer = try allocator.alloc(u8, decoded_size);
+            errdefer allocator.free(buffer);
+
+            try decoder.decode(buffer, data);
+            data = buffer;
+        }
+
+        return .{
+            .was_base64_encoded = base64_encoded,
+            .data = data,
+        };
+    }
+
+    pub fn deinit(self: *const DataURI, allocator: Allocator) void {
+        if (self.was_base64_encoded) {
+            allocator.free(self.data);
+        }
+    }
+};
+
+const testing = std.testing;
+test "DataURI: parse valid" {
+    try test_valid("data:text/javascript; charset=utf-8;base64,Zm9v", "foo");
+    try test_valid("data:text/javascript; charset=utf-8;,foo", "foo");
+    try test_valid("data:,foo", "foo");
+}
+
+test "DataURI: parse invalid" {
+    try test_cannot_parse("atad:,foo");
+    try test_cannot_parse("data:foo");
+    try test_cannot_parse("data:");
+}
+
+fn test_valid(uri: []const u8, expected: []const u8) !void {
+    const data_uri = try DataURI.parse(std.testing.allocator, uri) orelse return error.TestFailed;
+    defer data_uri.deinit(testing.allocator);
+    try testing.expectEqualStrings(expected, data_uri.data);
+}
+
+fn test_cannot_parse(uri: []const u8) !void {
+    try testing.expectEqual(null, DataURI.parse(std.testing.allocator, uri));
+}
--- a/src/browser/mime.zig
+++ b/src/browser/mime.zig
@@ -33,6 +33,7 @@ pub const Mime = struct {
    pub const ContentTypeEnum = enum {
        text_xml,
        text_html,
+        text_javascript,
        text_plain,
        unknown,
        other,
@@ -41,6 +42,7 @@ pub const Mime = struct {
    pub const ContentType = union(ContentTypeEnum) {
        text_xml: void,
        text_html: void,
+        text_javascript: void,
        text_plain: void,
        unknown: void,
        other: struct { type: []const u8, sub_type: []const u8 },
@@ -172,11 +174,17 @@ pub const Mime = struct {
        if (std.meta.stringToEnum(enum {
            @"text/xml",
            @"text/html",
+
+            @"text/javascript",
+            @"application/javascript",
+            @"application/x-javascript",
+
            @"text/plain",
        }, type_name)) |known_type| {
            const ct: ContentType = switch (known_type) {
                .@"text/xml" => .{ .text_xml = {} },
                .@"text/html" => .{ .text_html = {} },
+                .@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
                .@"text/plain" => .{ .text_plain = {} },
            };
            return .{ ct, attribute_start };
@@ -337,22 +345,26 @@ test "Mime: parse common" {
    try expect(.{ .content_type = .{ .text_xml = {} } }, " TeXT/xml");
    try expect(.{ .content_type = .{ .text_html = {} } }, "teXt/HtML  ;");
    try expect(.{ .content_type = .{ .text_plain = {} } }, "tExT/PlAiN;");
+
+    try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript");
+    try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript");
+    try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript");
 }

 test "Mime: parse uncommon" {
    defer testing.reset();

-    const text_javascript = Expectation{
-        .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
+    const text_csv = Expectation{
+        .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } },
    };
-    try expect(text_javascript, "text/javascript");
-    try expect(text_javascript, "text/javascript;");
-    try expect(text_javascript, "  text/javascript\t  ");
-    try expect(text_javascript, "  text/javascript\t  ;");
+    try expect(text_csv, "text/csv");
+    try expect(text_csv, "text/csv;");
+    try expect(text_csv, "  text/csv\t  ");
+    try expect(text_csv, "  text/csv\t  ;");

    try expect(
-        .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
-        "Text/Javascript",
+        .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } } },
+        "Text/CSV",
    );
 }