Support Data URI in scripts tags (#596)

* Support text/javascript mime type

* Support base64 encoded scripts

Related to https://github.com/lightpanda-io/browser/issues/412
This commit is contained in:
Kilari Teja
2025-05-05 12:18:21 +05:30
committed by GitHub
parent d9f21e0475
commit b32839292c
3 changed files with 106 additions and 8 deletions

View File

@@ -24,6 +24,7 @@ const ArenaAllocator = std.heap.ArenaAllocator;
const Dump = @import("dump.zig");
const Mime = @import("mime.zig").Mime;
const DataURI = @import("datauri.zig").DataURI;
const parser = @import("netsurf.zig");
const Window = @import("html/window.zig").Window;
@@ -585,6 +586,12 @@ pub const Page = struct {
log.debug("starting fetch {s}", .{src});
const arena = self.arena;
// Handle data URIs.
if (try DataURI.parse(arena, src)) |data_uri| {
return data_uri.data;
}
var res_src = src;
// if a base path is given, we resolve src using base.

79
src/browser/datauri.zig Normal file
View File

@@ -0,0 +1,79 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
// Represents https://developer.mozilla.org/en-US/docs/Web/URI/Reference/Schemes/data
pub const DataURI = struct {
was_base64_encoded: bool,
// The contents in the uri. It will be base64 decoded but not prepared in
// any way for mime.charset.
data: []const u8,
// Parses data:[<media-type>][;base64],<data>
pub fn parse(allocator: Allocator, src: []const u8) !?DataURI {
if (!std.mem.startsWith(u8, src, "data:")) {
return null;
}
const uri = src[5..];
const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null;
// Extract the encoding.
var metadata = uri[0..data_starts];
var base64_encoded = false;
if (std.mem.endsWith(u8, metadata, ";base64")) {
base64_encoded = true;
metadata = metadata[0 .. metadata.len - 7];
}
// TODO: Extract mime type. This not trivial because Mime.parse requires
// a []u8 and might mutate the src. And, the DataURI.parse references atm
// do not have deinit calls.
// Prepare the data.
var data = uri[data_starts + 1 ..];
if (base64_encoded) {
const decoder = std.base64.standard.Decoder;
const decoded_size = try decoder.calcSizeForSlice(data);
const buffer = try allocator.alloc(u8, decoded_size);
errdefer allocator.free(buffer);
try decoder.decode(buffer, data);
data = buffer;
}
return .{
.was_base64_encoded = base64_encoded,
.data = data,
};
}
pub fn deinit(self: *const DataURI, allocator: Allocator) void {
if (self.was_base64_encoded) {
allocator.free(self.data);
}
}
};
const testing = std.testing;
test "DataURI: parse valid" {
try test_valid("data:text/javascript; charset=utf-8;base64,Zm9v", "foo");
try test_valid("data:text/javascript; charset=utf-8;,foo", "foo");
try test_valid("data:,foo", "foo");
}
test "DataURI: parse invalid" {
try test_cannot_parse("atad:,foo");
try test_cannot_parse("data:foo");
try test_cannot_parse("data:");
}
fn test_valid(uri: []const u8, expected: []const u8) !void {
const data_uri = try DataURI.parse(std.testing.allocator, uri) orelse return error.TestFailed;
defer data_uri.deinit(testing.allocator);
try testing.expectEqualStrings(expected, data_uri.data);
}
fn test_cannot_parse(uri: []const u8) !void {
try testing.expectEqual(null, DataURI.parse(std.testing.allocator, uri));
}

View File

@@ -33,6 +33,7 @@ pub const Mime = struct {
pub const ContentTypeEnum = enum {
text_xml,
text_html,
text_javascript,
text_plain,
unknown,
other,
@@ -41,6 +42,7 @@ pub const Mime = struct {
pub const ContentType = union(ContentTypeEnum) {
text_xml: void,
text_html: void,
text_javascript: void,
text_plain: void,
unknown: void,
other: struct { type: []const u8, sub_type: []const u8 },
@@ -172,11 +174,17 @@ pub const Mime = struct {
if (std.meta.stringToEnum(enum {
@"text/xml",
@"text/html",
@"text/javascript",
@"application/javascript",
@"application/x-javascript",
@"text/plain",
}, type_name)) |known_type| {
const ct: ContentType = switch (known_type) {
.@"text/xml" => .{ .text_xml = {} },
.@"text/html" => .{ .text_html = {} },
.@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
.@"text/plain" => .{ .text_plain = {} },
};
return .{ ct, attribute_start };
@@ -337,22 +345,26 @@ test "Mime: parse common" {
try expect(.{ .content_type = .{ .text_xml = {} } }, " TeXT/xml");
try expect(.{ .content_type = .{ .text_html = {} } }, "teXt/HtML ;");
try expect(.{ .content_type = .{ .text_plain = {} } }, "tExT/PlAiN;");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript");
}
test "Mime: parse uncommon" {
defer testing.reset();
const text_javascript = Expectation{
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
const text_csv = Expectation{
.content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } },
};
try expect(text_javascript, "text/javascript");
try expect(text_javascript, "text/javascript;");
try expect(text_javascript, " text/javascript\t ");
try expect(text_javascript, " text/javascript\t ;");
try expect(text_csv, "text/csv");
try expect(text_csv, "text/csv;");
try expect(text_csv, " text/csv\t ");
try expect(text_csv, " text/csv\t ;");
try expect(
.{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
"Text/Javascript",
.{ .content_type = .{ .other = .{ .type = "text", .sub_type = "csv" } } },
"Text/CSV",
);
}