store charset value directly in Mime

2026-02-04 14:33:47 +00:00 · 2025-09-15 15:15:08 +03:00
parent 27ffea9052
commit 974f350f27
3 changed files with 21 additions and 35 deletions
--- a/src/browser/mime.zig
+++ b/src/browser/mime.zig
@@ -22,13 +22,15 @@ const Allocator = std.mem.Allocator;
 pub const Mime = struct {
    content_type: ContentType,
    params: []const u8 = "",
-    charset: ?[:0]const u8 = null,
+    // IANA defines max. charset value length as 40.
    // We keep 41 for null-termination since HTML parser expects in this format.
    charset: [41]u8 = default_charset,
-    pub const unknown = Mime{
+    /// String "UTF-8" continued by null characters.
-        .params = "",
+    pub const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
-        .charset = null,
+
-        .content_type = .{ .unknown = {} },
+    /// Mime with unknown Content-Type, empty params and empty charset.
-    };
+    pub const unknown = Mime{ .content_type = .{ .unknown = {} } };
    pub const ContentTypeEnum = enum {
        text_xml,
@@ -52,6 +54,11 @@ pub const Mime = struct {
        other: struct { type: []const u8, sub_type: []const u8 },
    };
    /// Returns the null-terminated charset value.
    pub inline fn charsetString(mime: *const Mime) [:0]const u8 {
        return @ptrCast(&mime.charset);
    }
    /// Removes quotes of value if quotes are given.
    ///
    /// Currently we don't validate the charset.
@@ -158,7 +165,7 @@ pub const Mime = struct {
        const params = trimLeft(normalized[type_len..]);
-        var charset: ?[:0]const u8 = null;
+        var charset: [41]u8 = undefined;
        var it = std.mem.splitScalar(u8, params, ';');
        while (it.next()) |attr| {
@@ -176,35 +183,14 @@ pub const Mime = struct {
            switch (attribute_name) {
                .charset => {
                    // We used to have a proper value parser, but we currently
                    // only care about the charset attribute, plus only about
                    // the UTF-8 value. It's a lot easier to do it this way,
                    // and it doesn't require an allocation to (a) unescape the
                    // value or (b) ensure the correct lifetime.
                    if (value.len == 0) {
                        break;
                    }
                    var attribute_value = value;
                    if (value[0] == '"') {
                        if (value.len < 3 or value[value.len - 1] != '"') {
                            return error.Invalid;
                        }
                        attribute_value = value[1 .. value.len - 1];
                    }
-                    if (std.ascii.eqlIgnoreCase(attribute_value, "utf-8")) {
+                    const attribute_value = try parseCharset(value);
-                        charset = "UTF-8";
+                    @memcpy(charset[0..attribute_value.len], attribute_value);
-                    } else if (std.ascii.eqlIgnoreCase(attribute_value, "iso-8859-1")) {
+                    // Fill the rest with zeroes.
-                        charset = "ISO-8859-1";
+                    @memset(charset[attribute_value.len..], 0);
                    } else {
                        // we only care about null (which we default to UTF-8)
                        // or UTF-8. If this is actually set (i.e. not null)
                        // and isn't UTF-8, we'll just put a dummy value. If
                        // we want to capture the actual value, we'll need to
                        // dupe/allocate it. Since, for now, we don't need that
                        // we can avoid the allocation.
                        charset = "lightpanda:UNSUPPORTED";
                    }
                },
            }
        }
--- a/src/browser/page.zig
+++ b/src/browser/page.zig
@@ -672,14 +672,14 @@ pub const Page = struct {
            log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
            self.mode = switch (mime.content_type) {
-                .text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
+                .text_html => .{ .html = try parser.Parser.init(mime.charsetString()) },
                .application_json,
                .text_javascript,
                .text_css,
                .text_plain,
                => blk: {
-                    var p = try parser.Parser.init(mime.charset orelse "UTF-8");
+                    var p = try parser.Parser.init(mime.charsetString());
                    try p.process("<html><head><meta charset=\"utf-8\"></head><body><pre>");
                    break :blk .{ .text = p };
                },
--- a/src/browser/xhr/xhr.zig
+++ b/src/browser/xhr/xhr.zig
@@ -679,7 +679,7 @@ pub const XMLHttpRequest = struct {
        }
        var fbs = std.io.fixedBufferStream(self.response_bytes.items);
-        const doc = parser.documentHTMLParse(fbs.reader(), mime.charset orelse "UTF-8") catch {
+        const doc = parser.documentHTMLParse(fbs.reader(), mime.charsetString()) catch {
            self.response_obj = .{ .Failure = {} };
            return;
        };