store charset value directly in Mime

This commit is contained in:
nikneym
2025-09-15 15:15:08 +03:00
parent 27ffea9052
commit 974f350f27
3 changed files with 21 additions and 35 deletions

View File

@@ -22,13 +22,15 @@ const Allocator = std.mem.Allocator;
pub const Mime = struct { pub const Mime = struct {
content_type: ContentType, content_type: ContentType,
params: []const u8 = "", params: []const u8 = "",
charset: ?[:0]const u8 = null, // IANA defines max. charset value length as 40.
// We keep 41 for null-termination since HTML parser expects in this format.
charset: [41]u8 = default_charset,
pub const unknown = Mime{ /// String "UTF-8" continued by null characters.
.params = "", pub const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
.charset = null,
.content_type = .{ .unknown = {} }, /// Mime with unknown Content-Type, empty params and empty charset.
}; pub const unknown = Mime{ .content_type = .{ .unknown = {} } };
pub const ContentTypeEnum = enum { pub const ContentTypeEnum = enum {
text_xml, text_xml,
@@ -52,6 +54,11 @@ pub const Mime = struct {
other: struct { type: []const u8, sub_type: []const u8 }, other: struct { type: []const u8, sub_type: []const u8 },
}; };
/// Returns the null-terminated charset value.
pub inline fn charsetString(mime: *const Mime) [:0]const u8 {
return @ptrCast(&mime.charset);
}
/// Removes quotes of value if quotes are given. /// Removes quotes of value if quotes are given.
/// ///
/// Currently we don't validate the charset. /// Currently we don't validate the charset.
@@ -158,7 +165,7 @@ pub const Mime = struct {
const params = trimLeft(normalized[type_len..]); const params = trimLeft(normalized[type_len..]);
var charset: ?[:0]const u8 = null; var charset: [41]u8 = undefined;
var it = std.mem.splitScalar(u8, params, ';'); var it = std.mem.splitScalar(u8, params, ';');
while (it.next()) |attr| { while (it.next()) |attr| {
@@ -176,35 +183,14 @@ pub const Mime = struct {
switch (attribute_name) { switch (attribute_name) {
.charset => { .charset => {
// We used to have a proper value parser, but we currently
// only care about the charset attribute, plus only about
// the UTF-8 value. It's a lot easier to do it this way,
// and it doesn't require an allocation to (a) unescape the
// value or (b) ensure the correct lifetime.
if (value.len == 0) { if (value.len == 0) {
break; break;
} }
var attribute_value = value;
if (value[0] == '"') {
if (value.len < 3 or value[value.len - 1] != '"') {
return error.Invalid;
}
attribute_value = value[1 .. value.len - 1];
}
if (std.ascii.eqlIgnoreCase(attribute_value, "utf-8")) { const attribute_value = try parseCharset(value);
charset = "UTF-8"; @memcpy(charset[0..attribute_value.len], attribute_value);
} else if (std.ascii.eqlIgnoreCase(attribute_value, "iso-8859-1")) { // Fill the rest with zeroes.
charset = "ISO-8859-1"; @memset(charset[attribute_value.len..], 0);
} else {
// we only care about null (which we default to UTF-8)
// or UTF-8. If this is actually set (i.e. not null)
// and isn't UTF-8, we'll just put a dummy value. If
// we want to capture the actual value, we'll need to
// dupe/allocate it. Since, for now, we don't need that
// we can avoid the allocation.
charset = "lightpanda:UNSUPPORTED";
}
}, },
} }
} }

View File

@@ -672,14 +672,14 @@ pub const Page = struct {
log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len }); log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
self.mode = switch (mime.content_type) { self.mode = switch (mime.content_type) {
.text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") }, .text_html => .{ .html = try parser.Parser.init(mime.charsetString()) },
.application_json, .application_json,
.text_javascript, .text_javascript,
.text_css, .text_css,
.text_plain, .text_plain,
=> blk: { => blk: {
var p = try parser.Parser.init(mime.charset orelse "UTF-8"); var p = try parser.Parser.init(mime.charsetString());
try p.process("<html><head><meta charset=\"utf-8\"></head><body><pre>"); try p.process("<html><head><meta charset=\"utf-8\"></head><body><pre>");
break :blk .{ .text = p }; break :blk .{ .text = p };
}, },

View File

@@ -679,7 +679,7 @@ pub const XMLHttpRequest = struct {
} }
var fbs = std.io.fixedBufferStream(self.response_bytes.items); var fbs = std.io.fixedBufferStream(self.response_bytes.items);
const doc = parser.documentHTMLParse(fbs.reader(), mime.charset orelse "UTF-8") catch { const doc = parser.documentHTMLParse(fbs.reader(), mime.charsetString()) catch {
self.response_obj = .{ .Failure = {} }; self.response_obj = .{ .Failure = {} };
return; return;
}; };