Merge pull request #1868 from lightpanda-io/bom_charset
Some checks failed
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
nightly build / build-linux-x86_64 (push) Has been cancelled
nightly build / build-linux-aarch64 (push) Has been cancelled
nightly build / build-macos-aarch64 (push) Has been cancelled
nightly build / build-macos-x86_64 (push) Has been cancelled

Set charset based on BOM
This commit is contained in:
Karl Seguin
2026-03-16 23:36:44 +08:00
committed by GitHub

View File

@@ -309,15 +309,30 @@ pub fn sniff(body: []const u8) ?Mime {
if (content[0] != '<') { if (content[0] != '<') {
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) { if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
// UTF-8 BOM // UTF-8 BOM
return .{ .content_type = .{ .text_plain = {} } }; return .{
.content_type = .{ .text_plain = {} },
.charset = default_charset,
.charset_len = default_charset_len,
.is_default_charset = false,
};
} }
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) { if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
// UTF-16 big-endian BOM // UTF-16 big-endian BOM
return .{ .content_type = .{ .text_plain = {} } }; return .{
.content_type = .{ .text_plain = {} },
.charset = .{ 'U', 'T', 'F', '-', '1', '6', 'B', 'E' } ++ .{0} ** 33,
.charset_len = 8,
.is_default_charset = false,
};
} }
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) { if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
// UTF-16 little-endian BOM // UTF-16 little-endian BOM
return .{ .content_type = .{ .text_plain = {} } }; return .{
.content_type = .{ .text_plain = {} },
.charset = .{ 'U', 'T', 'F', '-', '1', '6', 'L', 'E' } ++ .{0} ** 33,
.charset_len = 8,
.is_default_charset = false,
};
} }
return null; return null;
} }
@@ -671,6 +686,24 @@ test "Mime: sniff" {
try expectHTML("<!-->"); try expectHTML("<!-->");
try expectHTML(" \n\t <!-->"); try expectHTML(" \n\t <!-->");
{
const mime = Mime.sniff(&.{ 0xEF, 0xBB, 0xBF }).?;
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
try testing.expectEqual("UTF-8", mime.charsetString());
}
{
const mime = Mime.sniff(&.{ 0xFE, 0xFF }).?;
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
try testing.expectEqual("UTF-16BE", mime.charsetString());
}
{
const mime = Mime.sniff(&.{ 0xFF, 0xFE }).?;
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
try testing.expectEqual("UTF-16LE", mime.charsetString());
}
} }
const Expectation = struct { const Expectation = struct {