mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 04:34:44 +00:00
Merge pull request #1868 from lightpanda-io/bom_charset
Some checks failed
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
nightly build / build-linux-x86_64 (push) Has been cancelled
nightly build / build-linux-aarch64 (push) Has been cancelled
nightly build / build-macos-aarch64 (push) Has been cancelled
nightly build / build-macos-x86_64 (push) Has been cancelled
Some checks failed
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
nightly build / build-linux-x86_64 (push) Has been cancelled
nightly build / build-linux-aarch64 (push) Has been cancelled
nightly build / build-macos-aarch64 (push) Has been cancelled
nightly build / build-macos-x86_64 (push) Has been cancelled
Set charset based on BOM
This commit is contained in:
@@ -309,15 +309,30 @@ pub fn sniff(body: []const u8) ?Mime {
|
|||||||
if (content[0] != '<') {
|
if (content[0] != '<') {
|
||||||
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
|
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
|
||||||
// UTF-8 BOM
|
// UTF-8 BOM
|
||||||
return .{ .content_type = .{ .text_plain = {} } };
|
return .{
|
||||||
|
.content_type = .{ .text_plain = {} },
|
||||||
|
.charset = default_charset,
|
||||||
|
.charset_len = default_charset_len,
|
||||||
|
.is_default_charset = false,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
|
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
|
||||||
// UTF-16 big-endian BOM
|
// UTF-16 big-endian BOM
|
||||||
return .{ .content_type = .{ .text_plain = {} } };
|
return .{
|
||||||
|
.content_type = .{ .text_plain = {} },
|
||||||
|
.charset = .{ 'U', 'T', 'F', '-', '1', '6', 'B', 'E' } ++ .{0} ** 33,
|
||||||
|
.charset_len = 8,
|
||||||
|
.is_default_charset = false,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
|
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
|
||||||
// UTF-16 little-endian BOM
|
// UTF-16 little-endian BOM
|
||||||
return .{ .content_type = .{ .text_plain = {} } };
|
return .{
|
||||||
|
.content_type = .{ .text_plain = {} },
|
||||||
|
.charset = .{ 'U', 'T', 'F', '-', '1', '6', 'L', 'E' } ++ .{0} ** 33,
|
||||||
|
.charset_len = 8,
|
||||||
|
.is_default_charset = false,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -671,6 +686,24 @@ test "Mime: sniff" {
|
|||||||
|
|
||||||
try expectHTML("<!-->");
|
try expectHTML("<!-->");
|
||||||
try expectHTML(" \n\t <!-->");
|
try expectHTML(" \n\t <!-->");
|
||||||
|
|
||||||
|
{
|
||||||
|
const mime = Mime.sniff(&.{ 0xEF, 0xBB, 0xBF }).?;
|
||||||
|
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
|
||||||
|
try testing.expectEqual("UTF-8", mime.charsetString());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const mime = Mime.sniff(&.{ 0xFE, 0xFF }).?;
|
||||||
|
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
|
||||||
|
try testing.expectEqual("UTF-16BE", mime.charsetString());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const mime = Mime.sniff(&.{ 0xFF, 0xFE }).?;
|
||||||
|
try testing.expectEqual(.text_plain, std.meta.activeTag(mime.content_type));
|
||||||
|
try testing.expectEqual("UTF-16LE", mime.charsetString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Expectation = struct {
|
const Expectation = struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user