Improve TextDecoder.decode

1 - Optional input (why? I don't know, but it's part of the spec and happens)
2 - Optional stream parameter
3 - More test cases
This commit is contained in:
Karl Seguin
2025-09-12 12:31:28 +08:00
parent 6d3065c4c6
commit 0913abe806
2 changed files with 56 additions and 7 deletions

View File

@@ -20,6 +20,7 @@ const std = @import("std");
const log = @import("../../log.zig");
const Env = @import("../env.zig").Env;
const Page = @import("../page.zig").Page;
// https://encoding.spec.whatwg.org/#interface-textdecoder
const TextDecoder = @This();
@@ -37,6 +38,7 @@ const Options = struct {
fatal: bool,
ignore_bom: bool,
stream: std.ArrayList(u8),
pub fn constructor(label_: ?[]const u8, opts_: ?Options) !TextDecoder {
if (label_) |l| {
@@ -47,6 +49,7 @@ pub fn constructor(label_: ?[]const u8, opts_: ?Options) !TextDecoder {
}
const opts = opts_ orelse Options{};
return .{
.stream = .empty,
.fatal = opts.fatal,
.ignore_bom = opts.ignoreBOM,
};
@@ -64,18 +67,34 @@ pub fn get_fatal(self: *const TextDecoder) bool {
return self.fatal;
}
// TODO: Should accept an ArrayBuffer, TypedArray or DataView
// js.zig will currently only map a TypedArray to our []const u8.
pub fn _decode(self: *const TextDecoder, v: []const u8) ![]const u8 {
if (self.fatal and !std.unicode.utf8ValidateSlice(v)) {
const DecodeOptions = struct {
stream: bool = false,
};
pub fn _decode(self: *TextDecoder, input_: ?[]const u8, opts_: ?DecodeOptions, page: *Page) ![]const u8 {
var str = input_ orelse return "";
const opts: DecodeOptions = opts_ orelse .{};
if (self.stream.items.len > 0) {
try self.stream.appendSlice(page.arena, str);
str = self.stream.items;
}
if (self.fatal and !std.unicode.utf8ValidateSlice(str)) {
if (opts.stream) {
if (self.stream.items.len == 0) {
try self.stream.appendSlice(page.arena, str);
}
return "";
}
return error.InvalidUtf8;
}
if (self.ignore_bom == false and std.mem.startsWith(u8, v, &.{ 0xEF, 0xBB, 0xBF })) {
return v[3..];
self.stream.clearRetainingCapacity();
if (self.ignore_bom == false and std.mem.startsWith(u8, str, &.{ 0xEF, 0xBB, 0xBF })) {
return str[3..];
}
return v;
return str;
}
const testing = @import("../../testing.zig");

View File

@@ -1,3 +1,6 @@
<!DOCTYPE html>
<meta charset="UTF-8">
<script src="../testing.js"></script>
<script id=decoder>
let d1 = new TextDecoder();
@@ -5,13 +8,40 @@
testing.expectEqual(false, d1.fatal);
testing.expectEqual(false, d1.ignoreBOM);
testing.expectEqual('', d1.decode());
testing.expectEqual('𠮷', d1.decode(new Uint8Array([240, 160, 174, 183])));
testing.expectEqual('𠮷', d1.decode(new Uint8Array([0xEF, 0xBB, 0xBF, 240, 160, 174, 183])));
testing.expectEqual('<27>2', d1.decode(new Uint8Array([249, 50])));
{
const buffer = new ArrayBuffer(4);
const ints = new Uint8Array(buffer)
ints[0] = 240;
ints[1] = 160;
ints[2] = 174;
ints[3] = 183;
testing.expectEqual('𠮷', d1.decode(buffer));
}
{
const buffer = new ArrayBuffer(4);
const dv = new DataView(buffer);
dv.setUint8(0, 240);
dv.setUint8(1, 160);
dv.setUint8(2, 174);
dv.setUint8(3, 183);
testing.expectEqual('𠮷', d1.decode(dv));
}
let d2 = new TextDecoder('utf8', {fatal: true})
testing.expectError('Error: InvalidUtf8', () => {
let data = new Uint8Array([240, 240, 160, 174, 183]);
d2.decode(data);
});
</script>
<script id=stream>
let d3 = new TextDecoder();
testing.expectEqual('', d2.decode(new Uint8Array([226, 153]), { stream: true }));
testing.expectEqual('♥', d2.decode(new Uint8Array([165]), { stream: true }));
</script>