Add TextDecoderStream to decode UTF-8 byte streams into strings

Mirrors TextEncoderStream: wraps a TransformStream with a Zig-level
transform that converts Uint8Array chunks to strings. Supports the
same constructor options as TextDecoder (label, fatal, ignoreBOM).
This commit is contained in:
Pierre Tachoire
2026-03-02 11:48:32 +01:00
parent 5d3b965d28
commit 23d322452a
3 changed files with 169 additions and 0 deletions

View File

@@ -827,6 +827,7 @@ pub const JsApis = flattenTypes(&.{
@import("../webapi/encoding/TextDecoder.zig"),
@import("../webapi/encoding/TextEncoder.zig"),
@import("../webapi/encoding/TextEncoderStream.zig"),
@import("../webapi/encoding/TextDecoderStream.zig"),
@import("../webapi/Event.zig"),
@import("../webapi/event/CompositionEvent.zig"),
@import("../webapi/event/CustomEvent.zig"),

View File

@@ -0,0 +1,61 @@
<!DOCTYPE html>
<script src="../testing.js"></script>
<script id=text_decoder_stream_encoding>
{
const tds = new TextDecoderStream();
testing.expectEqual('utf-8', tds.encoding);
testing.expectEqual('object', typeof tds.readable);
testing.expectEqual('object', typeof tds.writable);
testing.expectEqual(false, tds.fatal);
testing.expectEqual(false, tds.ignoreBOM);
}
</script>
<script id=text_decoder_stream_with_label>
{
const tds = new TextDecoderStream('utf-8');
testing.expectEqual('utf-8', tds.encoding);
}
</script>
<script id=text_decoder_stream_with_opts>
{
const tds = new TextDecoderStream('utf-8', { fatal: true, ignoreBOM: true });
testing.expectEqual(true, tds.fatal);
testing.expectEqual(true, tds.ignoreBOM);
}
</script>
<script id=text_decoder_stream_invalid_label>
{
let errorThrown = false;
try {
new TextDecoderStream('windows-1252');
} catch (e) {
errorThrown = true;
}
testing.expectEqual(true, errorThrown);
}
</script>
<script id=text_decoder_stream_decode>
(async function() {
const tds = new TextDecoderStream();
const writer = tds.writable.getWriter();
const reader = tds.readable.getReader();
// 'hello' in UTF-8 bytes
const bytes = new Uint8Array([104, 101, 108, 108, 111]);
await writer.write(bytes);
await writer.close();
const result = await reader.read();
testing.expectEqual(false, result.done);
testing.expectEqual('hello', result.value);
const result2 = await reader.read();
testing.expectEqual(true, result2.done);
})();
</script>

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const js = @import("../../js/js.zig");
const Page = @import("../../Page.zig");
const ReadableStream = @import("../streams/ReadableStream.zig");
const WritableStream = @import("../streams/WritableStream.zig");
const TransformStream = @import("../streams/TransformStream.zig");
const TextDecoderStream = @This();
_transform: *TransformStream,
_fatal: bool,
_ignore_bom: bool,
const Label = enum {
utf8,
@"utf-8",
@"unicode-1-1-utf-8",
};
const InitOpts = struct {
fatal: bool = false,
ignoreBOM: bool = false,
};
pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !TextDecoderStream {
if (label_) |label| {
_ = std.meta.stringToEnum(Label, label) orelse return error.RangeError;
}
const opts = opts_ orelse InitOpts{};
const transform = try TransformStream.initWithZigTransform(&decodeTransform, page);
return .{
._transform = transform,
._fatal = opts.fatal,
._ignore_bom = opts.ignoreBOM,
};
}
fn decodeTransform(controller: *TransformStream.DefaultController, chunk: js.Value) !void {
// chunk should be a Uint8Array; decode it as UTF-8 string
const typed_array = try chunk.toZig(js.TypedArray(u8));
var input = typed_array.values;
// Strip UTF-8 BOM if present
if (std.mem.startsWith(u8, input, &.{ 0xEF, 0xBB, 0xBF })) {
input = input[3..];
}
try controller.enqueue(.{ .string = input });
}
pub fn getReadable(self: *const TextDecoderStream) *ReadableStream {
return self._transform.getReadable();
}
pub fn getWritable(self: *const TextDecoderStream) *WritableStream {
return self._transform.getWritable();
}
pub fn getFatal(self: *const TextDecoderStream) bool {
return self._fatal;
}
pub fn getIgnoreBOM(self: *const TextDecoderStream) bool {
return self._ignore_bom;
}
pub const JsApi = struct {
pub const bridge = js.Bridge(TextDecoderStream);
pub const Meta = struct {
pub const name = "TextDecoderStream";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const constructor = bridge.constructor(TextDecoderStream.init, .{});
pub const encoding = bridge.property("utf-8", .{ .template = false });
pub const readable = bridge.accessor(TextDecoderStream.getReadable, null, .{});
pub const writable = bridge.accessor(TextDecoderStream.getWritable, null, .{});
pub const fatal = bridge.accessor(TextDecoderStream.getFatal, null, .{});
pub const ignoreBOM = bridge.accessor(TextDecoderStream.getIgnoreBOM, null, .{});
};
const testing = @import("../../../testing.zig");
test "WebApi: TextDecoderStream" {
try testing.htmlRunner("streams/text_decoder_stream.html", .{});
}