Merge pull request #906 from lightpanda-io/text_decoder

Add TextDecoder (utf8 support only)
This commit is contained in:
Karl Seguin
2025-07-22 07:13:21 +08:00
committed by GitHub
4 changed files with 152 additions and 26 deletions

View File

@@ -0,0 +1,106 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const log = @import("../../log.zig");
const Env = @import("../env.zig").Env;
// https://encoding.spec.whatwg.org/#interface-textdecoder
const TextDecoder = @This();
const SupportedLabels = enum {
utf8,
@"utf-8",
@"unicode-1-1-utf-8",
};
const Options = struct {
fatal: bool = false,
ignoreBOM: bool = false,
};
fatal: bool,
ignore_bom: bool,
pub fn constructor(label_: ?[]const u8, opts_: ?Options) !TextDecoder {
if (label_) |l| {
_ = std.meta.stringToEnum(SupportedLabels, l) orelse {
log.warn(.web_api, "not implemented", .{ .feature = "TextDecoder label", .label = l });
return error.NotImplemented;
};
}
const opts = opts_ orelse Options{};
return .{
.fatal = opts.fatal,
.ignore_bom = opts.ignoreBOM,
};
}
pub fn get_encoding(_: *const TextDecoder) []const u8 {
return "utf-8";
}
pub fn get_ignoreBOM(self: *const TextDecoder) bool {
return self.ignore_bom;
}
pub fn get_fatal(self: *const TextDecoder) bool {
return self.fatal;
}
// TODO: Should accept an ArrayBuffer, TypedArray or DataView
// js.zig will currently only map a TypedArray to our []const u8.
pub fn _decode(self: *const TextDecoder, v: []const u8) ![]const u8 {
if (self.fatal and !std.unicode.utf8ValidateSlice(v)) {
return error.InvalidUtf8;
}
if (self.ignore_bom == false and std.mem.startsWith(u8, v, &.{ 0xEF, 0xBB, 0xBF })) {
return v[3..];
}
return v;
}
const testing = @import("../../testing.zig");
test "Browser.Encoding.TextDecoder" {
var runner = try testing.jsRunner(testing.tracking_allocator, .{
.html = "",
});
defer runner.deinit();
try runner.testCases(&.{
.{ "let d1 = new TextDecoder();", null },
.{ "d1.encoding;", "utf-8" },
.{ "d1.fatal", "false" },
.{ "d1.ignoreBOM", "false" },
.{ "d1.decode(new Uint8Array([240, 160, 174, 183]))", "𠮷" },
.{ "d1.decode(new Uint8Array([0xEF, 0xBB, 0xBF, 240, 160, 174, 183]))", "𠮷" },
.{ "let d2 = new TextDecoder('utf8', {fatal: true})", null },
.{
\\ try {
\\ let data = new Uint8Array([240, 240, 160, 174, 183]);
\\ d2.decode(data);
\\ } catch (e) {e}
,
"Error: InvalidUtf8",
},
}, .{});
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
@@ -20,21 +20,18 @@ const std = @import("std");
const Env = @import("../env.zig").Env;
pub const Interfaces = .{
TextEncoder,
};
// https://encoding.spec.whatwg.org/#interface-textencoder
pub const TextEncoder = struct {
pub fn constructor() !TextEncoder {
const TextEncoder = @This();
pub fn constructor() !TextEncoder {
return .{};
}
}
pub fn get_encoding(_: *const TextEncoder) []const u8 {
pub fn get_encoding(_: *const TextEncoder) []const u8 {
return "utf-8";
}
}
pub fn _encode(_: *const TextEncoder, v: []const u8) !Env.TypedArray(u8) {
pub fn _encode(_: *const TextEncoder, v: []const u8) !Env.TypedArray(u8) {
// Ensure the input is a valid utf-8
// It seems chrome accepts invalid utf-8 sequence.
//
@@ -43,16 +40,17 @@ pub const TextEncoder = struct {
}
return .{ .values = v };
}
};
}
const testing = @import("../../testing.zig");
test "Browser.Encoding.TextEncoder" {
var runner = try testing.jsRunner(testing.tracking_allocator, .{});
var runner = try testing.jsRunner(testing.tracking_allocator, .{
.html = "",
});
defer runner.deinit();
try runner.testCases(&.{
.{ "var encoder = new TextEncoder();", "undefined" },
.{ "var encoder = new TextEncoder();", null },
.{ "encoder.encoding;", "utf-8" },
.{ "encoder.encode('€');", "226,130,172" },

View File

@@ -0,0 +1,22 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
pub const Interfaces = .{
@import("TextDecoder.zig"),
@import("TextEncoder.zig"),
};

View File

@@ -26,7 +26,7 @@ const WebApis = struct {
@import("cssom/cssom.zig").Interfaces,
@import("dom/dom.zig").Interfaces,
@import("dom/shadow_root.zig").ShadowRoot,
@import("encoding/text_encoder.zig").Interfaces,
@import("encoding/encoding.zig").Interfaces,
@import("events/event.zig").Interfaces,
@import("html/html.zig").Interfaces,
@import("iterator/iterator.zig").Interfaces,