From 77b6377473d56d4492988cd9cb5137c78fe9ccbf Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 21 Jul 2025 16:28:43 +0800 Subject: [PATCH] Add TextDecoder (utf8 support only) --- src/browser/encoding/TextDecoder.zig | 106 ++++++++++++++++++ .../{text_encoder.zig => TextEncoder.zig} | 48 ++++---- src/browser/encoding/encoding.zig | 22 ++++ src/browser/env.zig | 2 +- 4 files changed, 152 insertions(+), 26 deletions(-) create mode 100644 src/browser/encoding/TextDecoder.zig rename src/browser/encoding/{text_encoder.zig => TextEncoder.zig} (68%) create mode 100644 src/browser/encoding/encoding.zig diff --git a/src/browser/encoding/TextDecoder.zig b/src/browser/encoding/TextDecoder.zig new file mode 100644 index 00000000..bc5f38d2 --- /dev/null +++ b/src/browser/encoding/TextDecoder.zig @@ -0,0 +1,106 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); + +const Env = @import("../env.zig").Env; + +// https://encoding.spec.whatwg.org/#interface-textdecoder +const TextDecoder = @This(); + +const SupportedLabels = enum { + utf8, + @"utf-8", + @"unicode-1-1-utf-8", +}; + +const Options = struct { + fatal: bool = false, + ignoreBOM: bool = false, +}; + +fatal: bool, +ignore_bom: bool, + +pub fn constructor(label_: ?[]const u8, opts_: ?Options) !TextDecoder { + if (label_) |l| { + _ = std.meta.stringToEnum(SupportedLabels, l) orelse { + log.warn(.web_api, "not implemented", .{ .feature = "TextDecoder label", .label = l }); + return error.NotImplemented; + }; + } + const opts = opts_ orelse Options{}; + return .{ + .fatal = opts.fatal, + .ignore_bom = opts.ignoreBOM, + }; +} + +pub fn get_encoding(_: *const TextDecoder) []const u8 { + return "utf-8"; +} + +pub fn get_ignoreBOM(self: *const TextDecoder) bool { + return self.ignore_bom; +} + +pub fn get_fatal(self: *const TextDecoder) bool { + return self.fatal; +} + +// TODO: Should accept an ArrayBuffer, TypedArray or DataView +// js.zig will currently only map a TypedArray to our []const u8. +pub fn _decode(self: *const TextDecoder, v: []const u8) ![]const u8 { + if (self.fatal and !std.unicode.utf8ValidateSlice(v)) { + return error.InvalidUtf8; + } + + if (self.ignore_bom == false and std.mem.startsWith(u8, v, &.{ 0xEF, 0xBB, 0xBF })) { + return v[3..]; + } + + return v; +} + +const testing = @import("../../testing.zig"); +test "Browser.Encoding.TextDecoder" { + var runner = try testing.jsRunner(testing.tracking_allocator, .{ + .html = "", + }); + defer runner.deinit(); + + try runner.testCases(&.{ + .{ "let d1 = new TextDecoder();", null }, + .{ "d1.encoding;", "utf-8" }, + .{ "d1.fatal", "false" }, + .{ "d1.ignoreBOM", "false" }, + .{ "d1.decode(new Uint8Array([240, 160, 174, 183]))", "𠮷" }, + .{ "d1.decode(new Uint8Array([0xEF, 0xBB, 0xBF, 240, 160, 174, 183]))", "𠮷" }, + + .{ "let d2 = new TextDecoder('utf8', {fatal: true})", null }, + .{ + \\ try { + \\ let data = new Uint8Array([240, 240, 160, 174, 183]); + \\ d2.decode(data); + \\ } catch (e) {e} + , + "Error: InvalidUtf8", + }, + }, .{}); +} diff --git a/src/browser/encoding/text_encoder.zig b/src/browser/encoding/TextEncoder.zig similarity index 68% rename from src/browser/encoding/text_encoder.zig rename to src/browser/encoding/TextEncoder.zig index 5bf9b7d7..600d5b82 100644 --- a/src/browser/encoding/text_encoder.zig +++ b/src/browser/encoding/TextEncoder.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -20,39 +20,37 @@ const std = @import("std"); const Env = @import("../env.zig").Env; -pub const Interfaces = .{ - TextEncoder, -}; - // https://encoding.spec.whatwg.org/#interface-textencoder -pub const TextEncoder = struct { - pub fn constructor() !TextEncoder { - return .{}; +const TextEncoder = @This(); + +pub fn constructor() !TextEncoder { + return .{}; +} + +pub fn get_encoding(_: *const TextEncoder) []const u8 { + return "utf-8"; +} + +pub fn _encode(_: *const TextEncoder, v: []const u8) !Env.TypedArray(u8) { + // Ensure the input is a valid utf-8 + // It seems chrome accepts invalid utf-8 sequence. + // + if (!std.unicode.utf8ValidateSlice(v)) { + return error.InvalidUtf8; } - pub fn get_encoding(_: *const TextEncoder) []const u8 { - return "utf-8"; - } - - pub fn _encode(_: *const TextEncoder, v: []const u8) !Env.TypedArray(u8) { - // Ensure the input is a valid utf-8 - // It seems chrome accepts invalid utf-8 sequence. - // - if (!std.unicode.utf8ValidateSlice(v)) { - return error.InvalidUtf8; - } - - return .{ .values = v }; - } -}; + return .{ .values = v }; +} const testing = @import("../../testing.zig"); test "Browser.Encoding.TextEncoder" { - var runner = try testing.jsRunner(testing.tracking_allocator, .{}); + var runner = try testing.jsRunner(testing.tracking_allocator, .{ + .html = "", + }); defer runner.deinit(); try runner.testCases(&.{ - .{ "var encoder = new TextEncoder();", "undefined" }, + .{ "var encoder = new TextEncoder();", null }, .{ "encoder.encoding;", "utf-8" }, .{ "encoder.encode('€');", "226,130,172" }, diff --git a/src/browser/encoding/encoding.zig b/src/browser/encoding/encoding.zig new file mode 100644 index 00000000..97a16f71 --- /dev/null +++ b/src/browser/encoding/encoding.zig @@ -0,0 +1,22 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +pub const Interfaces = .{ + @import("TextDecoder.zig"), + @import("TextEncoder.zig"), +}; diff --git a/src/browser/env.zig b/src/browser/env.zig index 344e687e..024c5d5d 100644 --- a/src/browser/env.zig +++ b/src/browser/env.zig @@ -26,7 +26,7 @@ const WebApis = struct { @import("cssom/cssom.zig").Interfaces, @import("dom/dom.zig").Interfaces, @import("dom/shadow_root.zig").ShadowRoot, - @import("encoding/text_encoder.zig").Interfaces, + @import("encoding/encoding.zig").Interfaces, @import("events/event.zig").Interfaces, @import("html/html.zig").Interfaces, @import("iterator/iterator.zig").Interfaces,