Files
browser/src/browser/webapi/Blob.zig
Karl Seguin 01ecb296e5 Rework finalizers
This commit involves a number of changes to finalizers, all aimed towards
better consistency and reliability.

A big part of this has to do with v8::Inspector's ability to move objects
across IsolatedWorlds. There has been a few previous efforts on this, the most
significant being https://github.com/lightpanda-io/browser/pull/1901. To recap,
a Zig instance can map to 0-N v8::Objects. Where N is the total number of
IsolatedWorlds. Generally, IsolatedWorlds between origins are...isolated...but
the v8::Inspector isn't bound by this. So a Zig instance cannot be tied to a
Context/Identity/IsolatedWorld...it has to live until all references, possibly
from different IsolatedWorlds, are released (or the page is reset).

Finalizers could previously be managed via reference counting or explicitly
toggling the instance as weak/strong. Now, only reference counting is supported.
weak/strong can essentially be seen as an acquireRef (rc += 1) and
releaseRef (rc -= 1). Explicit setting did make some things easier, like not
having to worry so much about double-releasing (e.g. XHR abort being called
multiple times), but it was only used in a few places AND it simply doesn't work
with objects shared between IsolatedWorlds. It is never a boolean now, as 3
different IsolatedWorlds can each hold a reference.

Temps and Globals are tracked on the Session. Previously, they were tracked on
the Identity, but that makes no sense. If a Zig instance can outlive an Identity,
then any of its Temp references can too. This hasn't been a problem because we've
only seen MutationObserver and IntersectionObserver be used cross-origin,
but the right CDP script can make this crash with a use-after-free (e.g.
`MessageEvent.data` is released when the Identity is done, but `MessageEvent` is
still referenced by a different IsolateWorld).

Rather than deinit with a `comptime shutdown: bool`, there is now an explicit
`releaseRef` and `deinit`.

Bridge registration has been streamlined. Previously, types had to register
their finalizer AND acquireRef/releaseRef/deinit had to be declared on the entire
prototype chain, even if these methods just delegated to their proto. Finalizers
are now automatically enabled if a type has a `acquireRef` function. If a type
has an `acquireRef`, then it must have a `releaseRef` and a `deinit`. So if
there's custom cleanup to do in `deinit`, then you also have to define
`acquireRef` and `releaseRef` which will just delegate to the _proto.

Furthermore these finalizer methods can be defined anywhere on the chain.

Previously:

```zig
const KeywboardEvent = struct {
  _proto: *Event,
  ...

  pub fn deinit(self: *KeyboardEvent, session: *Session) void {
    self._proto.deinit(session);
  }

  pub fn releaseRef(self: *KeyboardEvent, session: *Session) void {
    self._proto.releaseRef(session);
  }
}
```

```zig
const KeyboardEvent = struct {
  _proto: *Event,
  ...
  // no deinit, releaseRef, acquireref
}
```

Since the `KeyboardEvent` doesn't participate in finalization directly, it
doesn't have to define anything. The bridge will detect the most specific place
they are defined and call them there.
2026-03-28 21:11:23 +08:00

354 lines
11 KiB
Zig

// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const lp = @import("lightpanda");
const js = @import("../js/js.zig");
const Page = @import("../Page.zig");
const Session = @import("../Session.zig");
const Mime = @import("../Mime.zig");
const Writer = std.Io.Writer;
const Allocator = std.mem.Allocator;
/// https://w3c.github.io/FileAPI/#blob-section
/// https://developer.mozilla.org/en-US/docs/Web/API/Blob
const Blob = @This();
pub const _prototype_root = true;
_type: Type,
_rc: lp.RC(u32),
_arena: Allocator,
/// Immutable slice of blob.
/// Note that another blob may hold a pointer/slice to this,
/// so its better to leave the deallocation of it to arena allocator.
_slice: []const u8,
/// MIME attached to blob. Can be an empty string.
_mime: []const u8,
pub const Type = union(enum) {
generic,
file: *@import("File.zig"),
};
const InitOptions = struct {
/// MIME type.
type: []const u8 = "",
/// How to handle line endings (CR and LF).
/// `transparent` means do nothing, `native` expects CRLF (\r\n) on Windows.
endings: []const u8 = "transparent",
};
/// Creates a new Blob (JS constructor).
pub fn init(
maybe_blob_parts: ?[]const []const u8,
maybe_options: ?InitOptions,
page: *Page,
) !*Blob {
return initWithMimeValidation(maybe_blob_parts, maybe_options, false, page);
}
/// Creates a new Blob with optional MIME validation.
/// When validate_mime is true, uses full MIME parsing (for Response/Request).
/// When false, uses simple ASCII validation per FileAPI spec (for Blob constructor).
pub fn initWithMimeValidation(
maybe_blob_parts: ?[]const []const u8,
maybe_options: ?InitOptions,
validate_mime: bool,
page: *Page,
) !*Blob {
const arena = try page.getArena(.{ .debug = "Blob" });
errdefer page.releaseArena(arena);
const options: InitOptions = maybe_options orelse .{};
const mime: []const u8 = blk: {
const t = options.type;
if (t.len == 0) {
break :blk "";
}
const buf = try arena.dupe(u8, t);
if (validate_mime) {
// Full MIME parsing per MIME sniff spec (for Content-Type headers)
_ = Mime.parse(buf) catch break :blk "";
} else {
// Simple validation per FileAPI spec (for Blob constructor):
// - If any char is outside U+0020-U+007E, return empty string
// - Otherwise lowercase
for (t) |c| {
if (c < 0x20 or c > 0x7E) {
break :blk "";
}
}
_ = std.ascii.lowerString(buf, buf);
}
break :blk buf;
};
const data = blk: {
if (maybe_blob_parts) |blob_parts| {
var w: Writer.Allocating = .init(arena);
const use_native_endings = std.mem.eql(u8, options.endings, "native");
try writeBlobParts(&w.writer, blob_parts, use_native_endings);
break :blk w.written();
}
break :blk "";
};
const self = try arena.create(Blob);
self.* = .{
._rc = .{},
._arena = arena,
._type = .generic,
._slice = data,
._mime = mime,
};
return self;
}
pub fn deinit(self: *Blob, session: *Session) void {
session.releaseArena(self._arena);
}
pub fn releaseRef(self: *Blob, session: *Session) void {
self._rc.release(self, session);
}
pub fn acquireRef(self: *Blob) void {
self._rc.acquire();
}
const largest_vector = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
/// Array of possible vector sizes for the current arch in decrementing order.
/// We may move this to some file for SIMD helpers in the future.
const vector_sizes = blk: {
// Required for length calculation.
var n: usize = largest_vector;
var total: usize = 0;
while (n != 2) : (n /= 2) total += 1;
// Populate an array with vector sizes.
n = largest_vector;
var i: usize = 0;
var items: [total]usize = undefined;
while (n != 2) : (n /= 2) {
defer i += 1;
items[i] = n;
}
break :blk items;
};
/// Writes blob parts to given `Writer` with desired endings.
fn writeBlobParts(
writer: *Writer,
blob_parts: []const []const u8,
use_native_endings: bool,
) !void {
// Transparent.
if (!use_native_endings) {
for (blob_parts) |part| {
try writer.writeAll(part);
}
return;
}
// TODO: Windows support.
// Linux & Unix.
// Both Firefox and Chrome implement it as such:
// CRLF => LF
// CR => LF
// So even though CR is not followed by LF, it gets replaced.
//
// I believe this is because such scenario is possible:
// ```
// let parts = [ "the quick\r", "\nbrown fox" ];
// ```
// In the example, one should have to check the part before in order to
// understand that CRLF is being presented in the final buffer.
// So they took a simpler approach, here's what given blob parts produce:
// ```
// "the quick\n\nbrown fox"
// ```
scan_parts: for (blob_parts) |part| {
var end: usize = 0;
inline for (vector_sizes) |vector_len| {
const Vec = @Vector(vector_len, u8);
while (end + vector_len <= part.len) : (end += vector_len) {
const cr: Vec = @splat('\r');
// Load chunk as vectors.
const data = part[end..][0..vector_len];
const chunk: Vec = data.*;
// Look for CR.
const match = chunk == cr;
// Create a bitset out of match vector.
const bitset = std.bit_set.IntegerBitSet(vector_len){
.mask = @bitCast(@intFromBool(match)),
};
var iter = bitset.iterator(.{});
var relative_start: usize = 0;
while (iter.next()) |index| {
_ = try writer.writeVec(&.{ data[relative_start..index], "\n" });
if (index + 1 != data.len and data[index + 1] == '\n') {
relative_start = index + 2;
} else {
relative_start = index + 1;
}
}
_ = try writer.writeVec(&.{data[relative_start..]});
}
}
// Scalar scan fallback.
var relative_start: usize = end;
while (end < part.len) {
if (part[end] == '\r') {
_ = try writer.writeVec(&.{ part[relative_start..end], "\n" });
// Part ends with CR. We can continue to next part.
if (end + 1 == part.len) {
continue :scan_parts;
}
// If next char is LF, skip it too.
if (part[end + 1] == '\n') {
relative_start = end + 2;
} else {
relative_start = end + 1;
}
}
end += 1;
}
// Write the remaining. We get this in such situations:
// `the quick brown\rfox`
// `the quick brown\r\nfox`
try writer.writeAll(part[relative_start..end]);
}
}
/// Returns a Promise that resolves with the contents of the blob
/// as binary data contained in an ArrayBuffer.
pub fn arrayBuffer(self: *const Blob, page: *Page) !js.Promise {
return page.js.local.?.resolvePromise(js.ArrayBuffer{ .values = self._slice });
}
const ReadableStream = @import("streams/ReadableStream.zig");
/// Returns a ReadableStream which upon reading returns the data
/// contained within the Blob.
pub fn stream(self: *const Blob, page: *Page) !*ReadableStream {
return ReadableStream.initWithData(self._slice, page);
}
/// Returns a Promise that resolves with a string containing
/// the contents of the blob, interpreted as UTF-8.
pub fn text(self: *const Blob, page: *Page) !js.Promise {
return page.js.local.?.resolvePromise(self._slice);
}
/// Extension to Blob; works on Firefox and Safari.
/// https://developer.mozilla.org/en-US/docs/Web/API/Blob/bytes
/// Returns a Promise that resolves with a Uint8Array containing
/// the contents of the blob as an array of bytes.
pub fn bytes(self: *const Blob, page: *Page) !js.Promise {
return page.js.local.?.resolvePromise(js.TypedArray(u8){ .values = self._slice });
}
/// Returns a new Blob object which contains data
/// from a subset of the blob on which it's called.
pub fn slice(
self: *const Blob,
start_: ?i32,
end_: ?i32,
content_type_: ?[]const u8,
page: *Page,
) !*Blob {
const data = self._slice;
const start = blk: {
const requested_start = start_ orelse break :blk 0;
if (requested_start < 0) {
break :blk data.len -| @abs(requested_start);
}
break :blk @min(data.len, @as(u31, @intCast(requested_start)));
};
const end: usize = blk: {
const requested_end = end_ orelse break :blk data.len;
if (requested_end < 0) {
break :blk @max(start, data.len -| @abs(requested_end));
}
break :blk @min(data.len, @max(start, @as(u31, @intCast(requested_end))));
};
return Blob.init(&.{data[start..end]}, .{ .type = content_type_ orelse "" }, page);
}
/// Returns the size of the Blob in bytes.
pub fn getSize(self: *const Blob) usize {
return self._slice.len;
}
/// Returns the type of Blob; likely a MIME type, yet anything can be given.
pub fn getType(self: *const Blob) []const u8 {
return self._mime;
}
pub const JsApi = struct {
pub const bridge = js.Bridge(Blob);
pub const Meta = struct {
pub const name = "Blob";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const constructor = bridge.constructor(Blob.init, .{});
pub const text = bridge.function(Blob.text, .{});
pub const bytes = bridge.function(Blob.bytes, .{});
pub const slice = bridge.function(Blob.slice, .{});
pub const size = bridge.accessor(Blob.getSize, null, .{});
pub const @"type" = bridge.accessor(Blob.getType, null, .{});
pub const stream = bridge.function(Blob.stream, .{});
pub const arrayBuffer = bridge.function(Blob.arrayBuffer, .{});
};
const testing = @import("../../testing.zig");
test "WebApi: Blob" {
try testing.htmlRunner("blob.html", .{});
}