Merge pull request #1091 from lightpanda-io/concurrent_blocking_imports
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
zig-test / zig build dev (push) Has been cancelled
zig-test / browser fetch (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled

Concurrent blocking imports
This commit is contained in:
Karl Seguin
2025-09-30 12:30:42 +08:00
committed by GitHub
6 changed files with 159 additions and 134 deletions

View File

@@ -67,8 +67,18 @@ client: *Http.Client,
allocator: Allocator,
buffer_pool: BufferPool,
script_pool: std.heap.MemoryPool(PendingScript),
sync_module_pool: std.heap.MemoryPool(SyncModule),
async_module_pool: std.heap.MemoryPool(AsyncModule),
// We can download multiple sync modules in parallel, but we want to process
// then in order. We can't use an OrderList, like the other script types,
// because the order we load them might not be the order we want to process
// them in (I'm not sure this is true, but as far as I can tell, v8 doesn't
// make any guarantees about the list of sub-module dependencies it gives us
// So this is more like a cache. When a SyncModule is complete, it's put here
// and can be requested as needed.
sync_modules: std.StringHashMapUnmanaged(*SyncModule),
const OrderList = std.DoublyLinkedList;
pub fn init(browser: *Browser, page: *Page) ScriptManager {
@@ -80,24 +90,42 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager {
.scripts = .{},
.deferreds = .{},
.asyncs_ready = .{},
.sync_modules = .empty,
.is_evaluating = false,
.allocator = allocator,
.client = browser.http_client,
.static_scripts_done = false,
.buffer_pool = BufferPool.init(allocator, 5),
.script_pool = std.heap.MemoryPool(PendingScript).init(allocator),
.sync_module_pool = std.heap.MemoryPool(SyncModule).init(allocator),
.async_module_pool = std.heap.MemoryPool(AsyncModule).init(allocator),
};
}
pub fn deinit(self: *ScriptManager) void {
self.reset();
var it = self.sync_modules.valueIterator();
while (it.next()) |value_ptr| {
value_ptr.*.buffer.deinit(self.allocator);
self.sync_module_pool.destroy(value_ptr.*);
}
self.buffer_pool.deinit();
self.script_pool.deinit();
self.sync_module_pool.deinit();
self.async_module_pool.deinit();
self.sync_modules.deinit(self.allocator);
}
pub fn reset(self: *ScriptManager) void {
var it = self.sync_modules.valueIterator();
while (it.next()) |value_ptr| {
value_ptr.*.buffer.deinit(self.allocator);
self.sync_module_pool.destroy(value_ptr.*);
}
self.sync_modules.clearRetainingCapacity();
self.clearList(&self.asyncs);
self.clearList(&self.scripts);
self.clearList(&self.deferreds);
@@ -260,49 +288,70 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
// Unlike external modules which can only ever be executed after releasing an
// http handle, these are executed without there necessarily being a free handle.
// Thus, Http/Client.zig maintains a dedicated handle for these calls.
pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !GetResult {
std.debug.assert(self.is_blocking == false);
self.is_blocking = true;
defer {
self.is_blocking = false;
// we blocked evaluation while loading this script, there could be
// scripts ready to process.
self.evaluate();
pub fn getModule(self: *ScriptManager, url: [:0]const u8) !void {
const gop = try self.sync_modules.getOrPut(self.allocator, url);
if (gop.found_existing) {
// already requested
return;
}
errdefer _ = self.sync_modules.remove(url);
var blocking = Blocking{
.allocator = self.allocator,
.buffer_pool = &self.buffer_pool,
};
const sync = try self.sync_module_pool.create();
errdefer self.sync_module_pool.destroy(sync);
sync.* = .{ .manager = self };
gop.value_ptr.* = sync;
var headers = try self.client.newHeaders();
try self.page.requestCookie(.{}).headersForRequest(self.page.arena, url, &headers);
var client = self.client;
try client.blockingRequest(.{
try self.client.request(.{
.url = url,
.ctx = sync,
.method = .GET,
.headers = headers,
.cookie_jar = self.page.cookie_jar,
.ctx = &blocking,
.resource_type = .script,
.start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null,
.header_callback = Blocking.headerCallback,
.data_callback = Blocking.dataCallback,
.done_callback = Blocking.doneCallback,
.error_callback = Blocking.errorCallback,
.start_callback = if (log.enabled(.http, .debug)) SyncModule.startCallback else null,
.header_callback = SyncModule.headerCallback,
.data_callback = SyncModule.dataCallback,
.done_callback = SyncModule.doneCallback,
.error_callback = SyncModule.errorCallback,
});
}
// rely on http's timeout settings to avoid an endless/long loop.
pub fn waitForModule(self: *ScriptManager, url: [:0]const u8) !GetResult {
std.debug.assert(self.is_blocking == false);
self.is_blocking = true;
defer self.is_blocking = false;
// Normally it's dangerous to hold on to map pointers. But here, the map
// can't change. It's possible that by calling `tick`, other entries within
// the map will have their value change, but the map itself is immutable
// during this tick.
const entry = self.sync_modules.getEntry(url) orelse {
return error.UnknownModule;
};
const sync = entry.value_ptr.*;
var client = self.client;
while (true) {
_ = try client.tick(200);
switch (blocking.state) {
.running => {},
.done => |result| return result,
switch (sync.state) {
.loading => {},
.done => {
// Our caller has its own higher level cache (caching the
// actual compiled module). There's no reason for us to keep this
defer self.sync_module_pool.destroy(sync);
defer self.sync_modules.removeByPtr(entry.key_ptr);
return .{
.buffer = sync.buffer,
.buffer_pool = &self.buffer_pool,
};
},
.err => |err| return err,
}
// rely on http's timeout settings to avoid an endless/long loop.
_ = try client.tick(200);
}
}
@@ -333,7 +382,6 @@ pub fn getAsyncModule(self: *ScriptManager, url: [:0]const u8, cb: AsyncModule.C
.error_callback = AsyncModule.errorCallback,
});
}
pub fn staticScriptsDone(self: *ScriptManager) void {
std.debug.assert(self.static_scripts_done == false);
self.static_scripts_done = true;
@@ -783,16 +831,15 @@ const BufferPool = struct {
}
};
const Blocking = struct {
allocator: Allocator,
buffer_pool: *BufferPool,
state: State = .{ .running = {} },
const SyncModule = struct {
manager: *ScriptManager,
buffer: std.ArrayListUnmanaged(u8) = .{},
state: State = .loading,
const State = union(enum) {
running: void,
done,
loading,
err: anyerror,
done: GetResult,
};
fn startCallback(transfer: *Http.Transfer) !void {
@@ -808,12 +855,13 @@ const Blocking = struct {
.content_type = header.contentType(),
});
var self: *SyncModule = @ptrCast(@alignCast(transfer.ctx));
if (header.status != 200) {
self.finished(.{ .err = error.InvalidStatusCode });
return error.InvalidStatusCode;
}
var self: *Blocking = @ptrCast(@alignCast(transfer.ctx));
self.buffer = self.buffer_pool.get();
self.buffer = self.manager.buffer_pool.get();
}
fn dataCallback(transfer: *Http.Transfer, data: []const u8) !void {
@@ -823,8 +871,8 @@ const Blocking = struct {
// .blocking = true,
// });
var self: *Blocking = @ptrCast(@alignCast(transfer.ctx));
self.buffer.appendSlice(self.allocator, data) catch |err| {
var self: *SyncModule = @ptrCast(@alignCast(transfer.ctx));
self.buffer.appendSlice(self.manager.allocator, data) catch |err| {
log.err(.http, "SM.dataCallback", .{
.err = err,
.len = data.len,
@@ -836,19 +884,17 @@ const Blocking = struct {
}
fn doneCallback(ctx: *anyopaque) !void {
var self: *Blocking = @ptrCast(@alignCast(ctx));
self.state = .{ .done = .{
.buffer = self.buffer,
.buffer_pool = self.buffer_pool,
} };
var self: *SyncModule = @ptrCast(@alignCast(ctx));
self.finished(.done);
}
fn errorCallback(ctx: *anyopaque, err: anyerror) void {
var self: *Blocking = @ptrCast(@alignCast(ctx));
self.state = .{ .err = err };
if (self.buffer.items.len > 0) {
self.buffer_pool.release(self.buffer);
}
var self: *SyncModule = @ptrCast(@alignCast(ctx));
self.finished(.{ .err = err });
}
fn finished(self: *SyncModule, state: State) void {
self.state = state;
}
};

View File

@@ -143,7 +143,7 @@ pub const Page = struct {
.main_context = undefined,
};
self.main_context = try session.executor.createJsContext(&self.window, self, self, true, Env.GlobalMissingCallback.init(&self.polyfill_loader));
self.main_context = try session.executor.createJsContext(&self.window, self, &self.script_manager, true, Env.GlobalMissingCallback.init(&self.polyfill_loader));
try polyfill.preload(self.arena, self.main_context);
try self.scheduler.add(self, runMicrotasks, 5, .{ .name = "page.microtasks" });
@@ -255,16 +255,6 @@ pub const Page = struct {
try Node.prepend(head, &[_]Node.NodeOrText{.{ .node = parser.elementToNode(base) }});
}
pub fn fetchModuleSource(ctx: *anyopaque, url: [:0]const u8) !ScriptManager.GetResult {
const self: *Page = @ptrCast(@alignCast(ctx));
return self.script_manager.blockingGet(url);
}
pub fn fetchAsyncModuleSource(ctx: *anyopaque, url: [:0]const u8, cb: ScriptManager.AsyncModule.Callback, cb_data: *anyopaque) !void {
const self: *Page = @ptrCast(@alignCast(ctx));
return self.script_manager.getAsyncModule(url, cb, cb_data);
}
pub fn wait(self: *Page, wait_ms: i32) Session.WaitResult {
return self._wait(wait_ms) catch |err| {
switch (err) {

View File

@@ -693,7 +693,7 @@ const IsolatedWorld = struct {
_ = try self.executor.createJsContext(
&page.window,
page,
{},
null,
false,
Env.GlobalMissingCallback.init(&self.polyfill_loader),
);

View File

@@ -86,9 +86,6 @@ allocator: Allocator,
// request. These wil come and go with each request.
transfer_pool: std.heap.MemoryPool(Transfer),
// see ScriptManager.blockingGet
blocking: Handle,
// To notify registered subscribers of events, the browser sets/nulls this for us.
notification: ?*Notification = null,
@@ -121,16 +118,12 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie
var handles = try Handles.init(allocator, client, ca_blob, &opts);
errdefer handles.deinit(allocator);
var blocking = try Handle.init(client, ca_blob, &opts);
errdefer blocking.deinit();
client.* = .{
.queue = .{},
.active = 0,
.intercepted = 0,
.multi = multi,
.handles = handles,
.blocking = blocking,
.allocator = allocator,
.http_proxy = opts.http_proxy,
.user_agent = opts.user_agent,
@@ -142,7 +135,6 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie
pub fn deinit(self: *Client) void {
self.abort();
self.blocking.deinit();
self.handles.deinit(self.allocator);
_ = c.curl_multi_cleanup(self.multi);
@@ -263,12 +255,6 @@ pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers:
return transfer.fulfill(status, headers, body);
}
// See ScriptManager.blockingGet
pub fn blockingRequest(self: *Client, req: Request) !void {
const transfer = try self.makeTransfer(req);
return self.makeRequest(&self.blocking, transfer);
}
fn makeTransfer(self: *Client, req: Request) !*Transfer {
errdefer req.headers.deinit();
@@ -329,7 +315,6 @@ pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void {
for (self.handles.handles) |*h| {
try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy.ptr));
}
try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy.ptr));
}
// Same restriction as changeProxy. Should be ok since this is only called on
@@ -341,7 +326,6 @@ pub fn restoreOriginalProxy(self: *Client) !void {
for (self.handles.handles) |*h| {
try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy));
}
try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy));
}
fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void {
@@ -504,7 +488,7 @@ fn endTransfer(self: *Client, transfer: *Transfer) void {
log.fatal(.http, "Failed to remove handle", .{ .err = err });
};
self.handles.release(self, handle);
self.handles.release(handle);
transfer._handle = null;
self.active -= 1;
}
@@ -563,13 +547,7 @@ const Handles = struct {
return null;
}
fn release(self: *Handles, client: *Client, handle: *Handle) void {
if (handle == &client.blocking) {
// the handle we've reserved for blocking request doesn't participate
// int he in_use/available pools
return;
}
fn release(self: *Handles, handle: *Handle) void {
var node = &handle.node;
self.in_use.remove(node);
node.prev = null;
@@ -747,7 +725,7 @@ pub const Transfer = struct {
fn deinit(self: *Transfer) void {
self.req.headers.deinit();
if (self._handle) |handle| {
self.client.handles.release(self.client, handle);
self.client.handles.release(handle);
}
self.arena.deinit();
self.client.transfer_pool.destroy(self);

View File

@@ -410,19 +410,9 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// when the handle_scope is freed.
// We also maintain our own "context_arena" which allows us to have
// all page related memory easily managed.
pub fn createJsContext(self: *ExecutionWorld, global: anytype, state: State, module_loader: anytype, enter: bool, global_callback: ?GlobalMissingCallback) !*JsContext {
pub fn createJsContext(self: *ExecutionWorld, global: anytype, state: State, script_manager: ?*ScriptManager, enter: bool, global_callback: ?GlobalMissingCallback) !*JsContext {
std.debug.assert(self.js_context == null);
const ModuleLoader = switch (@typeInfo(@TypeOf(module_loader))) {
.@"struct" => @TypeOf(module_loader),
.pointer => |ptr| ptr.child,
.void => ErrorModuleLoader,
else => @compileError("invalid module_loader"),
};
// If necessary, turn a void context into something we can safely ptrCast
const safe_module_loader: *anyopaque = if (ModuleLoader == ErrorModuleLoader) @ptrCast(@constCast(&{})) else module_loader;
const env = self.env;
const isolate = env.isolate;
const Global = @TypeOf(global.*);
@@ -542,13 +532,9 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
.templates = &env.templates,
.meta_lookup = &env.meta_lookup,
.handle_scope = handle_scope,
.script_manager = script_manager,
.call_arena = self.call_arena.allocator(),
.context_arena = self.context_arena.allocator(),
.module_loader = .{
.ptr = safe_module_loader,
.func = ModuleLoader.fetchModuleSource,
.async = ModuleLoader.fetchAsyncModuleSource,
},
.global_callback = global_callback,
};
@@ -692,12 +678,6 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// the function that resolves/rejects them.
persisted_promise_resolvers: std.ArrayListUnmanaged(v8.Persistent(v8.PromiseResolver)) = .empty,
// When we need to load a resource (i.e. an external script), we call
// this function to get the source. This is always a reference to the
// Page's fetchModuleSource, but we use a function pointer
// since this js module is decoupled from the browser implementation.
module_loader: ModuleLoader,
// Some Zig types have code to execute to cleanup
destructor_callbacks: std.ArrayListUnmanaged(DestructorCallback) = .empty,
@@ -711,15 +691,12 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// necessary to lookup/store the dependent module in the module_cache.
module_identifier: std.AutoHashMapUnmanaged(u32, []const u8) = .empty,
// the page's script manager
script_manager: ?*ScriptManager,
// Global callback is called on missing property.
global_callback: ?GlobalMissingCallback = null,
const ModuleLoader = struct {
ptr: *anyopaque,
func: *const fn (ptr: *anyopaque, url: [:0]const u8) anyerror!ScriptManager.GetResult,
async: *const fn (ptr: *anyopaque, url: [:0]const u8, cb: ScriptManager.AsyncModule.Callback, cb_state: *anyopaque) anyerror!void,
};
const ModuleEntry = struct {
// Can be null if we're asynchrously loading the module, in
// which case resolver_promise cannot be null.
@@ -861,8 +838,33 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
try self.module_identifier.putNoClobber(arena, m.getIdentityHash(), owned_url);
errdefer _ = self.module_identifier.remove(m.getIdentityHash());
// resolveModuleCallback loads module's dependencies.
const v8_context = self.v8_context;
{
// Non-async modules are blocking. We can download them in
// parallel, but they need to be processed serially. So we
// want to get the list of dependent modules this module has
// and start downloading them asap.
const requests = m.getModuleRequests();
const isolate = self.isolate;
for (0..requests.length()) |i| {
const req = requests.get(v8_context, @intCast(i)).castTo(v8.ModuleRequest);
const specifier = try jsStringToZig(self.call_arena, req.getSpecifier(), isolate);
const normalized_specifier = try @import("../url.zig").stitch(
self.call_arena,
specifier,
owned_url,
.{ .alloc = .if_needed, .null_terminated = true },
);
const gop = try self.module_cache.getOrPut(self.context_arena, normalized_specifier);
if (!gop.found_existing) {
const owned_specifier = try self.context_arena.dupeZ(u8, normalized_specifier);
gop.key_ptr.* = owned_specifier;
gop.value_ptr.* = .{};
try self.script_manager.?.getModule(owned_specifier);
}
}
}
if (try m.instantiate(v8_context, resolveModuleCallback) == false) {
return error.ModuleInstantiationError;
}
@@ -891,14 +893,13 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
var gop = try self.module_cache.getOrPut(arena, owned_url);
if (gop.found_existing) {
// only way for us to have found an existing entry, is if
// we're asynchronously loading this module
// If we're here, it's because we had a cache entry, but no
// module. This happens because both our synch and async
// module loaders create the entry to prevent concurrent
// loads of the same resource (like Go's Singleflight).
std.debug.assert(gop.value_ptr.module == null);
std.debug.assert(gop.value_ptr.module_promise == null);
std.debug.assert(gop.value_ptr.resolver_promise != null);
// keep the resolver promise, it's doing the heavy lifting
// and any other async loads will be chained to it.
gop.value_ptr.module = persisted_module;
gop.value_ptr.module_promise = persisted_promise;
} else {
@@ -1639,8 +1640,29 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
.{ .alloc = .if_needed, .null_terminated = true },
);
const module_loader = self.module_loader;
var fetch_result = try module_loader.func(module_loader.ptr, normalized_specifier);
const gop = try self.module_cache.getOrPut(self.context_arena, normalized_specifier);
if (gop.found_existing) {
if (gop.value_ptr.module) |m| {
return m.handle;
}
// We don't have a module, but we do have a cache entry for it
// That means we're already trying to load it. We just have
// to wait for it to be done.
} else {
// I don't think it's possible for us to be here. This is
// only ever called by v8 when we evaluate a module. But
// before evaluating, we should have already started
// downloading all of the module's nested modules. So it
// should be impossible that this is the first time we've
// heard about this module.
// But, I'm not confident enough in that, and ther's little
// harm in handling this case.
@branchHint(.unlikely);
gop.value_ptr.* = .{};
try self.script_manager.?.getModule(normalized_specifier);
}
var fetch_result = try self.script_manager.?.waitForModule(normalized_specifier);
defer fetch_result.deinit();
var try_catch: TryCatch = undefined;
@@ -1756,8 +1778,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
};
// Next, we need to actually load it.
const module_loader = self.module_loader;
module_loader.async(module_loader.ptr, specifier, dynamicModuleSourceCallback, state) catch |err| {
self.script_manager.?.getAsyncModule(specifier, dynamicModuleSourceCallback, state) catch |err| {
const error_msg = v8.String.initUtf8(isolate, @errorName(err));
_ = resolver.reject(self.v8_context, error_msg.toValue());
};
@@ -4185,16 +4206,6 @@ const NoopInspector = struct {
pub fn onInspectorEvent(_: *anyopaque, _: []const u8) void {}
};
const ErrorModuleLoader = struct {
pub fn fetchModuleSource(_: *anyopaque, _: [:0]const u8) !ScriptManager.GetResult {
return error.NoModuleLoadConfigured;
}
pub fn fetchAsyncModuleSource(_: *anyopaque, _: [:0]const u8, _: ScriptManager.AsyncModule.Callback, _: *anyopaque) !void {
return error.NoModuleLoadConfigured;
}
};
// If we have a struct:
// const Cat = struct {
// pub fn meow(self: *Cat) void { ... }

View File

@@ -52,7 +52,7 @@ pub fn Runner(comptime State: type, comptime Global: type, comptime types: anyty
self.js_context = try self.executor.createJsContext(
if (Global == void) &default_global else global,
state,
{},
null,
true,
null,
);