diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 12f21ca0..93ffc17f 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -67,8 +67,18 @@ client: *Http.Client, allocator: Allocator, buffer_pool: BufferPool, script_pool: std.heap.MemoryPool(PendingScript), +sync_module_pool: std.heap.MemoryPool(SyncModule), async_module_pool: std.heap.MemoryPool(AsyncModule), +// We can download multiple sync modules in parallel, but we want to process +// then in order. We can't use an OrderList, like the other script types, +// because the order we load them might not be the order we want to process +// them in (I'm not sure this is true, but as far as I can tell, v8 doesn't +// make any guarantees about the list of sub-module dependencies it gives us +// So this is more like a cache. When a SyncModule is complete, it's put here +// and can be requested as needed. +sync_modules: std.StringHashMapUnmanaged(*SyncModule), + const OrderList = std.DoublyLinkedList; pub fn init(browser: *Browser, page: *Page) ScriptManager { @@ -80,24 +90,42 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager { .scripts = .{}, .deferreds = .{}, .asyncs_ready = .{}, + .sync_modules = .empty, .is_evaluating = false, .allocator = allocator, .client = browser.http_client, .static_scripts_done = false, .buffer_pool = BufferPool.init(allocator, 5), .script_pool = std.heap.MemoryPool(PendingScript).init(allocator), + .sync_module_pool = std.heap.MemoryPool(SyncModule).init(allocator), .async_module_pool = std.heap.MemoryPool(AsyncModule).init(allocator), }; } pub fn deinit(self: *ScriptManager) void { self.reset(); + var it = self.sync_modules.valueIterator(); + while (it.next()) |value_ptr| { + value_ptr.*.buffer.deinit(self.allocator); + self.sync_module_pool.destroy(value_ptr.*); + } + self.buffer_pool.deinit(); self.script_pool.deinit(); + self.sync_module_pool.deinit(); self.async_module_pool.deinit(); + + self.sync_modules.deinit(self.allocator); } pub fn reset(self: *ScriptManager) void { + var it = self.sync_modules.valueIterator(); + while (it.next()) |value_ptr| { + value_ptr.*.buffer.deinit(self.allocator); + self.sync_module_pool.destroy(value_ptr.*); + } + self.sync_modules.clearRetainingCapacity(); + self.clearList(&self.asyncs); self.clearList(&self.scripts); self.clearList(&self.deferreds); @@ -259,49 +287,70 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { // Unlike external modules which can only ever be executed after releasing an // http handle, these are executed without there necessarily being a free handle. // Thus, Http/Client.zig maintains a dedicated handle for these calls. -pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !GetResult { - std.debug.assert(self.is_blocking == false); - - self.is_blocking = true; - defer { - self.is_blocking = false; - - // we blocked evaluation while loading this script, there could be - // scripts ready to process. - self.evaluate(); +pub fn getModule(self: *ScriptManager, url: [:0]const u8) !void { + const gop = try self.sync_modules.getOrPut(self.allocator, url); + if (gop.found_existing) { + // already requested + return; } + errdefer _ = self.sync_modules.remove(url); - var blocking = Blocking{ - .allocator = self.allocator, - .buffer_pool = &self.buffer_pool, - }; + const sync = try self.sync_module_pool.create(); + errdefer self.sync_module_pool.destroy(sync); + + sync.* = .{ .manager = self }; + gop.value_ptr.* = sync; var headers = try self.client.newHeaders(); try self.page.requestCookie(.{}).headersForRequest(self.page.arena, url, &headers); - var client = self.client; - try client.blockingRequest(.{ + try self.client.request(.{ .url = url, + .ctx = sync, .method = .GET, .headers = headers, .cookie_jar = self.page.cookie_jar, - .ctx = &blocking, .resource_type = .script, - .start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null, - .header_callback = Blocking.headerCallback, - .data_callback = Blocking.dataCallback, - .done_callback = Blocking.doneCallback, - .error_callback = Blocking.errorCallback, + .start_callback = if (log.enabled(.http, .debug)) SyncModule.startCallback else null, + .header_callback = SyncModule.headerCallback, + .data_callback = SyncModule.dataCallback, + .done_callback = SyncModule.doneCallback, + .error_callback = SyncModule.errorCallback, }); +} - // rely on http's timeout settings to avoid an endless/long loop. +pub fn waitForModule(self: *ScriptManager, url: [:0]const u8) !GetResult { + std.debug.assert(self.is_blocking == false); + self.is_blocking = true; + defer self.is_blocking = false; + + // Normally it's dangerous to hold on to map pointers. But here, the map + // can't change. It's possible that by calling `tick`, other entries within + // the map will have their value change, but the map itself is immutable + // during this tick. + const entry = self.sync_modules.getEntry(url) orelse { + return error.UnknownModule; + }; + const sync = entry.value_ptr.*; + + var client = self.client; while (true) { - _ = try client.tick(200); - switch (blocking.state) { - .running => {}, - .done => |result| return result, + switch (sync.state) { + .loading => {}, + .done => { + // Our caller has its own higher level cache (caching the + // actual compiled module). There's no reason for us to keep this + defer self.sync_module_pool.destroy(sync); + defer self.sync_modules.removeByPtr(entry.key_ptr); + return .{ + .buffer = sync.buffer, + .buffer_pool = &self.buffer_pool, + }; + }, .err => |err| return err, } + // rely on http's timeout settings to avoid an endless/long loop. + _ = try client.tick(200); } } @@ -332,7 +381,6 @@ pub fn getAsyncModule(self: *ScriptManager, url: [:0]const u8, cb: AsyncModule.C .error_callback = AsyncModule.errorCallback, }); } - pub fn staticScriptsDone(self: *ScriptManager) void { std.debug.assert(self.static_scripts_done == false); self.static_scripts_done = true; @@ -782,16 +830,15 @@ const BufferPool = struct { } }; -const Blocking = struct { - allocator: Allocator, - buffer_pool: *BufferPool, - state: State = .{ .running = {} }, +const SyncModule = struct { + manager: *ScriptManager, buffer: std.ArrayListUnmanaged(u8) = .{}, + state: State = .loading, const State = union(enum) { - running: void, + done, + loading, err: anyerror, - done: GetResult, }; fn startCallback(transfer: *Http.Transfer) !void { @@ -807,12 +854,13 @@ const Blocking = struct { .content_type = header.contentType(), }); + var self: *SyncModule = @ptrCast(@alignCast(transfer.ctx)); if (header.status != 200) { + self.finished(.{ .err = error.InvalidStatusCode }); return error.InvalidStatusCode; } - var self: *Blocking = @ptrCast(@alignCast(transfer.ctx)); - self.buffer = self.buffer_pool.get(); + self.buffer = self.manager.buffer_pool.get(); } fn dataCallback(transfer: *Http.Transfer, data: []const u8) !void { @@ -822,8 +870,8 @@ const Blocking = struct { // .blocking = true, // }); - var self: *Blocking = @ptrCast(@alignCast(transfer.ctx)); - self.buffer.appendSlice(self.allocator, data) catch |err| { + var self: *SyncModule = @ptrCast(@alignCast(transfer.ctx)); + self.buffer.appendSlice(self.manager.allocator, data) catch |err| { log.err(.http, "SM.dataCallback", .{ .err = err, .len = data.len, @@ -835,19 +883,17 @@ const Blocking = struct { } fn doneCallback(ctx: *anyopaque) !void { - var self: *Blocking = @ptrCast(@alignCast(ctx)); - self.state = .{ .done = .{ - .buffer = self.buffer, - .buffer_pool = self.buffer_pool, - } }; + var self: *SyncModule = @ptrCast(@alignCast(ctx)); + self.finished(.done); } fn errorCallback(ctx: *anyopaque, err: anyerror) void { - var self: *Blocking = @ptrCast(@alignCast(ctx)); - self.state = .{ .err = err }; - if (self.buffer.items.len > 0) { - self.buffer_pool.release(self.buffer); - } + var self: *SyncModule = @ptrCast(@alignCast(ctx)); + self.finished(.{ .err = err }); + } + + fn finished(self: *SyncModule, state: State) void { + self.state = state; } }; diff --git a/src/browser/page.zig b/src/browser/page.zig index 99a402d3..451adc7c 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -143,7 +143,7 @@ pub const Page = struct { .main_context = undefined, }; - self.main_context = try session.executor.createJsContext(&self.window, self, self, true, Env.GlobalMissingCallback.init(&self.polyfill_loader)); + self.main_context = try session.executor.createJsContext(&self.window, self, &self.script_manager, true, Env.GlobalMissingCallback.init(&self.polyfill_loader)); try polyfill.preload(self.arena, self.main_context); try self.scheduler.add(self, runMicrotasks, 5, .{ .name = "page.microtasks" }); @@ -255,16 +255,6 @@ pub const Page = struct { try Node.prepend(head, &[_]Node.NodeOrText{.{ .node = parser.elementToNode(base) }}); } - pub fn fetchModuleSource(ctx: *anyopaque, url: [:0]const u8) !ScriptManager.GetResult { - const self: *Page = @ptrCast(@alignCast(ctx)); - return self.script_manager.blockingGet(url); - } - - pub fn fetchAsyncModuleSource(ctx: *anyopaque, url: [:0]const u8, cb: ScriptManager.AsyncModule.Callback, cb_data: *anyopaque) !void { - const self: *Page = @ptrCast(@alignCast(ctx)); - return self.script_manager.getAsyncModule(url, cb, cb_data); - } - pub fn wait(self: *Page, wait_ms: i32) Session.WaitResult { return self._wait(wait_ms) catch |err| { switch (err) { diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index f0a9cbfa..dcc5a7e1 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -693,7 +693,7 @@ const IsolatedWorld = struct { _ = try self.executor.createJsContext( &page.window, page, - {}, + null, false, Env.GlobalMissingCallback.init(&self.polyfill_loader), ); diff --git a/src/http/Client.zig b/src/http/Client.zig index c9464047..83046606 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -86,9 +86,6 @@ allocator: Allocator, // request. These wil come and go with each request. transfer_pool: std.heap.MemoryPool(Transfer), -// see ScriptManager.blockingGet -blocking: Handle, - // To notify registered subscribers of events, the browser sets/nulls this for us. notification: ?*Notification = null, @@ -121,16 +118,12 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie var handles = try Handles.init(allocator, client, ca_blob, &opts); errdefer handles.deinit(allocator); - var blocking = try Handle.init(client, ca_blob, &opts); - errdefer blocking.deinit(); - client.* = .{ .queue = .{}, .active = 0, .intercepted = 0, .multi = multi, .handles = handles, - .blocking = blocking, .allocator = allocator, .http_proxy = opts.http_proxy, .user_agent = opts.user_agent, @@ -142,7 +135,6 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie pub fn deinit(self: *Client) void { self.abort(); - self.blocking.deinit(); self.handles.deinit(self.allocator); _ = c.curl_multi_cleanup(self.multi); @@ -263,12 +255,6 @@ pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: return transfer.fulfill(status, headers, body); } -// See ScriptManager.blockingGet -pub fn blockingRequest(self: *Client, req: Request) !void { - const transfer = try self.makeTransfer(req); - return self.makeRequest(&self.blocking, transfer); -} - fn makeTransfer(self: *Client, req: Request) !*Transfer { errdefer req.headers.deinit(); @@ -329,7 +315,6 @@ pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { for (self.handles.handles) |*h| { try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } - try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } // Same restriction as changeProxy. Should be ok since this is only called on @@ -341,7 +326,6 @@ pub fn restoreOriginalProxy(self: *Client) !void { for (self.handles.handles) |*h| { try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy)); } - try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy)); } fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { @@ -504,7 +488,7 @@ fn endTransfer(self: *Client, transfer: *Transfer) void { log.fatal(.http, "Failed to remove handle", .{ .err = err }); }; - self.handles.release(self, handle); + self.handles.release(handle); transfer._handle = null; self.active -= 1; } @@ -563,13 +547,7 @@ const Handles = struct { return null; } - fn release(self: *Handles, client: *Client, handle: *Handle) void { - if (handle == &client.blocking) { - // the handle we've reserved for blocking request doesn't participate - // int he in_use/available pools - return; - } - + fn release(self: *Handles, handle: *Handle) void { var node = &handle.node; self.in_use.remove(node); node.prev = null; @@ -747,7 +725,7 @@ pub const Transfer = struct { fn deinit(self: *Transfer) void { self.req.headers.deinit(); if (self._handle) |handle| { - self.client.handles.release(self.client, handle); + self.client.handles.release(handle); } self.arena.deinit(); self.client.transfer_pool.destroy(self); diff --git a/src/runtime/js.zig b/src/runtime/js.zig index e09e89e5..d6f861ba 100644 --- a/src/runtime/js.zig +++ b/src/runtime/js.zig @@ -410,19 +410,9 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // when the handle_scope is freed. // We also maintain our own "context_arena" which allows us to have // all page related memory easily managed. - pub fn createJsContext(self: *ExecutionWorld, global: anytype, state: State, module_loader: anytype, enter: bool, global_callback: ?GlobalMissingCallback) !*JsContext { + pub fn createJsContext(self: *ExecutionWorld, global: anytype, state: State, script_manager: ?*ScriptManager, enter: bool, global_callback: ?GlobalMissingCallback) !*JsContext { std.debug.assert(self.js_context == null); - const ModuleLoader = switch (@typeInfo(@TypeOf(module_loader))) { - .@"struct" => @TypeOf(module_loader), - .pointer => |ptr| ptr.child, - .void => ErrorModuleLoader, - else => @compileError("invalid module_loader"), - }; - - // If necessary, turn a void context into something we can safely ptrCast - const safe_module_loader: *anyopaque = if (ModuleLoader == ErrorModuleLoader) @ptrCast(@constCast(&{})) else module_loader; - const env = self.env; const isolate = env.isolate; const Global = @TypeOf(global.*); @@ -542,13 +532,9 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { .templates = &env.templates, .meta_lookup = &env.meta_lookup, .handle_scope = handle_scope, + .script_manager = script_manager, .call_arena = self.call_arena.allocator(), .context_arena = self.context_arena.allocator(), - .module_loader = .{ - .ptr = safe_module_loader, - .func = ModuleLoader.fetchModuleSource, - .async = ModuleLoader.fetchAsyncModuleSource, - }, .global_callback = global_callback, }; @@ -692,12 +678,6 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // the function that resolves/rejects them. persisted_promise_resolvers: std.ArrayListUnmanaged(v8.Persistent(v8.PromiseResolver)) = .empty, - // When we need to load a resource (i.e. an external script), we call - // this function to get the source. This is always a reference to the - // Page's fetchModuleSource, but we use a function pointer - // since this js module is decoupled from the browser implementation. - module_loader: ModuleLoader, - // Some Zig types have code to execute to cleanup destructor_callbacks: std.ArrayListUnmanaged(DestructorCallback) = .empty, @@ -711,15 +691,12 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // necessary to lookup/store the dependent module in the module_cache. module_identifier: std.AutoHashMapUnmanaged(u32, []const u8) = .empty, + // the page's script manager + script_manager: ?*ScriptManager, + // Global callback is called on missing property. global_callback: ?GlobalMissingCallback = null, - const ModuleLoader = struct { - ptr: *anyopaque, - func: *const fn (ptr: *anyopaque, url: [:0]const u8) anyerror!ScriptManager.GetResult, - async: *const fn (ptr: *anyopaque, url: [:0]const u8, cb: ScriptManager.AsyncModule.Callback, cb_state: *anyopaque) anyerror!void, - }; - const ModuleEntry = struct { // Can be null if we're asynchrously loading the module, in // which case resolver_promise cannot be null. @@ -861,8 +838,33 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { try self.module_identifier.putNoClobber(arena, m.getIdentityHash(), owned_url); errdefer _ = self.module_identifier.remove(m.getIdentityHash()); - // resolveModuleCallback loads module's dependencies. const v8_context = self.v8_context; + { + // Non-async modules are blocking. We can download them in + // parallel, but they need to be processed serially. So we + // want to get the list of dependent modules this module has + // and start downloading them asap. + const requests = m.getModuleRequests(); + const isolate = self.isolate; + for (0..requests.length()) |i| { + const req = requests.get(v8_context, @intCast(i)).castTo(v8.ModuleRequest); + const specifier = try jsStringToZig(self.call_arena, req.getSpecifier(), isolate); + const normalized_specifier = try @import("../url.zig").stitch( + self.call_arena, + specifier, + owned_url, + .{ .alloc = .if_needed, .null_terminated = true }, + ); + const gop = try self.module_cache.getOrPut(self.context_arena, normalized_specifier); + if (!gop.found_existing) { + const owned_specifier = try self.context_arena.dupeZ(u8, normalized_specifier); + gop.key_ptr.* = owned_specifier; + gop.value_ptr.* = .{}; + try self.script_manager.?.getModule(owned_specifier); + } + } + } + if (try m.instantiate(v8_context, resolveModuleCallback) == false) { return error.ModuleInstantiationError; } @@ -891,14 +893,13 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { var gop = try self.module_cache.getOrPut(arena, owned_url); if (gop.found_existing) { - // only way for us to have found an existing entry, is if - // we're asynchronously loading this module + // If we're here, it's because we had a cache entry, but no + // module. This happens because both our synch and async + // module loaders create the entry to prevent concurrent + // loads of the same resource (like Go's Singleflight). std.debug.assert(gop.value_ptr.module == null); std.debug.assert(gop.value_ptr.module_promise == null); - std.debug.assert(gop.value_ptr.resolver_promise != null); - // keep the resolver promise, it's doing the heavy lifting - // and any other async loads will be chained to it. gop.value_ptr.module = persisted_module; gop.value_ptr.module_promise = persisted_promise; } else { @@ -1639,8 +1640,29 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { .{ .alloc = .if_needed, .null_terminated = true }, ); - const module_loader = self.module_loader; - var fetch_result = try module_loader.func(module_loader.ptr, normalized_specifier); + const gop = try self.module_cache.getOrPut(self.context_arena, normalized_specifier); + if (gop.found_existing) { + if (gop.value_ptr.module) |m| { + return m.handle; + } + // We don't have a module, but we do have a cache entry for it + // That means we're already trying to load it. We just have + // to wait for it to be done. + } else { + // I don't think it's possible for us to be here. This is + // only ever called by v8 when we evaluate a module. But + // before evaluating, we should have already started + // downloading all of the module's nested modules. So it + // should be impossible that this is the first time we've + // heard about this module. + // But, I'm not confident enough in that, and ther's little + // harm in handling this case. + @branchHint(.unlikely); + gop.value_ptr.* = .{}; + try self.script_manager.?.getModule(normalized_specifier); + } + + var fetch_result = try self.script_manager.?.waitForModule(normalized_specifier); defer fetch_result.deinit(); var try_catch: TryCatch = undefined; @@ -1756,8 +1778,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { }; // Next, we need to actually load it. - const module_loader = self.module_loader; - module_loader.async(module_loader.ptr, specifier, dynamicModuleSourceCallback, state) catch |err| { + self.script_manager.?.getAsyncModule(specifier, dynamicModuleSourceCallback, state) catch |err| { const error_msg = v8.String.initUtf8(isolate, @errorName(err)); _ = resolver.reject(self.v8_context, error_msg.toValue()); }; @@ -4169,16 +4190,6 @@ const NoopInspector = struct { pub fn onInspectorEvent(_: *anyopaque, _: []const u8) void {} }; -const ErrorModuleLoader = struct { - pub fn fetchModuleSource(_: *anyopaque, _: [:0]const u8) !ScriptManager.GetResult { - return error.NoModuleLoadConfigured; - } - - pub fn fetchAsyncModuleSource(_: *anyopaque, _: [:0]const u8, _: ScriptManager.AsyncModule.Callback, _: *anyopaque) !void { - return error.NoModuleLoadConfigured; - } -}; - // If we have a struct: // const Cat = struct { // pub fn meow(self: *Cat) void { ... } diff --git a/src/runtime/testing.zig b/src/runtime/testing.zig index 82327919..c27e1af2 100644 --- a/src/runtime/testing.zig +++ b/src/runtime/testing.zig @@ -52,7 +52,7 @@ pub fn Runner(comptime State: type, comptime Global: type, comptime types: anyty self.js_context = try self.executor.createJsContext( if (Global == void) &default_global else global, state, - {}, + null, true, null, );