Fix module loading

When V8 calls the ResolveModuleCallback that we give it, it passes the specifier
which is essentially the string given to `from`:

```
import {x} from './blah.js';
```

We were taking that specifier and giving it to the page. The page knew the
currently executing script, an thus could resolve the full URL. Given the full
URL, it could either return the JS content from its module cache or fetch
the source.

At best though, this isn't efficient. If two files import the same module, yes
we cache the src, but we still ask v8 to re-compile it. At worse, it crashes
due to resource exhaustion in the case of cyclical dependencies.

ResolveModuleCallback should instead detect that it has already loaded the
module and return the previously loaded module. Essentially, we shouldn't be
caching the JavaScript source, we should be caching the v8 module.

However, in order to do this, we need more than the specifier, which might only
be a relative path (and thus isn't unique). So, in addition to a module cache,
we now also maintain an module identifier lookup. Given a module, we can get
its full path. Thankfully ResolveModuleCallback gives us the referring module,
so we can look up that modules URL, stitch it to the specifier, and get the
full url (the unique identifier) within the JS runtime.

Need more real world testing, and a fully working example before I celebrate,
but for sites with many import, this appears to improve performance by many
orders of magnitude.
This commit is contained in:
Karl Seguin
2025-06-20 19:17:55 +08:00
parent 5dcc3db36b
commit d8ec50345a
2 changed files with 115 additions and 114 deletions

View File

@@ -87,13 +87,6 @@ pub const Page = struct {
// execute any JavaScript // execute any JavaScript
main_context: *Env.JsContext, main_context: *Env.JsContext,
// List of modules currently fetched/loaded.
module_map: std.StringHashMapUnmanaged([]const u8),
// current_script is the script currently evaluated by the page.
// current_script could by fetch module to resolve module's url to fetch.
current_script: ?*const Script = null,
// indicates intention to navigate to another page on the next loop execution. // indicates intention to navigate to another page on the next loop execution.
delayed_navigation: bool = false, delayed_navigation: bool = false,
@@ -119,7 +112,6 @@ pub const Page = struct {
.notification = browser.notification, .notification = browser.notification,
}), }),
.main_context = undefined, .main_context = undefined,
.module_map = .empty,
}; };
self.main_context = try session.executor.createJsContext(&self.window, self, self, true); self.main_context = try session.executor.createJsContext(&self.window, self, self, true);
@@ -147,34 +139,9 @@ pub const Page = struct {
try Dump.writeHTML(doc, out); try Dump.writeHTML(doc, out);
} }
pub fn fetchModuleSource(ctx: *anyopaque, specifier: []const u8) !?[]const u8 { pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 {
const self: *Page = @ptrCast(@alignCast(ctx)); const self: *Page = @ptrCast(@alignCast(ctx));
const base = if (self.current_script) |s| s.src else null; return self.fetchData("module", src);
const src = blk: {
if (base) |_base| {
break :blk try URL.stitch(self.arena, specifier, _base, .{});
} else break :blk specifier;
};
if (self.module_map.get(src)) |module| {
log.debug(.http, "fetching module", .{
.src = src,
.cached = true,
});
return module;
}
log.debug(.http, "fetching module", .{
.src = src,
.base = base,
.cached = false,
.specifier = specifier,
});
const module = try self.fetchData(specifier, base);
if (module) |_module| try self.module_map.putNoClobber(self.arena, src, _module);
return module;
} }
pub fn wait(self: *Page) !void { pub fn wait(self: *Page) !void {
@@ -473,26 +440,20 @@ pub const Page = struct {
log.err(.browser, "clear document script", .{ .err = err }); log.err(.browser, "clear document script", .{ .err = err });
}; };
var script_source: ?[]const u8 = null; const src = script.src orelse {
defer self.current_script = null;
if (script.src) |src| {
self.current_script = script;
// https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script
script_source = (try self.fetchData(src, null)) orelse {
// TODO If el's result is null, then fire an event named error at
// el, and return
return;
};
} else {
// source is inline // source is inline
// TODO handle charset attribute // TODO handle charset attribute
script_source = try parser.nodeTextContent(parser.elementToNode(script.element)); const script_source = try parser.nodeTextContent(parser.elementToNode(script.element)) orelse return;
} return script.eval(self, script_source);
};
if (script_source) |ss| { // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script
try script.eval(self, ss); const script_source = (try self.fetchData("script", src)) orelse {
} // TODO If el's result is null, then fire an event named error at
// el, and return
return;
};
return script.eval(self, script_source);
// TODO If el's from an external file is true, then fire an event // TODO If el's from an external file is true, then fire an event
// named load at el. // named load at el.
@@ -502,7 +463,11 @@ pub const Page = struct {
// It resolves src using the page's uri. // It resolves src using the page's uri.
// If a base path is given, src is resolved according to the base first. // If a base path is given, src is resolved according to the base first.
// the caller owns the returned string // the caller owns the returned string
fn fetchData(self: *const Page, src: []const u8, base: ?[]const u8) !?[]const u8 { fn fetchData(
self: *const Page,
comptime reason: []const u8,
src: []const u8,
) !?[]const u8 {
const arena = self.arena; const arena = self.arena;
// Handle data URIs. // Handle data URIs.
@@ -510,26 +475,20 @@ pub const Page = struct {
return data_uri.data; return data_uri.data;
} }
var res_src = src;
// if a base path is given, we resolve src using base.
if (base) |_base| {
res_src = try URL.stitch(arena, src, _base, .{ .alloc = .if_needed });
}
var origin_url = &self.url; var origin_url = &self.url;
const url = try origin_url.resolve(arena, res_src); const url = try origin_url.resolve(arena, src);
var status_code: u16 = 0; var status_code: u16 = 0;
log.debug(.http, "fetching script", .{ log.debug(.http, "fetching script", .{
.url = url, .url = url,
.src = src, .src = src,
.base = base, .reason = reason,
}); });
errdefer |err| log.err(.http, "fetch error", .{ errdefer |err| log.err(.http, "fetch error", .{
.err = err, .err = err,
.url = url, .url = url,
.reason = reason,
.status = status_code, .status = status_code,
}); });
@@ -563,6 +522,7 @@ pub const Page = struct {
log.info(.http, "fetch complete", .{ log.info(.http, "fetch complete", .{
.url = url, .url = url,
.reason = reason,
.status = status_code, .status = status_code,
.content_length = arr.items.len, .content_length = arr.items.len,
}); });
@@ -1025,25 +985,16 @@ const Script = struct {
try_catch.init(page.main_context); try_catch.init(page.main_context);
defer try_catch.deinit(); defer try_catch.deinit();
const src = self.src orelse "inline"; const src = self.src orelse page.url.raw;
log.debug(.browser, "executing script", .{ .src = src, .kind = self.kind }); log.debug(.browser, "executing script", .{ .src = src, .kind = self.kind });
_ = switch (self.kind) { const result = switch (self.kind) {
.javascript => page.main_context.exec(body, src), .javascript => page.main_context.eval(body, src),
.module => blk: { .module => page.main_context.module(body, src),
switch (try page.main_context.module(body, src)) { };
.value => |v| break :blk v,
.exception => |e| { result catch {
log.warn(.user_script, "eval module", .{
.src = src,
.err = try e.exception(page.arena),
});
return error.JsErr;
},
}
},
} catch {
if (page.delayed_navigation) { if (page.delayed_navigation) {
return error.Terminated; return error.Terminated;
} }

View File

@@ -515,6 +515,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
}; };
const PersistentObject = v8.Persistent(v8.Object); const PersistentObject = v8.Persistent(v8.Object);
const PersistentModule = v8.Persistent(v8.Module);
const PersistentFunction = v8.Persistent(v8.Function); const PersistentFunction = v8.Persistent(v8.Function);
// Loosely maps to a Browser Page. // Loosely maps to a Browser Page.
@@ -572,6 +573,16 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// Some Zig types have code to execute when the call scope ends // Some Zig types have code to execute when the call scope ends
call_scope_end_callbacks: std.ArrayListUnmanaged(CallScopeEndCallback) = .empty, call_scope_end_callbacks: std.ArrayListUnmanaged(CallScopeEndCallback) = .empty,
// Our module cache: normalized module specifier => module.
module_cache: std.StringHashMapUnmanaged(PersistentModule) = .empty,
// Module => Path. The key is the module hashcode (module.getIdentityHash)
// and the value is the full path to the module. We need to capture this
// so that when we're asked to resolve a dependent module, and all we're
// given is the specifier, we can form the full path. The full path is
// necessary to lookup/store the dependent module in the module_cache.
module_identifier: std.AutoHashMapUnmanaged(u32, []const u8) = .empty,
const ModuleLoader = struct { const ModuleLoader = struct {
ptr: *anyopaque, ptr: *anyopaque,
func: *const fn (ptr: *anyopaque, specifier: []const u8) anyerror!?[]const u8, func: *const fn (ptr: *anyopaque, specifier: []const u8) anyerror!?[]const u8,
@@ -605,6 +616,13 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
} }
} }
{
var it = self.module_cache.valueIterator();
while (it.next()) |p| {
p.deinit();
}
}
for (self.callbacks.items) |*cb| { for (self.callbacks.items) |*cb| {
cb.deinit(); cb.deinit();
} }
@@ -646,6 +664,10 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
} }
// Executes the src // Executes the src
pub fn eval(self: *JsContext, src: []const u8, name: ?[]const u8) !void {
_ = try self.exec(src, name);
}
pub fn exec(self: *JsContext, src: []const u8, name: ?[]const u8) !Value { pub fn exec(self: *JsContext, src: []const u8, name: ?[]const u8) !Value {
const isolate = self.isolate; const isolate = self.isolate;
const v8_context = self.v8_context; const v8_context = self.v8_context;
@@ -669,25 +691,31 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// compile and eval a JS module // compile and eval a JS module
// It doesn't wait for callbacks execution // It doesn't wait for callbacks execution
pub fn module(self: *JsContext, src: []const u8, name: []const u8) !union(enum) { value: Value, exception: Exception } { pub fn module(self: *JsContext, src: []const u8, url: []const u8) !void {
const v8_context = self.v8_context; const arena = self.context_arena;
const m = try compileModule(self.isolate, src, name);
const gop = try self.module_cache.getOrPut(arena, url);
if (gop.found_existing) {
return;
}
errdefer _ = self.module_cache.remove(url);
const m = try compileModule(self.isolate, src, url);
const owned_url = try arena.dupe(u8, url);
try self.module_identifier.putNoClobber(arena, m.getIdentityHash(), owned_url);
errdefer _ = self.module_identifier.remove(m.getIdentityHash());
gop.key_ptr.* = owned_url;
gop.value_ptr.* = PersistentModule.init(self.isolate, m);
// instantiate
// resolveModuleCallback loads module's dependencies. // resolveModuleCallback loads module's dependencies.
const ok = m.instantiate(v8_context, resolveModuleCallback) catch { const v8_context = self.v8_context;
return error.ExecutionError; if (try m.instantiate(v8_context, resolveModuleCallback) == false) {
};
if (!ok) {
return error.ModuleInstantiationError; return error.ModuleInstantiationError;
} }
// evaluate _ = try m.evaluate(v8_context);
const value = m.evaluate(v8_context) catch {
return .{ .exception = self.createException(m.getException()) };
};
return .{ .value = self.createValue(value) };
} }
// Wrap a v8.Exception // Wrap a v8.Exception
@@ -1234,52 +1262,74 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
c_context: ?*const v8.C_Context, c_context: ?*const v8.C_Context,
c_specifier: ?*const v8.C_String, c_specifier: ?*const v8.C_String,
import_attributes: ?*const v8.C_FixedArray, import_attributes: ?*const v8.C_FixedArray,
referrer: ?*const v8.C_Module, c_referrer: ?*const v8.C_Module,
) callconv(.C) ?*const v8.C_Module { ) callconv(.C) ?*const v8.C_Module {
_ = import_attributes; _ = import_attributes;
_ = referrer;
std.debug.assert(c_context != null);
const v8_context = v8.Context{ .handle = c_context.? }; const v8_context = v8.Context{ .handle = c_context.? };
const self: *JsContext = @ptrFromInt(v8_context.getEmbedderData(1).castTo(v8.BigInt).getUint64()); const self: *JsContext = @ptrFromInt(v8_context.getEmbedderData(1).castTo(v8.BigInt).getUint64());
// build the specifier value. const specifier = jsStringToZig(self.call_arena, .{ .handle = c_specifier.? }, self.isolate) catch |err| {
const specifier = valueToString( log.err(.js, "resolve module", .{ .err = err });
self.call_arena,
.{ .handle = c_specifier.? },
self.isolate,
v8_context,
) catch |e| {
log.err(.js, "resolve module specifier", .{ .err = e });
return null; return null;
}; };
const referrer = v8.Module{ .handle = c_referrer.? };
// not currently needed return self._resolveModuleCallback(referrer, specifier) catch |err| {
// const referrer_module = if (referrer) |ref| v8.Module{ .handle = ref } else null; log.err(.js, "resolve module", .{
const module_loader = self.module_loader;
const source = module_loader.func(module_loader.ptr, specifier) catch |err| {
log.err(.js, "resolve module fetch", .{
.err = err, .err = err,
.specifier = specifier, .specifier = specifier,
}); });
return null; return null;
} orelse return null; };
}
fn _resolveModuleCallback(
self: *JsContext,
referrer: v8.Module,
specifier: []const u8,
) !?*const v8.C_Module {
const referrer_path = self.module_identifier.get(referrer.getIdentityHash()) orelse {
// Shouldn't be possible.
return error.UnknownModuleReferrer;
};
const normalized_specifier = try @import("../url.zig").stitch(
self.call_arena,
specifier,
referrer_path,
.{ .alloc = .if_needed },
);
if (self.module_cache.get(normalized_specifier)) |pm| {
return pm.handle;
}
const module_loader = self.module_loader;
const source = try module_loader.func(module_loader.ptr, normalized_specifier) orelse return null;
var try_catch: TryCatch = undefined; var try_catch: TryCatch = undefined;
try_catch.init(self); try_catch.init(self);
defer try_catch.deinit(); defer try_catch.deinit();
const m = compileModule(self.isolate, source, specifier) catch |err| { const m = compileModule(self.isolate, source, specifier) catch |err| {
log.err(.js, "resolve module compile", .{ log.warn(.js, "compile resolved module", .{
.specifier = specifier, .specifier = specifier,
.stack = try_catch.stack(self.context_arena) catch null, .stack = try_catch.stack(self.call_arena) catch null,
.src = try_catch.sourceLine(self.context_arena) catch "err", .src = try_catch.sourceLine(self.call_arena) catch "err",
.line = try_catch.sourceLineNumber() orelse 0, .line = try_catch.sourceLineNumber() orelse 0,
.exception = (try_catch.exception(self.context_arena) catch @errorName(err)) orelse @errorName(err), .exception = (try_catch.exception(self.call_arena) catch @errorName(err)) orelse @errorName(err),
}); });
return null; return null;
}; };
// We were hoping to find the module in our cache, and thus used
// the short-lived call_arena to create the normalized_specifier.
// But now this'll live for the lifetime of the context.
const arena = self.context_arena;
const owned_specifier = try arena.dupe(u8, normalized_specifier);
try self.module_cache.put(arena, owned_specifier, PersistentModule.init(self.isolate, m));
try self.module_identifier.putNoClobber(arena, m.getIdentityHash(), owned_specifier);
return m.handle; return m.handle;
} }
}; };