Merge pull request #1840 from lightpanda-io/script_manager_arena_pool

Move ScriptManager to ArenaPool.
This commit is contained in:
Karl Seguin
2026-03-17 06:42:48 +08:00
committed by GitHub
3 changed files with 108 additions and 172 deletions

View File

@@ -110,6 +110,8 @@ use_proxy: bool,
// Current TLS verification state, applied per-connection in makeRequest. // Current TLS verification state, applied per-connection in makeRequest.
tls_verify: bool = true, tls_verify: bool = true,
obey_robots: bool,
cdp_client: ?CDPClient = null, cdp_client: ?CDPClient = null,
// libcurl can monitor arbitrary sockets, this lets us use libcurl to poll // libcurl can monitor arbitrary sockets, this lets us use libcurl to poll
@@ -154,6 +156,7 @@ pub fn init(allocator: Allocator, network: *Network) !*Client {
.http_proxy = http_proxy, .http_proxy = http_proxy,
.use_proxy = http_proxy != null, .use_proxy = http_proxy != null,
.tls_verify = network.config.tlsVerifyHost(), .tls_verify = network.config.tlsVerifyHost(),
.obey_robots = network.config.obeyRobots(),
.transfer_pool = transfer_pool, .transfer_pool = transfer_pool,
}; };
@@ -257,7 +260,10 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
} }
pub fn request(self: *Client, req: Request) !void { pub fn request(self: *Client, req: Request) !void {
if (self.network.config.obeyRobots()) { if (self.obey_robots == false) {
return self.processRequest(req);
}
const robots_url = try URL.getRobotsUrl(self.allocator, req.url); const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
errdefer self.allocator.free(robots_url); errdefer self.allocator.free(robots_url);
@@ -280,11 +286,7 @@ pub fn request(self: *Client, req: Request) !void {
return self.processRequest(req); return self.processRequest(req);
} }
return self.fetchRobotsThenProcessRequest(robots_url, req); return self.fetchRobotsThenProcessRequest(robots_url, req);
}
return self.processRequest(req);
} }
fn processRequest(self: *Client, req: Request) !void { fn processRequest(self: *Client, req: Request) !void {

View File

@@ -63,9 +63,6 @@ shutdown: bool = false,
client: *HttpClient, client: *HttpClient,
allocator: Allocator, allocator: Allocator,
buffer_pool: BufferPool,
script_pool: std.heap.MemoryPool(Script),
// We can download multiple sync modules in parallel, but we want to process // We can download multiple sync modules in parallel, but we want to process
// them in order. We can't use an std.DoublyLinkedList, like the other script types, // them in order. We can't use an std.DoublyLinkedList, like the other script types,
@@ -101,18 +98,14 @@ pub fn init(allocator: Allocator, http_client: *HttpClient, page: *Page) ScriptM
.imported_modules = .empty, .imported_modules = .empty,
.client = http_client, .client = http_client,
.static_scripts_done = false, .static_scripts_done = false,
.buffer_pool = BufferPool.init(allocator, 5),
.page_notified_of_completion = false, .page_notified_of_completion = false,
.script_pool = std.heap.MemoryPool(Script).init(allocator),
}; };
} }
pub fn deinit(self: *ScriptManager) void { pub fn deinit(self: *ScriptManager) void {
// necessary to free any buffers scripts may be referencing // necessary to free any arenas scripts may be referencing
self.reset(); self.reset();
self.buffer_pool.deinit();
self.script_pool.deinit();
self.imported_modules.deinit(self.allocator); self.imported_modules.deinit(self.allocator);
// we don't deinit self.importmap b/c we use the page's arena for its // we don't deinit self.importmap b/c we use the page's arena for its
// allocations. // allocations.
@@ -121,7 +114,10 @@ pub fn deinit(self: *ScriptManager) void {
pub fn reset(self: *ScriptManager) void { pub fn reset(self: *ScriptManager) void {
var it = self.imported_modules.valueIterator(); var it = self.imported_modules.valueIterator();
while (it.next()) |value_ptr| { while (it.next()) |value_ptr| {
self.buffer_pool.release(value_ptr.buffer); switch (value_ptr.state) {
.done => |script| script.deinit(),
else => {},
}
} }
self.imported_modules.clearRetainingCapacity(); self.imported_modules.clearRetainingCapacity();
@@ -138,13 +134,13 @@ pub fn reset(self: *ScriptManager) void {
fn clearList(list: *std.DoublyLinkedList) void { fn clearList(list: *std.DoublyLinkedList) void {
while (list.popFirst()) |n| { while (list.popFirst()) |n| {
const script: *Script = @fieldParentPtr("node", n); const script: *Script = @fieldParentPtr("node", n);
script.deinit(true); script.deinit();
} }
} }
pub fn getHeaders(self: *ScriptManager, url: [:0]const u8) !net_http.Headers { fn getHeaders(self: *ScriptManager, arena: Allocator, url: [:0]const u8) !net_http.Headers {
var headers = try self.client.newHeaders(); var headers = try self.client.newHeaders();
try self.page.headersForRequest(self.page.arena, url, &headers); try self.page.headersForRequest(arena, url, &headers);
return headers; return headers;
} }
@@ -191,19 +187,26 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
return; return;
}; };
var handover = false;
const page = self.page; const page = self.page;
const arena = try page.getArena(.{ .debug = "addFromElement" });
errdefer if (!handover) {
page.releaseArena(arena);
};
var source: Script.Source = undefined; var source: Script.Source = undefined;
var remote_url: ?[:0]const u8 = null; var remote_url: ?[:0]const u8 = null;
const base_url = page.base(); const base_url = page.base();
if (element.getAttributeSafe(comptime .wrap("src"))) |src| { if (element.getAttributeSafe(comptime .wrap("src"))) |src| {
if (try parseDataURI(page.arena, src)) |data_uri| { if (try parseDataURI(arena, src)) |data_uri| {
source = .{ .@"inline" = data_uri }; source = .{ .@"inline" = data_uri };
} else { } else {
remote_url = try URL.resolve(page.arena, base_url, src, .{}); remote_url = try URL.resolve(arena, base_url, src, .{});
source = .{ .remote = .{} }; source = .{ .remote = .{} };
} }
} else { } else {
var buf = std.Io.Writer.Allocating.init(page.arena); var buf = std.Io.Writer.Allocating.init(arena);
try element.asNode().getChildTextContent(&buf.writer); try element.asNode().getChildTextContent(&buf.writer);
try buf.writer.writeByte(0); try buf.writer.writeByte(0);
const data = buf.written(); const data = buf.written();
@@ -218,15 +221,13 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
// Only set _executed (already-started) when we actually have content to execute // Only set _executed (already-started) when we actually have content to execute
script_element._executed = true; script_element._executed = true;
const script = try self.script_pool.create();
errdefer self.script_pool.destroy(script);
const is_inline = source == .@"inline"; const is_inline = source == .@"inline";
const script = try arena.create(Script);
script.* = .{ script.* = .{
.kind = kind, .kind = kind,
.node = .{}, .node = .{},
.arena = arena,
.manager = self, .manager = self,
.source = source, .source = source,
.script_element = script_element, .script_element = script_element,
@@ -270,7 +271,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
if (is_blocking == false) { if (is_blocking == false) {
self.scriptList(script).remove(&script.node); self.scriptList(script).remove(&script.node);
} }
script.deinit(true); script.deinit();
} }
try self.client.request(.{ try self.client.request(.{
@@ -278,7 +279,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
.ctx = script, .ctx = script,
.method = .GET, .method = .GET,
.frame_id = page._frame_id, .frame_id = page._frame_id,
.headers = try self.getHeaders(url), .headers = try self.getHeaders(arena, url),
.blocking = is_blocking, .blocking = is_blocking,
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
.resource_type = .script, .resource_type = .script,
@@ -289,6 +290,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
.done_callback = Script.doneCallback, .done_callback = Script.doneCallback,
.error_callback = Script.errorCallback, .error_callback = Script.errorCallback,
}); });
handover = true;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
var ls: js.Local.Scope = undefined; var ls: js.Local.Scope = undefined;
@@ -318,7 +320,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
} }
if (script.status == 0) { if (script.status == 0) {
// an error (that we already logged) // an error (that we already logged)
script.deinit(true); script.deinit();
return; return;
} }
@@ -327,7 +329,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
self.is_evaluating = true; self.is_evaluating = true;
defer { defer {
self.is_evaluating = was_evaluating; self.is_evaluating = was_evaluating;
script.deinit(true); script.deinit();
} }
return script.eval(page); return script.eval(page);
} }
@@ -359,11 +361,14 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
} }
errdefer _ = self.imported_modules.remove(url); errdefer _ = self.imported_modules.remove(url);
const script = try self.script_pool.create(); const page = self.page;
errdefer self.script_pool.destroy(script); const arena = try page.getArena(.{ .debug = "preloadImport" });
errdefer page.releaseArena(arena);
const script = try arena.create(Script);
script.* = .{ script.* = .{
.kind = .module, .kind = .module,
.arena = arena,
.url = url, .url = url,
.node = .{}, .node = .{},
.manager = self, .manager = self,
@@ -373,11 +378,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
.mode = .import, .mode = .import,
}; };
gop.value_ptr.* = ImportedModule{ gop.value_ptr.* = ImportedModule{};
.manager = self,
};
const page = self.page;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
var ls: js.Local.Scope = undefined; var ls: js.Local.Scope = undefined;
@@ -392,12 +393,18 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
}); });
} }
try self.client.request(.{ // This seems wrong since we're not dealing with an async import (unlike
// getAsyncModule below), but all we're trying to do here is pre-load the
// script for execution at some point in the future (when waitForImport is
// called).
self.async_scripts.append(&script.node);
self.client.request(.{
.url = url, .url = url,
.ctx = script, .ctx = script,
.method = .GET, .method = .GET,
.frame_id = page._frame_id, .frame_id = page._frame_id,
.headers = try self.getHeaders(url), .headers = try self.getHeaders(arena, url),
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
.resource_type = .script, .resource_type = .script,
.notification = page._session.notification, .notification = page._session.notification,
@@ -406,13 +413,10 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
.data_callback = Script.dataCallback, .data_callback = Script.dataCallback,
.done_callback = Script.doneCallback, .done_callback = Script.doneCallback,
.error_callback = Script.errorCallback, .error_callback = Script.errorCallback,
}); }) catch |err| {
self.async_scripts.remove(&script.node);
// This seems wrong since we're not dealing with an async import (unlike return err;
// getAsyncModule below), but all we're trying to do here is pre-load the };
// script for execution at some point in the future (when waitForImport is
// called).
self.async_scripts.append(&script.node);
} }
pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource { pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource {
@@ -433,12 +437,12 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource {
_ = try client.tick(200); _ = try client.tick(200);
continue; continue;
}, },
.done => { .done => |script| {
var shared = false; var shared = false;
const buffer = entry.value_ptr.buffer; const buffer = entry.value_ptr.buffer;
const waiters = entry.value_ptr.waiters; const waiters = entry.value_ptr.waiters;
if (waiters == 0) { if (waiters == 1) {
self.imported_modules.removeByPtr(entry.key_ptr); self.imported_modules.removeByPtr(entry.key_ptr);
} else { } else {
shared = true; shared = true;
@@ -447,7 +451,7 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource {
return .{ return .{
.buffer = buffer, .buffer = buffer,
.shared = shared, .shared = shared,
.buffer_pool = &self.buffer_pool, .script = script,
}; };
}, },
.err => return error.Failed, .err => return error.Failed,
@@ -456,11 +460,14 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource {
} }
pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void {
const script = try self.script_pool.create(); const page = self.page;
errdefer self.script_pool.destroy(script); const arena = try page.getArena(.{ .debug = "getAsyncImport" });
errdefer page.releaseArena(arena);
const script = try arena.create(Script);
script.* = .{ script.* = .{
.kind = .module, .kind = .module,
.arena = arena,
.url = url, .url = url,
.node = .{}, .node = .{},
.manager = self, .manager = self,
@@ -473,7 +480,6 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
} }, } },
}; };
const page = self.page;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
var ls: js.Local.Scope = undefined; var ls: js.Local.Scope = undefined;
page.js.localScope(&ls); page.js.localScope(&ls);
@@ -496,11 +502,12 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
self.is_evaluating = true; self.is_evaluating = true;
defer self.is_evaluating = was_evaluating; defer self.is_evaluating = was_evaluating;
try self.client.request(.{ self.async_scripts.append(&script.node);
self.client.request(.{
.url = url, .url = url,
.method = .GET, .method = .GET,
.frame_id = page._frame_id, .frame_id = page._frame_id,
.headers = try self.getHeaders(url), .headers = try self.getHeaders(arena, url),
.ctx = script, .ctx = script,
.resource_type = .script, .resource_type = .script,
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
@@ -510,9 +517,10 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
.data_callback = Script.dataCallback, .data_callback = Script.dataCallback,
.done_callback = Script.doneCallback, .done_callback = Script.doneCallback,
.error_callback = Script.errorCallback, .error_callback = Script.errorCallback,
}); }) catch |err| {
self.async_scripts.remove(&script.node);
self.async_scripts.append(&script.node); return err;
};
} }
// Called from the Page to let us know it's done parsing the HTML. Necessary that // Called from the Page to let us know it's done parsing the HTML. Necessary that
@@ -537,18 +545,18 @@ fn evaluate(self: *ScriptManager) void {
var script: *Script = @fieldParentPtr("node", n); var script: *Script = @fieldParentPtr("node", n);
switch (script.mode) { switch (script.mode) {
.async => { .async => {
defer script.deinit(true); defer script.deinit();
script.eval(page); script.eval(page);
}, },
.import_async => |ia| { .import_async => |ia| {
defer script.deinit(false);
if (script.status < 200 or script.status > 299) { if (script.status < 200 or script.status > 299) {
script.deinit();
ia.callback(ia.data, error.FailedToLoad); ia.callback(ia.data, error.FailedToLoad);
} else { } else {
ia.callback(ia.data, .{ ia.callback(ia.data, .{
.shared = false, .shared = false,
.script = script,
.buffer = script.source.remote, .buffer = script.source.remote,
.buffer_pool = &self.buffer_pool,
}); });
} }
}, },
@@ -574,7 +582,7 @@ fn evaluate(self: *ScriptManager) void {
} }
defer { defer {
_ = self.defer_scripts.popFirst(); _ = self.defer_scripts.popFirst();
script.deinit(true); script.deinit();
} }
script.eval(page); script.eval(page);
} }
@@ -625,11 +633,12 @@ fn parseImportmap(self: *ScriptManager, script: *const Script) !void {
} }
pub const Script = struct { pub const Script = struct {
complete: bool,
kind: Kind, kind: Kind,
complete: bool,
status: u16 = 0, status: u16 = 0,
source: Source, source: Source,
url: []const u8, url: []const u8,
arena: Allocator,
mode: ExecutionMode, mode: ExecutionMode,
node: std.DoublyLinkedList.Node, node: std.DoublyLinkedList.Node,
script_element: ?*Element.Html.Script, script_element: ?*Element.Html.Script,
@@ -680,11 +689,8 @@ pub const Script = struct {
import_async: ImportAsync, import_async: ImportAsync,
}; };
fn deinit(self: *Script, comptime release_buffer: bool) void { fn deinit(self: *Script) void {
if ((comptime release_buffer) and self.source == .remote) { self.manager.page.releaseArena(self.arena);
self.manager.buffer_pool.release(self.source.remote);
}
self.manager.script_pool.destroy(self);
} }
fn startCallback(transfer: *HttpClient.Transfer) !void { fn startCallback(transfer: *HttpClient.Transfer) !void {
@@ -750,9 +756,9 @@ pub const Script = struct {
} }
lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity });
var buffer = self.manager.buffer_pool.get(); var buffer: std.ArrayList(u8) = .empty;
if (transfer.getContentLength()) |cl| { if (transfer.getContentLength()) |cl| {
try buffer.ensureTotalCapacity(self.manager.allocator, cl); try buffer.ensureTotalCapacity(self.arena, cl);
} }
self.source = .{ .remote = buffer }; self.source = .{ .remote = buffer };
return true; return true;
@@ -766,7 +772,7 @@ pub const Script = struct {
}; };
} }
fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void { fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void {
try self.source.remote.appendSlice(self.manager.allocator, data); try self.source.remote.appendSlice(self.arena, data);
} }
fn doneCallback(ctx: *anyopaque) !void { fn doneCallback(ctx: *anyopaque) !void {
@@ -783,9 +789,8 @@ pub const Script = struct {
} else if (self.mode == .import) { } else if (self.mode == .import) {
manager.async_scripts.remove(&self.node); manager.async_scripts.remove(&self.node);
const entry = manager.imported_modules.getPtr(self.url).?; const entry = manager.imported_modules.getPtr(self.url).?;
entry.state = .done; entry.state = .{ .done = self };
entry.buffer = self.source.remote; entry.buffer = self.source.remote;
self.deinit(false);
} }
manager.evaluate(); manager.evaluate();
} }
@@ -811,7 +816,7 @@ pub const Script = struct {
const manager = self.manager; const manager = self.manager;
manager.scriptList(self).remove(&self.node); manager.scriptList(self).remove(&self.node);
if (manager.shutdown) { if (manager.shutdown) {
self.deinit(true); self.deinit();
return; return;
} }
@@ -823,7 +828,7 @@ pub const Script = struct {
}, },
else => {}, else => {},
} }
self.deinit(true); self.deinit();
manager.evaluate(); manager.evaluate();
} }
@@ -951,76 +956,6 @@ pub const Script = struct {
} }
}; };
const BufferPool = struct {
count: usize,
available: List = .{},
allocator: Allocator,
max_concurrent_transfers: u8,
mem_pool: std.heap.MemoryPool(Container),
const List = std.SinglyLinkedList;
const Container = struct {
node: List.Node,
buf: std.ArrayList(u8),
};
fn init(allocator: Allocator, max_concurrent_transfers: u8) BufferPool {
return .{
.available = .{},
.count = 0,
.allocator = allocator,
.max_concurrent_transfers = max_concurrent_transfers,
.mem_pool = std.heap.MemoryPool(Container).init(allocator),
};
}
fn deinit(self: *BufferPool) void {
const allocator = self.allocator;
var node = self.available.first;
while (node) |n| {
const container: *Container = @fieldParentPtr("node", n);
container.buf.deinit(allocator);
node = n.next;
}
self.mem_pool.deinit();
}
fn get(self: *BufferPool) std.ArrayList(u8) {
const node = self.available.popFirst() orelse {
// return a new buffer
return .{};
};
self.count -= 1;
const container: *Container = @fieldParentPtr("node", node);
defer self.mem_pool.destroy(container);
return container.buf;
}
fn release(self: *BufferPool, buffer: ArrayList(u8)) void {
// create mutable copy
var b = buffer;
if (self.count == self.max_concurrent_transfers) {
b.deinit(self.allocator);
return;
}
const container = self.mem_pool.create() catch |err| {
b.deinit(self.allocator);
log.err(.http, "SM BufferPool release", .{ .err = err });
return;
};
b.clearRetainingCapacity();
container.* = .{ .buf = b, .node = .{} };
self.count += 1;
self.available.prepend(&container.node);
}
};
const ImportAsync = struct { const ImportAsync = struct {
data: *anyopaque, data: *anyopaque,
callback: ImportAsync.Callback, callback: ImportAsync.Callback,
@@ -1030,12 +965,12 @@ const ImportAsync = struct {
pub const ModuleSource = struct { pub const ModuleSource = struct {
shared: bool, shared: bool,
buffer_pool: *BufferPool, script: *Script,
buffer: std.ArrayList(u8), buffer: std.ArrayList(u8),
pub fn deinit(self: *ModuleSource) void { pub fn deinit(self: *ModuleSource) void {
if (self.shared == false) { if (self.shared == false) {
self.buffer_pool.release(self.buffer); self.script.deinit();
} }
} }
@@ -1045,15 +980,14 @@ pub const ModuleSource = struct {
}; };
const ImportedModule = struct { const ImportedModule = struct {
manager: *ScriptManager, waiters: u16 = 1,
state: State = .loading, state: State = .loading,
buffer: std.ArrayList(u8) = .{}, buffer: std.ArrayList(u8) = .{},
waiters: u16 = 1,
const State = enum { const State = union(enum) {
err, err,
done,
loading, loading,
done: *Script,
}; };
}; };

View File

@@ -310,15 +310,15 @@ pub fn module(self: *Context, comptime want_result: bool, local: *const js.Local
} }
const owned_url = try arena.dupeZ(u8, url); const owned_url = try arena.dupeZ(u8, url);
if (cacheable and !gop.found_existing) {
gop.key_ptr.* = owned_url;
}
const m = try compileModule(local, src, owned_url); const m = try compileModule(local, src, owned_url);
if (cacheable) { if (cacheable) {
// compileModule is synchronous - nothing can modify the cache during compilation // compileModule is synchronous - nothing can modify the cache during compilation
lp.assert(gop.value_ptr.module == null, "Context.module has module", .{}); lp.assert(gop.value_ptr.module == null, "Context.module has module", .{});
gop.value_ptr.module = try m.persist(); gop.value_ptr.module = try m.persist();
if (!gop.found_existing) {
gop.key_ptr.* = owned_url;
}
} }
break :blk .{ m, owned_url }; break :blk .{ m, owned_url };