From f45726d61f711ede3278c7699c777c37519c4c16 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Sun, 3 Aug 2025 20:27:23 +0800 Subject: [PATCH] ScriptManager & HttpClient support for JS modules Improve cleanup/shutdown (*cough* memory leaks *cough*) --- src/browser/ScriptManager.zig | 134 +++++++++++++++++++++++++++++++--- src/browser/page.zig | 17 ++--- src/http/Client.zig | 132 +++++++++++++++++++-------------- src/runtime/js.zig | 103 ++++++++++++++------------ 4 files changed, 262 insertions(+), 124 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 20b5906b..579f9560 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -77,7 +77,6 @@ pub fn deinit(self: *ScriptManager) void { } pub fn reset(self: *ScriptManager) void { - self.client.abort(); self.clearList(&self.scripts); self.clearList(&self.deferred); self.static_scripts_done = false; @@ -178,6 +177,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { if (source == .@"inline" and self.scripts.first == null) { // inline script with no pending scripts, execute it immediately. + // (if there is a pending script, then we cannot execute this immediately + // as it needs to be executed in order) return script.eval(page); } @@ -187,7 +188,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { .script = script, .complete = false, .manager = self, - .node = undefined, + .node = .{ .data = pending_script }, }; if (source == .@"inline") { @@ -211,6 +212,34 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { }); } +pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { + var blocking = Blocking{ + .allocator = self.allocator, + .buffer_pool = &self.buffer_pool, + }; + + var client = self.client; + try client.request(.{ + .url = url, + .method = .GET, + .ctx = &blocking, + .header_done_callback = Blocking.headerCallback, + .data_callback = Blocking.dataCallback, + .done_callback = Blocking.doneCallback, + .error_callback = Blocking.errorCallback, + }); + + // rely on http's timeout settings to avoid an endless/long loop. + while (true) { + try client.tick(1000); + switch (blocking.state) { + .running => {}, + .done => |result| return result, + .err => |err| return err, + } + } +} + pub fn staticScriptsDone(self: *ScriptManager) void { std.debug.assert(self.static_scripts_done == false); self.static_scripts_done = true; @@ -350,6 +379,11 @@ const PendingScript = struct { if (self.manager.getList(&self.script)) |list| { self.node.data = self; list.append(&self.node); + } else { + // async scripts don't get added to a list, because we can execute + // them in any order + std.debug.assert(self.script.is_async); + self.manager.async_count += 1; } // if the script is async, it isn't tracked in a list, because we can @@ -530,8 +564,8 @@ const Script = struct { }; const BufferPool = struct { - free: List = .{}, - available: usize, + count: usize, + available: List = .{}, allocator: Allocator, max_concurrent_transfers: u8, node_pool: std.heap.MemoryPool(List.Node), @@ -540,8 +574,8 @@ const BufferPool = struct { fn init(allocator: Allocator, max_concurrent_transfers: u8) BufferPool { return .{ - .free = .{}, - .available = 0, + .available = .{}, + .count = 0, .allocator = allocator, .max_concurrent_transfers = max_concurrent_transfers, .node_pool = std.heap.MemoryPool(List.Node).init(allocator), @@ -551,20 +585,21 @@ const BufferPool = struct { fn deinit(self: *BufferPool) void { const allocator = self.allocator; - var node = self.free.first; + var node = self.available.first; while (node) |n| { - node = n.next; n.data.deinit(allocator); + node = n.next; } self.node_pool.deinit(); } fn get(self: *BufferPool) ArrayListUnmanaged(u8) { - const node = self.free.popFirst() orelse { + const node = self.available.popFirst() orelse { // return a new buffer return .{}; }; + self.count -= 1; defer self.node_pool.destroy(node); return node.data; } @@ -575,8 +610,9 @@ const BufferPool = struct { // create mutable copy var b = buffer; - if (self.available == self.max_concurrent_transfers) { + if (self.count == self.max_concurrent_transfers) { b.deinit(self.allocator); + return; } const node = self.node_pool.create() catch |err| { @@ -587,7 +623,81 @@ const BufferPool = struct { b.clearRetainingCapacity(); node.data = b; - self.available += 1; - self.free.append(node); + self.count += 1; + self.available.append(node); + } +}; + +const Blocking = struct { + allocator: Allocator, + buffer_pool: *BufferPool, + state: State = .{ .running = {} }, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const State = union(enum) { + running: void, + err: anyerror, + done: BlockingResult, + }; + + fn startCallback(transfer: *HttpClient.Transfer) !void { + log.debug(.http, "script fetch start", .{ .req = transfer, .blocking = true }); + } + + fn headerCallback(transfer: *HttpClient.Transfer) !void { + const header = &transfer.response_header.?; + log.debug(.http, "script header", .{ + .req = transfer, + .blocking = true, + .status = header.status, + .content_type = header.contentType(), + }); + + if (header.status != 200) { + return error.InvalidStatusCode; + } + + var self: *Blocking = @alignCast(@ptrCast(transfer.ctx)); + self.buffer = self.buffer_pool.get(); + } + + fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { + var self: *Blocking = @alignCast(@ptrCast(transfer.ctx)); + self.buffer.appendSlice(self.allocator, data) catch |err| { + log.err(.http, "SM.dataCallback", .{ + .err = err, + .len = data.len, + .blocking = true, + .transfer = transfer, + }); + return err; + }; + } + + fn doneCallback(ctx: *anyopaque) !void { + var self: *Blocking = @alignCast(@ptrCast(ctx)); + self.state = .{ .done = .{ + .buffer = self.buffer, + .buffer_pool = self.buffer_pool, + } }; + } + + fn errorCallback(ctx: *anyopaque, err: anyerror) void { + var self: *Blocking = @alignCast(@ptrCast(ctx)); + self.state = .{ .err = err }; + self.buffer_pool.release(self.buffer); + } +}; + +pub const BlockingResult = struct { + buffer: std.ArrayListUnmanaged(u8), + buffer_pool: *BufferPool, + + pub fn deinit(self: *BlockingResult) void { + self.buffer_pool.release(self.buffer); + } + + pub fn src(self: *const BlockingResult) []const u8 { + return self.buffer.items; } }; diff --git a/src/browser/page.zig b/src/browser/page.zig index 7743da94..a556d4e1 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -150,12 +150,13 @@ pub const Page = struct { } fn reset(self: *Page) void { - _ = self.session.browser.page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); - // this will reset the http_client - self.script_manager.reset(); self.scheduler.reset(); + self.http_client.abort(); + self.script_manager.reset(); + self.document_state = .parsing; self.mode = .{ .pre = {} }; + _ = self.session.browser.page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); } fn runMicrotasks(ctx: *anyopaque) ?u32 { @@ -229,13 +230,9 @@ pub const Page = struct { try Node.prepend(head, &[_]Node.NodeOrText{.{ .node = parser.elementToNode(base) }}); } - pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 { - _ = ctx; - _ = src; - // @newhttp - return error.NewHTTP; - // const self: *Page = @ptrCast(@alignCast(ctx)); - // return self.fetchData("module", src); + pub fn fetchModuleSource(ctx: *anyopaque, src: [:0]const u8) !ScriptManager.BlockingResult { + const self: *Page = @ptrCast(@alignCast(ctx)); + return self.script_manager.blockingGet(src); } pub fn wait(self: *Page, wait_sec: usize) void { diff --git a/src/http/Client.zig b/src/http/Client.zig index 98353020..c6d4e935 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -52,6 +52,8 @@ transfer_pool: std.heap.MemoryPool(Transfer), queue_node_pool: std.heap.MemoryPool(RequestQueue.Node), //@newhttp http_proxy: ?std.Uri = null, +blocking: Handle, +blocking_active: if (builtin.mode == .Debug) bool else void = if (builtin.mode == .Debug) false else {}, const RequestQueue = std.DoublyLinkedList(Request); @@ -69,13 +71,17 @@ pub fn init(allocator: Allocator, ca_blob: c.curl_blob, opts: Http.Opts) !*Clien errdefer _ = c.curl_multi_cleanup(multi); var handles = try Handles.init(allocator, client, ca_blob, opts); - errdefer handles.deinit(allocator, multi); + errdefer handles.deinit(allocator); + + var blocking = try Handle.init(client, ca_blob, opts); + errdefer blocking.deinit(); client.* = .{ .queue = .{}, .active = 0, .multi = multi, .handles = handles, + .blocking = blocking, .allocator = allocator, .transfer_pool = transfer_pool, .queue_node_pool = queue_node_pool, @@ -85,7 +91,10 @@ pub fn init(allocator: Allocator, ca_blob: c.curl_blob, opts: Http.Opts) !*Clien } pub fn deinit(self: *Client) void { - self.handles.deinit(self.allocator, self.multi); + self.abort(); + self.blocking.deinit(); + self.handles.deinit(self.allocator); + _ = c.curl_multi_cleanup(self.multi); self.transfer_pool.deinit(); @@ -94,7 +103,15 @@ pub fn deinit(self: *Client) void { } pub fn abort(self: *Client) void { - self.handles.abort(self.multi); + while (self.handles.in_use.first) |node| { + var transfer = Transfer.fromEasy(node.data.easy) catch |err| { + log.err(.http, "get private info", .{ .err = err, .source = "abort" }); + continue; + }; + transfer.req.error_callback(transfer.ctx, error.Abort); + self.endTransfer(transfer); + } + std.debug.assert(self.active == 0); var n = self.queue.first; while (n) |node| { @@ -102,7 +119,6 @@ pub fn abort(self: *Client) void { self.queue_node_pool.destroy(node); } self.queue = .{}; - self.active = 0; // Maybe a bit of overkill // We can remove some (all?) of these once we're confident its right. @@ -116,17 +132,18 @@ pub fn abort(self: *Client) void { } pub fn tick(self: *Client, timeout_ms: usize) !void { - var handles = &self.handles.available; + var handles = &self.handles; while (true) { - if (handles.first == null) { + if (handles.isEmpty()) { break; } const queue_node = self.queue.popFirst() orelse break; + const req = queue_node.data; + self.queue_node_pool.destroy(queue_node); - defer self.queue_node_pool.destroy(queue_node); - - const handle = handles.popFirst().?.data; - try self.makeRequest(handle, queue_node.data); + // we know this exists, because we checked isEmpty() above + const handle = handles.getFreeHandle().?; + try self.makeRequest(handle, req); } try self.perform(@intCast(timeout_ms)); @@ -142,6 +159,19 @@ pub fn request(self: *Client, req: Request) !void { self.queue.append(node); } +// See ScriptManager.blockingGet +pub fn blockingRequest(self: *Client, req: Request) !void { + if (comptime builtin.mode == .Debug) { + std.debug.assert(self.blocking_active == false); + self.blocking_active = true; + } + defer if (comptime builtin.mode == .Debug) { + self.blocking_active = false; + }; + + return self.makeRequest(&self.blocking, req); +} + fn makeRequest(self: *Client, handle: *Handle, req: Request) !void { const easy = handle.easy; @@ -215,8 +245,9 @@ fn perform(self: *Client, timeout_ms: c_int) !void { const ctx = transfer.ctx; const done_callback = transfer.req.done_callback; const error_callback = transfer.req.error_callback; - // release it ASAP so that it's avaiable (since some done_callbacks - // will load more resources). + + // release it ASAP so that it's available; some done_callbacks + // will load more resources. self.endTransfer(transfer); if (errorCheck(msg.data.result)) { @@ -266,18 +297,9 @@ const Handles = struct { var available: HandleList = .{}; for (0..count) |i| { - const easy = c.curl_easy_init() orelse return error.FailedToInitializeEasy; - errdefer _ = c.curl_easy_cleanup(easy); - - handles[i] = .{ - .easy = easy, - .client = client, - .node = undefined, - }; - try handles[i].configure(ca_blob, opts); - - handles[i].node.data = &handles[i]; - available.append(&handles[i].node); + handles[i] = try Handle.init(client, ca_blob, opts); + handles[i].node = .{ .data = &handles[i] }; + available.append(&handles[i].node.?); } return .{ @@ -287,22 +309,15 @@ const Handles = struct { }; } - fn deinit(self: *Handles, allocator: Allocator, multi: *c.CURLM) void { - self.abort(multi); + fn deinit(self: *Handles, allocator: Allocator) void { for (self.handles) |*h| { - _ = c.curl_easy_cleanup(h.easy); + h.deinit(); } allocator.free(self.handles); } - fn abort(self: *Handles, multi: *c.CURLM) void { - while (self.in_use.first) |node| { - const handle = node.data; - errorMCheck(c.curl_multi_remove_handle(multi, handle.easy)) catch |err| { - log.err(.http, "remove handle", .{ .err = err }); - }; - self.release(handle); - } + fn isEmpty(self: *const Handles) bool { + return self.available.first == null; } fn getFreeHandle(self: *Handles) ?*Handle { @@ -316,7 +331,10 @@ const Handles = struct { } fn release(self: *Handles, handle: *Handle) void { - const node = &handle.node; + // client.blocking is a handle without a node, it doesn't exist in the + // eitehr the in_use or available lists. + const node = &(handle.node orelse return); + self.in_use.remove(node); node.prev = null; node.next = null; @@ -324,19 +342,15 @@ const Handles = struct { } }; -// wraps a c.CURL (an easy handle), mostly to make it easier to keep a -// handle_pool and error_buffer associated with each easy handle +// wraps a c.CURL (an easy handle) const Handle = struct { easy: *c.CURL, client: *Client, - node: Handles.HandleList.Node, - error_buffer: [c.CURL_ERROR_SIZE:0]u8 = undefined, + node: ?Handles.HandleList.Node, - // Is called by Handles when already partially initialized. Done like this - // so that we have a stable pointer to error_buffer. - fn configure(self: *Handle, ca_blob: c.curl_blob, opts: Http.Opts) !void { - const easy = self.easy; - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_ERRORBUFFER, &self.error_buffer)); + fn init(client: *Client, ca_blob: c.curl_blob, opts: Http.Opts) !Handle { + const easy = c.curl_easy_init() orelse return error.FailedToInitializeEasy; + errdefer _ = c.curl_easy_cleanup(easy); // timeouts try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_TIMEOUT_MS, @as(c_long, @intCast(opts.timeout_ms)))); @@ -348,9 +362,9 @@ const Handle = struct { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_REDIR_PROTOCOLS_STR, "HTTP,HTTPS")); // remove FTP and FTPS from the default // callbacks - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HEADERDATA, self)); + try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HEADERDATA, easy)); try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HEADERFUNCTION, Transfer.headerCallback)); - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEDATA, self)); + try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEDATA, easy)); try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEFUNCTION, Transfer.bodyCallback)); // tls @@ -367,6 +381,16 @@ const Handle = struct { if (comptime Http.ENABLE_DEBUG) { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_VERBOSE, @as(c_long, 1))); } + + return .{ + .easy = easy, + .node = null, + .client = client, + }; + } + + fn deinit(self: *const Handle) void { + _ = c.curl_easy_cleanup(self.easy); } }; @@ -430,9 +454,9 @@ pub const Transfer = struct { // libcurl should only ever emit 1 header at a time std.debug.assert(header_count == 1); - const handle: *Handle = @alignCast(@ptrCast(data)); - var transfer = fromEasy(handle.easy) catch |err| { - log.err(.http, "get private info", .{ .err = err }); + const easy: *c.CURL = @alignCast(@ptrCast(data)); + var transfer = fromEasy(easy) catch |err| { + log.err(.http, "get private info", .{ .err = err, .source = "header callback" }); return 0; }; @@ -467,7 +491,7 @@ pub const Transfer = struct { transfer._redirecting = false; var url: [*c]u8 = undefined; - errorCheck(c.curl_easy_getinfo(handle.easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { + errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { log.err(.http, "failed to get URL", .{ .err = err }); return 0; }; @@ -514,9 +538,9 @@ pub const Transfer = struct { // libcurl should only ever emit 1 chunk at a time std.debug.assert(chunk_count == 1); - const handle: *Handle = @alignCast(@ptrCast(data)); - var transfer = fromEasy(handle.easy) catch |err| { - log.err(.http, "get private info", .{ .err = err }); + const easy: *c.CURL = @alignCast(@ptrCast(data)); + var transfer = fromEasy(easy) catch |err| { + log.err(.http, "get private info", .{ .err = err, .source = "body callback" }); return c.CURL_WRITEFUNC_ERROR; }; diff --git a/src/runtime/js.zig b/src/runtime/js.zig index 8b724a25..00345918 100644 --- a/src/runtime/js.zig +++ b/src/runtime/js.zig @@ -668,7 +668,11 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { const ModuleLoader = struct { ptr: *anyopaque, - func: *const fn (ptr: *anyopaque, specifier: []const u8) anyerror!?[]const u8, + func: *const fn (ptr: *anyopaque, url: [:0]const u8) anyerror!BlockingResult, + + // Don't like having to reach into ../browser/ here. But can't think + // of a good way to fix this. + const BlockingResult = @import("../browser/ScriptManager.zig").BlockingResult; }; // no init, started with executor.createJsContext() @@ -1416,11 +1420,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { }; } - fn _resolveModuleCallback( - self: *JsContext, - referrer: v8.Module, - specifier: []const u8, - ) !?*const v8.C_Module { + fn _resolveModuleCallback(self: *JsContext, referrer: v8.Module, specifier: []const u8) !?*const v8.C_Module { const referrer_path = self.module_identifier.get(referrer.getIdentityHash()) orelse { // Shouldn't be possible. return error.UnknownModuleReferrer; @@ -1430,29 +1430,32 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { self.call_arena, specifier, referrer_path, - .{ .alloc = .if_needed }, + .{ .alloc = .if_needed, .null_terminated = true }, ); if (self.module_cache.get(normalized_specifier)) |pm| { return pm.handle; } - const module_loader = self.module_loader; - const source = try module_loader.func(module_loader.ptr, normalized_specifier) orelse return null; + const m: v8.Module = blk: { + const module_loader = self.module_loader; + var fetch_result = try module_loader.func(module_loader.ptr, normalized_specifier); + defer fetch_result.deinit(); - var try_catch: TryCatch = undefined; - try_catch.init(self); - defer try_catch.deinit(); + var try_catch: TryCatch = undefined; + try_catch.init(self); + defer try_catch.deinit(); - const m = compileModule(self.isolate, source, normalized_specifier) catch |err| { - log.warn(.js, "compile resolved module", .{ - .specifier = specifier, - .stack = try_catch.stack(self.call_arena) catch null, - .src = try_catch.sourceLine(self.call_arena) catch "err", - .line = try_catch.sourceLineNumber() orelse 0, - .exception = (try_catch.exception(self.call_arena) catch @errorName(err)) orelse @errorName(err), - }); - return null; + break :blk compileModule(self.isolate, fetch_result.src(), normalized_specifier) catch |err| { + log.warn(.js, "compile resolved module", .{ + .specifier = specifier, + .stack = try_catch.stack(self.call_arena) catch null, + .src = try_catch.sourceLine(self.call_arena) catch "err", + .line = try_catch.sourceLineNumber() orelse 0, + .exception = (try_catch.exception(self.call_arena) catch @errorName(err)) orelse @errorName(err), + }); + return null; + }; }; // We were hoping to find the module in our cache, and thus used @@ -1568,7 +1571,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { context.context_arena, specifier_str, resource_str, - .{ .alloc = .if_needed }, + .{ .alloc = .if_needed, .null_terminated = true }, ) catch unreachable; log.debug(.js, "dynamic import", .{ @@ -1590,41 +1593,41 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { fn _dynamicModuleCallback( self: *JsContext, - specifier: []const u8, + specifier: [:0]const u8, resolver: *const v8.PromiseResolver, ) !void { const iso = self.isolate; const ctx = self.v8_context; - const module_loader = self.module_loader; - const source = module_loader.func(module_loader.ptr, specifier) catch { - const error_msg = v8.String.initUtf8(iso, "Failed to load module"); - _ = resolver.reject(ctx, error_msg.toValue()); - return; - } orelse { - const error_msg = v8.String.initUtf8(iso, "Module source not available"); - _ = resolver.reject(ctx, error_msg.toValue()); - return; - }; - var try_catch: TryCatch = undefined; try_catch.init(self); defer try_catch.deinit(); - const maybe_promise = self.module(source, specifier, true) catch { - log.err(.js, "module compilation failed", .{ - .specifier = specifier, - .exception = try_catch.exception(self.call_arena) catch "unknown error", - .stack = try_catch.stack(self.call_arena) catch null, - .line = try_catch.sourceLineNumber() orelse 0, - }); - const error_msg = if (try_catch.hasCaught()) blk: { - const exception_str = try_catch.exception(self.call_arena) catch "Evaluation error"; - break :blk v8.String.initUtf8(iso, exception_str orelse "Evaluation error"); - } else v8.String.initUtf8(iso, "Module evaluation failed"); - _ = resolver.reject(ctx, error_msg.toValue()); - return; + const maybe_promise: ?v8.Promise = blk: { + const module_loader = self.module_loader; + var fetch_result = module_loader.func(module_loader.ptr, specifier) catch { + const error_msg = v8.String.initUtf8(iso, "Failed to load module"); + _ = resolver.reject(ctx, error_msg.toValue()); + return; + }; + defer fetch_result.deinit(); + + break :blk self.module(fetch_result.src(), specifier, true) catch { + log.err(.js, "module compilation failed", .{ + .specifier = specifier, + .exception = try_catch.exception(self.call_arena) catch "unknown error", + .stack = try_catch.stack(self.call_arena) catch null, + .line = try_catch.sourceLineNumber() orelse 0, + }); + const error_msg = if (try_catch.hasCaught()) eblk: { + const exception_str = try_catch.exception(self.call_arena) catch "Evaluation error"; + break :eblk v8.String.initUtf8(iso, exception_str orelse "Evaluation error"); + } else v8.String.initUtf8(iso, "Module evaluation failed"); + _ = resolver.reject(ctx, error_msg.toValue()); + return; + }; }; + const new_module = self.module_cache.get(specifier).?.castToModule(); if (maybe_promise) |promise| { @@ -3815,7 +3818,11 @@ const NoopInspector = struct { }; const ErrorModuleLoader = struct { - pub fn fetchModuleSource(_: *anyopaque, _: []const u8) !?[]const u8 { + // Don't like having to reach into ../browser/ here. But can't think + // of a good way to fix this. + const BlockingResult = @import("../browser/ScriptManager.zig").BlockingResult; + + pub fn fetchModuleSource(_: *anyopaque, _: [:0]const u8) !BlockingResult { return error.NoModuleLoadConfigured; } };