diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 2807a40d..3b061956 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -34,6 +34,9 @@ const ScriptManager = @This(); page: *Page, +// used to prevent recursive evalution +is_evaluating: bool, + // Only once this is true can deferred scripts be run static_scripts_done: bool, @@ -48,6 +51,8 @@ scripts: OrderList, // dom_loaded == true, deferreds: OrderList, +shutdown: bool = false, + client: *HttpClient, allocator: Allocator, buffer_pool: BufferPool, @@ -63,6 +68,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager { .asyncs = .{}, .scripts = .{}, .deferreds = .{}, + .is_evaluating = false, .allocator = allocator, .client = browser.http_client, .static_scripts_done = false, @@ -72,6 +78,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager { } pub fn deinit(self: *ScriptManager) void { + self.reset(); self.buffer_pool.deinit(); self.script_pool.deinit(); } @@ -193,7 +200,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { }; if (source == .@"inline") { - // if we're here, it means that we have pending scripts (i.e. self.ordered + // if we're here, it means that we have pending scripts (i.e. self.scripts // is not empty). Because the script is inline, it's complete/ready, but // we need to process them in order pending_script.complete = true; @@ -201,9 +208,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { return; } - const list = self.getList(&pending_script.script); pending_script.node = .{ .data = pending_script }; - list.append(&pending_script.node); + self.getList(&pending_script.script).append(&pending_script.node); errdefer pending_script.deinit(); @@ -255,7 +261,17 @@ pub fn staticScriptsDone(self: *ScriptManager) void { // try to evaluate completed scripts (in order). This is called whenever a script // is completed. fn evaluate(self: *ScriptManager) void { + if (self.is_evaluating) { + // It's possible for a script.eval to cause evaluate to be called again. + // This is particularly true with blockingGet, but even without this, + // it's theoretically possible (but unlikely). We could make this work + // but there's little reason to support the complexity. + return; + } + const page = self.page; + self.is_evaluating = true; + defer self.is_evaluating = false; while (self.scripts.first) |n| { var pending_script = n.data; @@ -269,8 +285,8 @@ fn evaluate(self: *ScriptManager) void { if (self.static_scripts_done == false) { // We can only execute deferred scripts if // 1 - all the normal scripts are done - // 2 - and we've loaded all the normal scripts - // The last one isn't obvious, but it's possible for self.scripts to/ + // 2 - we've finished parsing the HTML and at least queued all the scripts + // The last one isn't obvious, but it's possible for self.scripts to // be empty not because we're done executing all the normal scripts // but because we're done executing some (or maybe none), but we're still // parsing the HTML. @@ -315,14 +331,17 @@ fn asyncDone(self: *ScriptManager) void { } fn getList(self: *ScriptManager, script: *const Script) *OrderList { - if (script.is_defer) { - return &self.deferreds; - } - + // When a script has both the async and defer flag set, it should be + // treated as async. Async is newer, so some websites use both so that + // if async isn't known, it'll fallback to defer. if (script.is_async) { return &self.asyncs; } + if (script.is_defer) { + return &self.deferreds; + } + return &self.scripts; } @@ -375,16 +394,22 @@ const PendingScript = struct { manager: *ScriptManager, fn deinit(self: *PendingScript) void { - var manager = self.manager; const script = &self.script; + const manager = self.manager; if (script.source == .remote) { manager.buffer_pool.release(script.source.remote); } - manager.getList(script).remove(&self.node); } + fn remove(self: *PendingScript) void { + if (self.node) |*node| { + self.manager.getList(&self.script).remove(node); + self.node = null; + } + } + fn startCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void { _ = self; log.debug(.http, "script fetch start", .{ .req = transfer }); @@ -392,19 +417,25 @@ const PendingScript = struct { fn headerCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void { const header = &transfer.response_header.?; - if (header.status != 200) { - return error.InvalidStatusCode; - } - - // @newhttp TODO: pre size based on content-length - // @newhttp TODO: max-length enfocement - self.script.source = .{ .remote = self.manager.buffer_pool.get() }; - log.debug(.http, "script header", .{ .req = transfer, .status = header.status, .content_type = header.contentType(), }); + + if (header.status != 200) { + return error.InvalidStatusCode; + } + + // If this isn't true, then we'll likely leak memory. If you don't + // set `CURLOPT_SUPPRESS_CONNECT_HEADERS` and CONNECT to a proxy, this + // will fail. This assertion exists to catch incorrect assumptions about + // how libcurl works, or about how we've configured it. + std.debug.assert(self.script.source.remote.capacity == 0); + + // @newhttp TODO: pre size based on content-length + // @newhttp TODO: max-length enfocement + self.script.source = .{ .remote = self.manager.buffer_pool.get() }; } fn dataCallback(self: *PendingScript, transfer: *HttpClient.Transfer, data: []const u8) !void { @@ -436,9 +467,15 @@ const PendingScript = struct { fn errorCallback(self: *PendingScript, err: anyerror) void { log.warn(.http, "script fetch error", .{ .req = self.script.url, .err = err }); + const manager = self.manager; + self.deinit(); + if (manager.shutdown) { + return; + } + manager.evaluate(); } }; diff --git a/src/browser/page.zig b/src/browser/page.zig index 127747b9..51129e88 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -153,6 +153,8 @@ pub const Page = struct { } pub fn deinit(self: *Page) void { + self.script_manager.shutdown = true; + self.http_client.abort(); self.script_manager.deinit(); } @@ -268,6 +270,9 @@ pub const Page = struct { var scheduler = &self.scheduler; var http_client = self.http_client; + // for debugging + // defer self.printWaitAnalysis(); + while (true) { SW: switch (self.mode) { .pre, .raw => { @@ -346,6 +351,56 @@ pub const Page = struct { } } + fn printWaitAnalysis(self: *Page) void { + std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))}); + std.debug.print("load: {s}\n", .{@tagName(self.load_state)}); + std.debug.print("active requests: {d}\n", .{self.http_client.active}); + + { + std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len}); + var n_ = self.script_manager.scripts.first; + while (n_) |n| { + std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + n_ = n.next; + } + } + + { + std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len}); + var n_ = self.script_manager.deferreds.first; + while (n_) |n| { + std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + n_ = n.next; + } + } + + const now = std.time.milliTimestamp(); + { + std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len}); + var n_ = self.script_manager.asyncs.first; + while (n_) |n| { + std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + n_ = n.next; + } + } + + { + std.debug.print("\nprimary schedule: {d}\n", .{self.scheduler.primary.count()}); + var it = self.scheduler.primary.iterator(); + while (it.next()) |task| { + std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now }); + } + } + + { + std.debug.print("\nsecondary schedule: {d}\n", .{self.scheduler.secondary.count()}); + var it = self.scheduler.secondary.iterator(); + while (it.next()) |task| { + std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now }); + } + } + } + pub fn origin(self: *const Page, arena: Allocator) ![]const u8 { var arr: std.ArrayListUnmanaged(u8) = .{}; try self.url.origin(arr.writer(arena)); diff --git a/src/browser/session.zig b/src/browser/session.zig index f33a99e2..a6f83e9c 100644 --- a/src/browser/session.zig +++ b/src/browser/session.zig @@ -118,18 +118,13 @@ pub const Session = struct { std.debug.assert(self.page != null); - // Cleanup is a bit sensitive. We could still have inflight I/O. For - // example, we could have an XHR request which is still in the connect - // phase. It's important that we clean these up, as they're holding onto - // limited resources (like our fixed-sized http state pool). - // + self.page.?.deinit(); + self.page = null; + // RemoveJsContext() will execute the destructor of any type that // registered a destructor (e.g. XMLHttpRequest). self.executor.removeJsContext(); - self.page.?.deinit(); - self.page = null; - // clear netsurf memory arena. parser.deinit(); diff --git a/src/http/Http.zig b/src/http/Http.zig index cb4ae709..c30de564 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -141,6 +141,11 @@ pub const Connection = struct { } } + // compression, don't remove this. CloudFront will send gzip content + // even if we don't support it, and then it won't be decompressed. + // empty string means: use whatever's available + try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_ACCEPT_ENCODING, "")); + // debug if (comptime Http.ENABLE_DEBUG) { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_VERBOSE, @as(c_long, 1)));