From 3555680335afce51eaeb11fb2fafd1eb8148a542 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Sat, 2 Aug 2025 12:29:22 +0800 Subject: [PATCH] Working navigation events (clicks, form submission) --- src/browser/ScriptManager.zig | 16 ++++++++ src/browser/page.zig | 76 +++++++++-------------------------- src/browser/session.zig | 43 ++++++++++++++++++++ src/cdp/cdp.zig | 4 +- src/main.zig | 2 +- src/testing.zig | 2 +- 6 files changed, 82 insertions(+), 61 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 5cfc7414..20b5906b 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -76,6 +76,22 @@ pub fn deinit(self: *ScriptManager) void { self.script_pool.deinit(); } +pub fn reset(self: *ScriptManager) void { + self.client.abort(); + self.clearList(&self.scripts); + self.clearList(&self.deferred); + self.static_scripts_done = false; +} + +fn clearList(_: *const ScriptManager, list: *OrderList) void { + while (list.first) |node| { + const pending_script = node.data; + // this removes it from the list + pending_script.deinit(); + } + std.debug.assert(list.first == null); +} + pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { if (try parser.elementGetAttribute(element, "nomodule") != null) { // these scripts should only be loaded if we don't support modules diff --git a/src/browser/page.zig b/src/browser/page.zig index 7decc483..c413a524 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -48,6 +48,7 @@ const polyfill = @import("polyfill/polyfill.zig"); // end() to stop the previous navigation before starting a new one. // The page handle all its memory in an arena allocator. The arena is reseted // when end() is called. + pub const Page = struct { cookie_jar: *storage.CookieJar, @@ -150,7 +151,8 @@ pub const Page = struct { fn reset(self: *Page) void { _ = self.session.browser.page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); - self.http_client.abort(); + // this will reset the http_client + self.script_manager.reset(); self.scheduler.reset(); self.document_state = .parsing; self.mode = .{ .pre = {} }; @@ -729,26 +731,36 @@ pub const Page = struct { // The page.arena is safe to use here, but the transfer_arena exists // specifically for this type of lifetime. pub fn navigateFromWebAPI(self: *Page, url: []const u8, opts: NavigateOpts) !void { + const session = self.session; + if (session.queued_navigation != null) { + // It might seem like this should never happen. And it might not, + // BUT..consider the case where we have script like: + // top.location = X; + // top.location = Y; + // Will the 2nd top.location execute? You'd think not, since, + // when we're in this function for the 1st, we'll call: + // session.executor.terminateExecution(); + // But, this doesn't seem guaranteed to stop on the current line. + // My best guess is that v8 groups executes in chunks (how they are + // chunked, I can't guess) and always executes them together. + return; + } + log.debug(.browser, "delayed navigation", .{ .url = url, .reason = opts.reason, }); self.delayed_navigation = true; - const session = self.session; - const arena = session.transfer_arena; - const navi = try arena.create(DelayedNavigation); - navi.* = .{ + session.queued_navigation = .{ .opts = opts, - .session = session, - .url = try URL.stitch(arena, url, self.url.raw, .{ .alloc = .always }), + .url = try URL.stitch(session.transfer_arena, url, self.url.raw, .{ .alloc = .always }), }; self.http_client.abort(); // In v8, this throws an exception which JS code cannot catch. session.executor.terminateExecution(); - _ = try self.scheduler.add(navi, DelayedNavigation.run, 0, .{ .name = "delayed navigation" }); } pub fn getOrCreateNodeState(self: *Page, node: *parser.Node) !*State { @@ -833,54 +845,6 @@ pub const Page = struct { } }; -const DelayedNavigation = struct { - url: []const u8, - session: *Session, - opts: NavigateOpts, - - // Navigation is blocking, which is problem because it can seize up - // the loop and deadlock. We can only safely try to navigate to a - // new page when we're sure there's at least 1 free slot in the - // http client. We handle this in two phases: - // - // In the first phase, when self.initial == true, we'll shutdown the page - // and create a new one. The shutdown is important, because it resets the - // loop ctx_id and removes the JsContext. Removing the context calls our XHR - // destructors which aborts requests. This is necessary to make sure our - // [blocking] navigate won't block. - // - // In the 2nd phase, we wait until there's a free http slot so that our - // navigate definetly won't block (which could deadlock the system if there - // are still pending async requests, which we've seen happen, even after - // an abort). - fn run(ctx: *anyopaque) ?u32 { - const self: *DelayedNavigation = @alignCast(@ptrCast(ctx)); - const session = self.session; - - // abort any pending requests or active tranfers; - session.browser.http_client.abort(); - - // Prior to schedule this task, we terminated excution to stop - // the running script. If we don't resume it before doing a shutdown - // we'll get an error. - session.executor.resumeExecution(); - session.removePage(); - const page = session.createPage() catch |err| { - log.err(.browser, "delayed navigation page error", .{ - .err = err, - .url = self.url, - }); - return null; - }; - - page.navigate(self.url, self.opts) catch |err| { - log.err(.browser, "delayed navigation error", .{ .err = err, .url = self.url }); - }; - - return null; - } -}; - pub const NavigateReason = enum { anchor, address_bar, diff --git a/src/browser/session.zig b/src/browser/session.zig index f481a567..f33a99e2 100644 --- a/src/browser/session.zig +++ b/src/browser/session.zig @@ -56,6 +56,12 @@ pub const Session = struct { page: ?Page = null, + // If the current page want to navigate to a new page + // (form submit, link click, top.location = xxx) + // the details are stored here so that, on the next call to session.wait + // we can destroy the current page and start a new one. + queued_navigation: ?QueuedNavigation, + pub fn init(self: *Session, browser: *Browser) !void { var executor = try browser.env.newExecutionWorld(); errdefer executor.deinit(); @@ -64,6 +70,7 @@ pub const Session = struct { self.* = .{ .browser = browser, .executor = executor, + .queued_navigation = null, .arena = browser.session_arena.allocator(), .storage_shed = storage.Shed.init(allocator), .cookie_jar = storage.CookieJar.init(allocator), @@ -132,4 +139,40 @@ pub const Session = struct { pub fn currentPage(self: *Session) ?*Page { return &(self.page orelse return null); } + + pub fn wait(self: *Session, wait_sec: usize) void { + if (self.queued_navigation) |qn| { + // This was already aborted on the page, but it would be pretty + // bad if old requests went to the new page, so let's make double sure + self.browser.http_client.abort(); + + // Page.navigateFromWebAPI terminatedExecution. If we don't resume + // it before doing a shutdown we'll get an error. + self.executor.resumeExecution(); + self.removePage(); + self.queued_navigation = null; + + const page = self.createPage() catch |err| { + log.err(.browser, "queued navigation page error", .{ + .err = err, + .url = qn.url, + }); + return; + }; + + page.navigate(qn.url, qn.opts) catch |err| { + log.err(.browser, "queued navigation error", .{ .err = err, .url = qn.url }); + return; + }; + } + + if (self.page) |*page| { + page.wait(wait_sec); + } + } +}; + +const QueuedNavigation = struct { + url: []const u8, + opts: NavigateOpts, }; diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 1feafebe..052b8af9 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -123,11 +123,9 @@ pub fn CDPT(comptime TypeProvider: type) type { // This is hopefully temporary. pub fn pageWait(self: *Self) void { const session = &(self.browser.session orelse return); - var page = session.currentPage() orelse return; - // exits early if there's nothing to do, so a large value like // 5 seconds should be ok - page.wait(5); + session.wait(5); } // Called from above, in processMessage which handles client messages diff --git a/src/main.zig b/src/main.zig index 3373249b..c3df56e4 100644 --- a/src/main.zig +++ b/src/main.zig @@ -130,7 +130,7 @@ fn run(alloc: Allocator) !void { }, }; - page.wait(5); // 5 seconds + session.wait(5); // 5 seconds // dump if (opts.dump) { diff --git a/src/testing.zig b/src/testing.zig index 9d50c800..80bdf0d9 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -441,7 +441,7 @@ pub const JsRunner = struct { } return err; }; - self.page.wait(1); + self.page.session.wait(1); @import("root").js_runner_duration += std.time.Instant.since(try std.time.Instant.now(), start); if (case.@"1") |expected| {