diff --git a/src/browser/Page.zig b/src/browser/Page.zig index a709f3fe..ebf5fed3 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -946,237 +946,6 @@ fn pageErrorCallback(ctx: *anyopaque, err: anyerror) void { }; } -pub fn wait(self: *Page, wait_ms: u32) Session.WaitResult { - return self._wait(wait_ms) catch |err| { - switch (err) { - error.JsError => {}, // already logged (with hopefully more context) - else => { - // There may be errors from the http/client or ScriptManager - // that we should not treat as an error like this. Will need - // to run this through more real-world sites and see if we need - // to expand the switch (err) to have more customized logs for - // specific messages. - log.err(.browser, "page wait", .{ .err = err, .type = self._type, .url = self.url }); - }, - } - return .done; - }; -} - -fn _wait(self: *Page, wait_ms: u32) !Session.WaitResult { - if (comptime IS_DEBUG) { - std.debug.assert(self._type == .root); - } - - var timer = try std.time.Timer.start(); - var ms_remaining = wait_ms; - - const browser = self._session.browser; - var http_client = browser.http_client; - - // I'd like the page to know NOTHING about cdp_socket / CDP, but the - // fact is that the behavior of wait changes depending on whether or - // not we're using CDP. - // If we aren't using CDP, as soon as we think there's nothing left - // to do, we can exit - we'de done. - // But if we are using CDP, we should wait for the whole `wait_ms` - // because the http_click.tick() also monitors the CDP socket. And while - // we could let CDP poll http (like it does for HTTP requests), the fact - // is that we know more about the timing of stuff (e.g. how long to - // poll/sleep) in the page. - const exit_when_done = http_client.cdp_client == null; - - // for debugging - // defer self.printWaitAnalysis(); - - while (true) { - switch (self._parse_state) { - .pre, .raw, .text, .image => { - // The main page hasn't started/finished navigating. - // There's no JS to run, and no reason to run the scheduler. - if (http_client.active == 0 and exit_when_done) { - // haven't started navigating, I guess. - return .done; - } - // Either we have active http connections, or we're in CDP - // mode with an extra socket. Either way, we're waiting - // for http traffic - if (try http_client.tick(@intCast(ms_remaining)) == .cdp_socket) { - // exit_when_done is explicitly set when there isn't - // an extra socket, so it should not be possibl to - // get an cdp_socket message when exit_when_done - // is true. - if (IS_DEBUG) { - std.debug.assert(exit_when_done == false); - } - - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - }, - .html, .complete => { - if (self._queued_navigation != null) { - return .done; - } - - // The HTML page was parsed. We now either have JS scripts to - // download, or scheduled tasks to execute, or both. - - // scheduler.run could trigger new http transfers, so do not - // store http_client.active BEFORE this call and then use - // it AFTER. - const ms_to_next_task = try browser.runMacrotasks(); - - const http_active = http_client.active; - const total_network_activity = http_active + http_client.intercepted; - if (self._notified_network_almost_idle.check(total_network_activity <= 2)) { - self.notifyNetworkAlmostIdle(); - } - if (self._notified_network_idle.check(total_network_activity == 0)) { - self.notifyNetworkIdle(); - } - - if (http_active == 0 and exit_when_done) { - // we don't need to consider http_client.intercepted here - // because exit_when_done is true, and that can only be - // the case when interception isn't possible. - if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); - } - - const ms = ms_to_next_task orelse blk: { - if (wait_ms - ms_remaining < 100) { - if (comptime builtin.is_test) { - return .done; - } - // Look, we want to exit ASAP, but we don't want - // to exit so fast that we've run none of the - // background jobs. - break :blk 50; - } - // No http transfers, no cdp extra socket, no - // scheduled tasks, we're done. - return .done; - }; - - if (ms > ms_remaining) { - // Same as above, except we have a scheduled task, - // it just happens to be too far into the future - // compared to how long we were told to wait. - return .done; - } - - // We have a task to run in the not-so-distant future. - // You might think we can just sleep until that task is - // ready, but we should continue to run lowPriority tasks - // in the meantime, and that could unblock things. So - // we'll just sleep for a bit, and then restart our wait - // loop to see if anything new can be processed. - std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20)))); - } else { - // We're here because we either have active HTTP - // connections, or exit_when_done == false (aka, there's - // an cdp_socket registered with the http client). - // We should continue to run lowPriority tasks, so we - // minimize how long we'll poll for network I/O. - const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200)); - if (try http_client.tick(ms_to_wait) == .cdp_socket) { - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - } - }, - .err => |err| { - self._parse_state = .{ .raw_done = @errorName(err) }; - return err; - }, - .raw_done => { - if (exit_when_done) { - return .done; - } - // we _could_ http_client.tick(ms_to_wait), but this has - // the same result, and I feel is more correct. - return .no_page; - }, - } - - const ms_elapsed = timer.lap() / 1_000_000; - if (ms_elapsed >= ms_remaining) { - return .done; - } - ms_remaining -= @intCast(ms_elapsed); - } -} - -fn printWaitAnalysis(self: *Page) void { - std.debug.print("load_state: {s}\n", .{@tagName(self._load_state)}); - std.debug.print("parse_state: {s}\n", .{@tagName(std.meta.activeTag(self._parse_state))}); - { - std.debug.print("\nactive requests: {d}\n", .{self._session.browser.http_client.active}); - var n_ = self._session.browser.http_client.handles.in_use.first; - while (n_) |n| { - const conn: *Net.Connection = @fieldParentPtr("node", n); - const transfer = Http.Transfer.fromConnection(conn) catch |err| { - std.debug.print(" - failed to load transfer: {any}\n", .{err}); - break; - }; - std.debug.print(" - {f}\n", .{transfer}); - n_ = n.next; - } - } - - { - std.debug.print("\nqueued requests: {d}\n", .{self._session.browser.http_client.queue.len()}); - var n_ = self._session.browser.http_client.queue.first; - while (n_) |n| { - const transfer: *Http.Transfer = @fieldParentPtr("_node", n); - std.debug.print(" - {f}\n", .{transfer}); - n_ = n.next; - } - } - - { - std.debug.print("\ndeferreds: {d}\n", .{self._script_manager.defer_scripts.len()}); - var n_ = self._script_manager.defer_scripts.first; - while (n_) |n| { - const script: *ScriptManager.Script = @fieldParentPtr("node", n); - std.debug.print(" - {s} complete: {any}\n", .{ script.url, script.complete }); - n_ = n.next; - } - } - - { - std.debug.print("\nasyncs: {d}\n", .{self._script_manager.async_scripts.len()}); - } - - { - std.debug.print("\nasyncs ready: {d}\n", .{self._script_manager.ready_scripts.len()}); - var n_ = self._script_manager.ready_scripts.first; - while (n_) |n| { - const script: *ScriptManager.Script = @fieldParentPtr("node", n); - std.debug.print(" - {s} complete: {any}\n", .{ script.url, script.complete }); - n_ = n.next; - } - } - - const now = milliTimestamp(.monotonic); - { - std.debug.print("\nhigh_priority schedule: {d}\n", .{self.js.scheduler.high_priority.count()}); - var it = self.js.scheduler.high_priority.iterator(); - while (it.next()) |task| { - std.debug.print(" - {s} schedule: {d}ms\n", .{ task.name, task.run_at - now }); - } - } - - { - std.debug.print("\nlow_priority schedule: {d}\n", .{self.js.scheduler.low_priority.count()}); - var it = self.js.scheduler.low_priority.iterator(); - while (it.next()) |task| { - std.debug.print(" - {s} schedule: {d}ms\n", .{ task.name, task.run_at - now }); - } - } -} - pub fn isGoingAway(self: *const Page) bool { return self._queued_navigation != null; } diff --git a/src/browser/Session.zig b/src/browser/Session.zig index c9b4db48..bd7bdff8 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -166,6 +166,7 @@ pub fn wait(self: *Session, wait_ms: u32) WaitResult { error.JsError => {}, // already logged (with hopefully more context) else => log.err(.browser, "session wait", .{ .err = err, + .url = page.url, }), } return .done;