From 5dda86bf4aee99b7fd57be7df40db9808b25fb98 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 4 Sep 2025 16:28:12 +0800 Subject: [PATCH 1/2] Emit networkIdle and networkAlmostIdle Page.lifecycleEvent Most CDP drivers have a mechanism to wait for idle network, or an almost idle network (sometimes called networkIdle2). These are events the browser must emit. The page will now emit `networkIdle` when we are reasonably sure there's no more network activity (this requires some slight changes to request interception, since, I believe, intercepted requests should be considered). `networkAlmostIdle` is currently _always_ emitted prior to emitting `networkIdle`. We should tweak this but I can't, at a glance, think of a great heuristic for when this should be emitted. --- src/browser/page.zig | 57 +++++++++++++++++++++++++++++++++------ src/cdp/cdp.zig | 22 +++++++++++++++ src/cdp/domains/fetch.zig | 8 +++--- src/cdp/domains/page.zig | 33 ++++++++++++++++++++--- src/http/Client.zig | 48 ++++++++++++++++++++++++++++++--- src/notification.zig | 16 +++++++++-- 6 files changed, 163 insertions(+), 21 deletions(-) diff --git a/src/browser/page.zig b/src/browser/page.zig index c9a9df0a..c65011d9 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -90,6 +90,11 @@ pub const Page = struct { load_state: LoadState = .parsing, + // We should only emit these events once per page. To make sure of that, we + // track whether or not we've already emitted the notifications. + notified_network_idle: bool = false, + notified_network_almost_idle: bool = false, + const Mode = union(enum) { pre: void, err: anyerror, @@ -329,7 +334,13 @@ pub const Page = struct { return error.JsError; } - if (http_client.active == 0 and exit_when_done) { + const http_active = http_client.active; + if (http_active == 0 and exit_when_done) { + // we don't need to concider http_client.intercepted here + // because exit_when_done is true, and that can only be + // the case when interception isn't possible. + std.debug.assert(http_client.intercepted == 0); + const ms = ms_to_next_task orelse blk: { // TODO: when jsRunner is fully replaced with the // htmlRunner, we can remove the first part of this @@ -341,29 +352,34 @@ pub const Page = struct { // background jobs. break :blk 50; } - // no http transfers, no cdp extra socket, no + // No http transfers, no cdp extra socket, no // scheduled tasks, we're done. return .done; }; if (ms > ms_remaining) { - // same as above, except we have a scheduled task, - // it just happens to be too far into the future.s + // Same as above, except we have a scheduled task, + // it just happens to be too far into the future. return .done; } - // we have a task to run in the not-so-distant future. + // We have a task to run in the not-so-distant future. // You might think we can just sleep until that task is // ready, but we should continue to run lowPriority tasks // in the meantime, and that could unblock things. So // we'll just sleep for a bit, and then restart our wait - // loop to see what's changed + // loop to see if anything new can be processed. std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20)))); } else { + if (self.notified_network_idle == false and http_active == 0 and http_client.intercepted == 0) { + self.notifyNetworkIdle(); + } // We're here because we either have active HTTP - // connections, of exit_when_done == false (aka, there's + // connections, or exit_when_done == false (aka, there's // an extra_socket registered with the http client). - const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100); + // We should continue to run lowPriority tasks, so we + // minimize how long we'll poll for network I/O. + const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200)); if (try http_client.tick(ms_to_wait) == .extra_socket) { // data on a socket we aren't handling, return to caller return .extra_socket; @@ -467,6 +483,31 @@ pub const Page = struct { } } + fn notifyNetworkIdle(self: *Page) void { + // caller should always check that we haven't sent this already; + std.debug.assert(self.notified_network_idle == false); + + // if we're going to send networkIdle, we should first send networkAlmostIdle + // if it hasn't already been sent. + if (self.notified_network_almost_idle == false) { + self.notifyNetworkAlmostIdle(); + } + + self.notified_network_idle = true; + self.session.browser.notification.dispatch(.page_network_idle, &.{ + .timestamp = timestamp(), + }); + } + + fn notifyNetworkAlmostIdle(self: *Page) void { + // caller should always check that we haven't sent this already; + std.debug.assert(self.notified_network_almost_idle == false); + self.notified_network_almost_idle = true; + self.session.browser.notification.dispatch(.page_network_almost_idle, &.{ + .timestamp = timestamp(), + }); + } + pub fn origin(self: *const Page, arena: Allocator) ![]const u8 { var aw = std.Io.Writer.Allocating.init(arena); try self.url.origin(&aw.writer); diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 55c489d3..e7ce2086 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -487,6 +487,18 @@ pub fn BrowserContext(comptime CDP_T: type) type { self.cdp.browser.notification.unregister(.http_request_auth_required, self); } + pub fn lifecycleEventsEnable(self: *Self) !void { + self.page_life_cycle_events = true; + try self.cdp.browser.notification.register(.page_network_idle, self, onPageNetworkIdle); + try self.cdp.browser.notification.register(.page_network_almost_idle, self, onPageNetworkAlmostIdle); + } + + pub fn lifecycleEventsDisable(self: *Self) void { + self.page_life_cycle_events = false; + self.cdp.browser.notification.unregister(.page_network_idle, self); + self.cdp.browser.notification.unregister(.page_network_almost_idle, self); + } + pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void { const self: *Self = @ptrCast(@alignCast(ctx)); try @import("domains/page.zig").pageRemove(self); @@ -508,6 +520,16 @@ pub fn BrowserContext(comptime CDP_T: type) type { return @import("domains/page.zig").pageNavigated(self, msg); } + pub fn onPageNetworkIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkIdle) !void { + const self: *Self = @ptrCast(@alignCast(ctx)); + return @import("domains/page.zig").pageNetworkIdle(self, msg); + } + + pub fn onPageNetworkAlmostIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkAlmostIdle) !void { + const self: *Self = @ptrCast(@alignCast(ctx)); + return @import("domains/page.zig").pageNetworkAlmostIdle(self, msg); + } + pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void { const self: *Self = @ptrCast(@alignCast(ctx)); defer self.resetNotificationArena(); diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index c299e05c..da2a7ddd 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -261,7 +261,7 @@ fn continueRequest(cmd: anytype) !void { transfer.req.body = body; } - try bc.cdp.browser.http_client.process(transfer); + try bc.cdp.browser.http_client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -311,7 +311,7 @@ fn continueWithAuth(cmd: anytype) !void { ); transfer.reset(); - try bc.cdp.browser.http_client.process(transfer); + try bc.cdp.browser.http_client.continueTransfer(transfer); return cmd.sendResult(null, .{}); } @@ -352,7 +352,7 @@ fn fulfillRequest(cmd: anytype) !void { body = buf; } - try transfer.fulfill(params.responseCode, params.responseHeaders orelse &.{}, body); + try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body); return cmd.sendResult(null, .{}); } @@ -368,7 +368,7 @@ fn failRequest(cmd: anytype) !void { const request_id = try idFromRequestId(params.requestId); const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; - defer transfer.abort(); + defer bc.cdp.browser.http_client.abortTransfer(transfer); log.info(.cdp, "request intercept", .{ .state = "fail", diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 1f7e4494..1394953a 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -78,12 +78,16 @@ fn getFrameTree(cmd: anytype) !void { } fn setLifecycleEventsEnabled(cmd: anytype) !void { - // const params = (try cmd.params(struct { - // enabled: bool, - // })) orelse return error.InvalidParams; + const params = (try cmd.params(struct { + enabled: bool, + })) orelse return error.InvalidParams; const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; - bc.page_life_cycle_events = true; + if (params.enabled) { + try bc.lifecycleEventsEnable(); + } else { + bc.lifecycleEventsDisable(); + } return cmd.sendResult(null, .{}); } @@ -357,6 +361,27 @@ pub fn pageNavigated(bc: anytype, event: *const Notification.PageNavigated) !voi }, .{ .session_id = session_id }); } +pub fn pageNetworkIdle(bc: anytype, event: *const Notification.PageNetworkIdle) !void { + return sendPageLifecycle(bc, "networkIdle", event.timestamp); +} + +pub fn pageNetworkAlmostIdle(bc: anytype, event: *const Notification.PageNetworkAlmostIdle) !void { + return sendPageLifecycle(bc, "networkAlmostIdle", event.timestamp); +} + +fn sendPageLifecycle(bc: anytype, name: []const u8, timestamp: u32) !void { + const loader_id = bc.loader_id; + const target_id = bc.target_id orelse unreachable; + const session_id = bc.session_id orelse unreachable; + + return bc.cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{ + .name = name, + .frameId = target_id, + .loaderId = loader_id, + .timestamp = timestamp, + }, .{ .session_id = session_id }); +} + const LifecycleEvent = struct { frameId: []const u8, loaderId: ?[]const u8, diff --git a/src/http/Client.zig b/src/http/Client.zig index 5ab99c30..15b519d3 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -50,9 +50,18 @@ const Method = Http.Method; // those other http requests. pub const Client = @This(); -// count of active requests +// Count of active requests active: usize, +// Count of intercepted requests. This is to help deal with intercepted requests. +// The client doesn't track intercepted transfers. If a request is intercepted, +// the client forgets about it and requires the interceptor to continue or abort +// it. That works well, except if we only rely on active, we might think there's +// no more network activity when, with interecepted requests, there might be more +// in the future. (We really only need this to properly emit a 'networkIdle' and +// 'networkAlmostIdle' Page.lifecycleEvent in CDP). +intercepted: usize, + // curl has 2 APIs: easy and multi. Multi is like a combination of some I/O block // (e.g. epoll) and a bunch of pools. You add/remove easys to the multiple and // then poll the multi. @@ -115,6 +124,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie client.* = .{ .queue = .{}, .active = 0, + .intercepted = 0, .multi = multi, .handles = handles, .blocking = blocking, @@ -191,6 +201,10 @@ pub fn request(self: *Client, req: Request) !void { var wait_for_interception = false; notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }); if (wait_for_interception) { + self.intercepted += 1; + if (builtin.mode == .Debug) { + transfer._intercepted = true; + } // The user is send an invitation to intercept this request. return; } @@ -200,9 +214,9 @@ pub fn request(self: *Client, req: Request) !void { } // Above, request will not process if there's an interception request. In such -// cases, the interecptor is expected to call process to continue the transfer +// cases, the interecptor is expected to call resume to continue the transfer // or transfer.abort() to abort it. -pub fn process(self: *Client, transfer: *Transfer) !void { +fn process(self: *Client, transfer: *Transfer) !void { if (self.handles.getFreeHandle()) |handle| { return self.makeRequest(handle, transfer); } @@ -210,6 +224,33 @@ pub fn process(self: *Client, transfer: *Transfer) !void { self.queue.append(&transfer._node); } +// For an intercepted request +pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { + if (builtin.mode == .Debug) { + std.debug.assert(transfer._intercepted); + } + self.intercepted -= 1; + return self.process(transfer); +} + +// For an intercepted request +pub fn abortTransfer(self: *Client, transfer: *Transfer) void { + if (builtin.mode == .Debug) { + std.debug.assert(transfer._intercepted); + } + self.intercepted -= 1; + transfer.abort(); +} + +// For an intercepted request +pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const Http.Header, body: ?[]const u8) !void { + if (builtin.mode == .Debug) { + std.debug.assert(transfer._intercepted); + } + self.intercepted -= 1; + return transfer.fulfill(status, headers, body); +} + // See ScriptManager.blockingGet pub fn blockingRequest(self: *Client, req: Request) !void { const transfer = try self.makeTransfer(req); @@ -674,6 +715,7 @@ pub const Transfer = struct { // for when a Transfer is queued in the client.queue _node: std.DoublyLinkedList.Node = .{}, + _intercepted: if (builtin.mode == .Debug) bool else void = if (builtin.mode == .Debug) false else {}, pub fn reset(self: *Transfer) void { self._redirecting = false; diff --git a/src/notification.zig b/src/notification.zig index 76c5d970..17614a32 100644 --- a/src/notification.zig +++ b/src/notification.zig @@ -32,9 +32,9 @@ const List = std.DoublyLinkedList; // "scope". This would have a run-time cost and still require some coordination // between components to share a common scope. // -// Instead, the approach that we take is to have a notification per +// Instead, the approach that we take is to have a notification instance per // scope. This makes some things harder, but we only plan on having 2 -// notifications at a given time: one in a Browser and one in the App. +// notification instances at a given time: one in a Browser and one in the App. // What about something like Telemetry, which lives outside of a Browser but // still cares about Browser-events (like .page_navigate)? When the Browser // notification is created, a `notification_created` event is raised in the @@ -59,6 +59,8 @@ pub const Notification = struct { page_created: List = .{}, page_navigate: List = .{}, page_navigated: List = .{}, + page_network_idle: List = .{}, + page_network_almost_idle: List = .{}, http_request_fail: List = .{}, http_request_start: List = .{}, http_request_intercept: List = .{}, @@ -74,6 +76,8 @@ pub const Notification = struct { page_created: *page.Page, page_navigate: *const PageNavigate, page_navigated: *const PageNavigated, + page_network_idle: *const PageNetworkIdle, + page_network_almost_idle: *const PageNetworkAlmostIdle, http_request_fail: *const RequestFail, http_request_start: *const RequestStart, http_request_intercept: *const RequestIntercept, @@ -98,6 +102,14 @@ pub const Notification = struct { url: []const u8, }; + pub const PageNetworkIdle = struct { + timestamp: u32, + }; + + pub const PageNetworkAlmostIdle = struct { + timestamp: u32, + }; + pub const RequestStart = struct { transfer: *Transfer, }; From 55ef0a5e9e64d05cb55ca5c4aa3bfab9357890e8 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 4 Sep 2025 16:44:00 +0800 Subject: [PATCH 2/2] fix some spelling in comments --- src/browser/page.zig | 8 ++++---- src/runtime/js.zig | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/browser/page.zig b/src/browser/page.zig index c65011d9..6362b620 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -336,7 +336,7 @@ pub const Page = struct { const http_active = http_client.active; if (http_active == 0 and exit_when_done) { - // we don't need to concider http_client.intercepted here + // we don't need to consider http_client.intercepted here // because exit_when_done is true, and that can only be // the case when interception isn't possible. std.debug.assert(http_client.intercepted == 0); @@ -823,12 +823,12 @@ pub const Page = struct { } // The transfer arena is useful and interesting, but has a weird lifetime. - // When we're transfering from one page to another (via delayed navigation) + // When we're transferring from one page to another (via delayed navigation) // we need things in memory: like the URL that we're navigating to and // optionally the body to POST. That cannot exist in the page.arena, because // the page that we have is going to be destroyed and a new page is going // to be created. If we used the page.arena, we'd wouldn't be able to reset - // it between navigations. + // it between navigation. // So the transfer arena is meant to exist between a navigation event. It's // freed when the main html navigation is complete, either in pageDoneCallback // or pageErrorCallback. It needs to exist for this long because, if we set @@ -1159,7 +1159,7 @@ pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) c return; } - // It's posisble for a script to be dynamically added without a src. + // It's possible for a script to be dynamically added without a src. // const s = document.createElement('script'); // document.getElementsByTagName('body')[0].appendChild(s); // The src can be set after. We handle that in HTMLScriptElement.set_src, diff --git a/src/runtime/js.zig b/src/runtime/js.zig index 0a82b6d5..b343b3eb 100644 --- a/src/runtime/js.zig +++ b/src/runtime/js.zig @@ -1395,7 +1395,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { }, .@"struct" => { // We don't want to duplicate the code for this, so we call - // the actual coversion function. + // the actual conversion function. const value = (try self.jsValueToStruct(named_function, T, js_value)) orelse { return .{ .invalid = {} }; }; @@ -1494,7 +1494,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // We were hoping to find the module in our cache, and thus used // the short-lived call_arena to create the normalized_specifier. - // But now this'll live for the lifetime of the context. + // But now this will live for the lifetime of the context. const arena = self.context_arena; const owned_specifier = try arena.dupe(u8, normalized_specifier); try self.module_cache.put(arena, owned_specifier, PersistentModule.init(self.isolate, m)); @@ -2098,7 +2098,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // a shorthand method to return either the entire stack message // or just the exception message // - in Debug mode return the stack if available - // - otherwhise return the exception if available + // - otherwise return the exception if available // the caller needs to deinit the string returned pub fn err(self: TryCatch, allocator: Allocator) !?[]const u8 { if (builtin.mode == .Debug) { @@ -2579,7 +2579,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { // If you're trying to implement setter, read: // https://groups.google.com/g/v8-users/c/8tahYBsHpgY/m/IteS7Wn2AAAJ // The issue I had was - // (a) where to attache it: does it go ont he instance_template + // (a) where to attache it: does it go on the instance_template // instead of the prototype? // (b) defining the getter or query to respond with the // PropertyAttribute to indicate if the property can be set @@ -2751,7 +2751,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { } if (T == Function) { - // we're returnig a callback + // we're returning a callback return value.func.toValue(); } @@ -2876,8 +2876,8 @@ pub fn Env(comptime State: type, comptime WebApis: type) type { } }; - // Callback called on global's property mssing. - // Return true to intercept the exectution or false to let the call + // Callback called on global's property missing. + // Return true to intercept the execution or false to let the call // continue the chain. pub const GlobalMissingCallback = struct { ptr: *anyopaque, @@ -2951,10 +2951,10 @@ fn isComplexAttributeType(ti: std.builtin.Type) bool { }; } -// Responsible for calling Zig functions from JS invokations. This could +// Responsible for calling Zig functions from JS invocations. This could // probably just contained in ExecutionWorld, but having this specific logic, which // is somewhat repetitive between constructors, functions, getters, etc contained -// here does feel like it makes it clenaer. +// here does feel like it makes it cleaner. fn Caller(comptime JsContext: type, comptime State: type) type { return struct { js_context: *JsContext, @@ -3356,7 +3356,7 @@ fn Caller(comptime JsContext: type, comptime State: type) type { var is_variadic = false; { - // This is going to get complicated. If the last Zig paremeter + // This is going to get complicated. If the last Zig parameter // is a slice AND the corresponding javascript parameter is // NOT an an array, then we'll treat it as a variadic.