Emit networkIdle and networkAlmostIdle Page.lifecycleEvent

Most CDP drivers have a mechanism to wait for idle network, or an almost idle
network (sometimes called networkIdle2). These are events the browser must emit.

The page will now emit `networkIdle` when we are reasonably sure there's no more
network activity (this requires some slight changes to request interception,
since, I believe, intercepted requests should be considered).

`networkAlmostIdle` is currently _always_ emitted prior to emitting
`networkIdle`. We should tweak this but I can't, at a glance, think of a great
heuristic for when this should be emitted.
This commit is contained in:
Karl Seguin
2025-09-04 16:28:12 +08:00
parent 7fdc857326
commit 5dda86bf4a
6 changed files with 163 additions and 21 deletions

View File

@@ -90,6 +90,11 @@ pub const Page = struct {
load_state: LoadState = .parsing,
// We should only emit these events once per page. To make sure of that, we
// track whether or not we've already emitted the notifications.
notified_network_idle: bool = false,
notified_network_almost_idle: bool = false,
const Mode = union(enum) {
pre: void,
err: anyerror,
@@ -329,7 +334,13 @@ pub const Page = struct {
return error.JsError;
}
if (http_client.active == 0 and exit_when_done) {
const http_active = http_client.active;
if (http_active == 0 and exit_when_done) {
// we don't need to concider http_client.intercepted here
// because exit_when_done is true, and that can only be
// the case when interception isn't possible.
std.debug.assert(http_client.intercepted == 0);
const ms = ms_to_next_task orelse blk: {
// TODO: when jsRunner is fully replaced with the
// htmlRunner, we can remove the first part of this
@@ -341,29 +352,34 @@ pub const Page = struct {
// background jobs.
break :blk 50;
}
// no http transfers, no cdp extra socket, no
// No http transfers, no cdp extra socket, no
// scheduled tasks, we're done.
return .done;
};
if (ms > ms_remaining) {
// same as above, except we have a scheduled task,
// it just happens to be too far into the future.s
// Same as above, except we have a scheduled task,
// it just happens to be too far into the future.
return .done;
}
// we have a task to run in the not-so-distant future.
// We have a task to run in the not-so-distant future.
// You might think we can just sleep until that task is
// ready, but we should continue to run lowPriority tasks
// in the meantime, and that could unblock things. So
// we'll just sleep for a bit, and then restart our wait
// loop to see what's changed
// loop to see if anything new can be processed.
std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20))));
} else {
if (self.notified_network_idle == false and http_active == 0 and http_client.intercepted == 0) {
self.notifyNetworkIdle();
}
// We're here because we either have active HTTP
// connections, of exit_when_done == false (aka, there's
// connections, or exit_when_done == false (aka, there's
// an extra_socket registered with the http client).
const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100);
// We should continue to run lowPriority tasks, so we
// minimize how long we'll poll for network I/O.
const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200));
if (try http_client.tick(ms_to_wait) == .extra_socket) {
// data on a socket we aren't handling, return to caller
return .extra_socket;
@@ -467,6 +483,31 @@ pub const Page = struct {
}
}
fn notifyNetworkIdle(self: *Page) void {
// caller should always check that we haven't sent this already;
std.debug.assert(self.notified_network_idle == false);
// if we're going to send networkIdle, we should first send networkAlmostIdle
// if it hasn't already been sent.
if (self.notified_network_almost_idle == false) {
self.notifyNetworkAlmostIdle();
}
self.notified_network_idle = true;
self.session.browser.notification.dispatch(.page_network_idle, &.{
.timestamp = timestamp(),
});
}
fn notifyNetworkAlmostIdle(self: *Page) void {
// caller should always check that we haven't sent this already;
std.debug.assert(self.notified_network_almost_idle == false);
self.notified_network_almost_idle = true;
self.session.browser.notification.dispatch(.page_network_almost_idle, &.{
.timestamp = timestamp(),
});
}
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
var aw = std.Io.Writer.Allocating.init(arena);
try self.url.origin(&aw.writer);

View File

@@ -487,6 +487,18 @@ pub fn BrowserContext(comptime CDP_T: type) type {
self.cdp.browser.notification.unregister(.http_request_auth_required, self);
}
pub fn lifecycleEventsEnable(self: *Self) !void {
self.page_life_cycle_events = true;
try self.cdp.browser.notification.register(.page_network_idle, self, onPageNetworkIdle);
try self.cdp.browser.notification.register(.page_network_almost_idle, self, onPageNetworkAlmostIdle);
}
pub fn lifecycleEventsDisable(self: *Self) void {
self.page_life_cycle_events = false;
self.cdp.browser.notification.unregister(.page_network_idle, self);
self.cdp.browser.notification.unregister(.page_network_almost_idle, self);
}
pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
try @import("domains/page.zig").pageRemove(self);
@@ -508,6 +520,16 @@ pub fn BrowserContext(comptime CDP_T: type) type {
return @import("domains/page.zig").pageNavigated(self, msg);
}
pub fn onPageNetworkIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkIdle(self, msg);
}
pub fn onPageNetworkAlmostIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkAlmostIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkAlmostIdle(self, msg);
}
pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();

View File

@@ -261,7 +261,7 @@ fn continueRequest(cmd: anytype) !void {
transfer.req.body = body;
}
try bc.cdp.browser.http_client.process(transfer);
try bc.cdp.browser.http_client.continueTransfer(transfer);
return cmd.sendResult(null, .{});
}
@@ -311,7 +311,7 @@ fn continueWithAuth(cmd: anytype) !void {
);
transfer.reset();
try bc.cdp.browser.http_client.process(transfer);
try bc.cdp.browser.http_client.continueTransfer(transfer);
return cmd.sendResult(null, .{});
}
@@ -352,7 +352,7 @@ fn fulfillRequest(cmd: anytype) !void {
body = buf;
}
try transfer.fulfill(params.responseCode, params.responseHeaders orelse &.{}, body);
try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body);
return cmd.sendResult(null, .{});
}
@@ -368,7 +368,7 @@ fn failRequest(cmd: anytype) !void {
const request_id = try idFromRequestId(params.requestId);
const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound;
defer transfer.abort();
defer bc.cdp.browser.http_client.abortTransfer(transfer);
log.info(.cdp, "request intercept", .{
.state = "fail",

View File

@@ -78,12 +78,16 @@ fn getFrameTree(cmd: anytype) !void {
}
fn setLifecycleEventsEnabled(cmd: anytype) !void {
// const params = (try cmd.params(struct {
// enabled: bool,
// })) orelse return error.InvalidParams;
const params = (try cmd.params(struct {
enabled: bool,
})) orelse return error.InvalidParams;
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
bc.page_life_cycle_events = true;
if (params.enabled) {
try bc.lifecycleEventsEnable();
} else {
bc.lifecycleEventsDisable();
}
return cmd.sendResult(null, .{});
}
@@ -357,6 +361,27 @@ pub fn pageNavigated(bc: anytype, event: *const Notification.PageNavigated) !voi
}, .{ .session_id = session_id });
}
pub fn pageNetworkIdle(bc: anytype, event: *const Notification.PageNetworkIdle) !void {
return sendPageLifecycle(bc, "networkIdle", event.timestamp);
}
pub fn pageNetworkAlmostIdle(bc: anytype, event: *const Notification.PageNetworkAlmostIdle) !void {
return sendPageLifecycle(bc, "networkAlmostIdle", event.timestamp);
}
fn sendPageLifecycle(bc: anytype, name: []const u8, timestamp: u32) !void {
const loader_id = bc.loader_id;
const target_id = bc.target_id orelse unreachable;
const session_id = bc.session_id orelse unreachable;
return bc.cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.name = name,
.frameId = target_id,
.loaderId = loader_id,
.timestamp = timestamp,
}, .{ .session_id = session_id });
}
const LifecycleEvent = struct {
frameId: []const u8,
loaderId: ?[]const u8,

View File

@@ -50,9 +50,18 @@ const Method = Http.Method;
// those other http requests.
pub const Client = @This();
// count of active requests
// Count of active requests
active: usize,
// Count of intercepted requests. This is to help deal with intercepted requests.
// The client doesn't track intercepted transfers. If a request is intercepted,
// the client forgets about it and requires the interceptor to continue or abort
// it. That works well, except if we only rely on active, we might think there's
// no more network activity when, with interecepted requests, there might be more
// in the future. (We really only need this to properly emit a 'networkIdle' and
// 'networkAlmostIdle' Page.lifecycleEvent in CDP).
intercepted: usize,
// curl has 2 APIs: easy and multi. Multi is like a combination of some I/O block
// (e.g. epoll) and a bunch of pools. You add/remove easys to the multiple and
// then poll the multi.
@@ -115,6 +124,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie
client.* = .{
.queue = .{},
.active = 0,
.intercepted = 0,
.multi = multi,
.handles = handles,
.blocking = blocking,
@@ -191,6 +201,10 @@ pub fn request(self: *Client, req: Request) !void {
var wait_for_interception = false;
notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) {
self.intercepted += 1;
if (builtin.mode == .Debug) {
transfer._intercepted = true;
}
// The user is send an invitation to intercept this request.
return;
}
@@ -200,9 +214,9 @@ pub fn request(self: *Client, req: Request) !void {
}
// Above, request will not process if there's an interception request. In such
// cases, the interecptor is expected to call process to continue the transfer
// cases, the interecptor is expected to call resume to continue the transfer
// or transfer.abort() to abort it.
pub fn process(self: *Client, transfer: *Transfer) !void {
fn process(self: *Client, transfer: *Transfer) !void {
if (self.handles.getFreeHandle()) |handle| {
return self.makeRequest(handle, transfer);
}
@@ -210,6 +224,33 @@ pub fn process(self: *Client, transfer: *Transfer) !void {
self.queue.append(&transfer._node);
}
// For an intercepted request
pub fn continueTransfer(self: *Client, transfer: *Transfer) !void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
return self.process(transfer);
}
// For an intercepted request
pub fn abortTransfer(self: *Client, transfer: *Transfer) void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
transfer.abort();
}
// For an intercepted request
pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const Http.Header, body: ?[]const u8) !void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
return transfer.fulfill(status, headers, body);
}
// See ScriptManager.blockingGet
pub fn blockingRequest(self: *Client, req: Request) !void {
const transfer = try self.makeTransfer(req);
@@ -674,6 +715,7 @@ pub const Transfer = struct {
// for when a Transfer is queued in the client.queue
_node: std.DoublyLinkedList.Node = .{},
_intercepted: if (builtin.mode == .Debug) bool else void = if (builtin.mode == .Debug) false else {},
pub fn reset(self: *Transfer) void {
self._redirecting = false;

View File

@@ -32,9 +32,9 @@ const List = std.DoublyLinkedList;
// "scope". This would have a run-time cost and still require some coordination
// between components to share a common scope.
//
// Instead, the approach that we take is to have a notification per
// Instead, the approach that we take is to have a notification instance per
// scope. This makes some things harder, but we only plan on having 2
// notifications at a given time: one in a Browser and one in the App.
// notification instances at a given time: one in a Browser and one in the App.
// What about something like Telemetry, which lives outside of a Browser but
// still cares about Browser-events (like .page_navigate)? When the Browser
// notification is created, a `notification_created` event is raised in the
@@ -59,6 +59,8 @@ pub const Notification = struct {
page_created: List = .{},
page_navigate: List = .{},
page_navigated: List = .{},
page_network_idle: List = .{},
page_network_almost_idle: List = .{},
http_request_fail: List = .{},
http_request_start: List = .{},
http_request_intercept: List = .{},
@@ -74,6 +76,8 @@ pub const Notification = struct {
page_created: *page.Page,
page_navigate: *const PageNavigate,
page_navigated: *const PageNavigated,
page_network_idle: *const PageNetworkIdle,
page_network_almost_idle: *const PageNetworkAlmostIdle,
http_request_fail: *const RequestFail,
http_request_start: *const RequestStart,
http_request_intercept: *const RequestIntercept,
@@ -98,6 +102,14 @@ pub const Notification = struct {
url: []const u8,
};
pub const PageNetworkIdle = struct {
timestamp: u32,
};
pub const PageNetworkAlmostIdle = struct {
timestamp: u32,
};
pub const RequestStart = struct {
transfer: *Transfer,
};