Merge pull request #1008 from lightpanda-io/network_idle_page_lifecycle

Emit networkIdle and networkAlmostIdle Page.lifecycleEvent
This commit is contained in:
Karl Seguin
2025-09-04 17:48:02 +08:00
committed by GitHub
7 changed files with 176 additions and 34 deletions

View File

@@ -90,6 +90,11 @@ pub const Page = struct {
load_state: LoadState = .parsing,
// We should only emit these events once per page. To make sure of that, we
// track whether or not we've already emitted the notifications.
notified_network_idle: bool = false,
notified_network_almost_idle: bool = false,
const Mode = union(enum) {
pre: void,
err: anyerror,
@@ -329,7 +334,13 @@ pub const Page = struct {
return error.JsError;
}
if (http_client.active == 0 and exit_when_done) {
const http_active = http_client.active;
if (http_active == 0 and exit_when_done) {
// we don't need to consider http_client.intercepted here
// because exit_when_done is true, and that can only be
// the case when interception isn't possible.
std.debug.assert(http_client.intercepted == 0);
const ms = ms_to_next_task orelse blk: {
// TODO: when jsRunner is fully replaced with the
// htmlRunner, we can remove the first part of this
@@ -341,29 +352,34 @@ pub const Page = struct {
// background jobs.
break :blk 50;
}
// no http transfers, no cdp extra socket, no
// No http transfers, no cdp extra socket, no
// scheduled tasks, we're done.
return .done;
};
if (ms > ms_remaining) {
// same as above, except we have a scheduled task,
// it just happens to be too far into the future.s
// Same as above, except we have a scheduled task,
// it just happens to be too far into the future.
return .done;
}
// we have a task to run in the not-so-distant future.
// We have a task to run in the not-so-distant future.
// You might think we can just sleep until that task is
// ready, but we should continue to run lowPriority tasks
// in the meantime, and that could unblock things. So
// we'll just sleep for a bit, and then restart our wait
// loop to see what's changed
// loop to see if anything new can be processed.
std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20))));
} else {
if (self.notified_network_idle == false and http_active == 0 and http_client.intercepted == 0) {
self.notifyNetworkIdle();
}
// We're here because we either have active HTTP
// connections, of exit_when_done == false (aka, there's
// connections, or exit_when_done == false (aka, there's
// an extra_socket registered with the http client).
const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100);
// We should continue to run lowPriority tasks, so we
// minimize how long we'll poll for network I/O.
const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200));
if (try http_client.tick(ms_to_wait) == .extra_socket) {
// data on a socket we aren't handling, return to caller
return .extra_socket;
@@ -467,6 +483,31 @@ pub const Page = struct {
}
}
fn notifyNetworkIdle(self: *Page) void {
// caller should always check that we haven't sent this already;
std.debug.assert(self.notified_network_idle == false);
// if we're going to send networkIdle, we should first send networkAlmostIdle
// if it hasn't already been sent.
if (self.notified_network_almost_idle == false) {
self.notifyNetworkAlmostIdle();
}
self.notified_network_idle = true;
self.session.browser.notification.dispatch(.page_network_idle, &.{
.timestamp = timestamp(),
});
}
fn notifyNetworkAlmostIdle(self: *Page) void {
// caller should always check that we haven't sent this already;
std.debug.assert(self.notified_network_almost_idle == false);
self.notified_network_almost_idle = true;
self.session.browser.notification.dispatch(.page_network_almost_idle, &.{
.timestamp = timestamp(),
});
}
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
var aw = std.Io.Writer.Allocating.init(arena);
try self.url.origin(&aw.writer);
@@ -782,12 +823,12 @@ pub const Page = struct {
}
// The transfer arena is useful and interesting, but has a weird lifetime.
// When we're transfering from one page to another (via delayed navigation)
// When we're transferring from one page to another (via delayed navigation)
// we need things in memory: like the URL that we're navigating to and
// optionally the body to POST. That cannot exist in the page.arena, because
// the page that we have is going to be destroyed and a new page is going
// to be created. If we used the page.arena, we'd wouldn't be able to reset
// it between navigations.
// it between navigation.
// So the transfer arena is meant to exist between a navigation event. It's
// freed when the main html navigation is complete, either in pageDoneCallback
// or pageErrorCallback. It needs to exist for this long because, if we set
@@ -1118,7 +1159,7 @@ pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) c
return;
}
// It's posisble for a script to be dynamically added without a src.
// It's possible for a script to be dynamically added without a src.
// const s = document.createElement('script');
// document.getElementsByTagName('body')[0].appendChild(s);
// The src can be set after. We handle that in HTMLScriptElement.set_src,

View File

@@ -487,6 +487,18 @@ pub fn BrowserContext(comptime CDP_T: type) type {
self.cdp.browser.notification.unregister(.http_request_auth_required, self);
}
pub fn lifecycleEventsEnable(self: *Self) !void {
self.page_life_cycle_events = true;
try self.cdp.browser.notification.register(.page_network_idle, self, onPageNetworkIdle);
try self.cdp.browser.notification.register(.page_network_almost_idle, self, onPageNetworkAlmostIdle);
}
pub fn lifecycleEventsDisable(self: *Self) void {
self.page_life_cycle_events = false;
self.cdp.browser.notification.unregister(.page_network_idle, self);
self.cdp.browser.notification.unregister(.page_network_almost_idle, self);
}
pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
try @import("domains/page.zig").pageRemove(self);
@@ -508,6 +520,16 @@ pub fn BrowserContext(comptime CDP_T: type) type {
return @import("domains/page.zig").pageNavigated(self, msg);
}
pub fn onPageNetworkIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkIdle(self, msg);
}
pub fn onPageNetworkAlmostIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkAlmostIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkAlmostIdle(self, msg);
}
pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();

View File

@@ -261,7 +261,7 @@ fn continueRequest(cmd: anytype) !void {
transfer.req.body = body;
}
try bc.cdp.browser.http_client.process(transfer);
try bc.cdp.browser.http_client.continueTransfer(transfer);
return cmd.sendResult(null, .{});
}
@@ -311,7 +311,7 @@ fn continueWithAuth(cmd: anytype) !void {
);
transfer.reset();
try bc.cdp.browser.http_client.process(transfer);
try bc.cdp.browser.http_client.continueTransfer(transfer);
return cmd.sendResult(null, .{});
}
@@ -352,7 +352,7 @@ fn fulfillRequest(cmd: anytype) !void {
body = buf;
}
try transfer.fulfill(params.responseCode, params.responseHeaders orelse &.{}, body);
try bc.cdp.browser.http_client.fulfillTransfer(transfer, params.responseCode, params.responseHeaders orelse &.{}, body);
return cmd.sendResult(null, .{});
}
@@ -368,7 +368,7 @@ fn failRequest(cmd: anytype) !void {
const request_id = try idFromRequestId(params.requestId);
const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound;
defer transfer.abort();
defer bc.cdp.browser.http_client.abortTransfer(transfer);
log.info(.cdp, "request intercept", .{
.state = "fail",

View File

@@ -78,12 +78,16 @@ fn getFrameTree(cmd: anytype) !void {
}
fn setLifecycleEventsEnabled(cmd: anytype) !void {
// const params = (try cmd.params(struct {
// enabled: bool,
// })) orelse return error.InvalidParams;
const params = (try cmd.params(struct {
enabled: bool,
})) orelse return error.InvalidParams;
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
bc.page_life_cycle_events = true;
if (params.enabled) {
try bc.lifecycleEventsEnable();
} else {
bc.lifecycleEventsDisable();
}
return cmd.sendResult(null, .{});
}
@@ -357,6 +361,27 @@ pub fn pageNavigated(bc: anytype, event: *const Notification.PageNavigated) !voi
}, .{ .session_id = session_id });
}
pub fn pageNetworkIdle(bc: anytype, event: *const Notification.PageNetworkIdle) !void {
return sendPageLifecycle(bc, "networkIdle", event.timestamp);
}
pub fn pageNetworkAlmostIdle(bc: anytype, event: *const Notification.PageNetworkAlmostIdle) !void {
return sendPageLifecycle(bc, "networkAlmostIdle", event.timestamp);
}
fn sendPageLifecycle(bc: anytype, name: []const u8, timestamp: u32) !void {
const loader_id = bc.loader_id;
const target_id = bc.target_id orelse unreachable;
const session_id = bc.session_id orelse unreachable;
return bc.cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.name = name,
.frameId = target_id,
.loaderId = loader_id,
.timestamp = timestamp,
}, .{ .session_id = session_id });
}
const LifecycleEvent = struct {
frameId: []const u8,
loaderId: ?[]const u8,

View File

@@ -50,9 +50,18 @@ const Method = Http.Method;
// those other http requests.
pub const Client = @This();
// count of active requests
// Count of active requests
active: usize,
// Count of intercepted requests. This is to help deal with intercepted requests.
// The client doesn't track intercepted transfers. If a request is intercepted,
// the client forgets about it and requires the interceptor to continue or abort
// it. That works well, except if we only rely on active, we might think there's
// no more network activity when, with interecepted requests, there might be more
// in the future. (We really only need this to properly emit a 'networkIdle' and
// 'networkAlmostIdle' Page.lifecycleEvent in CDP).
intercepted: usize,
// curl has 2 APIs: easy and multi. Multi is like a combination of some I/O block
// (e.g. epoll) and a bunch of pools. You add/remove easys to the multiple and
// then poll the multi.
@@ -115,6 +124,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie
client.* = .{
.queue = .{},
.active = 0,
.intercepted = 0,
.multi = multi,
.handles = handles,
.blocking = blocking,
@@ -191,6 +201,10 @@ pub fn request(self: *Client, req: Request) !void {
var wait_for_interception = false;
notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) {
self.intercepted += 1;
if (builtin.mode == .Debug) {
transfer._intercepted = true;
}
// The user is send an invitation to intercept this request.
return;
}
@@ -200,9 +214,9 @@ pub fn request(self: *Client, req: Request) !void {
}
// Above, request will not process if there's an interception request. In such
// cases, the interecptor is expected to call process to continue the transfer
// cases, the interecptor is expected to call resume to continue the transfer
// or transfer.abort() to abort it.
pub fn process(self: *Client, transfer: *Transfer) !void {
fn process(self: *Client, transfer: *Transfer) !void {
if (self.handles.getFreeHandle()) |handle| {
return self.makeRequest(handle, transfer);
}
@@ -210,6 +224,33 @@ pub fn process(self: *Client, transfer: *Transfer) !void {
self.queue.append(&transfer._node);
}
// For an intercepted request
pub fn continueTransfer(self: *Client, transfer: *Transfer) !void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
return self.process(transfer);
}
// For an intercepted request
pub fn abortTransfer(self: *Client, transfer: *Transfer) void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
transfer.abort();
}
// For an intercepted request
pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const Http.Header, body: ?[]const u8) !void {
if (builtin.mode == .Debug) {
std.debug.assert(transfer._intercepted);
}
self.intercepted -= 1;
return transfer.fulfill(status, headers, body);
}
// See ScriptManager.blockingGet
pub fn blockingRequest(self: *Client, req: Request) !void {
const transfer = try self.makeTransfer(req);
@@ -674,6 +715,7 @@ pub const Transfer = struct {
// for when a Transfer is queued in the client.queue
_node: std.DoublyLinkedList.Node = .{},
_intercepted: if (builtin.mode == .Debug) bool else void = if (builtin.mode == .Debug) false else {},
pub fn reset(self: *Transfer) void {
self._redirecting = false;

View File

@@ -32,9 +32,9 @@ const List = std.DoublyLinkedList;
// "scope". This would have a run-time cost and still require some coordination
// between components to share a common scope.
//
// Instead, the approach that we take is to have a notification per
// Instead, the approach that we take is to have a notification instance per
// scope. This makes some things harder, but we only plan on having 2
// notifications at a given time: one in a Browser and one in the App.
// notification instances at a given time: one in a Browser and one in the App.
// What about something like Telemetry, which lives outside of a Browser but
// still cares about Browser-events (like .page_navigate)? When the Browser
// notification is created, a `notification_created` event is raised in the
@@ -59,6 +59,8 @@ pub const Notification = struct {
page_created: List = .{},
page_navigate: List = .{},
page_navigated: List = .{},
page_network_idle: List = .{},
page_network_almost_idle: List = .{},
http_request_fail: List = .{},
http_request_start: List = .{},
http_request_intercept: List = .{},
@@ -74,6 +76,8 @@ pub const Notification = struct {
page_created: *page.Page,
page_navigate: *const PageNavigate,
page_navigated: *const PageNavigated,
page_network_idle: *const PageNetworkIdle,
page_network_almost_idle: *const PageNetworkAlmostIdle,
http_request_fail: *const RequestFail,
http_request_start: *const RequestStart,
http_request_intercept: *const RequestIntercept,
@@ -98,6 +102,14 @@ pub const Notification = struct {
url: []const u8,
};
pub const PageNetworkIdle = struct {
timestamp: u32,
};
pub const PageNetworkAlmostIdle = struct {
timestamp: u32,
};
pub const RequestStart = struct {
transfer: *Transfer,
};

View File

@@ -1395,7 +1395,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
},
.@"struct" => {
// We don't want to duplicate the code for this, so we call
// the actual coversion function.
// the actual conversion function.
const value = (try self.jsValueToStruct(named_function, T, js_value)) orelse {
return .{ .invalid = {} };
};
@@ -1494,7 +1494,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// We were hoping to find the module in our cache, and thus used
// the short-lived call_arena to create the normalized_specifier.
// But now this'll live for the lifetime of the context.
// But now this will live for the lifetime of the context.
const arena = self.context_arena;
const owned_specifier = try arena.dupe(u8, normalized_specifier);
try self.module_cache.put(arena, owned_specifier, PersistentModule.init(self.isolate, m));
@@ -2098,7 +2098,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// a shorthand method to return either the entire stack message
// or just the exception message
// - in Debug mode return the stack if available
// - otherwhise return the exception if available
// - otherwise return the exception if available
// the caller needs to deinit the string returned
pub fn err(self: TryCatch, allocator: Allocator) !?[]const u8 {
if (builtin.mode == .Debug) {
@@ -2579,7 +2579,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
// If you're trying to implement setter, read:
// https://groups.google.com/g/v8-users/c/8tahYBsHpgY/m/IteS7Wn2AAAJ
// The issue I had was
// (a) where to attache it: does it go ont he instance_template
// (a) where to attache it: does it go on the instance_template
// instead of the prototype?
// (b) defining the getter or query to respond with the
// PropertyAttribute to indicate if the property can be set
@@ -2751,7 +2751,7 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
}
if (T == Function) {
// we're returnig a callback
// we're returning a callback
return value.func.toValue();
}
@@ -2876,8 +2876,8 @@ pub fn Env(comptime State: type, comptime WebApis: type) type {
}
};
// Callback called on global's property mssing.
// Return true to intercept the exectution or false to let the call
// Callback called on global's property missing.
// Return true to intercept the execution or false to let the call
// continue the chain.
pub const GlobalMissingCallback = struct {
ptr: *anyopaque,
@@ -2951,10 +2951,10 @@ fn isComplexAttributeType(ti: std.builtin.Type) bool {
};
}
// Responsible for calling Zig functions from JS invokations. This could
// Responsible for calling Zig functions from JS invocations. This could
// probably just contained in ExecutionWorld, but having this specific logic, which
// is somewhat repetitive between constructors, functions, getters, etc contained
// here does feel like it makes it clenaer.
// here does feel like it makes it cleaner.
fn Caller(comptime JsContext: type, comptime State: type) type {
return struct {
js_context: *JsContext,
@@ -3356,7 +3356,7 @@ fn Caller(comptime JsContext: type, comptime State: type) type {
var is_variadic = false;
{
// This is going to get complicated. If the last Zig paremeter
// This is going to get complicated. If the last Zig parameter
// is a slice AND the corresponding javascript parameter is
// NOT an an array, then we'll treat it as a variadic.