Files
browser/src/cdp/domains/network.zig
Karl Seguin 94a30b2167 HTTP request notification
- Add 2 internal notifications
  1 - http_request_start
  2 - http_request_complete

- When Network.enable CDP message is received, browser context registers for
  these 2 events (when Network.disable is called, it unregisters)

- On http_request_start, CDP will emit a Network.requestWillBeSent message.
  This _does not_ include all the fields, but what we have appears to be enough
  for puppeteer.waitForNetworkIdle.

- On http_request_complete, CDP will emit a Network.responseReceived message.
  This _does not_ include all the fields, bu what we have appears to be enough
  for puppeteer.waitForNetworkIdle.

We currently don't emit any other new events, including any network-specific
lifecycleEvent (i.e. Chrome will emit an networkIdle and networkAlmostIdle).

To support this, the following other things were done:
- CDP now has a `notification_arena` which is re-used between browser contexts.
  Normally, CDP code runs based on a "cmd" which has its own message_arena, but
  these notifications happen out-of-band, so we needed a new arena which is
  valid for handling 1 notification.

- HTTP Client is notification-aware. The SessionState no longer includes the
  *http.Client directly. It instead includes an http.RequestFactory which is
  the combination fo the client + a specific configuration (i.e. *Notification).
  This ensures that all requests made from that factory have the same settings.

- However, despite the above, _some_ requests do not appear to emit CDP events,
  such as loading a <script src="X">. So the page still deals directly with the
  *http.Client.

- Playwright and Puppeteer (but Playwright in particular) are very sensitive to
  event ordering. These new events have introduced additional sensitivity.
  The result sent to Page.navigate had to be moved to inside the navigate event
  handler, which meant passing some cdp-specific data (the input.id) into the
  NavigateOpts. This is the only way I found to keep both happy - the sequence
  of events is closer (but still pretty far) from what Chrome does.
2025-05-24 09:01:12 +08:00

147 lines
5.1 KiB
Zig

// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Notification = @import("../../notification.zig").Notification;
const Allocator = std.mem.Allocator;
pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum {
enable,
disable,
setCacheDisabled,
}, cmd.input.action) orelse return error.UnknownMethod;
switch (action) {
.enable => return enable(cmd),
.disable => return disable(cmd),
.setCacheDisabled => return cmd.sendResult(null, .{}),
}
}
fn enable(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
try bc.networkEnable();
return cmd.sendResult(null, .{});
}
fn disable(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
bc.networkDisable();
return cmd.sendResult(null, .{});
}
pub fn httpRequestStart(arena: Allocator, bc: anytype, request: *const Notification.RequestStart) !void {
// Isn't possible to do a network request within a Browser (which our
// notification is tied to), without a page.
std.debug.assert(bc.session.page != null);
var cdp = bc.cdp;
// all unreachable because we _have_ to have a page.
const session_id = bc.session_id orelse unreachable;
const target_id = bc.target_id orelse unreachable;
const page = bc.session.currentPage() orelse unreachable;
const document_url = try urlToString(arena, &page.url.uri, .{
.scheme = true,
.authentication = true,
.authority = true,
.path = true,
.query = true,
});
const request_url = try urlToString(arena, request.url, .{
.scheme = true,
.authentication = true,
.authority = true,
.path = true,
.query = true,
});
const request_fragment = try urlToString(arena, request.url, .{
.fragment = true,
});
var headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty;
try headers.ensureTotalCapacity(arena, request.headers.len);
for (request.headers) |header| {
headers.putAssumeCapacity(header.name, header.value);
}
// We're missing a bunch of fields, but, for now, this seems like enough
try cdp.sendEvent("Network.requestWillBeSent", .{
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}),
.frameId = target_id,
.loaderId = bc.loader_id,
.documentUrl = document_url,
.request = .{
.url = request_url,
.urlFragment = request_fragment,
.method = @tagName(request.method),
.hasPostData = request.has_body,
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
},
}, .{ .session_id = session_id });
}
pub fn httpRequestComplete(arena: Allocator, bc: anytype, request: *const Notification.RequestComplete) !void {
// Isn't possible to do a network request within a Browser (which our
// notification is tied to), without a page.
std.debug.assert(bc.session.page != null);
var cdp = bc.cdp;
// all unreachable because we _have_ to have a page.
const session_id = bc.session_id orelse unreachable;
const target_id = bc.target_id orelse unreachable;
const url = try urlToString(arena, request.url, .{
.scheme = true,
.authentication = true,
.authority = true,
.path = true,
.query = true,
});
var headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty;
try headers.ensureTotalCapacity(arena, request.headers.len);
for (request.headers) |header| {
headers.putAssumeCapacity(header.name, header.value);
}
// We're missing a bunch of fields, but, for now, this seems like enough
try cdp.sendEvent("Network.responseReceived", .{
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{request.id}),
.frameId = target_id,
.loaderId = bc.loader_id,
.response = .{
.url = url,
.status = request.status,
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
},
}, .{ .session_id = session_id });
}
fn urlToString(arena: Allocator, url: *const std.Uri, opts: std.Uri.WriteToStreamOptions) ![]const u8 {
var buf: std.ArrayListUnmanaged(u8) = .empty;
try url.writeToStream(opts, buf.writer(arena));
return buf.items;
}