From cd33e9ad0eeae0c2101dbc6dfb0876255502df1a Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 21 Aug 2025 10:32:03 +0800 Subject: [PATCH] Implement Network.getResponseBody Add response_data event, CDP now captures the full body so that it can respond to the Network.getResponseBody. This isn't memory efficient, but I don't see another way to do it. At least this way, it's only capturing/storing every response body when (a) CDP is used and (b) Network.enabled is called. That is, as opposed to baking this into Http/Client.zig, which would force the memory consumption for all use-cases. There's arguably some optimizations we could make for XHR requests, which also dupe/own the response. As of now, the response is dupe'd separately for CDP and XHR. --- src/cdp/cdp.zig | 56 ++++++++++++++++++++++++++----------- src/cdp/domains/network.zig | 47 +++++++++++++++---------------- src/http/Client.zig | 13 +++++++-- src/notification.zig | 13 +++++++-- 4 files changed, 84 insertions(+), 45 deletions(-) diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index f0d3b682..536d4b99 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -344,6 +344,15 @@ pub fn BrowserContext(comptime CDP_T: type) type { intercept_state: InterceptState, + // When network is enabled, we'll capture the transfer.id -> body + // This is awfully memory intensive, but our underlying http client and + // its users (script manager and page) correctly do not hold the body + // memory longer than they have to. In fact, the main request is only + // ever streamed. So if CDP is the only thing that needs bodies in + // memory for an arbitrary amount of time, then that's where we're going + // to store the, + captured_responses: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(u8)), + const Self = @This(); fn init(self: *Self, id: []const u8, cdp: *CDP_T) !void { @@ -374,6 +383,7 @@ pub fn BrowserContext(comptime CDP_T: type) type { .inspector = inspector, .notification_arena = cdp.notification_arena.allocator(), .intercept_state = try InterceptState.init(allocator), + .captured_responses = .empty, }; self.node_search_list = Node.Search.List.init(allocator, &self.node_registry); errdefer self.deinit(); @@ -454,15 +464,17 @@ pub fn BrowserContext(comptime CDP_T: type) type { pub fn networkEnable(self: *Self) !void { try self.cdp.browser.notification.register(.http_request_fail, self, onHttpRequestFail); try self.cdp.browser.notification.register(.http_request_start, self, onHttpRequestStart); - try self.cdp.browser.notification.register(.http_headers_done, self, onHttpHeadersDone); try self.cdp.browser.notification.register(.http_request_done, self, onHttpRequestDone); + try self.cdp.browser.notification.register(.http_response_data, self, onHttpResponseData); + try self.cdp.browser.notification.register(.http_response_header_done, self, onHttpResponseHeadersDone); } pub fn networkDisable(self: *Self) void { self.cdp.browser.notification.unregister(.http_request_fail, self); self.cdp.browser.notification.unregister(.http_request_start, self); - self.cdp.browser.notification.unregister(.http_headers_done, self); self.cdp.browser.notification.unregister(.http_request_done, self); + self.cdp.browser.notification.unregister(.http_response_data, self); + self.cdp.browser.notification.unregister(.http_response_header_done, self); } pub fn fetchEnable(self: *Self) !void { @@ -483,45 +495,57 @@ pub fn BrowserContext(comptime CDP_T: type) type { return @import("domains/page.zig").pageCreated(self, page); } - pub fn onPageNavigate(ctx: *anyopaque, data: *const Notification.PageNavigate) !void { + pub fn onPageNavigate(ctx: *anyopaque, msg: *const Notification.PageNavigate) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - return @import("domains/page.zig").pageNavigate(self.notification_arena, self, data); + return @import("domains/page.zig").pageNavigate(self.notification_arena, self, msg); } - pub fn onPageNavigated(ctx: *anyopaque, data: *const Notification.PageNavigated) !void { + pub fn onPageNavigated(ctx: *anyopaque, msg: *const Notification.PageNavigated) !void { const self: *Self = @alignCast(@ptrCast(ctx)); - return @import("domains/page.zig").pageNavigated(self, data); + return @import("domains/page.zig").pageNavigated(self, msg); } - pub fn onHttpRequestStart(ctx: *anyopaque, data: *const Notification.RequestStart) !void { + pub fn onHttpRequestStart(ctx: *anyopaque, msg: *const Notification.RequestStart) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - try @import("domains/network.zig").httpRequestStart(self.notification_arena, self, data); + try @import("domains/network.zig").httpRequestStart(self.notification_arena, self, msg); } - pub fn onHttpRequestIntercept(ctx: *anyopaque, data: *const Notification.RequestIntercept) !void { + pub fn onHttpRequestIntercept(ctx: *anyopaque, msg: *const Notification.RequestIntercept) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - try @import("domains/fetch.zig").requestIntercept(self.notification_arena, self, data); + try @import("domains/fetch.zig").requestIntercept(self.notification_arena, self, msg); } - pub fn onHttpRequestFail(ctx: *anyopaque, data: *const Notification.RequestFail) !void { + pub fn onHttpRequestFail(ctx: *anyopaque, msg: *const Notification.RequestFail) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - return @import("domains/network.zig").httpRequestFail(self.notification_arena, self, data); + return @import("domains/network.zig").httpRequestFail(self.notification_arena, self, msg); } - pub fn onHttpHeadersDone(ctx: *anyopaque, data: *const Notification.ResponseHeadersDone) !void { + pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - return @import("domains/network.zig").httpHeadersDone(self.notification_arena, self, data); + return @import("domains/network.zig").httpResponseHeaderDone(self.notification_arena, self, msg); } - pub fn onHttpRequestDone(ctx: *anyopaque, data: *const Notification.RequestDone) !void { + pub fn onHttpRequestDone(ctx: *anyopaque, msg: *const Notification.RequestDone) !void { const self: *Self = @alignCast(@ptrCast(ctx)); defer self.resetNotificationArena(); - return @import("domains/network.zig").httpRequestDone(self.notification_arena, self, data); + return @import("domains/network.zig").httpRequestDone(self.notification_arena, self, msg); + } + + pub fn onHttpResponseData(ctx: *anyopaque, msg: *const Notification.ResponseData) !void { + const self: *Self = @alignCast(@ptrCast(ctx)); + const arena = self.arena; + + const id = msg.transfer.id; + const gop = try self.captured_responses.getOrPut(arena, id); + if (!gop.found_existing) { + gop.value_ptr.* = .{}; + } + try gop.value_ptr.appendSlice(arena, try arena.dupe(u8, msg.data)); } fn resetNotificationArena(self: *Self) void { diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index a9c39b60..627ec6d6 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -54,16 +54,6 @@ pub fn processMessage(cmd: anytype) !void { } } -const Response = struct { - status: u16, - headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty, - // These may not be complete yet, but we only tell the client - // Network.responseReceived when all the headers are in. - // Later should store body as well to support getResponseBody which should - // only work once Network.loadingFinished is sent but the body itself would - // be loaded with each chunks as Network.dataReceiveds are coming in. -}; - fn enable(cmd: anytype) !void { const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; try bc.networkEnable(); @@ -209,15 +199,17 @@ fn getResponseBody(cmd: anytype) !void { requestId: []const u8, // "REQ-{d}" })) orelse return error.InvalidParams; - _ = params; + const request_id = try idFromRequestId(params.requestId); + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + const buf = bc.captured_responses.getPtr(request_id) orelse return error.RequestNotFound; try cmd.sendResult(.{ - .body = "TODO", + .body = buf.items, .base64Encoded = false, }, .{}); } -pub fn httpRequestFail(arena: Allocator, bc: anytype, data: *const Notification.RequestFail) !void { +pub fn httpRequestFail(arena: Allocator, bc: anytype, msg: *const Notification.RequestFail) !void { // It's possible that the request failed because we aborted when the client // sent Target.closeTarget. In that case, bc.session_id will be cleared // already, and we can skip sending these messages to the client. @@ -229,15 +221,15 @@ pub fn httpRequestFail(arena: Allocator, bc: anytype, data: *const Notification. // We're missing a bunch of fields, but, for now, this seems like enough try bc.cdp.sendEvent("Network.loadingFailed", .{ - .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.transfer.id}), + .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{msg.transfer.id}), // Seems to be what chrome answers with. I assume it depends on the type of error? .type = "Ping", - .errorText = data.err, + .errorText = msg.err, .canceled = false, }, .{ .session_id = session_id }); } -pub fn httpRequestStart(arena: Allocator, bc: anytype, data: *const Notification.RequestStart) !void { +pub fn httpRequestStart(arena: Allocator, bc: anytype, msg: *const Notification.RequestStart) !void { // Isn't possible to do a network request within a Browser (which our // notification is tied to), without a page. std.debug.assert(bc.session.page != null); @@ -251,15 +243,15 @@ pub fn httpRequestStart(arena: Allocator, bc: anytype, data: *const Notification // Modify request with extra CDP headers for (bc.extra_headers.items) |extra| { - try data.transfer.req.headers.add(extra); + try msg.transfer.req.headers.add(extra); } - const transfer = data.transfer; + const transfer = msg.transfer; // We're missing a bunch of fields, but, for now, this seems like enough try cdp.sendEvent("Network.requestWillBeSent", .{ .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{transfer.id}), .frameId = target_id, .loaderId = bc.loader_id, .documentUrl = DocumentUrlWriter.init(&page.url.uri), .request = TransferAsRequestWriter.init(transfer) }, .{ .session_id = session_id }); } -pub fn httpHeadersDone(arena: Allocator, bc: anytype, data: *const Notification.ResponseHeadersDone) !void { +pub fn httpResponseHeaderDone(arena: Allocator, bc: anytype, msg: *const Notification.ResponseHeaderDone) !void { // Isn't possible to do a network request within a Browser (which our // notification is tied to), without a page. std.debug.assert(bc.session.page != null); @@ -272,14 +264,14 @@ pub fn httpHeadersDone(arena: Allocator, bc: anytype, data: *const Notification. // We're missing a bunch of fields, but, for now, this seems like enough try cdp.sendEvent("Network.responseReceived", .{ - .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.transfer.id}), + .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{msg.transfer.id}), .loaderId = bc.loader_id, .frameId = target_id, - .response = TransferAsResponseWriter.init(data.transfer), + .response = TransferAsResponseWriter.init(msg.transfer), }, .{ .session_id = session_id }); } -pub fn httpRequestDone(arena: Allocator, bc: anytype, data: *const Notification.RequestDone) !void { +pub fn httpRequestDone(arena: Allocator, bc: anytype, msg: *const Notification.RequestDone) !void { // Isn't possible to do a network request within a Browser (which our // notification is tied to), without a page. std.debug.assert(bc.session.page != null); @@ -290,8 +282,8 @@ pub fn httpRequestDone(arena: Allocator, bc: anytype, data: *const Notification. const session_id = bc.session_id orelse unreachable; try cdp.sendEvent("Network.loadingFinished", .{ - .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.transfer.id}), - .encodedDataLength = data.transfer.bytes_received, + .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{msg.transfer.id}), + .encodedDataLength = msg.transfer.bytes_received, }, .{ .session_id = session_id }); } @@ -439,6 +431,13 @@ const DocumentUrlWriter = struct { } }; +fn idFromRequestId(request_id: []const u8) !u64 { + if (!std.mem.startsWith(u8, request_id, "REQ-")) { + return error.InvalidParams; + } + return std.fmt.parseInt(u64, request_id[4..], 10) catch return error.InvalidParams; +} + const testing = @import("../testing.zig"); test "cdp.network setExtraHTTPHeaders" { var ctx = testing.context(); diff --git a/src/http/Client.zig b/src/http/Client.zig index d6415ac8..b38525d4 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -757,7 +757,7 @@ pub const Transfer = struct { }; if (transfer.client.notification) |notification| { - notification.dispatch(.http_headers_done, &.{ + notification.dispatch(.http_response_header_done, &.{ .transfer = transfer, }); } @@ -780,10 +780,19 @@ pub const Transfer = struct { } transfer.bytes_received += chunk_len; - transfer.req.data_callback(transfer, buffer[0..chunk_len]) catch |err| { + const chunk = buffer[0..chunk_len]; + transfer.req.data_callback(transfer, chunk) catch |err| { log.err(.http, "data_callback", .{ .err = err, .req = transfer }); return c.CURL_WRITEFUNC_ERROR; }; + + if (transfer.client.notification) |notification| { + notification.dispatch(.http_response_data, &.{ + .data = chunk, + .transfer = transfer, + }); + } + return chunk_len; } diff --git a/src/notification.zig b/src/notification.zig index 0ed650dc..43707e40 100644 --- a/src/notification.zig +++ b/src/notification.zig @@ -63,8 +63,9 @@ pub const Notification = struct { http_request_fail: List = .{}, http_request_start: List = .{}, http_request_intercept: List = .{}, - http_headers_done: List = .{}, http_request_done: List = .{}, + http_response_data: List = .{}, + http_response_header_done: List = .{}, notification_created: List = .{}, }; @@ -76,8 +77,9 @@ pub const Notification = struct { http_request_fail: *const RequestFail, http_request_start: *const RequestStart, http_request_intercept: *const RequestIntercept, - http_headers_done: *const ResponseHeadersDone, http_request_done: *const RequestDone, + http_response_data: *const ResponseData, + http_response_header_done: *const ResponseHeaderDone, notification_created: *Notification, }; const EventType = std.meta.FieldEnum(Events); @@ -104,7 +106,12 @@ pub const Notification = struct { wait_for_interception: *bool, }; - pub const ResponseHeadersDone = struct { + pub const ResponseData = struct { + data: []const u8, + transfer: *Transfer, + }; + + pub const ResponseHeaderDone = struct { transfer: *Transfer, };