Create Client.Transfer earlier.

On client.request(req) we now immediately wrap the request into a Transfer. This
results in less copying of the Request object. It also makes the transfer.uri
available, so CDP no longer needs to std.Uri(request.url) anymore.

The main advantage is that it's easier to manage resources. There was a use-
after free before due to the sensitive nature of the tranfer's lifetime. There
were also corner cases where some resources might not be freed. This is
hopefully fixed with the lifetime of Transfer being extended.
This commit is contained in:
Karl Seguin
2025-08-13 18:05:00 +08:00
parent 2dc09c799f
commit ca9e850ac7
5 changed files with 144 additions and 108 deletions

View File

@@ -20,8 +20,8 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const Notification = @import("../../notification.zig").Notification;
const log = @import("../../log.zig");
const Request = @import("../../http/Client.zig").Request;
const Method = @import("../../http/Client.zig").Method;
const Transfer = @import("../../http/Client.zig").Transfer;
pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum {
@@ -42,11 +42,11 @@ pub fn processMessage(cmd: anytype) !void {
// Stored in CDP
pub const InterceptState = struct {
const Self = @This();
waiting: std.AutoArrayHashMap(u64, Request),
waiting: std.AutoArrayHashMap(u64, *Transfer),
pub fn init(allocator: Allocator) !InterceptState {
return .{
.waiting = std.AutoArrayHashMap(u64, Request).init(allocator),
.waiting = std.AutoArrayHashMap(u64, *Transfer).init(allocator),
};
}
@@ -135,10 +135,11 @@ pub fn requestPaused(arena: Allocator, bc: anytype, intercept: *const Notificati
// NOTE: we assume whomever created the request created it with a lifetime of the Page.
// TODO: What to do when receiving replies for a previous page's requests?
try cdp.intercept_state.waiting.put(intercept.request.id.?, intercept.request.*);
const transfer = intercept.transfer;
try cdp.intercept_state.waiting.put(transfer.id, transfer);
// NOTE: .request data preparation is duped from network.zig
const full_request_url = try std.Uri.parse(intercept.request.url);
const full_request_url = transfer.uri;
const request_url = try @import("network.zig").urlToString(arena, &full_request_url, .{
.scheme = true,
.authentication = true,
@@ -149,21 +150,21 @@ pub fn requestPaused(arena: Allocator, bc: anytype, intercept: *const Notificati
const request_fragment = try @import("network.zig").urlToString(arena, &full_request_url, .{
.fragment = true,
});
const headers = try intercept.request.headers.asHashMap(arena);
const headers = try transfer.req.headers.asHashMap(arena);
// End of duped code
try cdp.sendEvent("Fetch.requestPaused", .{
.requestId = try std.fmt.allocPrint(arena, "INTERCEPT-{d}", .{intercept.request.id.?}),
.requestId = try std.fmt.allocPrint(arena, "INTERCEPT-{d}", .{transfer.id}),
.request = .{
.url = request_url,
.urlFragment = request_fragment,
.method = @tagName(intercept.request.method),
.hasPostData = intercept.request.body != null,
.method = @tagName(transfer.req.method),
.hasPostData = transfer.req.body != null,
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
},
.frameId = target_id,
.resourceType = ResourceType.Document, // TODO!
.networkId = try std.fmt.allocPrint(arena, "REQ-{d}", .{intercept.request.id.?}),
.networkId = try std.fmt.allocPrint(arena, "REQ-{d}", .{transfer.id}),
}, .{ .session_id = session_id });
// Await either continueRequest, failRequest or fulfillRequest
@@ -188,19 +189,20 @@ fn continueRequest(cmd: anytype) !void {
if (params.postData != null or params.headers != null or params.interceptResponse) return error.NotYetImplementedParams;
const request_id = try idFromRequestId(params.requestId);
var waiting_request = (bc.cdp.intercept_state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound).value;
const entry = bc.cdp.intercept_state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound;
const transfer = entry.value;
// Update the request with the new parameters
if (params.url) |url| {
// The request url must be modified in a way that's not observable by page. So page.url is not updated.
waiting_request.url = try bc.cdp.browser.page_arena.allocator().dupeZ(u8, url);
try transfer.updateURL(try bc.cdp.browser.page_arena.allocator().dupeZ(u8, url));
}
if (params.method) |method| {
waiting_request.method = std.meta.stringToEnum(Method, method) orelse return error.InvalidParams;
transfer.req.method = std.meta.stringToEnum(Method, method) orelse return error.InvalidParams;
}
log.info(.cdp, "Request continued by intercept", .{ .id = params.requestId });
try bc.cdp.browser.http_client.request(waiting_request);
try bc.cdp.browser.http_client.process(transfer);
return cmd.sendResult(null, .{});
}
@@ -214,7 +216,9 @@ fn failRequest(cmd: anytype) !void {
})) orelse return error.InvalidParams;
const request_id = try idFromRequestId(params.requestId);
if (state.waiting.fetchSwapRemove(request_id) == null) return error.RequestNotFound;
const entry = state.waiting.fetchSwapRemove(request_id) orelse return error.RequestNotFound;
// entry.value is the transfer
entry.value.abort();
log.info(.cdp, "Request aborted by intercept", .{ .reason = params.errorReason });
return cmd.sendResult(null, .{});

View File

@@ -215,7 +215,7 @@ pub fn httpRequestFail(arena: Allocator, bc: anytype, data: *const Notification.
// We're missing a bunch of fields, but, for now, this seems like enough
try bc.cdp.sendEvent("Network.loadingFailed", .{
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.request.id.?}),
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.transfer.id}),
// Seems to be what chrome answers with. I assume it depends on the type of error?
.type = "Ping",
.errorText = data.err,
@@ -251,7 +251,8 @@ pub fn httpRequestStart(arena: Allocator, bc: anytype, data: *const Notification
.query = true,
});
const full_request_url = try std.Uri.parse(data.request.url);
const transfer = data.transfer;
const full_request_url = transfer.uri;
const request_url = try urlToString(arena, &full_request_url, .{
.scheme = true,
.authentication = true,
@@ -263,19 +264,19 @@ pub fn httpRequestStart(arena: Allocator, bc: anytype, data: *const Notification
.fragment = true, // TODO since path is false, this likely does not work as intended
});
const headers = try data.request.headers.asHashMap(arena);
const headers = try transfer.req.headers.asHashMap(arena);
// We're missing a bunch of fields, but, for now, this seems like enough
try cdp.sendEvent("Network.requestWillBeSent", .{
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{data.request.id.?}),
.requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{transfer.id}),
.frameId = target_id,
.loaderId = bc.loader_id,
.documentUrl = document_url,
.request = .{
.url = request_url,
.urlFragment = request_fragment,
.method = @tagName(data.request.method),
.hasPostData = data.request.body != null,
.method = @tagName(transfer.req.method),
.hasPostData = transfer.req.body != null,
.headers = std.json.ArrayHashMap([]const u8){ .map = headers },
},
}, .{ .session_id = session_id });

View File

@@ -64,10 +64,10 @@ handles: Handles,
next_request_id: u64 = 0,
// When handles has no more available easys, requests get queued.
queue: RequestQueue,
queue: TransferQueue,
// Memory pool for Queue nodes.
queue_node_pool: std.heap.MemoryPool(RequestQueue.Node),
queue_node_pool: std.heap.MemoryPool(TransferQueue.Node),
// The main app allocator
allocator: Allocator,
@@ -93,13 +93,13 @@ arena: ArenaAllocator,
// restoring, this originally-configured value is what it goes to.
http_proxy: ?[:0]const u8 = null,
const RequestQueue = std.DoublyLinkedList(Request);
const TransferQueue = std.DoublyLinkedList(*Transfer);
pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Client {
var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
errdefer transfer_pool.deinit();
var queue_node_pool = std.heap.MemoryPool(RequestQueue.Node).init(allocator);
var queue_node_pool = std.heap.MemoryPool(TransferQueue.Node).init(allocator);
errdefer queue_node_pool.deinit();
const client = try allocator.create(Client);
@@ -151,8 +151,7 @@ pub fn abort(self: *Client) void {
log.err(.http, "get private info", .{ .err = err, .source = "abort" });
continue;
};
self.requestFailed(&transfer.req, error.Abort);
self.endTransfer(transfer);
transfer.abort();
}
std.debug.assert(self.active == 0);
@@ -193,46 +192,83 @@ pub fn tick(self: *Client, timeout_ms: usize) !void {
}
pub fn request(self: *Client, req: Request) !void {
var req_copy = req; // We need it mutable
const transfer = try self.makeTransfer(req);
if (req_copy.id == null) { // If the ID has already been set that means the request was previously intercepted
req_copy.id = self.next_request_id;
self.next_request_id += 1;
if (self.notification) |notification| {
notification.dispatch(.http_request_start, &.{ .request = &req_copy });
notification.dispatch(.http_request_start, &.{ .transfer = transfer });
var wait_for_interception = false;
notification.dispatch(.http_request_intercept, &.{ .request = &req_copy, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) return; // The user is send an invitation to intercept this request.
notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) {
// The user is send an invitation to intercept this request.
return;
}
}
return self.process(transfer);
}
// Above, request will not process if there's an interception request. In such
// cases, the interecptor is expected to call process to continue the transfer
// or transfer.abort() to abort it.
pub fn process(self: *Client, transfer: *Transfer) !void {
if (self.handles.getFreeHandle()) |handle| {
return self.makeRequest(handle, req_copy);
return self.makeRequest(handle, transfer);
}
const node = try self.queue_node_pool.create();
node.data = req_copy;
node.data = transfer;
self.queue.append(node);
}
// See ScriptManager.blockingGet
pub fn blockingRequest(self: *Client, req: Request) !void {
return self.makeRequest(&self.blocking, req);
const transfer = try self.makeTransfer(req);
return self.makeRequest(&self.blocking, transfer);
}
fn requestFailed(self: *Client, req: *Request, err: anyerror) void {
if (req._notified_fail) return;
req._notified_fail = true;
fn makeTransfer(self: *Client, req: Request) !*Transfer {
errdefer req.headers.deinit();
// we need this for cookies
const uri = std.Uri.parse(req.url) catch |err| {
log.warn(.http, "invalid url", .{ .err = err, .url = req.url });
return err;
};
const transfer = try self.transfer_pool.create();
errdefer self.transfer_pool.destroy(transfer);
const id = self.next_request_id + 1;
self.next_request_id = id;
transfer.* = .{
.id = id,
.uri = uri,
.req = req,
.ctx = req.ctx,
.client = self,
};
return transfer;
}
fn requestFailed(self: *Client, transfer: *Transfer, err: anyerror) void {
// this shoudln't happen, we'll crash in debug mode. But in release, we'll
// just noop this state.
std.debug.assert(transfer._notified_fail == false);
if (transfer._notified_fail) {
return;
}
transfer._notified_fail = true;
if (self.notification) |notification| {
notification.dispatch(.http_request_fail, &.{
.request = req,
.transfer = transfer,
.err = err,
});
}
req.error_callback(req.ctx, err);
transfer.req.error_callback(transfer.ctx, err);
}
// Restrictive since it'll only work if there are no inflight requests. In some
@@ -265,55 +301,41 @@ pub fn restoreOriginalProxy(self: *Client) !void {
try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy));
}
fn makeRequest(self: *Client, handle: *Handle, req: Request) !void {
fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void {
const conn = handle.conn;
const easy = conn.easy;
const req = &transfer.req;
// we need this for cookies
const uri = std.Uri.parse(req.url) catch |err| {
self.handles.release(handle);
log.warn(.http, "invalid url", .{ .err = err, .url = req.url });
return;
};
var header_list = req.headers;
{
errdefer self.handles.release(handle);
try conn.setMethod(req.method);
try conn.setURL(req.url);
transfer._handle = handle;
errdefer transfer.deinit();
try conn.setURL(req.url);
try conn.setMethod(req.method);
if (req.body) |b| {
try conn.setBody(b);
}
var header_list = req.headers;
try conn.secretHeaders(&header_list); // Add headers that must be hidden from intercepts
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list.headers));
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_PRIVATE, transfer));
}
{
errdefer self.handles.release(handle);
const transfer = try self.transfer_pool.create();
transfer.* = .{
.id = 0,
.uri = uri,
.req = req,
.ctx = req.ctx,
.handle = handle,
._request_header_list = header_list.headers,
};
errdefer self.transfer_pool.destroy(transfer);
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_PRIVATE, transfer));
// Once soon as this is called, our "perform" loop is responsible for
// cleaning things up. That's why the above code is in a block. If anything
// fails BEFORE `curl_multi_add_handle` suceeds, the we still need to do
// cleanup. But if things fail after `curl_multi_add_handle`, we expect
// perfom to pickup the failure and cleanup.
try errorMCheck(c.curl_multi_add_handle(self.multi, easy));
if (req.start_callback) |cb| {
cb(transfer) catch |err| {
try errorMCheck(c.curl_multi_remove_handle(self.multi, easy));
transfer.deinit();
return err;
};
}
}
self.active += 1;
return self.perform(0);
@@ -336,38 +358,36 @@ fn perform(self: *Client, timeout_ms: c_int) !void {
std.debug.assert(msg.msg == c.CURLMSG_DONE);
const easy = msg.easy_handle.?;
const transfer = try Transfer.fromEasy(easy);
const ctx = transfer.ctx;
const done_callback = transfer.req.done_callback;
// release it ASAP so that it's available; some done_callbacks
// will load more resources.
self.endTransfer(transfer);
defer transfer.deinit();
if (errorCheck(msg.data.result)) {
done_callback(ctx) catch |err| {
transfer.req.done_callback(transfer.ctx) catch |err| {
// transfer isn't valid at this point, don't use it.
log.err(.http, "done_callback", .{ .err = err });
self.requestFailed(&transfer.req, err);
self.requestFailed(transfer, err);
};
// self.requestComplete(transfer);
} else |err| {
self.requestFailed(&transfer.req, err);
self.requestFailed(transfer, err);
}
}
}
fn endTransfer(self: *Client, transfer: *Transfer) void {
const handle = transfer.handle;
transfer.deinit();
self.transfer_pool.destroy(transfer);
const handle = transfer._handle.?;
errorMCheck(c.curl_multi_remove_handle(self.multi, handle.conn.easy)) catch |err| {
log.fatal(.http, "Failed to remove handle", .{ .err = err });
};
self.handles.release(handle);
transfer._handle = null;
self.active -= 1;
}
@@ -497,50 +517,49 @@ pub const RequestCookie = struct {
};
pub const Request = struct {
id: ?u64 = null,
method: Method,
url: [:0]const u8,
headers: Headers,
body: ?[]const u8 = null,
cookie_jar: *storage.CookieJar,
_notified_fail: bool = false,
// arbitrary data that can be associated with this request
ctx: *anyopaque = undefined,
start_callback: ?*const fn (req: *Transfer) anyerror!void = null,
header_callback: ?*const fn (req: *Transfer, header: []const u8) anyerror!void = null,
header_done_callback: *const fn (req: *Transfer) anyerror!void,
data_callback: *const fn (req: *Transfer, data: []const u8) anyerror!void,
start_callback: ?*const fn (transfer: *Transfer) anyerror!void = null,
header_callback: ?*const fn (transfer: *Transfer, header: []const u8) anyerror!void = null,
header_done_callback: *const fn (transfer: *Transfer) anyerror!void,
data_callback: *const fn (transfer: *Transfer, data: []const u8) anyerror!void,
done_callback: *const fn (ctx: *anyopaque) anyerror!void,
error_callback: *const fn (ctx: *anyopaque, err: anyerror) void,
};
pub const Transfer = struct {
id: usize,
id: usize = 0,
req: Request,
ctx: *anyopaque,
uri: std.Uri, // used for setting/getting the cookie
ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers
client: *Client,
_notified_fail: bool = false,
// We'll store the response header here
response_header: ?Header = null,
handle: *Handle,
_handle: ?*Handle = null,
_redirecting: bool = false,
// needs to be freed when we're done
_request_header_list: ?*c.curl_slist = null,
fn deinit(self: *Transfer) void {
if (self._request_header_list) |list| {
c.curl_slist_free_all(list);
self.req.headers.deinit();
if (self._handle) |handle| {
self.client.handles.release(handle);
}
self.client.transfer_pool.destroy(self);
}
pub fn format(self: *const Transfer, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
const req = self.req;
return writer.print("[{d}] {s} {s}", .{ self.id, @tagName(req.method), req.url });
return writer.print("{s} {s}", .{ @tagName(req.method), req.url });
}
pub fn setBody(self: *Transfer, body: []const u8) !void {
@@ -553,8 +572,20 @@ pub const Transfer = struct {
self._request_header_list = c.curl_slist_append(self._request_header_list, value);
}
pub fn updateURL(self: *Transfer, url: [:0]const u8) !void {
// for cookies
self.uri = try std.Uri.parse(url);
// for the request itself
self.req.url = url;
}
pub fn abort(self: *Transfer) void {
self.handle.client.endTransfer(self);
self.client.requestFailed(self, error.Abort);
if (self._handle != null) {
self.client.endTransfer(self);
}
self.deinit();
}
fn headerCallback(buffer: [*]const u8, header_count: usize, buf_len: usize, data: *anyopaque) callconv(.c) usize {

View File

@@ -219,7 +219,7 @@ pub const Headers = struct {
return .{ .headers = header_list };
}
pub fn deinit(self: *Headers) void {
pub fn deinit(self: *const Headers) void {
c.curl_slist_free_all(self.headers);
}

View File

@@ -4,7 +4,7 @@ const log = @import("log.zig");
const URL = @import("url.zig").URL;
const page = @import("browser/page.zig");
const Http = @import("http/Http.zig");
const Request = @import("http/Client.zig").Request;
const Transfer = @import("http/Client.zig").Transfer;
const Allocator = std.mem.Allocator;
@@ -94,16 +94,16 @@ pub const Notification = struct {
};
pub const RequestStart = struct {
request: *Request,
transfer: *Transfer,
};
pub const RequestIntercept = struct {
request: *Request,
transfer: *Transfer,
wait_for_interception: *bool,
};
pub const RequestFail = struct {
request: *Request,
transfer: *Transfer,
err: anyerror,
};