From bc7e1e07f4f6d219568e79dbf14dffe84cb64016 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Aug 2025 11:59:30 +0200 Subject: [PATCH 01/10] typo fix --- src/http/Client.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index b38525d4..fad98e8b 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -359,7 +359,7 @@ fn perform(self: *Client, timeout_ms: c_int) !void { var messages_count: c_int = 0; while (c.curl_multi_info_read(multi, &messages_count)) |msg_| { const msg: *c.CURLMsg = @ptrCast(msg_); - // This is the only possible mesage type from CURL for now. + // This is the only possible message type from CURL for now. std.debug.assert(msg.msg == c.CURLMSG_DONE); const easy = msg.easy_handle.?; From 159bd06a569abc42eb02be0627601dac37ad78f7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Aug 2025 12:03:46 +0200 Subject: [PATCH 02/10] http: add use_proxy bool in connection --- src/http/Client.zig | 8 ++++++-- src/http/Http.zig | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index fad98e8b..094aedd8 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -278,9 +278,11 @@ fn requestFailed(self: *Client, transfer: *Transfer, err: anyerror) void { pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { try self.ensureNoActiveConnection(); - for (self.handles.handles) |h| { + for (self.handles.handles) |*h| { + h.conn.opts.use_proxy = true; try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } + self.blocking.conn.opts.use_proxy = true; try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } @@ -290,10 +292,12 @@ pub fn restoreOriginalProxy(self: *Client) !void { try self.ensureNoActiveConnection(); const proxy = if (self.http_proxy) |p| p.ptr else null; - for (self.handles.handles) |h| { + for (self.handles.handles) |*h| { + h.conn.opts.use_proxy = proxy != null; try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy)); } try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy)); + self.blocking.conn.opts.use_proxy = proxy != null; } fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { diff --git a/src/http/Http.zig b/src/http/Http.zig index 59b5b621..872350eb 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -94,6 +94,7 @@ pub const Connection = struct { opts: Connection.Opts, const Opts = struct { + use_proxy: bool, proxy_bearer_token: ?[:0]const u8, }; @@ -112,9 +113,11 @@ pub const Connection = struct { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_REDIR_PROTOCOLS_STR, "HTTP,HTTPS")); // remove FTP and FTPS from the default // proxy + var use_proxy = false; if (opts.http_proxy) |proxy| { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_PROXY, proxy.ptr)); try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_SUPPRESS_CONNECT_HEADERS, @as(c_long, 1))); + use_proxy = true; } // tls @@ -156,6 +159,7 @@ pub const Connection = struct { return .{ .easy = easy, .opts = .{ + .use_proxy = use_proxy, .proxy_bearer_token = opts.proxy_bearer_token, }, }; From 5e78a26e3daee2c19240ebdab815f2d844c55e57 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Aug 2025 14:01:33 +0200 Subject: [PATCH 03/10] http: refacto http header parsing --- src/http/Client.zig | 161 +++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 69 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index 094aedd8..3126d564 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -309,6 +309,9 @@ fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { transfer._handle = handle; errdefer transfer.deinit(); + // Store the proxy's information in transfer to ease headers parsing. + transfer._use_proxy = conn.opts.use_proxy; + try conn.setURL(req.url); try conn.setMethod(req.method); if (req.body) |b| { @@ -574,6 +577,13 @@ pub const Transfer = struct { _redirecting: bool = false, + // use_proxy is set when the transfer has been associated to a given + // connection in makeRequest(). + _use_proxy: bool = undefined, + + // stateful variables used to parse responses headers. + _resp_header_status: enum { empty, first, next, end } = .empty, + fn deinit(self: *Transfer) void { self.req.headers.deinit(); if (self._handle) |handle| { @@ -684,88 +694,101 @@ pub const Transfer = struct { const header = buffer[0 .. buf_len - 2]; - if (transfer.response_header == null) { - if (transfer._redirecting and buf_len == 2) { - // parse and set cookies for the redirection. - redirectionCookies(transfer, easy) catch |err| { - log.debug(.http, "redirection cookies", .{ .err = err }); + // transition the status dependending the previous one. + transfer._resp_header_status = switch (transfer._resp_header_status) { + .empty => .first, + .first => .next, + .next => .next, + .end => .first, + }; + + // mark the end of parsing headers + if (buf_len == 2) transfer._resp_header_status = .end; + + log.debug(.http, "header parsing", .{ .status = transfer._resp_header_status }); + + switch (transfer._resp_header_status) { + .empty => unreachable, + .first => { + if (buf_len < 13) { + log.debug(.http, "invalid response line", .{ .line = header }); + return 0; + } + const version_start: usize = if (header[5] == '2') 7 else 9; + const version_end = version_start + 3; + + // a bit silly, but it makes sure that we don't change the length check + // above in a way that could break this. + std.debug.assert(version_end < 13); + + const status = std.fmt.parseInt(u16, header[version_start..version_end], 10) catch { + log.debug(.http, "invalid status code", .{ .line = header }); return 0; }; - return buf_len; - } - if (buf_len < 13 or std.mem.startsWith(u8, header, "HTTP/") == false) { - if (transfer._redirecting) { + if (status >= 300 and status <= 399) { + transfer._redirecting = true; return buf_len; } - log.debug(.http, "invalid response line", .{ .line = header }); - return 0; - } - const version_start: usize = if (header[5] == '2') 7 else 9; - const version_end = version_start + 3; + transfer._redirecting = false; - // a bit silly, but it makes sure that we don't change the length check - // above in a way that could break this. - std.debug.assert(version_end < 13); + var url: [*c]u8 = undefined; + errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { + log.err(.http, "failed to get URL", .{ .err = err }); + return 0; + }; - const status = std.fmt.parseInt(u16, header[version_start..version_end], 10) catch { - log.debug(.http, "invalid status code", .{ .line = header }); - return 0; - }; + transfer.response_header = .{ + .url = url, + .status = status, + }; + }, + .next => {}, + .end => { + // If we are in a redirection, take care of cookies only. + if (transfer._redirecting) { + // parse and set cookies for the redirection. + redirectionCookies(transfer, easy) catch |err| { + log.debug(.http, "redirection cookies", .{ .err = err }); + return 0; + }; + return buf_len; + } - if (status >= 300 and status <= 399) { - transfer._redirecting = true; - return buf_len; - } - transfer._redirecting = false; + if (getResponseHeader(easy, "content-type", 0)) |ct| { + var hdr = &transfer.response_header.?; + const value = ct.value; + const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); + hdr._content_type_len = len; + @memcpy(hdr._content_type[0..len], value[0..len]); + } - var url: [*c]u8 = undefined; - errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { - log.err(.http, "failed to get URL", .{ .err = err }); - return 0; - }; + var i: usize = 0; + while (true) { + const ct = getResponseHeader(easy, "set-cookie", i); + if (ct == null) break; + transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { + log.err(.http, "set cookie", .{ .err = err, .req = transfer }); + }; + i += 1; + if (i >= ct.?.amount) break; + } - transfer.response_header = .{ - .url = url, - .status = status, - }; - transfer.bytes_received = buf_len; - return buf_len; + transfer.req.header_callback(transfer) catch |err| { + log.err(.http, "header_callback", .{ .err = err, .req = transfer }); + // returning < buf_len terminates the request + return 0; + }; + + if (transfer.client.notification) |notification| { + notification.dispatch(.http_response_header_done, &.{ + .transfer = transfer, + }); + } + }, } transfer.bytes_received += buf_len; - if (buf_len == 2) { - if (getResponseHeader(easy, "content-type", 0)) |ct| { - var hdr = &transfer.response_header.?; - const value = ct.value; - const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); - hdr._content_type_len = len; - @memcpy(hdr._content_type[0..len], value[0..len]); - } - - var i: usize = 0; - while (true) { - const ct = getResponseHeader(easy, "set-cookie", i); - if (ct == null) break; - transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { - log.err(.http, "set cookie", .{ .err = err, .req = transfer }); - }; - i += 1; - if (i >= ct.?.amount) break; - } - - transfer.req.header_callback(transfer) catch |err| { - log.err(.http, "header_callback", .{ .err = err, .req = transfer }); - // returning < buf_len terminates the request - return 0; - }; - - if (transfer.client.notification) |notification| { - notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - } - } return buf_len; } From e2320ebe66b6d33665538b2c210e378f7d1de7b7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Aug 2025 15:09:07 +0200 Subject: [PATCH 04/10] http: handle proxy's request header callback --- src/http/Client.zig | 38 ++++++++++++++++++++++++++++++-------- src/http/Http.zig | 1 - 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index 3126d564..dce6bebb 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -569,6 +569,7 @@ pub const Transfer = struct { bytes_received: usize = 0, // We'll store the response header here + proxy_response_header: ?ResponseHeader = null, response_header: ?ResponseHeader = null, _notified_fail: bool = false, @@ -705,8 +706,6 @@ pub const Transfer = struct { // mark the end of parsing headers if (buf_len == 2) transfer._resp_header_status = .end; - log.debug(.http, "header parsing", .{ .status = transfer._resp_header_status }); - switch (transfer._resp_header_status) { .empty => unreachable, .first => { @@ -726,18 +725,36 @@ pub const Transfer = struct { return 0; }; - if (status >= 300 and status <= 399) { - transfer._redirecting = true; - return buf_len; - } - transfer._redirecting = false; - var url: [*c]u8 = undefined; errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { log.err(.http, "failed to get URL", .{ .err = err }); return 0; }; + // When using proxy, curl call the header function for all HTTP + // requests, including the CONNECT one used tunneling requests. + if (transfer._use_proxy and transfer.proxy_response_header == null) { + transfer.proxy_response_header = .{ + .url = "", + .status = status, + }; + + // We want to ignore the successful proxy's CONNECT request. + // But there is no proper way to detect if the current + // request is a proxy CONNECT one. + // We know curl uses a CONNECT when it establishes a TLS + // conn. + if (status == 200 and std.mem.startsWith(u8, std.mem.span(url), "https")) { + return buf_len; + } + } + + if (status >= 300 and status <= 399) { + transfer._redirecting = true; + return buf_len; + } + transfer._redirecting = false; + transfer.response_header = .{ .url = url, .status = status, @@ -755,6 +772,11 @@ pub const Transfer = struct { return buf_len; } + if (transfer._use_proxy and transfer.response_header == null) { + // we are in a successful CONNECT proxy request, ignore it. + return buf_len; + } + if (getResponseHeader(easy, "content-type", 0)) |ct| { var hdr = &transfer.response_header.?; const value = ct.value; diff --git a/src/http/Http.zig b/src/http/Http.zig index 872350eb..7cf6a011 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -116,7 +116,6 @@ pub const Connection = struct { var use_proxy = false; if (opts.http_proxy) |proxy| { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_PROXY, proxy.ptr)); - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_SUPPRESS_CONNECT_HEADERS, @as(c_long, 1))); use_proxy = true; } From fcd49c000f66e749bced91a5de52e98d39ba05db Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Aug 2025 16:34:03 +0200 Subject: [PATCH 05/10] page: avoid crash on empty body --- src/browser/page.zig | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/browser/page.zig b/src/browser/page.zig index a533091f..1f7b6684 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -756,7 +756,14 @@ pub const Page = struct { self.documentIsComplete(); } }, - else => unreachable, + .pre => { + // we didn't get any data. + self.documentIsComplete(); + }, + else => { + log.err(.app, "unreachable mode", .{ .mode = self.mode }); + unreachable; + }, } } From 25ad420f85d9753bb1522d0d4eaa6fcb9c19ff7e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Aug 2025 10:15:54 +0200 Subject: [PATCH 06/10] http: ajust header callback according to review --- src/http/Client.zig | 209 ++++++++++++++++++++++---------------------- 1 file changed, 104 insertions(+), 105 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index dce6bebb..b82aa51f 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -582,9 +582,6 @@ pub const Transfer = struct { // connection in makeRequest(). _use_proxy: bool = undefined, - // stateful variables used to parse responses headers. - _resp_header_status: enum { empty, first, next, end } = .empty, - fn deinit(self: *Transfer) void { self.req.headers.deinit(); if (self._handle) |handle| { @@ -695,122 +692,124 @@ pub const Transfer = struct { const header = buffer[0 .. buf_len - 2]; - // transition the status dependending the previous one. - transfer._resp_header_status = switch (transfer._resp_header_status) { - .empty => .first, - .first => .next, - .next => .next, - .end => .first, - }; - - // mark the end of parsing headers - if (buf_len == 2) transfer._resp_header_status = .end; - - switch (transfer._resp_header_status) { - .empty => unreachable, - .first => { - if (buf_len < 13) { - log.debug(.http, "invalid response line", .{ .line = header }); - return 0; - } - const version_start: usize = if (header[5] == '2') 7 else 9; - const version_end = version_start + 3; - - // a bit silly, but it makes sure that we don't change the length check - // above in a way that could break this. - std.debug.assert(version_end < 13); - - const status = std.fmt.parseInt(u16, header[version_start..version_end], 10) catch { - log.debug(.http, "invalid status code", .{ .line = header }); + if (transfer.response_header == null) { + if (transfer._redirecting and buf_len == 2) { + // parse and set cookies for the redirection. + redirectionCookies(transfer, easy) catch |err| { + log.debug(.http, "redirection cookies", .{ .err = err }); return 0; }; + return buf_len; + } - var url: [*c]u8 = undefined; - errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { - log.err(.http, "failed to get URL", .{ .err = err }); - return 0; - }; - - // When using proxy, curl call the header function for all HTTP - // requests, including the CONNECT one used tunneling requests. - if (transfer._use_proxy and transfer.proxy_response_header == null) { - transfer.proxy_response_header = .{ - .url = "", - .status = status, - }; - - // We want to ignore the successful proxy's CONNECT request. - // But there is no proper way to detect if the current - // request is a proxy CONNECT one. - // We know curl uses a CONNECT when it establishes a TLS - // conn. - if (status == 200 and std.mem.startsWith(u8, std.mem.span(url), "https")) { - return buf_len; - } - } - - if (status >= 300 and status <= 399) { - transfer._redirecting = true; - return buf_len; - } - transfer._redirecting = false; - - transfer.response_header = .{ - .url = url, - .status = status, - }; - }, - .next => {}, - .end => { - // If we are in a redirection, take care of cookies only. + if (buf_len < 13 or std.mem.startsWith(u8, header, "HTTP/") == false) { if (transfer._redirecting) { - // parse and set cookies for the redirection. - redirectionCookies(transfer, easy) catch |err| { - log.debug(.http, "redirection cookies", .{ .err = err }); - return 0; - }; return buf_len; } + log.debug(.http, "invalid response line", .{ .line = header }); + return 0; + } + const version_start: usize = if (header[5] == '2') 7 else 9; + const version_end = version_start + 3; - if (transfer._use_proxy and transfer.response_header == null) { - // we are in a successful CONNECT proxy request, ignore it. - return buf_len; - } + // a bit silly, but it makes sure that we don't change the length check + // above in a way that could break this. + std.debug.assert(version_end < 13); - if (getResponseHeader(easy, "content-type", 0)) |ct| { - var hdr = &transfer.response_header.?; - const value = ct.value; - const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); - hdr._content_type_len = len; - @memcpy(hdr._content_type[0..len], value[0..len]); - } + const status = std.fmt.parseInt(u16, header[version_start..version_end], 10) catch { + log.debug(.http, "invalid status code", .{ .line = header }); + return 0; + }; - var i: usize = 0; - while (true) { - const ct = getResponseHeader(easy, "set-cookie", i); - if (ct == null) break; - transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { - log.err(.http, "set cookie", .{ .err = err, .req = transfer }); - }; - i += 1; - if (i >= ct.?.amount) break; - } + if (status >= 300 and status <= 399) { + transfer._redirecting = true; + return buf_len; + } + transfer._redirecting = false; - transfer.req.header_callback(transfer) catch |err| { - log.err(.http, "header_callback", .{ .err = err, .req = transfer }); - // returning < buf_len terminates the request - return 0; - }; + var url: [*c]u8 = undefined; + errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { + log.err(.http, "failed to get URL", .{ .err = err }); + return 0; + }; - if (transfer.client.notification) |notification| { - notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - } - }, + transfer.response_header = .{ + .url = url, + .status = status, + }; + transfer.bytes_received = buf_len; + return buf_len; } transfer.bytes_received += buf_len; + + if (buf_len != 2) { + return buf_len; + } + + // Starting here, we get the last header line. + + // We're connecting to a proxy. Consider the first request to the + // proxy's result. + if (transfer._use_proxy and transfer.proxy_response_header == null) { + // We have to cases: + // 1. for http://, we have one request. So both + // proxy_response_header and response_header will have the same + // value. + // + // 2. for https://, we two successive requests, a CONNECT to the + // proxy and a final request. So proxy_response_header and + // response_header may have different values. + transfer.proxy_response_header = transfer.response_header; + + // Detect if the request is a CONNECT to the proxy. There might be + // a better way to detect this, but I didn't find a better one. + // When we don't force curl to always use tunneling, it uses + // CONNECT tunnel only for https requests. + const is_connect = std.mem.startsWith(u8, std.mem.span(transfer.proxy_response_header.?.url), "https"); + + // If the CONNECT is successful, curl will create a following + // request to the final target, so we reset + // transfer.response_header to get the "real" data. + if (is_connect and transfer.proxy_response_header.?.status == 200) { + transfer.response_header = null; + return buf_len; + } + + // If the CONNECT fails, use the request result as it would be our + // final request. + } + + if (getResponseHeader(easy, "content-type", 0)) |ct| { + var hdr = &transfer.response_header.?; + const value = ct.value; + const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); + hdr._content_type_len = len; + @memcpy(hdr._content_type[0..len], value[0..len]); + } + + var i: usize = 0; + while (true) { + const ct = getResponseHeader(easy, "set-cookie", i); + if (ct == null) break; + transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { + log.err(.http, "set cookie", .{ .err = err, .req = transfer }); + }; + i += 1; + if (i >= ct.?.amount) break; + } + + transfer.req.header_callback(transfer) catch |err| { + log.err(.http, "header_callback", .{ .err = err, .req = transfer }); + // returning < buf_len terminates the request + return 0; + }; + + if (transfer.client.notification) |notification| { + notification.dispatch(.http_response_header_done, &.{ + .transfer = transfer, + }); + } return buf_len; } From e61d787ff0eef335f013b96d4deea26a95445efd Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Aug 2025 11:48:43 +0200 Subject: [PATCH 07/10] http: move header done callback in its own func And call it only after the headers are parsed, either from data callback or end of the request. --- src/http/Client.zig | 142 +++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 68 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index b82aa51f..9b1456ba 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -379,6 +379,15 @@ fn perform(self: *Client, timeout_ms: c_int) !void { defer transfer.deinit(); if (errorCheck(msg.data.result)) { + // In case of request w/o data, we need to call the header done + // callback now. + if (!transfer._header_done_called) { + transfer.headerDoneCallback(easy) catch |err| { + log.err(.http, "header_done_callback", .{ .err = err }); + self.requestFailed(transfer, err); + continue; + }; + } transfer.req.done_callback(transfer.ctx) catch |err| { // transfer isn't valid at this point, don't use it. log.err(.http, "done_callback", .{ .err = err }); @@ -572,6 +581,9 @@ pub const Transfer = struct { proxy_response_header: ?ResponseHeader = null, response_header: ?ResponseHeader = null, + // track if the header callbacks done have been called. + _header_done_called: bool = false, + _notified_fail: bool = false, _handle: ?*Handle = null, @@ -678,6 +690,48 @@ pub const Transfer = struct { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_COOKIE, @as([*c]const u8, @ptrCast(cookies.items.ptr)))); } + // headerDoneCallback is called once the headers have been read. + // It can be called either on dataCallback or once the request for those + // w/o body. + fn headerDoneCallback(transfer: *Transfer, easy: *c.CURL) !void { + std.debug.assert(transfer._header_done_called == false); + std.debug.assert(transfer.response_header != null); + + defer transfer._header_done_called = true; + + if (getResponseHeader(easy, "content-type", 0)) |ct| { + var hdr = &transfer.response_header.?; + const value = ct.value; + const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); + hdr._content_type_len = len; + @memcpy(hdr._content_type[0..len], value[0..len]); + } + + var i: usize = 0; + while (true) { + const ct = getResponseHeader(easy, "set-cookie", i); + if (ct == null) break; + transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { + log.err(.http, "set cookie", .{ .err = err, .req = transfer }); + return err; + }; + i += 1; + if (i >= ct.?.amount) break; + } + + transfer.req.header_callback(transfer) catch |err| { + log.err(.http, "header_callback", .{ .err = err, .req = transfer }); + return err; + }; + + if (transfer.client.notification) |notification| { + notification.dispatch(.http_response_header_done, &.{ + .transfer = transfer, + }); + } + } + + // headerCallback is called by curl on each request's header line read. fn headerCallback(buffer: [*]const u8, header_count: usize, buf_len: usize, data: *anyopaque) callconv(.c) usize { // libcurl should only ever emit 1 header at a time std.debug.assert(header_count == 1); @@ -692,20 +746,9 @@ pub const Transfer = struct { const header = buffer[0 .. buf_len - 2]; - if (transfer.response_header == null) { - if (transfer._redirecting and buf_len == 2) { - // parse and set cookies for the redirection. - redirectionCookies(transfer, easy) catch |err| { - log.debug(.http, "redirection cookies", .{ .err = err }); - return 0; - }; - return buf_len; - } - - if (buf_len < 13 or std.mem.startsWith(u8, header, "HTTP/") == false) { - if (transfer._redirecting) { - return buf_len; - } + // Is it the first header line? + if (std.mem.startsWith(u8, header, "HTTP/")) { + if (buf_len < 13) { log.debug(.http, "invalid response line", .{ .line = header }); return 0; } @@ -741,7 +784,9 @@ pub const Transfer = struct { return buf_len; } - transfer.bytes_received += buf_len; + if (transfer._redirecting == false) { + transfer.bytes_received += buf_len; + } if (buf_len != 2) { return buf_len; @@ -749,67 +794,21 @@ pub const Transfer = struct { // Starting here, we get the last header line. - // We're connecting to a proxy. Consider the first request to the + // We're connecting to a proxy. Consider the first request to be the // proxy's result. if (transfer._use_proxy and transfer.proxy_response_header == null) { - // We have to cases: - // 1. for http://, we have one request. So both - // proxy_response_header and response_header will have the same - // value. - // - // 2. for https://, we two successive requests, a CONNECT to the - // proxy and a final request. So proxy_response_header and - // response_header may have different values. transfer.proxy_response_header = transfer.response_header; - - // Detect if the request is a CONNECT to the proxy. There might be - // a better way to detect this, but I didn't find a better one. - // When we don't force curl to always use tunneling, it uses - // CONNECT tunnel only for https requests. - const is_connect = std.mem.startsWith(u8, std.mem.span(transfer.proxy_response_header.?.url), "https"); - - // If the CONNECT is successful, curl will create a following - // request to the final target, so we reset - // transfer.response_header to get the "real" data. - if (is_connect and transfer.proxy_response_header.?.status == 200) { - transfer.response_header = null; - return buf_len; - } - - // If the CONNECT fails, use the request result as it would be our - // final request. } - if (getResponseHeader(easy, "content-type", 0)) |ct| { - var hdr = &transfer.response_header.?; - const value = ct.value; - const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); - hdr._content_type_len = len; - @memcpy(hdr._content_type[0..len], value[0..len]); - } - - var i: usize = 0; - while (true) { - const ct = getResponseHeader(easy, "set-cookie", i); - if (ct == null) break; - transfer.req.cookie_jar.populateFromResponse(&transfer.uri, ct.?.value) catch |err| { - log.err(.http, "set cookie", .{ .err = err, .req = transfer }); + if (transfer._redirecting) { + // parse and set cookies for the redirection. + redirectionCookies(transfer, easy) catch |err| { + log.debug(.http, "redirection cookies", .{ .err = err }); + return 0; }; - i += 1; - if (i >= ct.?.amount) break; + return buf_len; } - transfer.req.header_callback(transfer) catch |err| { - log.err(.http, "header_callback", .{ .err = err, .req = transfer }); - // returning < buf_len terminates the request - return 0; - }; - - if (transfer.client.notification) |notification| { - notification.dispatch(.http_response_header_done, &.{ - .transfer = transfer, - }); - } return buf_len; } @@ -827,6 +826,13 @@ pub const Transfer = struct { return chunk_len; } + if (!transfer._header_done_called) { + transfer.headerDoneCallback(easy) catch |err| { + log.err(.http, "header_done_callback", .{ .err = err, .req = transfer }); + return c.CURL_WRITEFUNC_ERROR; + }; + } + transfer.bytes_received += chunk_len; const chunk = buffer[0..chunk_len]; transfer.req.data_callback(transfer, chunk) catch |err| { From a7516061d0c23c7b644504046a83d5e571333cc1 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Aug 2025 11:58:48 +0200 Subject: [PATCH 08/10] http: move use_proxy from connection to client --- src/http/Client.zig | 14 +++++++------- src/http/Http.zig | 4 ---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index 9b1456ba..a74c85d8 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -89,6 +89,9 @@ notification: ?*Notification = null, // restoring, this originally-configured value is what it goes to. http_proxy: ?[:0]const u8 = null, +// does the client use a proxy? +use_proxy: bool = false, + const TransferQueue = std.DoublyLinkedList(*Transfer); pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Client { @@ -120,6 +123,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie .blocking = blocking, .allocator = allocator, .http_proxy = opts.http_proxy, + .use_proxy = opts.http_proxy != null, .transfer_pool = transfer_pool, .queue_node_pool = queue_node_pool, }; @@ -242,6 +246,7 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .req = req, .ctx = req.ctx, .client = self, + ._use_proxy = self.use_proxy, }; return transfer; } @@ -278,11 +283,10 @@ fn requestFailed(self: *Client, transfer: *Transfer, err: anyerror) void { pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { try self.ensureNoActiveConnection(); + self.use_proxy = true; for (self.handles.handles) |*h| { - h.conn.opts.use_proxy = true; try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } - self.blocking.conn.opts.use_proxy = true; try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } @@ -291,13 +295,12 @@ pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { pub fn restoreOriginalProxy(self: *Client) !void { try self.ensureNoActiveConnection(); + self.use_proxy = self.http_proxy != null; const proxy = if (self.http_proxy) |p| p.ptr else null; for (self.handles.handles) |*h| { - h.conn.opts.use_proxy = proxy != null; try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy)); } try errorCheck(c.curl_easy_setopt(self.blocking.conn.easy, c.CURLOPT_PROXY, proxy)); - self.blocking.conn.opts.use_proxy = proxy != null; } fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { @@ -309,9 +312,6 @@ fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { transfer._handle = handle; errdefer transfer.deinit(); - // Store the proxy's information in transfer to ease headers parsing. - transfer._use_proxy = conn.opts.use_proxy; - try conn.setURL(req.url); try conn.setMethod(req.method); if (req.body) |b| { diff --git a/src/http/Http.zig b/src/http/Http.zig index 7cf6a011..e661d766 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -94,7 +94,6 @@ pub const Connection = struct { opts: Connection.Opts, const Opts = struct { - use_proxy: bool, proxy_bearer_token: ?[:0]const u8, }; @@ -113,10 +112,8 @@ pub const Connection = struct { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_REDIR_PROTOCOLS_STR, "HTTP,HTTPS")); // remove FTP and FTPS from the default // proxy - var use_proxy = false; if (opts.http_proxy) |proxy| { try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_PROXY, proxy.ptr)); - use_proxy = true; } // tls @@ -158,7 +155,6 @@ pub const Connection = struct { return .{ .easy = easy, .opts = .{ - .use_proxy = use_proxy, .proxy_bearer_token = opts.proxy_bearer_token, }, }; From 7046e18d7ed7a226ac01927454ce310905a074a7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Aug 2025 15:25:43 +0200 Subject: [PATCH 09/10] http: simplify header parsing --- src/http/Client.zig | 73 +++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/src/http/Client.zig b/src/http/Client.zig index a74c85d8..6ea56740 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -89,9 +89,6 @@ notification: ?*Notification = null, // restoring, this originally-configured value is what it goes to. http_proxy: ?[:0]const u8 = null, -// does the client use a proxy? -use_proxy: bool = false, - const TransferQueue = std.DoublyLinkedList(*Transfer); pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Client { @@ -123,7 +120,6 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie .blocking = blocking, .allocator = allocator, .http_proxy = opts.http_proxy, - .use_proxy = opts.http_proxy != null, .transfer_pool = transfer_pool, .queue_node_pool = queue_node_pool, }; @@ -246,7 +242,6 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .req = req, .ctx = req.ctx, .client = self, - ._use_proxy = self.use_proxy, }; return transfer; } @@ -283,7 +278,6 @@ fn requestFailed(self: *Client, transfer: *Transfer, err: anyerror) void { pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { try self.ensureNoActiveConnection(); - self.use_proxy = true; for (self.handles.handles) |*h| { try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy.ptr)); } @@ -295,7 +289,6 @@ pub fn changeProxy(self: *Client, proxy: [:0]const u8) !void { pub fn restoreOriginalProxy(self: *Client) !void { try self.ensureNoActiveConnection(); - self.use_proxy = self.http_proxy != null; const proxy = if (self.http_proxy) |p| p.ptr else null; for (self.handles.handles) |*h| { try errorCheck(c.curl_easy_setopt(h.conn.easy, c.CURLOPT_PROXY, proxy)); @@ -589,10 +582,7 @@ pub const Transfer = struct { _handle: ?*Handle = null, _redirecting: bool = false, - - // use_proxy is set when the transfer has been associated to a given - // connection in makeRequest(). - _use_proxy: bool = undefined, + _forbidden: bool = false, fn deinit(self: *Transfer) void { self.req.headers.deinit(); @@ -603,17 +593,34 @@ pub const Transfer = struct { self.client.transfer_pool.destroy(self); } + fn buildResponseHeader(self: *Transfer, easy: *c.CURL) !void { + std.debug.assert(self.response_header == null); + + var url: [*c]u8 = undefined; + try errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)); + + var status: c_long = undefined; + try errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_RESPONSE_CODE, &status)); + + self.response_header = .{ + .url = url, + .status = @intCast(status), + }; + + if (getResponseHeader(easy, "content-type", 0)) |ct| { + var hdr = &self.response_header.?; + const value = ct.value; + const len = @min(value.len, ResponseHeader.MAX_CONTENT_TYPE_LEN); + hdr._content_type_len = len; + @memcpy(hdr._content_type[0..len], value[0..len]); + } + } + pub fn format(self: *const Transfer, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { const req = self.req; return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); } - pub fn setBody(self: *Transfer, body: []const u8) !void { - const easy = self.handle.easy; - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_POSTFIELDS, body.ptr)); - try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_POSTFIELDSIZE, @as(c_long, @intCast(body.len)))); - } - pub fn addHeader(self: *Transfer, value: [:0]const u8) !void { self._request_header_list = c.curl_slist_append(self._request_header_list, value); } @@ -695,10 +702,10 @@ pub const Transfer = struct { // w/o body. fn headerDoneCallback(transfer: *Transfer, easy: *c.CURL) !void { std.debug.assert(transfer._header_done_called == false); - std.debug.assert(transfer.response_header != null); - defer transfer._header_done_called = true; + try transfer.buildResponseHeader(easy); + if (getResponseHeader(easy, "content-type", 0)) |ct| { var hdr = &transfer.response_header.?; const value = ct.value; @@ -746,8 +753,12 @@ pub const Transfer = struct { const header = buffer[0 .. buf_len - 2]; - // Is it the first header line? + // We need to parse the first line headers for each request b/c curl's + // CURLINFO_RESPONSE_CODE returns the status code of the final request. + // If a redirection or a proxy's CONNECT forbidden happens, we won't + // get this intermediary status code. if (std.mem.startsWith(u8, header, "HTTP/")) { + // Is it the first header line. if (buf_len < 13) { log.debug(.http, "invalid response line", .{ .line = header }); return 0; @@ -770,21 +781,17 @@ pub const Transfer = struct { } transfer._redirecting = false; - var url: [*c]u8 = undefined; - errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_EFFECTIVE_URL, &url)) catch |err| { - log.err(.http, "failed to get URL", .{ .err = err }); - return 0; - }; + if (status == 401 or status == 407) { + transfer._forbidden = true; + return buf_len; + } + transfer._forbidden = false; - transfer.response_header = .{ - .url = url, - .status = status, - }; transfer.bytes_received = buf_len; return buf_len; } - if (transfer._redirecting == false) { + if (transfer._redirecting == false and transfer._forbidden == false) { transfer.bytes_received += buf_len; } @@ -794,12 +801,6 @@ pub const Transfer = struct { // Starting here, we get the last header line. - // We're connecting to a proxy. Consider the first request to be the - // proxy's result. - if (transfer._use_proxy and transfer.proxy_response_header == null) { - transfer.proxy_response_header = transfer.response_header; - } - if (transfer._redirecting) { // parse and set cookies for the redirection. redirectionCookies(transfer, easy) catch |err| { From 7869159657a5420607790fd6670bb0fc5c49740d Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Aug 2025 11:29:08 +0200 Subject: [PATCH 10/10] add e2e test through proxy --- .github/workflows/e2e-test.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index f237abee..8833d382 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -96,6 +96,18 @@ jobs: go run runner/main.go --verbose kill `cat LPD.pid` + - name: build proxy + run: | + cd proxy + go build + + - name: run end to end tests through proxy + run: | + ./proxy/proxy & echo $! > PROXY.id + ./lightpanda serve --http_proxy 'http://127.0.0.1:3000' & echo $! > LPD.pid + go run runner/main.go --verbose + kill `cat LPD.pid` `cat PROXY.id` + cdp-and-hyperfine-bench: name: cdp-and-hyperfine-bench needs: zig-build-release