Move redirects handling from curl callbacks

This commit is contained in:
Nikolay Govorov
2026-03-17 17:18:23 +00:00
parent c6861829c3
commit f1a96bab5b
3 changed files with 178 additions and 269 deletions

View File

@@ -19,28 +19,27 @@
const std = @import("std"); const std = @import("std");
const builtin = @import("builtin"); const builtin = @import("builtin");
const posix = std.posix; const posix = std.posix;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const lp = @import("lightpanda"); const lp = @import("lightpanda");
const log = @import("../log.zig"); const URL = @import("URL.zig");
const Net = @import("../network/http.zig");
const Network = @import("../network/Runtime.zig");
const Config = @import("../Config.zig"); const Config = @import("../Config.zig");
const URL = @import("../browser/URL.zig");
const Notification = @import("../Notification.zig"); const Notification = @import("../Notification.zig");
const CookieJar = @import("../browser/webapi/storage/Cookie.zig").Jar; const CookieJar = @import("webapi/storage/Cookie.zig").Jar;
const http = @import("../network/http.zig");
const Runtime = @import("../network/Runtime.zig");
const Robots = @import("../network/Robots.zig"); const Robots = @import("../network/Robots.zig");
const RobotStore = Robots.RobotStore; const RobotStore = Robots.RobotStore;
const WebBotAuth = @import("../network/WebBotAuth.zig"); const WebBotAuth = @import("../network/WebBotAuth.zig");
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const IS_DEBUG = builtin.mode == .Debug; const IS_DEBUG = builtin.mode == .Debug;
pub const Method = Net.Method; pub const Method = http.Method;
pub const Headers = Net.Headers; pub const Headers = http.Headers;
pub const ResponseHead = Net.ResponseHead; pub const ResponseHead = http.ResponseHead;
pub const HeaderIterator = Net.HeaderIterator; pub const HeaderIterator = http.HeaderIterator;
// This is loosely tied to a browser Page. Loading all the <scripts>, doing // This is loosely tied to a browser Page. Loading all the <scripts>, doing
// XHR requests, and loading imports all happens through here. Sine the app // XHR requests, and loading imports all happens through here. Sine the app
@@ -68,7 +67,7 @@ active: usize,
intercepted: usize, intercepted: usize,
// Our curl multi handle. // Our curl multi handle.
handles: Net.Handles, handles: http.Handles,
// Connections currently in this client's curl_multi. // Connections currently in this client's curl_multi.
in_use: std.DoublyLinkedList = .{}, in_use: std.DoublyLinkedList = .{},
@@ -88,7 +87,7 @@ queue: TransferQueue,
// The main app allocator // The main app allocator
allocator: Allocator, allocator: Allocator,
network: *Network, network: *Runtime,
// Queue of requests that depend on a robots.txt. // Queue of requests that depend on a robots.txt.
// Allows us to fetch the robots.txt just once. // Allows us to fetch the robots.txt just once.
pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
@@ -134,14 +133,14 @@ pub const CDPClient = struct {
const TransferQueue = std.DoublyLinkedList; const TransferQueue = std.DoublyLinkedList;
pub fn init(allocator: Allocator, network: *Network) !*Client { pub fn init(allocator: Allocator, network: *Runtime) !*Client {
var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator); var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
errdefer transfer_pool.deinit(); errdefer transfer_pool.deinit();
const client = try allocator.create(Client); const client = try allocator.create(Client);
errdefer allocator.destroy(client); errdefer allocator.destroy(client);
var handles = try Net.Handles.init(network.config); var handles = try http.Handles.init(network.config);
errdefer handles.deinit(); errdefer handles.deinit();
const http_proxy = network.config.httpProxy(); const http_proxy = network.config.httpProxy();
@@ -178,8 +177,8 @@ pub fn deinit(self: *Client) void {
self.allocator.destroy(self); self.allocator.destroy(self);
} }
pub fn newHeaders(self: *const Client) !Net.Headers { pub fn newHeaders(self: *const Client) !http.Headers {
return Net.Headers.init(self.network.config.http_headers.user_agent_header); return http.Headers.init(self.network.config.http_headers.user_agent_header);
} }
pub fn abort(self: *Client) void { pub fn abort(self: *Client) void {
@@ -198,11 +197,11 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void {
var n = q.first; var n = q.first;
while (n) |node| { while (n) |node| {
n = node.next; n = node.next;
const conn: *Net.Connection = @fieldParentPtr("node", node); const conn: *http.Connection = @fieldParentPtr("node", node);
var transfer = Transfer.fromConnection(conn) catch |err| { var transfer = Transfer.fromConnection(conn) catch |err| {
// Let's cleanup what we can // Let's cleanup what we can
self.removeConn(conn); self.removeConn(conn);
log.err(.http, "get private info", .{ .err = err, .source = "abort" }); lp.log.err(.http, "get private info", .{ .err = err, .source = "abort" });
continue; continue;
}; };
if (comptime abort_all) { if (comptime abort_all) {
@@ -306,7 +305,7 @@ fn processRequest(self: *Client, req: Request) !void {
self.intercepted += 1; self.intercepted += 1;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted }); lp.log.debug(.http, "wait for interception", .{ .intercepted = self.intercepted });
} }
transfer._intercept_state = .pending; transfer._intercept_state = .pending;
@@ -351,7 +350,7 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty }; ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
const headers = try self.newHeaders(); const headers = try self.newHeaders();
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); lp.log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
try self.processRequest(.{ try self.processRequest(.{
.ctx = ctx, .ctx = ctx,
.url = robots_url, .url = robots_url,
@@ -380,7 +379,7 @@ fn robotsHeaderCallback(transfer: *Transfer) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx)); const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
if (transfer.response_header) |hdr| { if (transfer.response_header) |hdr| {
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); lp.log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url });
ctx.status = hdr.status; ctx.status = hdr.status;
} }
@@ -409,7 +408,7 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
ctx.client.network.config.http_headers.user_agent, ctx.client.network.config.http_headers.user_agent,
ctx.buffer.items, ctx.buffer.items,
) catch blk: { ) catch blk: {
log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); lp.log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url });
// If we fail to parse, we just insert it as absent and ignore. // If we fail to parse, we just insert it as absent and ignore.
try ctx.client.network.robot_store.putAbsent(ctx.robots_url); try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
break :blk null; break :blk null;
@@ -423,12 +422,12 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
} }
}, },
404 => { 404 => {
log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); lp.log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
// If we get a 404, we just insert it as absent. // If we get a 404, we just insert it as absent.
try ctx.client.network.robot_store.putAbsent(ctx.robots_url); try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
}, },
else => { else => {
log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status }); lp.log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status });
// If we get an unexpected status, we just insert as absent. // If we get an unexpected status, we just insert as absent.
try ctx.client.network.robot_store.putAbsent(ctx.robots_url); try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
}, },
@@ -441,7 +440,7 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
for (queued.value.items) |queued_req| { for (queued.value.items) |queued_req| {
if (!allowed) { if (!allowed) {
log.warn(.http, "blocked by robots", .{ .url = queued_req.url }); lp.log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked); queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
} else { } else {
ctx.client.processRequest(queued_req) catch |e| { ctx.client.processRequest(queued_req) catch |e| {
@@ -455,7 +454,7 @@ fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
defer ctx.deinit(); defer ctx.deinit();
log.warn(.http, "robots fetch failed", .{ .err = err }); lp.log.warn(.http, "robots fetch failed", .{ .err = err });
var queued = ctx.client.pending_robots_queue.fetchRemove( var queued = ctx.client.pending_robots_queue.fetchRemove(
ctx.robots_url, ctx.robots_url,
@@ -474,7 +473,7 @@ fn robotsShutdownCallback(ctx_ptr: *anyopaque) void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr)); const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
defer ctx.deinit(); defer ctx.deinit();
log.debug(.http, "robots fetch shutdown", .{}); lp.log.debug(.http, "robots fetch shutdown", .{});
var queued = ctx.client.pending_robots_queue.fetchRemove( var queued = ctx.client.pending_robots_queue.fetchRemove(
ctx.robots_url, ctx.robots_url,
@@ -549,7 +548,7 @@ fn process(self: *Client, transfer: *Transfer) !void {
pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { pub fn continueTransfer(self: *Client, transfer: *Transfer) !void {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(transfer._intercept_state != .not_intercepted); std.debug.assert(transfer._intercept_state != .not_intercepted);
log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); lp.log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted });
} }
self.intercepted -= 1; self.intercepted -= 1;
@@ -563,7 +562,7 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void {
pub fn abortTransfer(self: *Client, transfer: *Transfer) void { pub fn abortTransfer(self: *Client, transfer: *Transfer) void {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(transfer._intercept_state != .not_intercepted); std.debug.assert(transfer._intercept_state != .not_intercepted);
log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); lp.log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted });
} }
self.intercepted -= 1; self.intercepted -= 1;
@@ -574,10 +573,10 @@ pub fn abortTransfer(self: *Client, transfer: *Transfer) void {
} }
// For an intercepted request // For an intercepted request
pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(transfer._intercept_state != .not_intercepted); std.debug.assert(transfer._intercept_state != .not_intercepted);
log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); lp.log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted });
} }
self.intercepted -= 1; self.intercepted -= 1;
@@ -671,13 +670,13 @@ pub fn setTlsVerify(self: *Client, verify: bool) !void {
var it = self.in_use.first; var it = self.in_use.first;
while (it) |node| : (it = node.next) { while (it) |node| : (it = node.next) {
const conn: *Net.Connection = @fieldParentPtr("node", node); const conn: *http.Connection = @fieldParentPtr("node", node);
try conn.setTlsVerify(verify, self.use_proxy); try conn.setTlsVerify(verify, self.use_proxy);
} }
self.tls_verify = verify; self.tls_verify = verify;
} }
fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *Transfer) anyerror!void { fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyerror!void {
const req = &transfer.req; const req = &transfer.req;
{ {
@@ -689,7 +688,8 @@ fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *Transfer) anyerr
} }
// Set callbacks and per-client settings on the pooled connection. // Set callbacks and per-client settings on the pooled connection.
try conn.setCallbacks(Transfer.headerCallback, Transfer.dataCallback); try conn.setCallbacks(Transfer.dataCallback);
try conn.setFollowLocation(false);
try conn.setProxy(self.http_proxy); try conn.setProxy(self.http_proxy);
try conn.setTlsVerify(self.tls_verify, self.use_proxy); try conn.setTlsVerify(self.tls_verify, self.use_proxy);
@@ -768,9 +768,9 @@ fn perform(self: *Client, timeout_ms: c_int) !PerformStatus {
// Process dirty connections — return them to Runtime pool. // Process dirty connections — return them to Runtime pool.
while (self.dirty.popFirst()) |node| { while (self.dirty.popFirst()) |node| {
const conn: *Net.Connection = @fieldParentPtr("node", node); const conn: *http.Connection = @fieldParentPtr("node", node);
self.handles.remove(conn) catch |err| { self.handles.remove(conn) catch |err| {
log.fatal(.http, "multi remove handle", .{ .err = err, .src = "perform" }); lp.log.fatal(.http, "multi remove handle", .{ .err = err, .src = "perform" });
@panic("multi_remove_handle"); @panic("multi_remove_handle");
}; };
self.releaseConn(conn); self.releaseConn(conn);
@@ -784,7 +784,7 @@ fn perform(self: *Client, timeout_ms: c_int) !PerformStatus {
var status = PerformStatus.normal; var status = PerformStatus.normal;
if (self.cdp_client) |cdp_client| { if (self.cdp_client) |cdp_client| {
var wait_fds = [_]Net.WaitFd{.{ var wait_fds = [_]http.WaitFd{.{
.fd = cdp_client.socket, .fd = cdp_client.socket,
.events = .{ .pollin = true }, .events = .{ .pollin = true },
.revents = .{}, .revents = .{},
@@ -806,6 +806,11 @@ fn processMessages(self: *Client) !bool {
while (self.handles.readMessage()) |msg| { while (self.handles.readMessage()) |msg| {
const transfer = try Transfer.fromConnection(&msg.conn); const transfer = try Transfer.fromConnection(&msg.conn);
// Detect auth challenge from response headers.
if (msg.err == null) {
transfer.detectAuthChallenge(&msg.conn);
}
// In case of auth challenge // In case of auth challenge
// TODO give a way to configure the number of auth retries. // TODO give a way to configure the number of auth retries.
if (transfer._auth_challenge != null and transfer._tries < 10) { if (transfer._auth_challenge != null and transfer._tries < 10) {
@@ -814,7 +819,7 @@ fn processMessages(self: *Client) !bool {
if (wait_for_interception) { if (wait_for_interception) {
self.intercepted += 1; self.intercepted += 1;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted }); lp.log.debug(.http, "wait for auth interception", .{ .intercepted = self.intercepted });
} }
transfer._intercept_state = .pending; transfer._intercept_state = .pending;
@@ -848,6 +853,23 @@ fn processMessages(self: *Client) !bool {
} }
} }
// Handle redirects: extract data from conn before releasing it.
if (msg.err == null) {
const status = try msg.conn.getResponseCode();
if (status >= 300 and status <= 399) {
transfer.handleRedirect(&msg.conn) catch |err| {
requestFailed(transfer, err, true);
self.endTransfer(transfer);
transfer.deinit();
continue;
};
self.endTransfer(transfer);
transfer.reset();
try self.process(transfer);
continue;
}
}
// release it ASAP so that it's available; some done_callbacks // release it ASAP so that it's available; some done_callbacks
// will load more resources. // will load more resources.
self.endTransfer(transfer); self.endTransfer(transfer);
@@ -866,7 +888,7 @@ fn processMessages(self: *Client) !bool {
// In case of request w/o data, we need to call the header done // In case of request w/o data, we need to call the header done
// callback now. // callback now.
const proceed = transfer.headerDoneCallback(&msg.conn) catch |err| { const proceed = transfer.headerDoneCallback(&msg.conn) catch |err| {
log.err(.http, "header_done_callback2", .{ .err = err }); lp.log.err(.http, "header_done_callback2", .{ .err = err });
requestFailed(transfer, err, true); requestFailed(transfer, err, true);
continue; continue;
}; };
@@ -877,7 +899,7 @@ fn processMessages(self: *Client) !bool {
} }
transfer.req.done_callback(transfer.ctx) catch |err| { transfer.req.done_callback(transfer.ctx) catch |err| {
// transfer isn't valid at this point, don't use it. // transfer isn't valid at this point, don't use it.
log.err(.http, "done_callback", .{ .err = err }); lp.log.err(.http, "done_callback", .{ .err = err });
requestFailed(transfer, err, true); requestFailed(transfer, err, true);
continue; continue;
}; };
@@ -898,7 +920,7 @@ fn endTransfer(self: *Client, transfer: *Transfer) void {
self.active -= 1; self.active -= 1;
} }
fn removeConn(self: *Client, conn: *Net.Connection) void { fn removeConn(self: *Client, conn: *http.Connection) void {
self.in_use.remove(&conn.node); self.in_use.remove(&conn.node);
if (self.handles.remove(conn)) { if (self.handles.remove(conn)) {
self.releaseConn(conn); self.releaseConn(conn);
@@ -909,7 +931,7 @@ fn removeConn(self: *Client, conn: *Net.Connection) void {
} }
} }
fn releaseConn(self: *Client, conn: *Net.Connection) void { fn releaseConn(self: *Client, conn: *http.Connection) void {
self.network.releaseConnection(conn); self.network.releaseConnection(conn);
} }
@@ -925,7 +947,7 @@ pub const RequestCookie = struct {
is_navigation: bool, is_navigation: bool,
origin: [:0]const u8, origin: [:0]const u8,
pub fn headersForRequest(self: *const RequestCookie, temp: Allocator, url: [:0]const u8, headers: *Net.Headers) !void { pub fn headersForRequest(self: *const RequestCookie, temp: Allocator, url: [:0]const u8, headers: *http.Headers) !void {
var arr: std.ArrayList(u8) = .{}; var arr: std.ArrayList(u8) = .{};
try self.jar.forRequest(url, arr.writer(temp), .{ try self.jar.forRequest(url, arr.writer(temp), .{
.is_http = self.is_http, .is_http = self.is_http,
@@ -944,7 +966,7 @@ pub const Request = struct {
frame_id: u32, frame_id: u32,
method: Method, method: Method,
url: [:0]const u8, url: [:0]const u8,
headers: Net.Headers, headers: http.Headers,
body: ?[]const u8 = null, body: ?[]const u8 = null,
cookie_jar: ?*CookieJar, cookie_jar: ?*CookieJar,
resource_type: ResourceType, resource_type: ResourceType,
@@ -990,7 +1012,7 @@ pub const Request = struct {
}; };
}; };
const AuthChallenge = Net.AuthChallenge; const AuthChallenge = http.AuthChallenge;
pub const Transfer = struct { pub const Transfer = struct {
arena: ArenaAllocator, arena: ArenaAllocator,
@@ -1015,15 +1037,15 @@ pub const Transfer = struct {
_notified_fail: bool = false, _notified_fail: bool = false,
_conn: ?*Net.Connection = null, _conn: ?*http.Connection = null,
_redirecting: bool = false,
_auth_challenge: ?AuthChallenge = null, _auth_challenge: ?AuthChallenge = null,
// number of times the transfer has been tried. // number of times the transfer has been tried.
// incremented by reset func. // incremented by reset func.
_tries: u8 = 0, _tries: u8 = 0,
_performing: bool = false, _performing: bool = false,
_redirect_count: u8 = 0,
// for when a Transfer is queued in the client.queue // for when a Transfer is queued in the client.queue
_node: std.DoublyLinkedList.Node = .{}, _node: std.DoublyLinkedList.Node = .{},
@@ -1038,22 +1060,10 @@ pub const Transfer = struct {
}; };
pub fn reset(self: *Transfer) void { pub fn reset(self: *Transfer) void {
// There's an assertion in ScriptManager that's failing. Seemingly because
// the headerCallback is being called multiple times. This shouldn't be
// possible (hence the assertion). Previously, this `reset` would set
// _header_done_called = false. That could have been how headerCallback
// was called multuple times (because _header_done_called is the guard
// against that, so resetting it would allow a 2nd call to headerCallback).
// But it should also be impossible for this to be true. So, I've added
// this assertion to try to narrow down what's going on.
lp.assert(self._header_done_called == false, "Transfer.reset header_done_called", .{});
self._redirecting = false;
self._auth_challenge = null; self._auth_challenge = null;
self._notified_fail = false; self._notified_fail = false;
self.response_header = null; self.response_header = null;
self.bytes_received = 0; self.bytes_received = 0;
self._tries += 1; self._tries += 1;
} }
@@ -1066,7 +1076,7 @@ pub const Transfer = struct {
self.client.transfer_pool.destroy(self); self.client.transfer_pool.destroy(self);
} }
fn buildResponseHeader(self: *Transfer, conn: *const Net.Connection) !void { fn buildResponseHeader(self: *Transfer, conn: *const http.Connection) !void {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(self.response_header == null); std.debug.assert(self.response_header == null);
} }
@@ -1081,7 +1091,7 @@ pub const Transfer = struct {
self.response_header = .{ self.response_header = .{
.url = url, .url = url,
.status = status, .status = status,
.redirect_count = try conn.getRedirectCount(), .redirect_count = self._redirect_count,
}; };
if (conn.getResponseHeader("content-type", 0)) |ct| { if (conn.getResponseHeader("content-type", 0)) |ct| {
@@ -1106,11 +1116,82 @@ pub const Transfer = struct {
self.req.url = url; self.req.url = url;
} }
fn handleRedirect(transfer: *Transfer, conn: *const http.Connection) !void {
const req = &transfer.req;
const arena = transfer.arena.allocator();
transfer._redirect_count += 1;
if (transfer._redirect_count > transfer.client.network.config.httpMaxRedirects()) {
return error.TooManyRedirects;
}
// retrieve cookies from the redirect's response.
if (req.cookie_jar) |jar| {
var i: usize = 0;
while (true) {
const ct = conn.getResponseHeader("set-cookie", i);
if (ct == null) break;
try jar.populateFromResponse(transfer.url, ct.?.value);
i += 1;
if (i >= ct.?.amount) break;
}
}
// resolve the redirect target.
const location = conn.getResponseHeader("location", 0) orelse {
return error.LocationNotFound;
};
const base_url = try conn.getEffectiveUrl();
const url = try URL.resolve(arena, std.mem.span(base_url), location.value, .{});
try transfer.updateURL(url);
// 301, 302, 303 → change to GET, drop body.
// 307, 308 → keep method and body.
const status = try conn.getResponseCode();
if (status == 301 or status == 302 or status == 303) {
req.method = .GET;
req.body = null;
}
// set cookies for the following request.
if (req.cookie_jar) |jar| {
var cookies: std.ArrayList(u8) = .{};
try jar.forRequest(url, cookies.writer(arena), .{
.is_http = true,
.origin_url = url,
.is_navigation = req.resource_type == .document,
});
if (cookies.items.len > 0) {
try cookies.append(arena, 0); // null terminate
req.headers.cookies = @ptrCast(cookies.items.ptr);
} else {
req.headers.cookies = null;
}
}
}
fn detectAuthChallenge(transfer: *Transfer, conn: *const http.Connection) void {
const status = conn.getResponseCode() catch return;
if (status != 401 and status != 407) {
transfer._auth_challenge = null;
return;
}
if (conn.getResponseHeader("WWW-Authenticate", 0)) |hdr| {
transfer._auth_challenge = AuthChallenge.parse(status, .server, hdr.value) catch null;
} else if (conn.getResponseHeader("Proxy-Authenticate", 0)) |hdr| {
transfer._auth_challenge = AuthChallenge.parse(status, .proxy, hdr.value) catch null;
} else {
transfer._auth_challenge = .{ .status = status, .source = null, .scheme = null, .realm = null };
}
}
pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void {
self.req.credentials = userpwd; self.req.credentials = userpwd;
} }
pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const Net.Header) !void { pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const http.Header) !void {
self.req.headers.deinit(); self.req.headers.deinit();
var buf: std.ArrayList(u8) = .empty; var buf: std.ArrayList(u8) = .empty;
@@ -1172,7 +1253,7 @@ pub const Transfer = struct {
pub fn abortAuthChallenge(self: *Transfer) void { pub fn abortAuthChallenge(self: *Transfer) void {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(self._intercept_state != .not_intercepted); std.debug.assert(self._intercept_state != .not_intercepted);
log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); lp.log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted });
} }
self.client.intercepted -= 1; self.client.intercepted -= 1;
if (!self.req.blocking) { if (!self.req.blocking) {
@@ -1182,52 +1263,10 @@ pub const Transfer = struct {
self._intercept_state = .{ .abort = error.AbortAuthChallenge }; self._intercept_state = .{ .abort = error.AbortAuthChallenge };
} }
// redirectionCookies manages cookies during redirections handled by Curl.
// It sets the cookies from the current response to the cookie jar.
// It also immediately sets cookies for the following request.
fn redirectionCookies(transfer: *Transfer, conn: *const Net.Connection) !void {
const req = &transfer.req;
const arena = transfer.arena.allocator();
// retrieve cookies from the redirect's response.
if (req.cookie_jar) |jar| {
var i: usize = 0;
while (true) {
const ct = conn.getResponseHeader("set-cookie", i);
if (ct == null) break;
try jar.populateFromResponse(transfer.url, ct.?.value);
i += 1;
if (i >= ct.?.amount) break;
}
}
// set cookies for the following redirection's request.
const location = conn.getResponseHeader("location", 0) orelse {
return error.LocationNotFound;
};
const base_url = try conn.getEffectiveUrl();
const url = try URL.resolve(arena, std.mem.span(base_url), location.value, .{});
transfer.url = url;
if (req.cookie_jar) |jar| {
var cookies: std.ArrayList(u8) = .{};
try jar.forRequest(url, cookies.writer(arena), .{
.is_http = true,
.origin_url = url,
// used to enforce samesite cookie rules
.is_navigation = req.resource_type == .document,
});
try cookies.append(arena, 0); //null terminate
try conn.setCookies(@ptrCast(cookies.items.ptr));
}
}
// headerDoneCallback is called once the headers have been read. // headerDoneCallback is called once the headers have been read.
// It can be called either on dataCallback or once the request for those // It can be called either on dataCallback or once the request for those
// w/o body. // w/o body.
fn headerDoneCallback(transfer: *Transfer, conn: *const Net.Connection) !bool { fn headerDoneCallback(transfer: *Transfer, conn: *const http.Connection) !bool {
lp.assert(transfer._header_done_called == false, "Transfer.headerDoneCallback", .{}); lp.assert(transfer._header_done_called == false, "Transfer.headerDoneCallback", .{});
defer transfer._header_done_called = true; defer transfer._header_done_called = true;
@@ -1247,7 +1286,7 @@ pub const Transfer = struct {
const ct = conn.getResponseHeader("set-cookie", i); const ct = conn.getResponseHeader("set-cookie", i);
if (ct == null) break; if (ct == null) break;
jar.populateFromResponse(transfer.url, ct.?.value) catch |err| { jar.populateFromResponse(transfer.url, ct.?.value) catch |err| {
log.err(.http, "set cookie", .{ .err = err, .req = transfer }); lp.log.err(.http, "set cookie", .{ .err = err, .req = transfer });
return err; return err;
}; };
i += 1; i += 1;
@@ -1264,7 +1303,7 @@ pub const Transfer = struct {
} }
const proceed = transfer.req.header_callback(transfer) catch |err| { const proceed = transfer.req.header_callback(transfer) catch |err| {
log.err(.http, "header_callback", .{ .err = err, .req = transfer }); lp.log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return err; return err;
}; };
@@ -1275,147 +1314,32 @@ pub const Transfer = struct {
return proceed and transfer.aborted == false; return proceed and transfer.aborted == false;
} }
// headerCallback is called by curl on each request's header line read.
fn headerCallback(buffer: [*]const u8, header_count: usize, buf_len: usize, data: *anyopaque) usize {
// libcurl should only ever emit 1 header at a time
if (comptime IS_DEBUG) {
std.debug.assert(header_count == 1);
}
const conn: Net.Connection = .{ .easy = @ptrCast(@alignCast(data)) };
var transfer = fromConnection(&conn) catch |err| {
log.err(.http, "get private info", .{ .err = err, .source = "header callback" });
return 0;
};
if (comptime IS_DEBUG) {
// curl will allow header lines that end with either \r\n or just \n
std.debug.assert(buffer[buf_len - 1] == '\n');
}
if (buf_len < 3) {
// could be \r\n or \n.
// We get the last header line.
if (transfer._redirecting) {
// parse and set cookies for the redirection.
redirectionCookies(transfer, &conn) catch |err| {
if (comptime IS_DEBUG) {
log.debug(.http, "redirection cookies", .{ .err = err });
}
return 0;
};
}
return buf_len;
}
var header_len = buf_len - 2;
if (buffer[buf_len - 2] != '\r') {
// curl supports headers that just end with either \r\n or \n
header_len = buf_len - 1;
}
const header = buffer[0..header_len];
// We need to parse the first line headers for each request b/c curl's
// CURLINFO_RESPONSE_CODE returns the status code of the final request.
// If a redirection or a proxy's CONNECT forbidden happens, we won't
// get this intermediary status code.
if (std.mem.startsWith(u8, header, "HTTP/")) {
// Is it the first header line.
if (buf_len < 13) {
if (comptime IS_DEBUG) {
log.debug(.http, "invalid response line", .{ .line = header });
}
return 0;
}
const version_start: usize = if (header[5] == '2') 7 else 9;
const version_end = version_start + 3;
// a bit silly, but it makes sure that we don't change the length check
// above in a way that could break this.
if (comptime IS_DEBUG) {
std.debug.assert(version_end < 13);
}
const status = std.fmt.parseInt(u16, header[version_start..version_end], 10) catch {
if (comptime IS_DEBUG) {
log.debug(.http, "invalid status code", .{ .line = header });
}
return 0;
};
if (status >= 300 and status <= 399) {
transfer._redirecting = true;
return buf_len;
}
transfer._redirecting = false;
if (status == 401 or status == 407) {
// The auth challenge must be parsed from a following
// WWW-Authenticate or Proxy-Authenticate header.
transfer._auth_challenge = .{
.status = status,
.source = null,
.scheme = null,
.realm = null,
};
return buf_len;
}
transfer._auth_challenge = null;
transfer.bytes_received = buf_len;
return buf_len;
}
if (transfer._redirecting == false and transfer._auth_challenge != null) {
transfer.bytes_received += buf_len;
}
if (transfer._auth_challenge != null) {
// try to parse auth challenge.
if (std.ascii.startsWithIgnoreCase(header, "WWW-Authenticate") or
std.ascii.startsWithIgnoreCase(header, "Proxy-Authenticate"))
{
const ac = AuthChallenge.parse(
transfer._auth_challenge.?.status,
header,
) catch |err| {
// We can't parse the auth challenge
log.err(.http, "parse auth challenge", .{ .err = err, .header = header });
// Should we cancel the request? I don't think so.
return buf_len;
};
transfer._auth_challenge = ac;
}
}
return buf_len;
}
fn dataCallback(buffer: [*]const u8, chunk_count: usize, chunk_len: usize, data: *anyopaque) usize { fn dataCallback(buffer: [*]const u8, chunk_count: usize, chunk_len: usize, data: *anyopaque) usize {
// libcurl should only ever emit 1 chunk at a time // libcurl should only ever emit 1 chunk at a time
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(chunk_count == 1); std.debug.assert(chunk_count == 1);
} }
const conn: Net.Connection = .{ .easy = @ptrCast(@alignCast(data)) }; const conn: http.Connection = .{ .easy = @ptrCast(@alignCast(data)) };
var transfer = fromConnection(&conn) catch |err| { var transfer = fromConnection(&conn) catch |err| {
log.err(.http, "get private info", .{ .err = err, .source = "body callback" }); lp.log.err(.http, "get private info", .{ .err = err, .source = "body callback" });
return Net.writefunc_error; return http.writefunc_error;
}; };
if (transfer._redirecting or transfer._auth_challenge != null) { // Skip body for responses that will be retried (redirects, auth challenges).
const status = conn.getResponseCode() catch return http.writefunc_error;
if ((status >= 300 and status <= 399) or status == 401 or status == 407) {
return @intCast(chunk_len); return @intCast(chunk_len);
} }
if (!transfer._header_done_called) { if (!transfer._header_done_called) {
const proceed = transfer.headerDoneCallback(&conn) catch |err| { const proceed = transfer.headerDoneCallback(&conn) catch |err| {
log.err(.http, "header_done_callback", .{ .err = err, .req = transfer }); lp.log.err(.http, "header_done_callback", .{ .err = err, .req = transfer });
return Net.writefunc_error; return http.writefunc_error;
}; };
if (!proceed) { if (!proceed) {
// signal abort to libcurl // signal abort to libcurl
return Net.writefunc_error; return http.writefunc_error;
} }
} }
@@ -1423,14 +1347,14 @@ pub const Transfer = struct {
if (transfer.max_response_size) |max_size| { if (transfer.max_response_size) |max_size| {
if (transfer.bytes_received > max_size) { if (transfer.bytes_received > max_size) {
requestFailed(transfer, error.ResponseTooLarge, true); requestFailed(transfer, error.ResponseTooLarge, true);
return Net.writefunc_error; return http.writefunc_error;
} }
} }
const chunk = buffer[0..chunk_len]; const chunk = buffer[0..chunk_len];
transfer.req.data_callback(transfer, chunk) catch |err| { transfer.req.data_callback(transfer, chunk) catch |err| {
log.err(.http, "data_callback", .{ .err = err, .req = transfer }); lp.log.err(.http, "data_callback", .{ .err = err, .req = transfer });
return Net.writefunc_error; return http.writefunc_error;
}; };
transfer.req.notification.dispatch(.http_response_data, &.{ transfer.req.notification.dispatch(.http_response_data, &.{
@@ -1439,7 +1363,7 @@ pub const Transfer = struct {
}); });
if (transfer.aborted) { if (transfer.aborted) {
return Net.writefunc_error; return http.writefunc_error;
} }
return @intCast(chunk_len); return @intCast(chunk_len);
@@ -1458,12 +1382,12 @@ pub const Transfer = struct {
return .{ .list = .{ .list = self.response_header.?._injected_headers } }; return .{ .list = .{ .list = self.response_header.?._injected_headers } };
} }
pub fn fromConnection(conn: *const Net.Connection) !*Transfer { pub fn fromConnection(conn: *const http.Connection) !*Transfer {
const private = try conn.getPrivate(); const private = try conn.getPrivate();
return @ptrCast(@alignCast(private)); return @ptrCast(@alignCast(private));
} }
pub fn fulfill(transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { pub fn fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void {
if (transfer._conn != null) { if (transfer._conn != null) {
// should never happen, should have been intercepted/paused, and then // should never happen, should have been intercepted/paused, and then
// either continued, aborted or fulfilled once. // either continued, aborted or fulfilled once.
@@ -1477,7 +1401,7 @@ pub const Transfer = struct {
}; };
} }
fn _fulfill(transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void {
const req = &transfer.req; const req = &transfer.req;
if (req.start_callback) |cb| { if (req.start_callback) |cb| {
try cb(transfer); try cb(transfer);

View File

@@ -654,7 +654,6 @@ pub const Script = struct {
debug_transfer_aborted: bool = false, debug_transfer_aborted: bool = false,
debug_transfer_bytes_received: usize = 0, debug_transfer_bytes_received: usize = 0,
debug_transfer_notified_fail: bool = false, debug_transfer_notified_fail: bool = false,
debug_transfer_redirecting: bool = false,
debug_transfer_intercept_state: u8 = 0, debug_transfer_intercept_state: u8 = 0,
debug_transfer_auth_challenge: bool = false, debug_transfer_auth_challenge: bool = false,
debug_transfer_easy_id: usize = 0, debug_transfer_easy_id: usize = 0,
@@ -730,7 +729,6 @@ pub const Script = struct {
.a3 = self.debug_transfer_aborted, .a3 = self.debug_transfer_aborted,
.a4 = self.debug_transfer_bytes_received, .a4 = self.debug_transfer_bytes_received,
.a5 = self.debug_transfer_notified_fail, .a5 = self.debug_transfer_notified_fail,
.a6 = self.debug_transfer_redirecting,
.a7 = self.debug_transfer_intercept_state, .a7 = self.debug_transfer_intercept_state,
.a8 = self.debug_transfer_auth_challenge, .a8 = self.debug_transfer_auth_challenge,
.a9 = self.debug_transfer_easy_id, .a9 = self.debug_transfer_easy_id,
@@ -739,7 +737,6 @@ pub const Script = struct {
.b3 = transfer.aborted, .b3 = transfer.aborted,
.b4 = transfer.bytes_received, .b4 = transfer.bytes_received,
.b5 = transfer._notified_fail, .b5 = transfer._notified_fail,
.b6 = transfer._redirecting,
.b7 = @intFromEnum(transfer._intercept_state), .b7 = @intFromEnum(transfer._intercept_state),
.b8 = transfer._auth_challenge != null, .b8 = transfer._auth_challenge != null,
.b9 = if (transfer._conn) |c| @intFromPtr(c.easy) else 0, .b9 = if (transfer._conn) |c| @intFromPtr(c.easy) else 0,
@@ -750,7 +747,6 @@ pub const Script = struct {
self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_notified_fail = transfer._notified_fail; self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_redirecting = transfer._redirecting;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c.easy) else 0; self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c.easy) else 0;

View File

@@ -174,33 +174,24 @@ const HeaderValue = struct {
}; };
pub const AuthChallenge = struct { pub const AuthChallenge = struct {
const Source = enum { server, proxy };
const Scheme = enum { basic, digest };
status: u16, status: u16,
source: ?enum { server, proxy }, source: ?Source,
scheme: ?enum { basic, digest }, scheme: ?Scheme,
realm: ?[]const u8, realm: ?[]const u8,
pub fn parse(status: u16, header: []const u8) !AuthChallenge { pub fn parse(status: u16, source: Source, value: []const u8) !AuthChallenge {
var ac: AuthChallenge = .{ var ac: AuthChallenge = .{
.status = status, .status = status,
.source = null, .source = source,
.realm = null, .realm = null,
.scheme = null, .scheme = null,
}; };
const sep = std.mem.indexOfPos(u8, header, 0, ": ") orelse return error.InvalidHeader; const pos = std.mem.indexOfPos(u8, std.mem.trim(u8, value, std.ascii.whitespace[0..]), 0, " ") orelse value.len;
const hname = header[0..sep]; const _scheme = value[0..pos];
const hvalue = header[sep + 2 ..];
if (std.ascii.eqlIgnoreCase("WWW-Authenticate", hname)) {
ac.source = .server;
} else if (std.ascii.eqlIgnoreCase("Proxy-Authenticate", hname)) {
ac.source = .proxy;
} else {
return error.InvalidAuthChallenge;
}
const pos = std.mem.indexOfPos(u8, std.mem.trim(u8, hvalue, std.ascii.whitespace[0..]), 0, " ") orelse hvalue.len;
const _scheme = hvalue[0..pos];
if (std.ascii.eqlIgnoreCase(_scheme, "basic")) { if (std.ascii.eqlIgnoreCase(_scheme, "basic")) {
ac.scheme = .basic; ac.scheme = .basic;
} else if (std.ascii.eqlIgnoreCase(_scheme, "digest")) { } else if (std.ascii.eqlIgnoreCase(_scheme, "digest")) {
@@ -376,11 +367,8 @@ pub const Connection = struct {
pub fn setCallbacks( pub fn setCallbacks(
self: *const Connection, self: *const Connection,
comptime header_cb: libcurl.CurlHeaderFunction,
comptime data_cb: libcurl.CurlWriteFunction, comptime data_cb: libcurl.CurlWriteFunction,
) !void { ) !void {
try libcurl.curl_easy_setopt(self.easy, .header_data, self.easy);
try libcurl.curl_easy_setopt(self.easy, .header_function, header_cb);
try libcurl.curl_easy_setopt(self.easy, .write_data, self.easy); try libcurl.curl_easy_setopt(self.easy, .write_data, self.easy);
try libcurl.curl_easy_setopt(self.easy, .write_function, data_cb); try libcurl.curl_easy_setopt(self.easy, .write_function, data_cb);
} }
@@ -389,9 +377,6 @@ pub const Connection = struct {
try libcurl.curl_easy_setopt(self.easy, .proxy, null); try libcurl.curl_easy_setopt(self.easy, .proxy, null);
try libcurl.curl_easy_setopt(self.easy, .http_header, null); try libcurl.curl_easy_setopt(self.easy, .http_header, null);
try libcurl.curl_easy_setopt(self.easy, .header_data, null);
try libcurl.curl_easy_setopt(self.easy, .header_function, null);
try libcurl.curl_easy_setopt(self.easy, .write_data, null); try libcurl.curl_easy_setopt(self.easy, .write_data, null);
try libcurl.curl_easy_setopt(self.easy, .write_function, discardBody); try libcurl.curl_easy_setopt(self.easy, .write_function, discardBody);
} }
@@ -404,6 +389,10 @@ pub const Connection = struct {
try libcurl.curl_easy_setopt(self.easy, .proxy, if (proxy) |p| p.ptr else null); try libcurl.curl_easy_setopt(self.easy, .proxy, if (proxy) |p| p.ptr else null);
} }
pub fn setFollowLocation(self: *const Connection, follow: bool) !void {
try libcurl.curl_easy_setopt(self.easy, .follow_location, @as(c_long, if (follow) 2 else 0));
}
pub fn setTlsVerify(self: *const Connection, verify: bool, use_proxy: bool) !void { pub fn setTlsVerify(self: *const Connection, verify: bool, use_proxy: bool) !void {
try libcurl.curl_easy_setopt(self.easy, .ssl_verify_host, verify); try libcurl.curl_easy_setopt(self.easy, .ssl_verify_host, verify);
try libcurl.curl_easy_setopt(self.easy, .ssl_verify_peer, verify); try libcurl.curl_easy_setopt(self.easy, .ssl_verify_peer, verify);