// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . const std = @import("std"); const builtin = @import("builtin"); const posix = std.posix; const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const Config = @import("../Config.zig"); const libcurl = @import("../sys/libcurl.zig"); const log = @import("lightpanda").log; const assert = @import("lightpanda").assert; pub const ENABLE_DEBUG = false; const IS_DEBUG = builtin.mode == .Debug; pub const Blob = libcurl.CurlBlob; pub const WaitFd = libcurl.CurlWaitFd; pub const writefunc_error = libcurl.curl_writefunc_error; const Error = libcurl.Error; const ErrorMulti = libcurl.ErrorMulti; const errorFromCode = libcurl.errorFromCode; const errorMFromCode = libcurl.errorMFromCode; const errorCheck = libcurl.errorCheck; const errorMCheck = libcurl.errorMCheck; pub fn curl_version() [*c]const u8 { return libcurl.curl_version(); } pub const Method = enum(u8) { GET = 0, PUT = 1, POST = 2, DELETE = 3, HEAD = 4, OPTIONS = 5, PATCH = 6, PROPFIND = 7, }; pub const Header = struct { name: []const u8, value: []const u8, }; pub const Headers = struct { headers: ?*libcurl.CurlSList, cookies: ?[*c]const u8, pub fn init(user_agent: [:0]const u8) !Headers { const header_list = libcurl.curl_slist_append(null, user_agent); if (header_list == null) { return error.OutOfMemory; } return .{ .headers = header_list, .cookies = null }; } pub fn deinit(self: *const Headers) void { if (self.headers) |hdr| { libcurl.curl_slist_free_all(hdr); } } pub fn add(self: *Headers, header: [*c]const u8) !void { // Copies the value const updated_headers = libcurl.curl_slist_append(self.headers, header); if (updated_headers == null) { return error.OutOfMemory; } self.headers = updated_headers; } fn parseHeader(header_str: []const u8) ?Header { const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null; const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); const value = std.mem.trim(u8, header_str[colon_pos + 1 ..], " \t"); return .{ .name = name, .value = value }; } pub fn iterator(self: *Headers) Iterator { return .{ .header = self.headers, .cookies = self.cookies, }; } const Iterator = struct { header: [*c]libcurl.CurlSList, cookies: ?[*c]const u8, pub fn next(self: *Iterator) ?Header { const h = self.header orelse { const cookies = self.cookies orelse return null; self.cookies = null; return .{ .name = "Cookie", .value = std.mem.span(@as([*:0]const u8, cookies)) }; }; self.header = h.*.next; return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); } }; }; // In normal cases, the header iterator comes from the curl linked list. // But it's also possible to inject a response, via `transfer.fulfill`. In that // case, the resposne headers are a list, []const Http.Header. // This union, is an iterator that exposes the same API for either case. pub const HeaderIterator = union(enum) { curl: CurlHeaderIterator, list: ListHeaderIterator, pub fn next(self: *HeaderIterator) ?Header { switch (self.*) { inline else => |*it| return it.next(), } } const CurlHeaderIterator = struct { conn: *const Connection, prev: ?*libcurl.CurlHeader = null, pub fn next(self: *CurlHeaderIterator) ?Header { const h = libcurl.curl_easy_nextheader(self.conn.easy, .header, -1, self.prev) orelse return null; self.prev = h; const header = h.*; return .{ .name = std.mem.span(header.name), .value = std.mem.span(header.value), }; } }; const ListHeaderIterator = struct { index: usize = 0, list: []const Header, pub fn next(self: *ListHeaderIterator) ?Header { const idx = self.index; if (idx == self.list.len) { return null; } self.index = idx + 1; return self.list[idx]; } }; }; const HeaderValue = struct { value: []const u8, amount: usize, }; pub const AuthChallenge = struct { status: u16, source: ?enum { server, proxy }, scheme: ?enum { basic, digest }, realm: ?[]const u8, pub fn parse(status: u16, header: []const u8) !AuthChallenge { var ac: AuthChallenge = .{ .status = status, .source = null, .realm = null, .scheme = null, }; const sep = std.mem.indexOfPos(u8, header, 0, ": ") orelse return error.InvalidHeader; const hname = header[0..sep]; const hvalue = header[sep + 2 ..]; if (std.ascii.eqlIgnoreCase("WWW-Authenticate", hname)) { ac.source = .server; } else if (std.ascii.eqlIgnoreCase("Proxy-Authenticate", hname)) { ac.source = .proxy; } else { return error.InvalidAuthChallenge; } const pos = std.mem.indexOfPos(u8, std.mem.trim(u8, hvalue, std.ascii.whitespace[0..]), 0, " ") orelse hvalue.len; const _scheme = hvalue[0..pos]; if (std.ascii.eqlIgnoreCase(_scheme, "basic")) { ac.scheme = .basic; } else if (std.ascii.eqlIgnoreCase(_scheme, "digest")) { ac.scheme = .digest; } else { return error.UnknownAuthChallengeScheme; } return ac; } }; pub const ResponseHead = struct { pub const MAX_CONTENT_TYPE_LEN = 64; status: u16, url: [*c]const u8, redirect_count: u32, _content_type_len: usize = 0, _content_type: [MAX_CONTENT_TYPE_LEN]u8 = undefined, // this is normally an empty list, but if the response is being injected // than it'll be populated. It isn't meant to be used directly, but should // be used through the transfer.responseHeaderIterator() which abstracts // whether the headers are from a live curl easy handle, or injected. _injected_headers: []const Header = &.{}, pub fn contentType(self: *ResponseHead) ?[]u8 { if (self._content_type_len == 0) { return null; } return self._content_type[0..self._content_type_len]; } }; pub const Connection = struct { easy: *libcurl.Curl, node: Handles.HandleList.Node = .{}, pub fn init( ca_blob_: ?libcurl.CurlBlob, config: *const Config, ) !Connection { const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy; errdefer libcurl.curl_easy_cleanup(easy); // timeouts try libcurl.curl_easy_setopt(easy, .timeout_ms, config.httpTimeout()); try libcurl.curl_easy_setopt(easy, .connect_timeout_ms, config.httpConnectTimeout()); // redirect behavior try libcurl.curl_easy_setopt(easy, .max_redirs, config.httpMaxRedirects()); try libcurl.curl_easy_setopt(easy, .follow_location, 2); try libcurl.curl_easy_setopt(easy, .redir_protocols_str, "HTTP,HTTPS"); // remove FTP and FTPS from the default // proxy const http_proxy = config.httpProxy(); if (http_proxy) |proxy| { try libcurl.curl_easy_setopt(easy, .proxy, proxy.ptr); } // tls if (ca_blob_) |ca_blob| { try libcurl.curl_easy_setopt(easy, .ca_info_blob, ca_blob); if (http_proxy != null) { try libcurl.curl_easy_setopt(easy, .proxy_ca_info_blob, ca_blob); } } else { assert(config.tlsVerifyHost() == false, "Http.init tls_verify_host", .{}); try libcurl.curl_easy_setopt(easy, .ssl_verify_host, false); try libcurl.curl_easy_setopt(easy, .ssl_verify_peer, false); if (http_proxy != null) { try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_host, false); try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_peer, false); } } // compression, don't remove this. CloudFront will send gzip content // even if we don't support it, and then it won't be decompressed. // empty string means: use whatever's available try libcurl.curl_easy_setopt(easy, .accept_encoding, ""); // debug if (comptime ENABLE_DEBUG) { try libcurl.curl_easy_setopt(easy, .verbose, true); // Sometimes the default debug output hides some useful data. You can // uncomment the following line (BUT KEEP THE LIVE ABOVE AS-IS), to // get more control over the data (specifically, the `CURLINFO_TEXT` // can include useful data). // try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback); } return .{ .easy = easy, }; } pub fn deinit(self: *const Connection) void { libcurl.curl_easy_cleanup(self.easy); } pub fn setURL(self: *const Connection, url: [:0]const u8) !void { try libcurl.curl_easy_setopt(self.easy, .url, url.ptr); } // a libcurl request has 2 methods. The first is the method that // controls how libcurl behaves. This specifically influences how redirects // are handled. For example, if you do a POST and get a 301, libcurl will // change that to a GET. But if you do a POST and get a 308, libcurl will // keep the POST (and re-send the body). // The second method is the actual string that's included in the request // headers. // These two methods can be different - you can tell curl to behave as though // you made a GET, but include "POST" in the request header. // // Here, we're only concerned about the 2nd method. If we want, we'll set // the first one based on whether or not we have a body. // // It's important that, for each use of this connection, we set the 2nd // method. Else, if we make a HEAD request and re-use the connection, but // DON'T reset this, it'll keep making HEAD requests. // (I don't know if it's as important to reset the 1st method, or if libcurl // can infer that based on the presence of the body, but we also reset it // to be safe); pub fn setMethod(self: *const Connection, method: Method) !void { const easy = self.easy; const m: [:0]const u8 = switch (method) { .GET => "GET", .POST => "POST", .PUT => "PUT", .DELETE => "DELETE", .HEAD => "HEAD", .OPTIONS => "OPTIONS", .PATCH => "PATCH", .PROPFIND => "PROPFIND", }; try libcurl.curl_easy_setopt(easy, .custom_request, m.ptr); } pub fn setBody(self: *const Connection, body: []const u8) !void { const easy = self.easy; try libcurl.curl_easy_setopt(easy, .post, true); try libcurl.curl_easy_setopt(easy, .post_field_size, body.len); try libcurl.curl_easy_setopt(easy, .copy_post_fields, body.ptr); } pub fn setGetMode(self: *const Connection) !void { try libcurl.curl_easy_setopt(self.easy, .http_get, true); } pub fn setHeaders(self: *const Connection, headers: *Headers) !void { try libcurl.curl_easy_setopt(self.easy, .http_header, headers.headers); } pub fn setCookies(self: *const Connection, cookies: [*c]const u8) !void { try libcurl.curl_easy_setopt(self.easy, .cookie, cookies); } pub fn setPrivate(self: *const Connection, ptr: *anyopaque) !void { try libcurl.curl_easy_setopt(self.easy, .private, ptr); } pub fn setProxyCredentials(self: *const Connection, creds: [:0]const u8) !void { try libcurl.curl_easy_setopt(self.easy, .proxy_user_pwd, creds.ptr); } pub fn setCredentials(self: *const Connection, creds: [:0]const u8) !void { try libcurl.curl_easy_setopt(self.easy, .user_pwd, creds.ptr); } pub fn setCallbacks( self: *const Connection, comptime header_cb: libcurl.CurlHeaderFunction, comptime data_cb: libcurl.CurlWriteFunction, ) !void { try libcurl.curl_easy_setopt(self.easy, .header_data, self.easy); try libcurl.curl_easy_setopt(self.easy, .header_function, header_cb); try libcurl.curl_easy_setopt(self.easy, .write_data, self.easy); try libcurl.curl_easy_setopt(self.easy, .write_function, data_cb); } pub fn setProxy(self: *const Connection, proxy: ?[*:0]const u8) !void { try libcurl.curl_easy_setopt(self.easy, .proxy, proxy); } pub fn setTlsVerify(self: *const Connection, verify: bool, use_proxy: bool) !void { try libcurl.curl_easy_setopt(self.easy, .ssl_verify_host, verify); try libcurl.curl_easy_setopt(self.easy, .ssl_verify_peer, verify); if (use_proxy) { try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_host, verify); try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_peer, verify); } } pub fn getEffectiveUrl(self: *const Connection) ![*c]const u8 { var url: [*c]u8 = undefined; try libcurl.curl_easy_getinfo(self.easy, .effective_url, &url); return url; } pub fn getResponseCode(self: *const Connection) !u16 { var status: c_long = undefined; try libcurl.curl_easy_getinfo(self.easy, .response_code, &status); if (status < 0 or status > std.math.maxInt(u16)) { return 0; } return @intCast(status); } pub fn getRedirectCount(self: *const Connection) !u32 { var count: c_long = undefined; try libcurl.curl_easy_getinfo(self.easy, .redirect_count, &count); return @intCast(count); } pub fn getResponseHeader(self: *const Connection, name: [:0]const u8, index: usize) ?HeaderValue { var hdr: ?*libcurl.CurlHeader = null; libcurl.curl_easy_header(self.easy, name, index, .header, -1, &hdr) catch |err| { // ErrorHeader includes OutOfMemory — rare but real errors from curl internals. // Logged and returned as null since callers don't expect errors. log.err(.http, "get response header", .{ .name = name, .err = err, }); return null; }; const h = hdr orelse return null; return .{ .amount = h.amount, .value = std.mem.span(h.value), }; } pub fn getPrivate(self: *const Connection) !*anyopaque { var private: *anyopaque = undefined; try libcurl.curl_easy_getinfo(self.easy, .private, &private); return private; } // These are headers that may not be send to the users for inteception. pub fn secretHeaders(_: *const Connection, headers: *Headers, http_headers: *const Config.HttpHeaders) !void { if (http_headers.proxy_bearer_header) |hdr| { try headers.add(hdr); } } pub fn request(self: *const Connection, http_headers: *const Config.HttpHeaders) !u16 { var header_list = try Headers.init(http_headers.user_agent_header); defer header_list.deinit(); try self.secretHeaders(&header_list, http_headers); try self.setHeaders(&header_list); // Add cookies. if (header_list.cookies) |cookies| { try self.setCookies(cookies); } try libcurl.curl_easy_perform(self.easy); return self.getResponseCode(); } }; pub const Handles = struct { connections: []Connection, dirty: HandleList, in_use: HandleList, available: HandleList, multi: *libcurl.CurlM, performing: bool = false, pub const HandleList = std.DoublyLinkedList; pub fn init( allocator: Allocator, ca_blob: ?libcurl.CurlBlob, config: *const Config, ) !Handles { const count: usize = config.httpMaxConcurrent(); if (count == 0) return error.InvalidMaxConcurrent; const multi = libcurl.curl_multi_init() orelse return error.FailedToInitializeMulti; errdefer libcurl.curl_multi_cleanup(multi) catch {}; try libcurl.curl_multi_setopt(multi, .max_host_connections, config.httpMaxHostOpen()); const connections = try allocator.alloc(Connection, count); errdefer allocator.free(connections); var available: HandleList = .{}; for (0..count) |i| { connections[i] = try Connection.init(ca_blob, config); available.append(&connections[i].node); } return .{ .dirty = .{}, .in_use = .{}, .connections = connections, .available = available, .multi = multi, }; } pub fn deinit(self: *Handles, allocator: Allocator) void { for (self.connections) |*conn| { conn.deinit(); } allocator.free(self.connections); libcurl.curl_multi_cleanup(self.multi) catch {}; } pub fn hasAvailable(self: *const Handles) bool { return self.available.first != null; } pub fn get(self: *Handles) ?*Connection { if (self.available.popFirst()) |node| { self.in_use.append(node); return @as(*Connection, @fieldParentPtr("node", node)); } return null; } pub fn add(self: *Handles, conn: *const Connection) !void { try libcurl.curl_multi_add_handle(self.multi, conn.easy); } pub fn remove(self: *Handles, conn: *Connection) void { if (libcurl.curl_multi_remove_handle(self.multi, conn.easy)) { self.isAvailable(conn); } else |err| { // can happen if we're in a perform() call, so we'll queue this // for cleanup later. const node = &conn.node; self.in_use.remove(node); self.dirty.append(node); log.warn(.http, "multi remove handle", .{ .err = err }); } } pub fn isAvailable(self: *Handles, conn: *Connection) void { const node = &conn.node; self.in_use.remove(node); self.available.append(node); } pub fn perform(self: *Handles) !c_int { self.performing = true; defer self.performing = false; const multi = self.multi; var running: c_int = undefined; try libcurl.curl_multi_perform(self.multi, &running); { const list = &self.dirty; while (list.first) |node| { list.remove(node); const conn: *Connection = @fieldParentPtr("node", node); if (libcurl.curl_multi_remove_handle(multi, conn.easy)) { self.available.append(node); } else |err| { log.fatal(.http, "multi remove handle", .{ .err = err, .src = "perform" }); @panic("multi_remove_handle"); } } } return running; } pub fn poll(self: *Handles, extra_fds: []libcurl.CurlWaitFd, timeout_ms: c_int) !void { try libcurl.curl_multi_poll(self.multi, extra_fds, timeout_ms, null); } pub const MultiMessage = struct { conn: Connection, err: ?Error, }; pub fn readMessage(self: *Handles) ?MultiMessage { var messages_count: c_int = 0; const msg = libcurl.curl_multi_info_read(self.multi, &messages_count) orelse return null; return switch (msg.data) { .done => |err| .{ .conn = .{ .easy = msg.easy_handle }, .err = err, }, else => unreachable, }; } }; fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8, len: usize, _: *anyopaque) c_int { const data = raw[0..len]; switch (msg_type) { .text => std.debug.print("libcurl [text]: {s}\n", .{data}), .header_out => std.debug.print("libcurl [req-h]: {s}\n", .{data}), .header_in => std.debug.print("libcurl [res-h]: {s}\n", .{data}), // .data_in => std.debug.print("libcurl [res-b]: {s}\n", .{data}), else => std.debug.print("libcurl ?? {d}\n", .{msg_type}), } return 0; }