Prepare global NetworkRuntime module

This commit is contained in:
Nikolay Govorov
2026-03-04 21:55:50 +00:00
parent 0e4a65efb7
commit 8e59ce9e9f
26 changed files with 985 additions and 981 deletions

610
src/network/http.zig Normal file
View File

@@ -0,0 +1,610 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const builtin = @import("builtin");
const posix = std.posix;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const Config = @import("../Config.zig");
const libcurl = @import("../sys/libcurl.zig");
const log = @import("lightpanda").log;
const assert = @import("lightpanda").assert;
pub const ENABLE_DEBUG = false;
const IS_DEBUG = builtin.mode == .Debug;
pub const Blob = libcurl.CurlBlob;
pub const WaitFd = libcurl.CurlWaitFd;
pub const writefunc_error = libcurl.curl_writefunc_error;
const Error = libcurl.Error;
const ErrorMulti = libcurl.ErrorMulti;
const errorFromCode = libcurl.errorFromCode;
const errorMFromCode = libcurl.errorMFromCode;
const errorCheck = libcurl.errorCheck;
const errorMCheck = libcurl.errorMCheck;
pub fn curl_version() [*c]const u8 {
return libcurl.curl_version();
}
pub const Method = enum(u8) {
GET = 0,
PUT = 1,
POST = 2,
DELETE = 3,
HEAD = 4,
OPTIONS = 5,
PATCH = 6,
PROPFIND = 7,
};
pub const Header = struct {
name: []const u8,
value: []const u8,
};
pub const Headers = struct {
headers: ?*libcurl.CurlSList,
cookies: ?[*c]const u8,
pub fn init(user_agent: [:0]const u8) !Headers {
const header_list = libcurl.curl_slist_append(null, user_agent);
if (header_list == null) {
return error.OutOfMemory;
}
return .{ .headers = header_list, .cookies = null };
}
pub fn deinit(self: *const Headers) void {
if (self.headers) |hdr| {
libcurl.curl_slist_free_all(hdr);
}
}
pub fn add(self: *Headers, header: [*c]const u8) !void {
// Copies the value
const updated_headers = libcurl.curl_slist_append(self.headers, header);
if (updated_headers == null) {
return error.OutOfMemory;
}
self.headers = updated_headers;
}
fn parseHeader(header_str: []const u8) ?Header {
const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null;
const name = std.mem.trim(u8, header_str[0..colon_pos], " \t");
const value = std.mem.trim(u8, header_str[colon_pos + 1 ..], " \t");
return .{ .name = name, .value = value };
}
pub fn iterator(self: *Headers) Iterator {
return .{
.header = self.headers,
.cookies = self.cookies,
};
}
const Iterator = struct {
header: [*c]libcurl.CurlSList,
cookies: ?[*c]const u8,
pub fn next(self: *Iterator) ?Header {
const h = self.header orelse {
const cookies = self.cookies orelse return null;
self.cookies = null;
return .{ .name = "Cookie", .value = std.mem.span(@as([*:0]const u8, cookies)) };
};
self.header = h.*.next;
return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
};
// In normal cases, the header iterator comes from the curl linked list.
// But it's also possible to inject a response, via `transfer.fulfill`. In that
// case, the resposne headers are a list, []const Http.Header.
// This union, is an iterator that exposes the same API for either case.
pub const HeaderIterator = union(enum) {
curl: CurlHeaderIterator,
list: ListHeaderIterator,
pub fn next(self: *HeaderIterator) ?Header {
switch (self.*) {
inline else => |*it| return it.next(),
}
}
const CurlHeaderIterator = struct {
conn: *const Connection,
prev: ?*libcurl.CurlHeader = null,
pub fn next(self: *CurlHeaderIterator) ?Header {
const h = libcurl.curl_easy_nextheader(self.conn.easy, .header, -1, self.prev) orelse return null;
self.prev = h;
const header = h.*;
return .{
.name = std.mem.span(header.name),
.value = std.mem.span(header.value),
};
}
};
const ListHeaderIterator = struct {
index: usize = 0,
list: []const Header,
pub fn next(self: *ListHeaderIterator) ?Header {
const idx = self.index;
if (idx == self.list.len) {
return null;
}
self.index = idx + 1;
return self.list[idx];
}
};
};
const HeaderValue = struct {
value: []const u8,
amount: usize,
};
pub const AuthChallenge = struct {
status: u16,
source: ?enum { server, proxy },
scheme: ?enum { basic, digest },
realm: ?[]const u8,
pub fn parse(status: u16, header: []const u8) !AuthChallenge {
var ac: AuthChallenge = .{
.status = status,
.source = null,
.realm = null,
.scheme = null,
};
const sep = std.mem.indexOfPos(u8, header, 0, ": ") orelse return error.InvalidHeader;
const hname = header[0..sep];
const hvalue = header[sep + 2 ..];
if (std.ascii.eqlIgnoreCase("WWW-Authenticate", hname)) {
ac.source = .server;
} else if (std.ascii.eqlIgnoreCase("Proxy-Authenticate", hname)) {
ac.source = .proxy;
} else {
return error.InvalidAuthChallenge;
}
const pos = std.mem.indexOfPos(u8, std.mem.trim(u8, hvalue, std.ascii.whitespace[0..]), 0, " ") orelse hvalue.len;
const _scheme = hvalue[0..pos];
if (std.ascii.eqlIgnoreCase(_scheme, "basic")) {
ac.scheme = .basic;
} else if (std.ascii.eqlIgnoreCase(_scheme, "digest")) {
ac.scheme = .digest;
} else {
return error.UnknownAuthChallengeScheme;
}
return ac;
}
};
pub const ResponseHead = struct {
pub const MAX_CONTENT_TYPE_LEN = 64;
status: u16,
url: [*c]const u8,
redirect_count: u32,
_content_type_len: usize = 0,
_content_type: [MAX_CONTENT_TYPE_LEN]u8 = undefined,
// this is normally an empty list, but if the response is being injected
// than it'll be populated. It isn't meant to be used directly, but should
// be used through the transfer.responseHeaderIterator() which abstracts
// whether the headers are from a live curl easy handle, or injected.
_injected_headers: []const Header = &.{},
pub fn contentType(self: *ResponseHead) ?[]u8 {
if (self._content_type_len == 0) {
return null;
}
return self._content_type[0..self._content_type_len];
}
};
pub const Connection = struct {
easy: *libcurl.Curl,
node: Handles.HandleList.Node = .{},
pub fn init(
ca_blob_: ?libcurl.CurlBlob,
config: *const Config,
) !Connection {
const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy;
errdefer libcurl.curl_easy_cleanup(easy);
// timeouts
try libcurl.curl_easy_setopt(easy, .timeout_ms, config.httpTimeout());
try libcurl.curl_easy_setopt(easy, .connect_timeout_ms, config.httpConnectTimeout());
// redirect behavior
try libcurl.curl_easy_setopt(easy, .max_redirs, config.httpMaxRedirects());
try libcurl.curl_easy_setopt(easy, .follow_location, 2);
try libcurl.curl_easy_setopt(easy, .redir_protocols_str, "HTTP,HTTPS"); // remove FTP and FTPS from the default
// proxy
const http_proxy = config.httpProxy();
if (http_proxy) |proxy| {
try libcurl.curl_easy_setopt(easy, .proxy, proxy.ptr);
}
// tls
if (ca_blob_) |ca_blob| {
try libcurl.curl_easy_setopt(easy, .ca_info_blob, ca_blob);
if (http_proxy != null) {
try libcurl.curl_easy_setopt(easy, .proxy_ca_info_blob, ca_blob);
}
} else {
assert(config.tlsVerifyHost() == false, "Http.init tls_verify_host", .{});
try libcurl.curl_easy_setopt(easy, .ssl_verify_host, false);
try libcurl.curl_easy_setopt(easy, .ssl_verify_peer, false);
if (http_proxy != null) {
try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_host, false);
try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_peer, false);
}
}
// compression, don't remove this. CloudFront will send gzip content
// even if we don't support it, and then it won't be decompressed.
// empty string means: use whatever's available
try libcurl.curl_easy_setopt(easy, .accept_encoding, "");
// debug
if (comptime ENABLE_DEBUG) {
try libcurl.curl_easy_setopt(easy, .verbose, true);
// Sometimes the default debug output hides some useful data. You can
// uncomment the following line (BUT KEEP THE LIVE ABOVE AS-IS), to
// get more control over the data (specifically, the `CURLINFO_TEXT`
// can include useful data).
// try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback);
}
return .{
.easy = easy,
};
}
pub fn deinit(self: *const Connection) void {
libcurl.curl_easy_cleanup(self.easy);
}
pub fn setURL(self: *const Connection, url: [:0]const u8) !void {
try libcurl.curl_easy_setopt(self.easy, .url, url.ptr);
}
// a libcurl request has 2 methods. The first is the method that
// controls how libcurl behaves. This specifically influences how redirects
// are handled. For example, if you do a POST and get a 301, libcurl will
// change that to a GET. But if you do a POST and get a 308, libcurl will
// keep the POST (and re-send the body).
// The second method is the actual string that's included in the request
// headers.
// These two methods can be different - you can tell curl to behave as though
// you made a GET, but include "POST" in the request header.
//
// Here, we're only concerned about the 2nd method. If we want, we'll set
// the first one based on whether or not we have a body.
//
// It's important that, for each use of this connection, we set the 2nd
// method. Else, if we make a HEAD request and re-use the connection, but
// DON'T reset this, it'll keep making HEAD requests.
// (I don't know if it's as important to reset the 1st method, or if libcurl
// can infer that based on the presence of the body, but we also reset it
// to be safe);
pub fn setMethod(self: *const Connection, method: Method) !void {
const easy = self.easy;
const m: [:0]const u8 = switch (method) {
.GET => "GET",
.POST => "POST",
.PUT => "PUT",
.DELETE => "DELETE",
.HEAD => "HEAD",
.OPTIONS => "OPTIONS",
.PATCH => "PATCH",
.PROPFIND => "PROPFIND",
};
try libcurl.curl_easy_setopt(easy, .custom_request, m.ptr);
}
pub fn setBody(self: *const Connection, body: []const u8) !void {
const easy = self.easy;
try libcurl.curl_easy_setopt(easy, .post, true);
try libcurl.curl_easy_setopt(easy, .post_field_size, body.len);
try libcurl.curl_easy_setopt(easy, .copy_post_fields, body.ptr);
}
pub fn setGetMode(self: *const Connection) !void {
try libcurl.curl_easy_setopt(self.easy, .http_get, true);
}
pub fn setHeaders(self: *const Connection, headers: *Headers) !void {
try libcurl.curl_easy_setopt(self.easy, .http_header, headers.headers);
}
pub fn setCookies(self: *const Connection, cookies: [*c]const u8) !void {
try libcurl.curl_easy_setopt(self.easy, .cookie, cookies);
}
pub fn setPrivate(self: *const Connection, ptr: *anyopaque) !void {
try libcurl.curl_easy_setopt(self.easy, .private, ptr);
}
pub fn setProxyCredentials(self: *const Connection, creds: [:0]const u8) !void {
try libcurl.curl_easy_setopt(self.easy, .proxy_user_pwd, creds.ptr);
}
pub fn setCredentials(self: *const Connection, creds: [:0]const u8) !void {
try libcurl.curl_easy_setopt(self.easy, .user_pwd, creds.ptr);
}
pub fn setCallbacks(
self: *const Connection,
comptime header_cb: libcurl.CurlHeaderFunction,
comptime data_cb: libcurl.CurlWriteFunction,
) !void {
try libcurl.curl_easy_setopt(self.easy, .header_data, self.easy);
try libcurl.curl_easy_setopt(self.easy, .header_function, header_cb);
try libcurl.curl_easy_setopt(self.easy, .write_data, self.easy);
try libcurl.curl_easy_setopt(self.easy, .write_function, data_cb);
}
pub fn setProxy(self: *const Connection, proxy: ?[*:0]const u8) !void {
try libcurl.curl_easy_setopt(self.easy, .proxy, proxy);
}
pub fn setTlsVerify(self: *const Connection, verify: bool, use_proxy: bool) !void {
try libcurl.curl_easy_setopt(self.easy, .ssl_verify_host, verify);
try libcurl.curl_easy_setopt(self.easy, .ssl_verify_peer, verify);
if (use_proxy) {
try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_host, verify);
try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_peer, verify);
}
}
pub fn getEffectiveUrl(self: *const Connection) ![*c]const u8 {
var url: [*c]u8 = undefined;
try libcurl.curl_easy_getinfo(self.easy, .effective_url, &url);
return url;
}
pub fn getResponseCode(self: *const Connection) !u16 {
var status: c_long = undefined;
try libcurl.curl_easy_getinfo(self.easy, .response_code, &status);
if (status < 0 or status > std.math.maxInt(u16)) {
return 0;
}
return @intCast(status);
}
pub fn getRedirectCount(self: *const Connection) !u32 {
var count: c_long = undefined;
try libcurl.curl_easy_getinfo(self.easy, .redirect_count, &count);
return @intCast(count);
}
pub fn getResponseHeader(self: *const Connection, name: [:0]const u8, index: usize) ?HeaderValue {
var hdr: ?*libcurl.CurlHeader = null;
libcurl.curl_easy_header(self.easy, name, index, .header, -1, &hdr) catch |err| {
// ErrorHeader includes OutOfMemory — rare but real errors from curl internals.
// Logged and returned as null since callers don't expect errors.
log.err(.http, "get response header", .{
.name = name,
.err = err,
});
return null;
};
const h = hdr orelse return null;
return .{
.amount = h.amount,
.value = std.mem.span(h.value),
};
}
pub fn getPrivate(self: *const Connection) !*anyopaque {
var private: *anyopaque = undefined;
try libcurl.curl_easy_getinfo(self.easy, .private, &private);
return private;
}
// These are headers that may not be send to the users for inteception.
pub fn secretHeaders(_: *const Connection, headers: *Headers, http_headers: *const Config.HttpHeaders) !void {
if (http_headers.proxy_bearer_header) |hdr| {
try headers.add(hdr);
}
}
pub fn request(self: *const Connection, http_headers: *const Config.HttpHeaders) !u16 {
var header_list = try Headers.init(http_headers.user_agent_header);
defer header_list.deinit();
try self.secretHeaders(&header_list, http_headers);
try self.setHeaders(&header_list);
// Add cookies.
if (header_list.cookies) |cookies| {
try self.setCookies(cookies);
}
try libcurl.curl_easy_perform(self.easy);
return self.getResponseCode();
}
};
pub const Handles = struct {
connections: []Connection,
dirty: HandleList,
in_use: HandleList,
available: HandleList,
multi: *libcurl.CurlM,
performing: bool = false,
pub const HandleList = std.DoublyLinkedList;
pub fn init(
allocator: Allocator,
ca_blob: ?libcurl.CurlBlob,
config: *const Config,
) !Handles {
const count: usize = config.httpMaxConcurrent();
if (count == 0) return error.InvalidMaxConcurrent;
const multi = libcurl.curl_multi_init() orelse return error.FailedToInitializeMulti;
errdefer libcurl.curl_multi_cleanup(multi) catch {};
try libcurl.curl_multi_setopt(multi, .max_host_connections, config.httpMaxHostOpen());
const connections = try allocator.alloc(Connection, count);
errdefer allocator.free(connections);
var available: HandleList = .{};
for (0..count) |i| {
connections[i] = try Connection.init(ca_blob, config);
available.append(&connections[i].node);
}
return .{
.dirty = .{},
.in_use = .{},
.connections = connections,
.available = available,
.multi = multi,
};
}
pub fn deinit(self: *Handles, allocator: Allocator) void {
for (self.connections) |*conn| {
conn.deinit();
}
allocator.free(self.connections);
libcurl.curl_multi_cleanup(self.multi) catch {};
}
pub fn hasAvailable(self: *const Handles) bool {
return self.available.first != null;
}
pub fn get(self: *Handles) ?*Connection {
if (self.available.popFirst()) |node| {
self.in_use.append(node);
return @as(*Connection, @fieldParentPtr("node", node));
}
return null;
}
pub fn add(self: *Handles, conn: *const Connection) !void {
try libcurl.curl_multi_add_handle(self.multi, conn.easy);
}
pub fn remove(self: *Handles, conn: *Connection) void {
if (libcurl.curl_multi_remove_handle(self.multi, conn.easy)) {
self.isAvailable(conn);
} else |err| {
// can happen if we're in a perform() call, so we'll queue this
// for cleanup later.
const node = &conn.node;
self.in_use.remove(node);
self.dirty.append(node);
log.warn(.http, "multi remove handle", .{ .err = err });
}
}
pub fn isAvailable(self: *Handles, conn: *Connection) void {
const node = &conn.node;
self.in_use.remove(node);
self.available.append(node);
}
pub fn perform(self: *Handles) !c_int {
self.performing = true;
defer self.performing = false;
const multi = self.multi;
var running: c_int = undefined;
try libcurl.curl_multi_perform(self.multi, &running);
{
const list = &self.dirty;
while (list.first) |node| {
list.remove(node);
const conn: *Connection = @fieldParentPtr("node", node);
if (libcurl.curl_multi_remove_handle(multi, conn.easy)) {
self.available.append(node);
} else |err| {
log.fatal(.http, "multi remove handle", .{ .err = err, .src = "perform" });
@panic("multi_remove_handle");
}
}
}
return running;
}
pub fn poll(self: *Handles, extra_fds: []libcurl.CurlWaitFd, timeout_ms: c_int) !void {
try libcurl.curl_multi_poll(self.multi, extra_fds, timeout_ms, null);
}
pub const MultiMessage = struct {
conn: Connection,
err: ?Error,
};
pub fn readMessage(self: *Handles) ?MultiMessage {
var messages_count: c_int = 0;
const msg = libcurl.curl_multi_info_read(self.multi, &messages_count) orelse return null;
return switch (msg.data) {
.done => |err| .{
.conn = .{ .easy = msg.easy_handle },
.err = err,
},
else => unreachable,
};
}
};
fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8, len: usize, _: *anyopaque) c_int {
const data = raw[0..len];
switch (msg_type) {
.text => std.debug.print("libcurl [text]: {s}\n", .{data}),
.header_out => std.debug.print("libcurl [req-h]: {s}\n", .{data}),
.header_in => std.debug.print("libcurl [res-h]: {s}\n", .{data}),
// .data_in => std.debug.print("libcurl [res-b]: {s}\n", .{data}),
else => std.debug.print("libcurl ?? {d}\n", .{msg_type}),
}
return 0;
}