Merge pull request #635 from lightpanda-io/http_proxy

add direct http proxy support
This commit is contained in:
Karl Seguin
2025-05-15 12:58:54 +08:00
committed by GitHub
5 changed files with 121 additions and 52 deletions

View File

@@ -30,6 +30,7 @@ pub const App = struct {
run_mode: RunMode, run_mode: RunMode,
gc_hints: bool = false, gc_hints: bool = false,
tls_verify_host: bool = true, tls_verify_host: bool = true,
http_proxy: ?std.Uri = null,
}; };
pub fn init(allocator: Allocator, config: Config) !*App { pub fn init(allocator: Allocator, config: Config) !*App {
@@ -54,6 +55,7 @@ pub const App = struct {
.app_dir_path = app_dir_path, .app_dir_path = app_dir_path,
.notification = notification, .notification = notification,
.http_client = try HttpClient.init(allocator, 5, .{ .http_client = try HttpClient.init(allocator, 5, .{
.http_proxy = config.http_proxy,
.tls_verify_host = config.tls_verify_host, .tls_verify_host = config.tls_verify_host,
}), }),
.config = config, .config = config,

View File

@@ -370,7 +370,7 @@ pub const Page = struct {
var response = try request.sendSync(.{}); var response = try request.sendSync(.{});
// would be different than self.url in the case of a redirect // would be different than self.url in the case of a redirect
self.url = try URL.fromURI(arena, request.uri); self.url = try URL.fromURI(arena, request.request_uri);
const header = response.header; const header = response.header;
try session.cookie_jar.populateFromResponse(&self.url.uri, &header); try session.cookie_jar.populateFromResponse(&self.url.uri, &header);

View File

@@ -497,7 +497,7 @@ pub const XMLHttpRequest = struct {
self.state = .loading; self.state = .loading;
self.dispatchEvt("readystatechange"); self.dispatchEvt("readystatechange");
try self.cookie_jar.populateFromResponse(self.request.?.uri, &header); try self.cookie_jar.populateFromResponse(self.request.?.request_uri, &header);
} }
if (progress.data) |data| { if (progress.data) |data| {

View File

@@ -46,6 +46,7 @@ const MAX_HEADER_LINE_LEN = 4096;
pub const Client = struct { pub const Client = struct {
allocator: Allocator, allocator: Allocator,
state_pool: StatePool, state_pool: StatePool,
http_proxy: ?Uri,
root_ca: tls.config.CertBundle, root_ca: tls.config.CertBundle,
tls_verify_host: bool = true, tls_verify_host: bool = true,
idle_connections: IdleConnections, idle_connections: IdleConnections,
@@ -53,6 +54,7 @@ pub const Client = struct {
const Opts = struct { const Opts = struct {
tls_verify_host: bool = true, tls_verify_host: bool = true,
http_proxy: ?std.Uri = null,
max_idle_connection: usize = 10, max_idle_connection: usize = 10,
}; };
@@ -70,6 +72,7 @@ pub const Client = struct {
.root_ca = root_ca, .root_ca = root_ca,
.allocator = allocator, .allocator = allocator,
.state_pool = state_pool, .state_pool = state_pool,
.http_proxy = opts.http_proxy,
.idle_connections = idle_connections, .idle_connections = idle_connections,
.tls_verify_host = opts.tls_verify_host, .tls_verify_host = opts.tls_verify_host,
.connection_pool = std.heap.MemoryPool(Connection).init(allocator), .connection_pool = std.heap.MemoryPool(Connection).init(allocator),
@@ -150,10 +153,14 @@ pub const Request = struct {
method: Method, method: Method,
// The URI we're requested // The URI we're requested
uri: *const Uri, request_uri: *const Uri,
// The URI that we're connecting to. Can be different than request_uri when
// proxying is enabled
connect_uri: *const Uri,
// If we're redirecting, this is where we're redirecting to. The only reason // If we're redirecting, this is where we're redirecting to. The only reason
// we really have this is so that we can set self.uri = &self.redirect_url.? // we really have this is so that we can set self.request_uri = &self.redirect_url.?
redirect_uri: ?Uri = null, redirect_uri: ?Uri = null,
// Optional body // Optional body
@@ -174,9 +181,12 @@ pub const Request = struct {
// for other requests // for other requests
_keepalive: bool, _keepalive: bool,
_port: u16, // extracted from request_uri
_request_host: []const u8,
_host: []const u8, // extracted from connect_uri
_connect_port: u16,
_connect_host: []const u8,
// whether or not the socket comes from the connection pool. If it does, // whether or not the socket comes from the connection pool. If it does,
// and we get an error sending the header, we might retry on a new connection // and we get an error sending the header, we might retry on a new connection
@@ -222,16 +232,18 @@ pub const Request = struct {
}; };
fn init(client: *Client, state: *State, method: Method, uri: *const Uri) !Request { fn init(client: *Client, state: *State, method: Method, uri: *const Uri) !Request {
const secure, const host, const port = try decomposeURL(uri); const decomposed = try decomposeURL(client, uri);
return .{ return .{
.uri = uri, .request_uri = uri,
.connect_uri = decomposed.connect_uri,
.body = null, .body = null,
.headers = .{}, .headers = .{},
.method = method, .method = method,
.arena = state.arena.allocator(), .arena = state.arena.allocator(),
._secure = secure, ._secure = decomposed.secure,
._host = host, ._connect_host = decomposed.connect_host,
._port = port, ._connect_port = decomposed.connect_port,
._request_host = decomposed.request_host,
._state = state, ._state = state,
._client = client, ._client = client,
._connection = null, ._connection = null,
@@ -249,14 +261,28 @@ pub const Request = struct {
self._client.state_pool.release(self._state); self._client.state_pool.release(self._state);
} }
fn decomposeURL(uri: *const Uri) !struct { bool, []const u8, u16 } { const DecomposedURL = struct {
secure: bool,
connect_port: u16,
connect_host: []const u8,
connect_uri: *const std.Uri,
request_host: []const u8,
};
fn decomposeURL(client: *const Client, uri: *const Uri) !DecomposedURL {
if (uri.host == null) { if (uri.host == null) {
return error.UriMissingHost; return error.UriMissingHost;
} }
const request_host = uri.host.?.percent_encoded;
var connect_uri = uri;
var connect_host = request_host;
if (client.http_proxy) |*proxy| {
connect_uri = proxy;
connect_host = proxy.host.?.percent_encoded;
}
var secure: bool = undefined; var secure: bool = undefined;
const scheme = connect_uri.scheme;
const scheme = uri.scheme;
if (std.ascii.eqlIgnoreCase(scheme, "https")) { if (std.ascii.eqlIgnoreCase(scheme, "https")) {
secure = true; secure = true;
} else if (std.ascii.eqlIgnoreCase(scheme, "http")) { } else if (std.ascii.eqlIgnoreCase(scheme, "http")) {
@@ -264,11 +290,15 @@ pub const Request = struct {
} else { } else {
return error.UnsupportedUriScheme; return error.UnsupportedUriScheme;
} }
const connect_port: u16 = connect_uri.port orelse if (secure) 443 else 80;
const host = uri.host.?.percent_encoded; return .{
const port: u16 = uri.port orelse if (secure) 443 else 80; .secure = secure,
.connect_port = connect_port,
return .{ secure, host, port }; .connect_host = connect_host,
.connect_uri = connect_uri,
.request_host = request_host,
};
} }
// Called in deinit, but also called when we're redirecting to another page // Called in deinit, but also called when we're redirecting to another page
@@ -293,11 +323,11 @@ pub const Request = struct {
errdefer client.connection_pool.destroy(connection); errdefer client.connection_pool.destroy(connection);
connection.* = .{ connection.* = .{
.socket = socket,
.tls = null, .tls = null,
.port = self._port, .socket = socket,
.blocking = blocking, .blocking = blocking,
.host = try client.allocator.dupe(u8, self._host), .port = self._connect_port,
.host = try client.allocator.dupe(u8, self._connect_host),
}; };
return connection; return connection;
@@ -374,12 +404,10 @@ pub const Request = struct {
return err; return err;
}; };
errdefer self.destroyConnection(connection);
if (self._secure) { if (self._secure) {
connection.tls = .{ connection.tls = .{
.blocking = try tls.client(std.net.Stream{ .handle = socket }, .{ .blocking = try tls.client(std.net.Stream{ .handle = socket }, .{
.host = connection.host, .host = self._connect_host,
.root_ca = self._client.root_ca, .root_ca = self._client.root_ca,
.insecure_skip_verify = self._tls_verify_host == false, .insecure_skip_verify = self._tls_verify_host == false,
// .key_log_callback = tls.config.key_log.callback, // .key_log_callback = tls.config.key_log.callback,
@@ -391,11 +419,9 @@ pub const Request = struct {
self._connection_from_keepalive = false; self._connection_from_keepalive = false;
} }
errdefer self.destroyConnection(self._connection.?);
var handler = SyncHandler{ .request = self }; var handler = SyncHandler{ .request = self };
return handler.send() catch |err| { return handler.send() catch |err| {
log.warn("HTTP error: {any} ({any} {any} {d})", .{ err, self.method, self.uri, self._redirect_count }); log.warn("HTTP error: {any} ({any} {any} {d})", .{ err, self.method, self.request_uri, self._redirect_count });
return err; return err;
}; };
} }
@@ -461,7 +487,7 @@ pub const Request = struct {
if (self._secure) { if (self._secure) {
connection.tls = .{ connection.tls = .{
.nonblocking = try tls.nb.Client().init(self._client.allocator, .{ .nonblocking = try tls.nb.Client().init(self._client.allocator, .{
.host = connection.host, .host = self._connect_host,
.root_ca = self._client.root_ca, .root_ca = self._client.root_ca,
.insecure_skip_verify = self._tls_verify_host == false, .insecure_skip_verify = self._tls_verify_host == false,
.key_log_callback = tls.config.key_log.callback, .key_log_callback = tls.config.key_log.callback,
@@ -501,7 +527,7 @@ pub const Request = struct {
} }
if (!self._has_host_header) { if (!self._has_host_header) {
try self.headers.append(arena, .{ .name = "Host", .value = self._host }); try self.headers.append(arena, .{ .name = "Host", .value = self._request_host });
} }
try self.headers.append(arena, .{ .name = "User-Agent", .value = "Lightpanda/1.0" }); try self.headers.append(arena, .{ .name = "User-Agent", .value = "Lightpanda/1.0" });
@@ -512,7 +538,7 @@ pub const Request = struct {
self.releaseConnection(); self.releaseConnection();
// CANNOT reset the arena (╥﹏╥) // CANNOT reset the arena (╥﹏╥)
// We need it for self.uri (which we're about to use to resolve // We need it for self.request_uri (which we're about to use to resolve
// redirect.location, and it might own some/all headers) // redirect.location, and it might own some/all headers)
const redirect_count = self._redirect_count; const redirect_count = self._redirect_count;
@@ -522,14 +548,16 @@ pub const Request = struct {
var buf = try self.arena.alloc(u8, 2048); var buf = try self.arena.alloc(u8, 2048);
const previous_host = self._host; const previous_request_host = self._request_host;
self.redirect_uri = try self.uri.resolve_inplace(redirect.location, &buf); self.redirect_uri = try self.request_uri.resolve_inplace(redirect.location, &buf);
self.uri = &self.redirect_uri.?; self.request_uri = &self.redirect_uri.?;
const secure, const host, const port = try decomposeURL(self.uri); const decomposed = try decomposeURL(self._client, self.request_uri);
self._host = host; self.connect_uri = decomposed.connect_uri;
self._port = port; self._request_host = decomposed.request_host;
self._secure = secure; self._connect_host = decomposed.connect_host;
self._connect_port = decomposed.connect_port;
self._secure = decomposed.secure;
self._keepalive = false; self._keepalive = false;
self._redirect_count = redirect_count + 1; self._redirect_count = redirect_count + 1;
@@ -538,7 +566,7 @@ pub const Request = struct {
// to a GET. // to a GET.
self.method = .GET; self.method = .GET;
} }
log.info("redirecting to: {any} {any}", .{ self.method, self.uri }); log.info("redirecting to: {any} {any}", .{ self.method, self.request_uri });
if (self.body != null and self.method == .GET) { if (self.body != null and self.method == .GET) {
// If we have a body and the method is a GET, then we must be following // If we have a body and the method is a GET, then we must be following
@@ -553,10 +581,10 @@ pub const Request = struct {
} }
} }
if (std.mem.eql(u8, previous_host, host) == false) { if (std.mem.eql(u8, previous_request_host, self._request_host) == false) {
for (self.headers.items) |*hdr| { for (self.headers.items) |*hdr| {
if (std.mem.eql(u8, hdr.name, "Host")) { if (std.mem.eql(u8, hdr.name, "Host")) {
hdr.value = host; hdr.value = self._request_host;
break; break;
} }
} }
@@ -577,11 +605,11 @@ pub const Request = struct {
return null; return null;
} }
return self._client.idle_connections.get(self._secure, self._host, self._port, blocking); return self._client.idle_connections.get(self._secure, self._connect_host, self._connect_port, blocking);
} }
fn createSocket(self: *Request, blocking: bool) !struct { posix.socket_t, std.net.Address } { fn createSocket(self: *Request, blocking: bool) !struct { posix.socket_t, std.net.Address } {
const addresses = try std.net.getAddressList(self.arena, self._host, self._port); const addresses = try std.net.getAddressList(self.arena, self._connect_host, self._connect_port);
if (addresses.addrs.len == 0) { if (addresses.addrs.len == 0) {
return error.UnknownHostName; return error.UnknownHostName;
} }
@@ -600,13 +628,15 @@ pub const Request = struct {
} }
fn buildHeader(self: *Request) ![]const u8 { fn buildHeader(self: *Request) ![]const u8 {
const proxied = self.connect_uri != self.request_uri;
const buf = self._state.header_buf; const buf = self._state.header_buf;
var fbs = std.io.fixedBufferStream(buf); var fbs = std.io.fixedBufferStream(buf);
var writer = fbs.writer(); var writer = fbs.writer();
try writer.writeAll(@tagName(self.method)); try writer.writeAll(@tagName(self.method));
try writer.writeByte(' '); try writer.writeByte(' ');
try self.uri.writeToStream(.{ .path = true, .query = true }, writer); try self.request_uri.writeToStream(.{ .scheme = proxied, .authority = proxied, .path = true, .query = true }, writer);
try writer.writeAll(" HTTP/1.1\r\n"); try writer.writeAll(" HTTP/1.1\r\n");
for (self.headers.items) |header| { for (self.headers.items) |header| {
try writer.writeAll(header.name); try writer.writeAll(header.name);
@@ -906,7 +936,7 @@ fn AsyncHandler(comptime H: type, comptime L: type) type {
} }
fn handleError(self: *Self, comptime msg: []const u8, err: anyerror) void { fn handleError(self: *Self, comptime msg: []const u8, err: anyerror) void {
log.err(msg ++ ": {any} ({any} {any})", .{ err, self.request.method, self.request.uri }); log.err(msg ++ ": {any} ({any} {any})", .{ err, self.request.method, self.request.request_uri });
self.handler.onHttpResponse(err) catch {}; self.handler.onHttpResponse(err) catch {};
// just to be safe // just to be safe
self.request._keepalive = false; self.request._keepalive = false;
@@ -1127,7 +1157,7 @@ const SyncHandler = struct {
// See CompressedReader for an explanation. This isn't great code. Sorry. // See CompressedReader for an explanation. This isn't great code. Sorry.
if (reader.response.get("content-encoding")) |ce| { if (reader.response.get("content-encoding")) |ce| {
if (std.ascii.eqlIgnoreCase(ce, "gzip") == false) { if (std.ascii.eqlIgnoreCase(ce, "gzip") == false) {
log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.uri }); log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.request_uri });
return error.UnsupportedContentEncoding; return error.UnsupportedContentEncoding;
} }

View File

@@ -71,6 +71,7 @@ pub fn main() !void {
var app = try App.init(alloc, .{ var app = try App.init(alloc, .{
.run_mode = args.mode, .run_mode = args.mode,
.gc_hints = args.gcHints(), .gc_hints = args.gcHints(),
.http_proxy = args.httpProxy(),
.tls_verify_host = args.tlsVerifyHost(), .tls_verify_host = args.tlsVerifyHost(),
}); });
defer app.deinit(); defer app.deinit();
@@ -137,12 +138,18 @@ const Command = struct {
fn tlsVerifyHost(self: *const Command) bool { fn tlsVerifyHost(self: *const Command) bool {
return switch (self.mode) { return switch (self.mode) {
.serve => |opts| opts.tls_verify_host, inline .serve, .fetch => |opts| opts.tls_verify_host,
.fetch => |opts| opts.tls_verify_host,
else => true, else => true,
}; };
} }
fn httpProxy(self: *const Command) ?std.Uri {
return switch (self.mode) {
inline .serve, .fetch => |opts| opts.http_proxy,
else => null,
};
}
const Mode = union(App.RunMode) { const Mode = union(App.RunMode) {
help: bool, // false when being printed because of an error help: bool, // false when being printed because of an error
fetch: Fetch, fetch: Fetch,
@@ -156,12 +163,14 @@ const Command = struct {
timeout: u16, timeout: u16,
gc_hints: bool, gc_hints: bool,
tls_verify_host: bool, tls_verify_host: bool,
http_proxy: ?std.Uri,
}; };
const Fetch = struct { const Fetch = struct {
url: []const u8, url: []const u8,
dump: bool = false, dump: bool = false,
tls_verify_host: bool, tls_verify_host: bool,
http_proxy: ?std.Uri,
}; };
fn printUsageAndExit(self: *const Command, success: bool) void { fn printUsageAndExit(self: *const Command, success: bool) void {
@@ -184,6 +193,9 @@ const Command = struct {
\\ set if you understand and accept the risk of \\ set if you understand and accept the risk of
\\ disabling host verification. \\ disabling host verification.
\\ \\
\\--http_proxy The HTTP proxy to use for all HTTP requests.
\\ Defaults to none.
\\
\\serve command \\serve command
\\Starts a websocket CDP server \\Starts a websocket CDP server
\\Example: {s} serve --host 127.0.0.1 --port 9222 \\Example: {s} serve --host 127.0.0.1 --port 9222
@@ -207,6 +219,9 @@ const Command = struct {
\\ set if you understand and accept the risk of \\ set if you understand and accept the risk of
\\ disabling host verification. \\ disabling host verification.
\\ \\
\\--http_proxy The HTTP proxy to use for all HTTP requests.
\\ Defaults to none.
\\
\\version command \\version command
\\Displays the version of {s} \\Displays the version of {s}
\\ \\
@@ -297,12 +312,13 @@ fn parseServeArgs(
var timeout: u16 = 3; var timeout: u16 = 3;
var gc_hints = false; var gc_hints = false;
var tls_verify_host = true; var tls_verify_host = true;
var http_proxy: ?std.Uri = null;
while (args.next()) |opt| { while (args.next()) |opt| {
if (std.mem.eql(u8, "--host", opt)) { if (std.mem.eql(u8, "--host", opt)) {
const str = args.next() orelse { const str = args.next() orelse {
log.err("--host argument requires an value", .{}); log.err("--host argument requires an value", .{});
return error.InvalidMissingHost; return error.InvalidArgument;
}; };
host = try allocator.dupe(u8, str); host = try allocator.dupe(u8, str);
continue; continue;
@@ -311,12 +327,12 @@ fn parseServeArgs(
if (std.mem.eql(u8, "--port", opt)) { if (std.mem.eql(u8, "--port", opt)) {
const str = args.next() orelse { const str = args.next() orelse {
log.err("--port argument requires an value", .{}); log.err("--port argument requires an value", .{});
return error.InvalidMissingPort; return error.InvalidArgument;
}; };
port = std.fmt.parseInt(u16, str, 10) catch |err| { port = std.fmt.parseInt(u16, str, 10) catch |err| {
log.err("--port value is invalid: {}", .{err}); log.err("--port value is invalid: {}", .{err});
return error.InvalidPort; return error.InvalidArgument;
}; };
continue; continue;
} }
@@ -324,12 +340,12 @@ fn parseServeArgs(
if (std.mem.eql(u8, "--timeout", opt)) { if (std.mem.eql(u8, "--timeout", opt)) {
const str = args.next() orelse { const str = args.next() orelse {
log.err("--timeout argument requires an value", .{}); log.err("--timeout argument requires an value", .{});
return error.MissingTimeout; return error.InvalidArgument;
}; };
timeout = std.fmt.parseInt(u16, str, 10) catch |err| { timeout = std.fmt.parseInt(u16, str, 10) catch |err| {
log.err("--timeout value is invalid: {}", .{err}); log.err("--timeout value is invalid: {}", .{err});
return error.InvalidTimeout; return error.InvalidArgument;
}; };
continue; continue;
} }
@@ -344,6 +360,15 @@ fn parseServeArgs(
continue; continue;
} }
if (std.mem.eql(u8, "--http_proxy", opt)) {
const str = args.next() orelse {
log.err("--http_proxy argument requires an value", .{});
return error.InvalidArgument;
};
http_proxy = try std.Uri.parse(try allocator.dupe(u8, str));
continue;
}
log.err("Unknown option to serve command: '{s}'", .{opt}); log.err("Unknown option to serve command: '{s}'", .{opt});
return error.UnkownOption; return error.UnkownOption;
} }
@@ -353,6 +378,7 @@ fn parseServeArgs(
.port = port, .port = port,
.timeout = timeout, .timeout = timeout,
.gc_hints = gc_hints, .gc_hints = gc_hints,
.http_proxy = http_proxy,
.tls_verify_host = tls_verify_host, .tls_verify_host = tls_verify_host,
}; };
} }
@@ -364,6 +390,7 @@ fn parseFetchArgs(
var dump: bool = false; var dump: bool = false;
var url: ?[]const u8 = null; var url: ?[]const u8 = null;
var tls_verify_host = true; var tls_verify_host = true;
var http_proxy: ?std.Uri = null;
while (args.next()) |opt| { while (args.next()) |opt| {
if (std.mem.eql(u8, "--dump", opt)) { if (std.mem.eql(u8, "--dump", opt)) {
@@ -376,6 +403,15 @@ fn parseFetchArgs(
continue; continue;
} }
if (std.mem.eql(u8, "--http_proxy", opt)) {
const str = args.next() orelse {
log.err("--http_proxy argument requires an value", .{});
return error.InvalidArgument;
};
http_proxy = try std.Uri.parse(try allocator.dupe(u8, str));
continue;
}
if (std.mem.startsWith(u8, opt, "--")) { if (std.mem.startsWith(u8, opt, "--")) {
log.err("Unknown option to serve command: '{s}'", .{opt}); log.err("Unknown option to serve command: '{s}'", .{opt});
return error.UnkownOption; return error.UnkownOption;
@@ -396,6 +432,7 @@ fn parseFetchArgs(
return .{ return .{
.url = url.?, .url = url.?,
.dump = dump, .dump = dump,
.http_proxy = http_proxy,
.tls_verify_host = tls_verify_host, .tls_verify_host = tls_verify_host,
}; };
} }