diff --git a/src/app.zig b/src/app.zig index 14117612..260d8e52 100644 --- a/src/app.zig +++ b/src/app.zig @@ -30,6 +30,7 @@ pub const App = struct { run_mode: RunMode, gc_hints: bool = false, tls_verify_host: bool = true, + http_proxy: ?std.Uri = null, }; pub fn init(allocator: Allocator, config: Config) !*App { @@ -54,6 +55,7 @@ pub const App = struct { .app_dir_path = app_dir_path, .notification = notification, .http_client = try HttpClient.init(allocator, 5, .{ + .http_proxy = config.http_proxy, .tls_verify_host = config.tls_verify_host, }), .config = config, diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 45fc0343..bba25918 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -356,7 +356,7 @@ pub const Page = struct { var response = try request.sendSync(.{}); // would be different than self.url in the case of a redirect - self.url = try URL.fromURI(arena, request.uri); + self.url = try URL.fromURI(arena, request.request_uri); const header = response.header; try session.cookie_jar.populateFromResponse(&self.url.uri, &header); diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index 3d532930..13c05d31 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -497,7 +497,7 @@ pub const XMLHttpRequest = struct { self.state = .loading; self.dispatchEvt("readystatechange"); - try self.cookie_jar.populateFromResponse(self.request.?.uri, &header); + try self.cookie_jar.populateFromResponse(self.request.?.request_uri, &header); } if (progress.data) |data| { diff --git a/src/http/client.zig b/src/http/client.zig index e6bf9cf3..353c8cd3 100644 --- a/src/http/client.zig +++ b/src/http/client.zig @@ -46,6 +46,7 @@ const MAX_HEADER_LINE_LEN = 4096; pub const Client = struct { allocator: Allocator, state_pool: StatePool, + http_proxy: ?Uri, root_ca: tls.config.CertBundle, tls_verify_host: bool = true, idle_connections: IdleConnections, @@ -53,6 +54,7 @@ pub const Client = struct { const Opts = struct { tls_verify_host: bool = true, + http_proxy: ?std.Uri = null, max_idle_connection: usize = 10, }; @@ -70,6 +72,7 @@ pub const Client = struct { .root_ca = root_ca, .allocator = allocator, .state_pool = state_pool, + .http_proxy = opts.http_proxy, .idle_connections = idle_connections, .tls_verify_host = opts.tls_verify_host, .connection_pool = std.heap.MemoryPool(Connection).init(allocator), @@ -150,10 +153,14 @@ pub const Request = struct { method: Method, // The URI we're requested - uri: *const Uri, + request_uri: *const Uri, + + // The URI that we're connecting to. Can be different than request_uri when + // proxying is enabled + connect_uri: *const Uri, // If we're redirecting, this is where we're redirecting to. The only reason - // we really have this is so that we can set self.uri = &self.redirect_url.? + // we really have this is so that we can set self.request_uri = &self.redirect_url.? redirect_uri: ?Uri = null, // Optional body @@ -174,9 +181,12 @@ pub const Request = struct { // for other requests _keepalive: bool, - _port: u16, + // extracted from request_uri + _request_host: []const u8, - _host: []const u8, + // extracted from connect_uri + _connect_port: u16, + _connect_host: []const u8, // whether or not the socket comes from the connection pool. If it does, // and we get an error sending the header, we might retry on a new connection @@ -222,16 +232,18 @@ pub const Request = struct { }; fn init(client: *Client, state: *State, method: Method, uri: *const Uri) !Request { - const secure, const host, const port = try decomposeURL(uri); + const decomposed = try decomposeURL(client, uri); return .{ - .uri = uri, + .request_uri = uri, + .connect_uri = decomposed.connect_uri, .body = null, .headers = .{}, .method = method, .arena = state.arena.allocator(), - ._secure = secure, - ._host = host, - ._port = port, + ._secure = decomposed.secure, + ._connect_host = decomposed.connect_host, + ._connect_port = decomposed.connect_port, + ._request_host = decomposed.request_host, ._state = state, ._client = client, ._connection = null, @@ -249,14 +261,28 @@ pub const Request = struct { self._client.state_pool.release(self._state); } - fn decomposeURL(uri: *const Uri) !struct { bool, []const u8, u16 } { + const DecomposedURL = struct { + secure: bool, + connect_port: u16, + connect_host: []const u8, + connect_uri: *const std.Uri, + request_host: []const u8, + }; + fn decomposeURL(client: *const Client, uri: *const Uri) !DecomposedURL { if (uri.host == null) { return error.UriMissingHost; } + const request_host = uri.host.?.percent_encoded; + + var connect_uri = uri; + var connect_host = request_host; + if (client.http_proxy) |*proxy| { + connect_uri = proxy; + connect_host = proxy.host.?.percent_encoded; + } var secure: bool = undefined; - - const scheme = uri.scheme; + const scheme = connect_uri.scheme; if (std.ascii.eqlIgnoreCase(scheme, "https")) { secure = true; } else if (std.ascii.eqlIgnoreCase(scheme, "http")) { @@ -264,11 +290,15 @@ pub const Request = struct { } else { return error.UnsupportedUriScheme; } + const connect_port: u16 = connect_uri.port orelse if (secure) 443 else 80; - const host = uri.host.?.percent_encoded; - const port: u16 = uri.port orelse if (secure) 443 else 80; - - return .{ secure, host, port }; + return .{ + .secure = secure, + .connect_port = connect_port, + .connect_host = connect_host, + .connect_uri = connect_uri, + .request_host = request_host, + }; } // Called in deinit, but also called when we're redirecting to another page @@ -293,11 +323,11 @@ pub const Request = struct { errdefer client.connection_pool.destroy(connection); connection.* = .{ - .socket = socket, .tls = null, - .port = self._port, + .socket = socket, .blocking = blocking, - .host = try client.allocator.dupe(u8, self._host), + .port = self._connect_port, + .host = try client.allocator.dupe(u8, self._connect_host), }; return connection; @@ -374,12 +404,10 @@ pub const Request = struct { return err; }; - errdefer self.destroyConnection(connection); - if (self._secure) { connection.tls = .{ .blocking = try tls.client(std.net.Stream{ .handle = socket }, .{ - .host = connection.host, + .host = self._connect_host, .root_ca = self._client.root_ca, .insecure_skip_verify = self._tls_verify_host == false, // .key_log_callback = tls.config.key_log.callback, @@ -391,11 +419,9 @@ pub const Request = struct { self._connection_from_keepalive = false; } - errdefer self.destroyConnection(self._connection.?); - var handler = SyncHandler{ .request = self }; return handler.send() catch |err| { - log.warn("HTTP error: {any} ({any} {any} {d})", .{ err, self.method, self.uri, self._redirect_count }); + log.warn("HTTP error: {any} ({any} {any} {d})", .{ err, self.method, self.request_uri, self._redirect_count }); return err; }; } @@ -461,7 +487,7 @@ pub const Request = struct { if (self._secure) { connection.tls = .{ .nonblocking = try tls.nb.Client().init(self._client.allocator, .{ - .host = connection.host, + .host = self._connect_host, .root_ca = self._client.root_ca, .insecure_skip_verify = self._tls_verify_host == false, .key_log_callback = tls.config.key_log.callback, @@ -501,7 +527,7 @@ pub const Request = struct { } if (!self._has_host_header) { - try self.headers.append(arena, .{ .name = "Host", .value = self._host }); + try self.headers.append(arena, .{ .name = "Host", .value = self._request_host }); } try self.headers.append(arena, .{ .name = "User-Agent", .value = "Lightpanda/1.0" }); @@ -512,7 +538,7 @@ pub const Request = struct { self.releaseConnection(); // CANNOT reset the arena (╥﹏╥) - // We need it for self.uri (which we're about to use to resolve + // We need it for self.request_uri (which we're about to use to resolve // redirect.location, and it might own some/all headers) const redirect_count = self._redirect_count; @@ -522,14 +548,16 @@ pub const Request = struct { var buf = try self.arena.alloc(u8, 2048); - const previous_host = self._host; - self.redirect_uri = try self.uri.resolve_inplace(redirect.location, &buf); + const previous_request_host = self._request_host; + self.redirect_uri = try self.request_uri.resolve_inplace(redirect.location, &buf); - self.uri = &self.redirect_uri.?; - const secure, const host, const port = try decomposeURL(self.uri); - self._host = host; - self._port = port; - self._secure = secure; + self.request_uri = &self.redirect_uri.?; + const decomposed = try decomposeURL(self._client, self.request_uri); + self.connect_uri = decomposed.connect_uri; + self._request_host = decomposed.request_host; + self._connect_host = decomposed.connect_host; + self._connect_port = decomposed.connect_port; + self._secure = decomposed.secure; self._keepalive = false; self._redirect_count = redirect_count + 1; @@ -538,7 +566,7 @@ pub const Request = struct { // to a GET. self.method = .GET; } - log.info("redirecting to: {any} {any}", .{ self.method, self.uri }); + log.info("redirecting to: {any} {any}", .{ self.method, self.request_uri }); if (self.body != null and self.method == .GET) { // If we have a body and the method is a GET, then we must be following @@ -553,10 +581,10 @@ pub const Request = struct { } } - if (std.mem.eql(u8, previous_host, host) == false) { + if (std.mem.eql(u8, previous_request_host, self._request_host) == false) { for (self.headers.items) |*hdr| { if (std.mem.eql(u8, hdr.name, "Host")) { - hdr.value = host; + hdr.value = self._request_host; break; } } @@ -577,11 +605,11 @@ pub const Request = struct { return null; } - return self._client.idle_connections.get(self._secure, self._host, self._port, blocking); + return self._client.idle_connections.get(self._secure, self._connect_host, self._connect_port, blocking); } fn createSocket(self: *Request, blocking: bool) !struct { posix.socket_t, std.net.Address } { - const addresses = try std.net.getAddressList(self.arena, self._host, self._port); + const addresses = try std.net.getAddressList(self.arena, self._connect_host, self._connect_port); if (addresses.addrs.len == 0) { return error.UnknownHostName; } @@ -600,13 +628,15 @@ pub const Request = struct { } fn buildHeader(self: *Request) ![]const u8 { + const proxied = self.connect_uri != self.request_uri; + const buf = self._state.header_buf; var fbs = std.io.fixedBufferStream(buf); var writer = fbs.writer(); try writer.writeAll(@tagName(self.method)); try writer.writeByte(' '); - try self.uri.writeToStream(.{ .path = true, .query = true }, writer); + try self.request_uri.writeToStream(.{ .scheme = proxied, .authority = proxied, .path = true, .query = true }, writer); try writer.writeAll(" HTTP/1.1\r\n"); for (self.headers.items) |header| { try writer.writeAll(header.name); @@ -906,7 +936,7 @@ fn AsyncHandler(comptime H: type, comptime L: type) type { } fn handleError(self: *Self, comptime msg: []const u8, err: anyerror) void { - log.err(msg ++ ": {any} ({any} {any})", .{ err, self.request.method, self.request.uri }); + log.err(msg ++ ": {any} ({any} {any})", .{ err, self.request.method, self.request.request_uri }); self.handler.onHttpResponse(err) catch {}; // just to be safe self.request._keepalive = false; @@ -1127,7 +1157,7 @@ const SyncHandler = struct { // See CompressedReader for an explanation. This isn't great code. Sorry. if (reader.response.get("content-encoding")) |ce| { if (std.ascii.eqlIgnoreCase(ce, "gzip") == false) { - log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.uri }); + log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.request_uri }); return error.UnsupportedContentEncoding; } diff --git a/src/main.zig b/src/main.zig index c6c163d5..f6b73bec 100644 --- a/src/main.zig +++ b/src/main.zig @@ -71,6 +71,7 @@ pub fn main() !void { var app = try App.init(alloc, .{ .run_mode = args.mode, .gc_hints = args.gcHints(), + .http_proxy = args.httpProxy(), .tls_verify_host = args.tlsVerifyHost(), }); defer app.deinit(); @@ -137,12 +138,18 @@ const Command = struct { fn tlsVerifyHost(self: *const Command) bool { return switch (self.mode) { - .serve => |opts| opts.tls_verify_host, - .fetch => |opts| opts.tls_verify_host, + inline .serve, .fetch => |opts| opts.tls_verify_host, else => true, }; } + fn httpProxy(self: *const Command) ?std.Uri { + return switch (self.mode) { + inline .serve, .fetch => |opts| opts.http_proxy, + else => null, + }; + } + const Mode = union(App.RunMode) { help: bool, // false when being printed because of an error fetch: Fetch, @@ -156,12 +163,14 @@ const Command = struct { timeout: u16, gc_hints: bool, tls_verify_host: bool, + http_proxy: ?std.Uri, }; const Fetch = struct { url: []const u8, dump: bool = false, tls_verify_host: bool, + http_proxy: ?std.Uri, }; fn printUsageAndExit(self: *const Command, success: bool) void { @@ -184,6 +193,9 @@ const Command = struct { \\ set if you understand and accept the risk of \\ disabling host verification. \\ + \\--http_proxy The HTTP proxy to use for all HTTP requests. + \\ Defaults to none. + \\ \\serve command \\Starts a websocket CDP server \\Example: {s} serve --host 127.0.0.1 --port 9222 @@ -207,6 +219,9 @@ const Command = struct { \\ set if you understand and accept the risk of \\ disabling host verification. \\ + \\--http_proxy The HTTP proxy to use for all HTTP requests. + \\ Defaults to none. + \\ \\version command \\Displays the version of {s} \\ @@ -297,12 +312,13 @@ fn parseServeArgs( var timeout: u16 = 3; var gc_hints = false; var tls_verify_host = true; + var http_proxy: ?std.Uri = null; while (args.next()) |opt| { if (std.mem.eql(u8, "--host", opt)) { const str = args.next() orelse { log.err("--host argument requires an value", .{}); - return error.InvalidMissingHost; + return error.InvalidArgument; }; host = try allocator.dupe(u8, str); continue; @@ -311,12 +327,12 @@ fn parseServeArgs( if (std.mem.eql(u8, "--port", opt)) { const str = args.next() orelse { log.err("--port argument requires an value", .{}); - return error.InvalidMissingPort; + return error.InvalidArgument; }; port = std.fmt.parseInt(u16, str, 10) catch |err| { log.err("--port value is invalid: {}", .{err}); - return error.InvalidPort; + return error.InvalidArgument; }; continue; } @@ -324,12 +340,12 @@ fn parseServeArgs( if (std.mem.eql(u8, "--timeout", opt)) { const str = args.next() orelse { log.err("--timeout argument requires an value", .{}); - return error.MissingTimeout; + return error.InvalidArgument; }; timeout = std.fmt.parseInt(u16, str, 10) catch |err| { log.err("--timeout value is invalid: {}", .{err}); - return error.InvalidTimeout; + return error.InvalidArgument; }; continue; } @@ -344,6 +360,15 @@ fn parseServeArgs( continue; } + if (std.mem.eql(u8, "--http_proxy", opt)) { + const str = args.next() orelse { + log.err("--http_proxy argument requires an value", .{}); + return error.InvalidArgument; + }; + http_proxy = try std.Uri.parse(try allocator.dupe(u8, str)); + continue; + } + log.err("Unknown option to serve command: '{s}'", .{opt}); return error.UnkownOption; } @@ -353,6 +378,7 @@ fn parseServeArgs( .port = port, .timeout = timeout, .gc_hints = gc_hints, + .http_proxy = http_proxy, .tls_verify_host = tls_verify_host, }; } @@ -364,6 +390,7 @@ fn parseFetchArgs( var dump: bool = false; var url: ?[]const u8 = null; var tls_verify_host = true; + var http_proxy: ?std.Uri = null; while (args.next()) |opt| { if (std.mem.eql(u8, "--dump", opt)) { @@ -376,6 +403,15 @@ fn parseFetchArgs( continue; } + if (std.mem.eql(u8, "--http_proxy", opt)) { + const str = args.next() orelse { + log.err("--http_proxy argument requires an value", .{}); + return error.InvalidArgument; + }; + http_proxy = try std.Uri.parse(try allocator.dupe(u8, str)); + continue; + } + if (std.mem.startsWith(u8, opt, "--")) { log.err("Unknown option to serve command: '{s}'", .{opt}); return error.UnkownOption; @@ -396,6 +432,7 @@ fn parseFetchArgs( return .{ .url = url.?, .dump = dump, + .http_proxy = http_proxy, .tls_verify_host = tls_verify_host, }; }