diff --git a/src/app.zig b/src/app.zig index ee690e73..3b3772c4 100644 --- a/src/app.zig +++ b/src/app.zig @@ -36,6 +36,7 @@ pub const App = struct { http_connect_timeout_ms: ?u31 = null, http_max_host_open: ?u8 = null, http_max_concurrent: ?u8 = null, + user_agent: [:0]const u8, }; pub fn init(allocator: Allocator, config: Config) !*App { @@ -53,6 +54,7 @@ pub const App = struct { .http_proxy = config.http_proxy, .tls_verify_host = config.tls_verify_host, .proxy_bearer_token = config.proxy_bearer_token, + .user_agent = config.user_agent, }); errdefer http.deinit(); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 7987aa88..e787c971 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -205,7 +205,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { errdefer pending_script.deinit(); - var headers = try Http.Headers.init(); + var headers = try self.client.newHeaders(); try page.requestCookie(.{}).headersForRequest(page.arena, remote_url.?, &headers); try self.client.request(.{ @@ -273,7 +273,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { .buffer_pool = &self.buffer_pool, }; - var headers = try Http.Headers.init(); + var headers = try self.client.newHeaders(); try self.page.requestCookie(.{}).headersForRequest(self.page.arena, url, &headers); var client = self.client; diff --git a/src/browser/page.zig b/src/browser/page.zig index ae426d21..fca79144 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -548,7 +548,7 @@ pub const Page = struct { const owned_url = try self.arena.dupeZ(u8, request_url); self.url = try URL.parse(owned_url, null); - var headers = try Http.Headers.init(); + var headers = try self.http_client.newHeaders(); if (opts.header) |hdr| try headers.add(hdr); try self.requestCookie(.{ .is_navigation = true }).headersForRequest(self.arena, owned_url, &headers); diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index 3117e9e9..f374ed6d 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -370,7 +370,7 @@ pub const XMLHttpRequest = struct { } } - var headers = try Http.Headers.init(); + var headers = try page.http_client.newHeaders(); for (self.headers.items) |hdr| { try headers.add(hdr); } diff --git a/src/http/Client.zig b/src/http/Client.zig index 11e8512d..e603ae9b 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -96,6 +96,9 @@ notification: ?*Notification = null, // restoring, this originally-configured value is what it goes to. http_proxy: ?[:0]const u8 = null, +// The complete user-agent header line +user_agent: [:0]const u8, + // libcurl can monitor arbitrary sockets. Currently, we ever [maybe] want to // monitor the CDP client socket, so we've done the simplest thing possible // by having this single optional field @@ -130,6 +133,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Clie .blocking = blocking, .allocator = allocator, .http_proxy = opts.http_proxy, + .user_agent = opts.user_agent, .transfer_pool = transfer_pool, }; @@ -147,6 +151,10 @@ pub fn deinit(self: *Client) void { self.allocator.destroy(self); } +pub fn newHeaders(self: *const Client) !Http.Headers { + return Http.Headers.init(self.user_agent); +} + pub fn abort(self: *Client) void { while (self.handles.in_use.first) |node| { const handle: *Handle = @fieldParentPtr("node", node); @@ -796,7 +804,7 @@ pub const Transfer = struct { self.req.headers.deinit(); var buf: std.ArrayListUnmanaged(u8) = .empty; - var new_headers = try Http.Headers.init(); + var new_headers = try self.client.newHeaders(); for (headers) |hdr| { // safe to re-use this buffer, because Headers.add because curl copies // the value we pass into curl_slist_append. diff --git a/src/http/Http.zig b/src/http/Http.zig index 21b88469..5de4d859 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -102,12 +102,17 @@ pub fn newConnection(self: *Http) !Connection { return Connection.init(self.ca_blob, &self.opts); } +pub fn newHeaders(self: *const Http) Headers { + return Headers.init(self.opts.user_agent); +} + pub const Connection = struct { easy: *c.CURL, opts: Connection.Opts, const Opts = struct { proxy_bearer_token: ?[:0]const u8, + user_agent: [:0]const u8, }; // pointer to opts is not stable, don't hold a reference to it! @@ -168,6 +173,7 @@ pub const Connection = struct { return .{ .easy = easy, .opts = .{ + .user_agent = opts.user_agent, .proxy_bearer_token = opts.proxy_bearer_token, }, }; @@ -230,7 +236,7 @@ pub const Connection = struct { pub fn request(self: *const Connection) !u16 { const easy = self.easy; - var header_list = try Headers.init(); + var header_list = try Headers.init(self.opts.user_agent); defer header_list.deinit(); try self.secretHeaders(&header_list); try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HTTPHEADER, header_list.headers)); @@ -259,8 +265,8 @@ pub const Headers = struct { headers: *c.curl_slist, cookies: ?[*c]const u8, - pub fn init() !Headers { - const header_list = c.curl_slist_append(null, "User-Agent: Lightpanda/1.0"); + pub fn init(user_agent: [:0]const u8) !Headers { + const header_list = c.curl_slist_append(null, user_agent); if (header_list == null) return error.OutOfMemory; return .{ .headers = header_list, .cookies = null }; } @@ -337,6 +343,7 @@ pub const Opts = struct { tls_verify_host: bool = true, http_proxy: ?[:0]const u8 = null, proxy_bearer_token: ?[:0]const u8 = null, + user_agent: [:0]const u8, }; pub const Method = enum { diff --git a/src/main.zig b/src/main.zig index 7485e8fa..fdfc9142 100644 --- a/src/main.zig +++ b/src/main.zig @@ -108,6 +108,14 @@ fn run(alloc: Allocator) !void { log.opts.filter_scopes = lfs; } + const user_agent = blk: { + const USER_AGENT = "User-Agent: Lightpanda/1.0"; + if (args.userAgentSuffix()) |suffix| { + break :blk try std.fmt.allocPrintSentinel(args_arena.allocator(), "{s} {s}", .{ USER_AGENT, suffix }, 0); + } + break :blk USER_AGENT; + }; + // _app is global to handle graceful shutdown. _app = try App.init(alloc, .{ .run_mode = args.mode, @@ -118,6 +126,7 @@ fn run(alloc: Allocator) !void { .http_connect_timeout_ms = args.httpConnectTiemout(), .http_max_host_open = args.httpMaxHostOpen(), .http_max_concurrent = args.httpMaxConcurrent(), + .user_agent = user_agent, }); const app = _app.?; @@ -260,6 +269,13 @@ const Command = struct { }; } + fn userAgentSuffix(self: *const Command) ?[]const u8 { + return switch (self.mode) { + inline .serve, .fetch => |opts| opts.common.user_agent_suffix, + else => unreachable, + }; + } + const Mode = union(App.RunMode) { help: bool, // false when being printed because of an error fetch: Fetch, @@ -293,6 +309,7 @@ const Command = struct { log_level: ?log.Level = null, log_format: ?log.Format = null, log_filter_scopes: ?[]log.Scope = null, + user_agent_suffix: ?[]const u8 = null, }; fn printUsageAndExit(self: *const Command, success: bool) void { @@ -339,6 +356,9 @@ const Command = struct { \\ Defaults to ++ (if (builtin.mode == .Debug) " pretty." else " logfmt.") ++ \\ + \\ --user_agent_suffix + \\ Suffix to append to the Lightpanda/X.Y User-Agent + \\ ; // MAX_HELP_LEN| @@ -713,6 +733,21 @@ fn parseCommonArg( return true; } + if (std.mem.eql(u8, "--user_agent_suffix", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--user_agent_suffix" }); + return error.InvalidArgument; + }; + for (str) |c| { + if (!std.ascii.isPrint(c)) { + log.fatal(.app, "not printable character", .{ .arg = "--user_agent_suffix" }); + return error.InvalidArgument; + } + } + common.user_agent_suffix = try allocator.dupe(u8, str); + return true; + } + return false; } diff --git a/src/testing.zig b/src/testing.zig index d8c894ce..1eac2f22 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -493,6 +493,7 @@ pub fn setup() !void { test_app = try App.init(gpa.allocator(), .{ .run_mode = .serve, .tls_verify_host = false, + .user_agent = "User-Agent: Lightpanda/1.0 internal-tester", }); errdefer test_app.deinit();