robots in the actual http client

This commit is contained in:
Muki Kiboigo
2026-01-31 18:41:55 -08:00
parent 48ebc46c5f
commit 1a246f2e38
9 changed files with 357 additions and 62 deletions

View File

@@ -57,6 +57,13 @@ pub fn tlsVerifyHost(self: *const Config) bool {
};
}
pub fn obeyRobots(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch => |opts| opts.common.obey_robots,
else => unreachable,
};
}
pub fn httpProxy(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch => |opts| opts.common.http_proxy,
@@ -158,6 +165,7 @@ pub const Fetch = struct {
};
pub const Common = struct {
obey_robots: bool = false,
proxy_bearer_token: ?[:0]const u8 = null,
http_proxy: ?[:0]const u8 = null,
http_max_concurrent: ?u8 = null,
@@ -223,6 +231,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\ advanced option which should only be set if you understand
\\ and accept the risk of disabling host verification.
\\
\\--obey_robots
\\ Fetches and obeys the robots.txt (if available) of the web pages
\\ we make requests towards.
\\ Defaults to false.
\\
\\--http_proxy The HTTP proxy to use for all HTTP requests.
\\ A username:password can be included for basic authentication.
\\ Defaults to none.
@@ -613,6 +626,11 @@ fn parseCommonArg(
return true;
}
if (std.mem.eql(u8, "--obey_robots", opt)) {
common.obey_robots = true;
return true;
}
if (std.mem.eql(u8, "--http_proxy", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--http_proxy" });