robots in the actual http client

This commit is contained in:
Muki Kiboigo
2026-01-31 18:41:55 -08:00
parent 48ebc46c5f
commit 1a246f2e38
9 changed files with 357 additions and 62 deletions

View File

@@ -25,6 +25,7 @@ const Config = @import("Config.zig");
const Snapshot = @import("browser/js/Snapshot.zig"); const Snapshot = @import("browser/js/Snapshot.zig");
const Platform = @import("browser/js/Platform.zig"); const Platform = @import("browser/js/Platform.zig");
const Telemetry = @import("telemetry/telemetry.zig").Telemetry; const Telemetry = @import("telemetry/telemetry.zig").Telemetry;
const RobotStore = @import("browser/Robots.zig").RobotStore;
pub const Http = @import("http/Http.zig"); pub const Http = @import("http/Http.zig");
pub const ArenaPool = @import("ArenaPool.zig"); pub const ArenaPool = @import("ArenaPool.zig");
@@ -38,6 +39,7 @@ snapshot: Snapshot,
telemetry: Telemetry, telemetry: Telemetry,
allocator: Allocator, allocator: Allocator,
arena_pool: ArenaPool, arena_pool: ArenaPool,
robots: RobotStore,
app_dir_path: ?[]const u8, app_dir_path: ?[]const u8,
shutdown: bool = false, shutdown: bool = false,
@@ -57,6 +59,8 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
app.snapshot = try Snapshot.load(); app.snapshot = try Snapshot.load();
errdefer app.snapshot.deinit(); errdefer app.snapshot.deinit();
app.robots = RobotStore.init(allocator);
app.app_dir_path = getAndMakeAppDir(allocator); app.app_dir_path = getAndMakeAppDir(allocator);
app.telemetry = try Telemetry.init(app, config.mode); app.telemetry = try Telemetry.init(app, config.mode);
@@ -79,6 +83,7 @@ pub fn deinit(self: *App) void {
self.app_dir_path = null; self.app_dir_path = null;
} }
self.telemetry.deinit(); self.telemetry.deinit();
self.robots.deinit();
self.http.deinit(); self.http.deinit();
self.snapshot.deinit(); self.snapshot.deinit();
self.platform.deinit(); self.platform.deinit();

View File

@@ -57,6 +57,13 @@ pub fn tlsVerifyHost(self: *const Config) bool {
}; };
} }
pub fn obeyRobots(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch => |opts| opts.common.obey_robots,
else => unreachable,
};
}
pub fn httpProxy(self: *const Config) ?[:0]const u8 { pub fn httpProxy(self: *const Config) ?[:0]const u8 {
return switch (self.mode) { return switch (self.mode) {
inline .serve, .fetch => |opts| opts.common.http_proxy, inline .serve, .fetch => |opts| opts.common.http_proxy,
@@ -158,6 +165,7 @@ pub const Fetch = struct {
}; };
pub const Common = struct { pub const Common = struct {
obey_robots: bool = false,
proxy_bearer_token: ?[:0]const u8 = null, proxy_bearer_token: ?[:0]const u8 = null,
http_proxy: ?[:0]const u8 = null, http_proxy: ?[:0]const u8 = null,
http_max_concurrent: ?u8 = null, http_max_concurrent: ?u8 = null,
@@ -223,6 +231,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\ advanced option which should only be set if you understand \\ advanced option which should only be set if you understand
\\ and accept the risk of disabling host verification. \\ and accept the risk of disabling host verification.
\\ \\
\\--obey_robots
\\ Fetches and obeys the robots.txt (if available) of the web pages
\\ we make requests towards.
\\ Defaults to false.
\\
\\--http_proxy The HTTP proxy to use for all HTTP requests. \\--http_proxy The HTTP proxy to use for all HTTP requests.
\\ A username:password can be included for basic authentication. \\ A username:password can be included for basic authentication.
\\ Defaults to none. \\ Defaults to none.
@@ -613,6 +626,11 @@ fn parseCommonArg(
return true; return true;
} }
if (std.mem.eql(u8, "--obey_robots", opt)) {
common.obey_robots = true;
return true;
}
if (std.mem.eql(u8, "--http_proxy", opt)) { if (std.mem.eql(u8, "--http_proxy", opt)) {
const str = args.next() orelse { const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--http_proxy" }); log.fatal(.app, "missing argument value", .{ .arg = "--http_proxy" });

View File

@@ -559,6 +559,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
.headers = headers, .headers = headers,
.body = opts.body, .body = opts.body,
.cookie_jar = &self._session.cookie_jar, .cookie_jar = &self._session.cookie_jar,
.robots = &self._session.browser.app.robots,
.resource_type = .document, .resource_type = .document,
.notification = self._session.notification, .notification = self._session.notification,
.header_callback = pageHeaderDoneCallback, .header_callback = pageHeaderDoneCallback,

View File

@@ -33,13 +33,80 @@ pub const Key = enum {
pub const Robots = @This(); pub const Robots = @This();
pub const empty: Robots = .{ .rules = &.{} }; pub const empty: Robots = .{ .rules = &.{} };
pub const RobotStore = struct {
const RobotsEntry = union(enum) {
present: Robots,
absent,
};
pub const RobotsMap = std.HashMapUnmanaged([]const u8, RobotsEntry, struct {
const Context = @This();
pub fn hash(_: Context, value: []const u8) u32 {
var hasher = std.hash.Wyhash.init(value.len);
for (value) |c| {
std.hash.autoHash(&hasher, std.ascii.toLower(c));
}
return @truncate(hasher.final());
}
pub fn eql(_: Context, a: []const u8, b: []const u8) bool {
if (a.len != b.len) return false;
return std.ascii.eqlIgnoreCase(a, b);
}
}, 80);
allocator: std.mem.Allocator,
map: RobotsMap,
pub fn init(allocator: std.mem.Allocator) RobotStore {
return .{ .allocator = allocator, .map = .empty };
}
pub fn deinit(self: *RobotStore) void {
var iter = self.map.iterator();
while (iter.next()) |entry| {
self.allocator.free(entry.key_ptr.*);
switch (entry.value_ptr.*) {
.present => |*robots| robots.deinit(self.allocator),
.absent => {},
}
}
self.map.deinit(self.allocator);
}
pub fn get(self: *RobotStore, url: []const u8) ?RobotsEntry {
return self.map.get(url);
}
pub fn robotsFromBytes(self: *RobotStore, user_agent: []const u8, bytes: []const u8) !Robots {
return try Robots.fromBytes(self.allocator, user_agent, bytes);
}
pub fn put(self: *RobotStore, url: []const u8, robots: Robots) !void {
const duped = try self.allocator.dupe(u8, url);
try self.map.put(self.allocator, duped, .{ .present = robots });
}
pub fn putAbsent(self: *RobotStore, url: []const u8) !void {
const duped = try self.allocator.dupe(u8, url);
try self.map.put(self.allocator, duped, .absent);
}
};
rules: []const Rule, rules: []const Rule,
const State = enum { const State = struct {
not_in_entry, entry: enum {
in_other_entry, not_in_entry,
in_our_entry, in_other_entry,
in_wildcard_entry, in_our_entry,
in_wildcard_entry,
},
has_rules: bool = false,
}; };
fn freeRulesInList(allocator: std.mem.Allocator, rules: []const Rule) void { fn freeRulesInList(allocator: std.mem.Allocator, rules: []const Rule) void {
@@ -62,7 +129,7 @@ fn parseRulesWithUserAgent(
var wildcard_rules: std.ArrayList(Rule) = .empty; var wildcard_rules: std.ArrayList(Rule) = .empty;
defer wildcard_rules.deinit(allocator); defer wildcard_rules.deinit(allocator);
var state: State = .not_in_entry; var state: State = .{ .entry = .not_in_entry, .has_rules = false };
var iter = std.mem.splitScalar(u8, bytes, '\n'); var iter = std.mem.splitScalar(u8, bytes, '\n');
while (iter.next()) |line| { while (iter.next()) |line| {
@@ -78,7 +145,6 @@ fn parseRulesWithUserAgent(
trimmed; trimmed;
if (true_line.len == 0) { if (true_line.len == 0) {
state = .not_in_entry;
continue; continue;
} }
@@ -94,55 +160,69 @@ fn parseRulesWithUserAgent(
const value = std.mem.trim(u8, true_line[colon_idx + 1 ..], &std.ascii.whitespace); const value = std.mem.trim(u8, true_line[colon_idx + 1 ..], &std.ascii.whitespace);
switch (key) { switch (key) {
.@"user-agent" => switch (state) { .@"user-agent" => {
.in_other_entry => { if (state.has_rules) {
if (std.ascii.eqlIgnoreCase(user_agent, value)) { state = .{ .entry = .not_in_entry, .has_rules = false };
state = .in_our_entry; }
}
}, switch (state.entry) {
.in_our_entry => {}, .in_other_entry => {
.in_wildcard_entry => { if (std.ascii.eqlIgnoreCase(user_agent, value)) {
if (std.ascii.eqlIgnoreCase(user_agent, value)) { state.entry = .in_our_entry;
state = .in_our_entry; }
} },
}, .in_our_entry => {},
.not_in_entry => { .in_wildcard_entry => {
if (std.ascii.eqlIgnoreCase(user_agent, value)) { if (std.ascii.eqlIgnoreCase(user_agent, value)) {
state = .in_our_entry; state.entry = .in_our_entry;
} else if (std.mem.eql(u8, "*", value)) { }
state = .in_wildcard_entry; },
} else { .not_in_entry => {
state = .in_other_entry; if (std.ascii.eqlIgnoreCase(user_agent, value)) {
} state.entry = .in_our_entry;
}, } else if (std.mem.eql(u8, "*", value)) {
state.entry = .in_wildcard_entry;
} else {
state.entry = .in_other_entry;
}
},
}
}, },
.allow => switch (state) { .allow => {
.in_our_entry => { defer state.has_rules = true;
const duped_value = try allocator.dupe(u8, value);
errdefer allocator.free(duped_value); switch (state.entry) {
try rules.append(allocator, .{ .allow = duped_value }); .in_our_entry => {
}, const duped_value = try allocator.dupe(u8, value);
.in_other_entry => {}, errdefer allocator.free(duped_value);
.in_wildcard_entry => { try rules.append(allocator, .{ .allow = duped_value });
const duped_value = try allocator.dupe(u8, value); },
errdefer allocator.free(duped_value); .in_other_entry => {},
try wildcard_rules.append(allocator, .{ .allow = duped_value }); .in_wildcard_entry => {
}, const duped_value = try allocator.dupe(u8, value);
.not_in_entry => return error.UnexpectedRule, errdefer allocator.free(duped_value);
try wildcard_rules.append(allocator, .{ .allow = duped_value });
},
.not_in_entry => return error.UnexpectedRule,
}
}, },
.disallow => switch (state) { .disallow => {
.in_our_entry => { defer state.has_rules = true;
const duped_value = try allocator.dupe(u8, value);
errdefer allocator.free(duped_value); switch (state.entry) {
try rules.append(allocator, .{ .disallow = duped_value }); .in_our_entry => {
}, const duped_value = try allocator.dupe(u8, value);
.in_other_entry => {}, errdefer allocator.free(duped_value);
.in_wildcard_entry => { try rules.append(allocator, .{ .disallow = duped_value });
const duped_value = try allocator.dupe(u8, value); },
errdefer allocator.free(duped_value); .in_other_entry => {},
try wildcard_rules.append(allocator, .{ .disallow = duped_value }); .in_wildcard_entry => {
}, const duped_value = try allocator.dupe(u8, value);
.not_in_entry => return error.UnexpectedRule, errdefer allocator.free(duped_value);
try wildcard_rules.append(allocator, .{ .disallow = duped_value });
},
.not_in_entry => return error.UnexpectedRule,
}
}, },
} }
} }
@@ -737,3 +817,54 @@ test "Robots: isAllowed - Google's real robots.txt" {
try std.testing.expect(twitterbot.isAllowed("/groups") == false); try std.testing.expect(twitterbot.isAllowed("/groups") == false);
try std.testing.expect(twitterbot.isAllowed("/m/") == false); try std.testing.expect(twitterbot.isAllowed("/m/") == false);
} }
test "Robots: user-agent after rules starts new entry" {
const allocator = std.testing.allocator;
const file =
\\User-agent: Bot1
\\User-agent: Bot2
\\Disallow: /admin/
\\Allow: /public/
\\User-agent: Bot3
\\Disallow: /private/
\\
;
var robots1 = try Robots.fromBytes(allocator, "Bot1", file);
defer robots1.deinit(allocator);
try std.testing.expect(robots1.isAllowed("/admin/") == false);
try std.testing.expect(robots1.isAllowed("/public/") == true);
try std.testing.expect(robots1.isAllowed("/private/") == true);
var robots2 = try Robots.fromBytes(allocator, "Bot2", file);
defer robots2.deinit(allocator);
try std.testing.expect(robots2.isAllowed("/admin/") == false);
try std.testing.expect(robots2.isAllowed("/public/") == true);
try std.testing.expect(robots2.isAllowed("/private/") == true);
var robots3 = try Robots.fromBytes(allocator, "Bot3", file);
defer robots3.deinit(allocator);
try std.testing.expect(robots3.isAllowed("/admin/") == true);
try std.testing.expect(robots3.isAllowed("/public/") == true);
try std.testing.expect(robots3.isAllowed("/private/") == false);
}
test "Robots: blank lines don't end entries" {
const allocator = std.testing.allocator;
const file =
\\User-agent: MyBot
\\Disallow: /admin/
\\
\\
\\Allow: /public/
\\
;
var robots = try Robots.fromBytes(allocator, "MyBot", file);
defer robots.deinit(allocator);
try std.testing.expect(robots.isAllowed("/admin/") == false);
try std.testing.expect(robots.isAllowed("/public/") == true);
}

View File

@@ -265,6 +265,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
.headers = try self.getHeaders(url), .headers = try self.getHeaders(url),
.blocking = is_blocking, .blocking = is_blocking,
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
.robots = &page._session.browser.app.robots,
.resource_type = .script, .resource_type = .script,
.notification = page._session.notification, .notification = page._session.notification,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -380,6 +381,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
.method = .GET, .method = .GET,
.headers = try self.getHeaders(url), .headers = try self.getHeaders(url),
.cookie_jar = &self.page._session.cookie_jar, .cookie_jar = &self.page._session.cookie_jar,
.robots = &self.page._session.browser.app.robots,
.resource_type = .script, .resource_type = .script,
.notification = self.page._session.notification, .notification = self.page._session.notification,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -484,6 +486,7 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
.resource_type = .script, .resource_type = .script,
.cookie_jar = &self.page._session.cookie_jar, .cookie_jar = &self.page._session.cookie_jar,
.notification = self.page._session.notification, .notification = self.page._session.notification,
.robots = &self.page._session.browser.app.robots,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
.header_callback = Script.headerCallback, .header_callback = Script.headerCallback,
.data_callback = Script.dataCallback, .data_callback = Script.dataCallback,

View File

@@ -502,8 +502,8 @@ pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []cons
return buf.items[0 .. buf.items.len - 1 :0]; return buf.items[0 .. buf.items.len - 1 :0];
} }
pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) !?[:0]const u8 { pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 {
const origin = try getOrigin(arena, url) orelse return null; const origin = try getOrigin(arena, url) orelse return error.NoOrigin;
return try std.fmt.allocPrintSentinel( return try std.fmt.allocPrintSentinel(
arena, arena,
"{s}/robots.txt", "{s}/robots.txt",
@@ -795,24 +795,24 @@ test "URL: getRobotsUrl" {
{ {
const url = try getRobotsUrl(arena, "https://www.lightpanda.io"); const url = try getRobotsUrl(arena, "https://www.lightpanda.io");
try testing.expectEqual("https://www.lightpanda.io/robots.txt", url.?); try testing.expectEqual("https://www.lightpanda.io/robots.txt", url);
} }
{ {
const url = try getRobotsUrl(arena, "https://www.lightpanda.io/some/path"); const url = try getRobotsUrl(arena, "https://www.lightpanda.io/some/path");
try testing.expectString("https://www.lightpanda.io/robots.txt", url.?); try testing.expectString("https://www.lightpanda.io/robots.txt", url);
} }
{ {
const url = try getRobotsUrl(arena, "https://www.lightpanda.io:8080/page"); const url = try getRobotsUrl(arena, "https://www.lightpanda.io:8080/page");
try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url.?); try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url);
} }
{ {
const url = try getRobotsUrl(arena, "http://example.com/deep/nested/path?query=value#fragment"); const url = try getRobotsUrl(arena, "http://example.com/deep/nested/path?query=value#fragment");
try testing.expectString("http://example.com/robots.txt", url.?); try testing.expectString("http://example.com/robots.txt", url);
} }
{ {
const url = try getRobotsUrl(arena, "https://user:pass@example.com/page"); const url = try getRobotsUrl(arena, "https://user:pass@example.com/page");
try testing.expectString("https://example.com/robots.txt", url.?); try testing.expectString("https://example.com/robots.txt", url);
} }
} }

View File

@@ -79,6 +79,7 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
.resource_type = .fetch, .resource_type = .fetch,
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
.notification = page._session.notification, .notification = page._session.notification,
.robots = &page._session.browser.app.robots,
.start_callback = httpStartCallback, .start_callback = httpStartCallback,
.header_callback = httpHeaderDoneCallback, .header_callback = httpHeaderDoneCallback,
.data_callback = httpDataCallback, .data_callback = httpDataCallback,

View File

@@ -208,6 +208,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
.headers = headers, .headers = headers,
.body = self._request_body, .body = self._request_body,
.cookie_jar = &page._session.cookie_jar, .cookie_jar = &page._session.cookie_jar,
.robots = &page._session.browser.app.robots,
.resource_type = .xhr, .resource_type = .xhr,
.notification = page._session.notification, .notification = page._session.notification,
.start_callback = httpStartCallback, .start_callback = httpStartCallback,

View File

@@ -27,6 +27,8 @@ const Config = @import("../Config.zig");
const URL = @import("../browser/URL.zig"); const URL = @import("../browser/URL.zig");
const Notification = @import("../Notification.zig"); const Notification = @import("../Notification.zig");
const CookieJar = @import("../browser/webapi/storage/Cookie.zig").Jar; const CookieJar = @import("../browser/webapi/storage/Cookie.zig").Jar;
const Robots = @import("../browser/Robots.zig");
const RobotStore = Robots.RobotStore;
const c = Http.c; const c = Http.c;
const posix = std.posix; const posix = std.posix;
@@ -217,6 +219,36 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
} }
pub fn request(self: *Client, req: Request) !void { pub fn request(self: *Client, req: Request) !void {
if (self.config.obeyRobots()) {
const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
// If we have this robots cached, we can take a fast path.
if (req.robots.get(robots_url)) |robot_entry| {
defer self.allocator.free(robots_url);
switch (robot_entry) {
// If we have a found robots entry, we check it.
.present => |robots| {
const path = URL.getPathname(req.url);
if (!robots.isAllowed(path)) {
req.error_callback(req.ctx, error.RobotsBlocked);
return;
}
},
// Otherwise, we assume we won't find it again.
.absent => {},
}
return self.processRequest(req);
}
return self.fetchRobotsThenProcessRequest(robots_url, req);
}
return self.processRequest(req);
}
fn processRequest(self: *Client, req: Request) !void {
const transfer = try self.makeTransfer(req); const transfer = try self.makeTransfer(req);
transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
@@ -246,6 +278,108 @@ pub fn request(self: *Client, req: Request) !void {
} }
} }
const RobotsRequestContext = struct {
client: *Client,
req: Request,
robots_url: [:0]const u8,
buffer: std.ArrayList(u8),
status: u16 = 0,
};
fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
const ctx = try self.allocator.create(RobotsRequestContext);
ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
const headers = try self.newHeaders();
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
try self.processRequest(.{
.ctx = ctx,
.url = robots_url,
.method = .GET,
.headers = headers,
.blocking = false,
.cookie_jar = req.cookie_jar,
.notification = req.notification,
.robots = req.robots,
.resource_type = .fetch,
.header_callback = robotsHeaderCallback,
.data_callback = robotsDataCallback,
.done_callback = robotsDoneCallback,
.error_callback = robotsErrorCallback,
});
}
fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
if (transfer.response_header) |hdr| {
log.debug(.browser, "robots status", .{ .status = hdr.status });
ctx.status = hdr.status;
}
if (transfer.getContentLength()) |cl| {
try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl);
}
return true;
}
fn robotsDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
try ctx.buffer.appendSlice(ctx.client.allocator, data);
}
fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
defer ctx.client.allocator.destroy(ctx);
defer ctx.buffer.deinit(ctx.client.allocator);
defer ctx.client.allocator.free(ctx.robots_url);
var allowed = true;
if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
const robots = try ctx.req.robots.robotsFromBytes(
ctx.client.config.http_headers.user_agent,
ctx.buffer.items,
);
try ctx.req.robots.put(ctx.robots_url, robots);
const path = URL.getPathname(ctx.req.url);
allowed = robots.isAllowed(path);
}
// If not found, store as Not Found.
if (ctx.status == 404) {
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
try ctx.req.robots.putAbsent(ctx.robots_url);
}
if (!allowed) {
log.warn(.http, "blocked by robots", .{ .url = ctx.req.url });
ctx.req.error_callback(ctx.req.ctx, error.RobotsBlocked);
return;
}
// Now process the original request
try ctx.client.processRequest(ctx.req);
}
fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
defer ctx.client.allocator.destroy(ctx);
defer ctx.buffer.deinit(ctx.client.allocator);
defer ctx.client.allocator.free(ctx.robots_url);
log.warn(.http, "robots fetch failed", .{ .err = err });
// On error, allow the request to proceed
ctx.client.processRequest(ctx.req) catch |e| {
ctx.req.error_callback(ctx.req.ctx, e);
};
}
fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
// The request was intercepted and is blocking. This is messy, but our // The request was intercepted and is blocking. This is messy, but our
// callers, the ScriptManager -> Page, don't have a great way to stop the // callers, the ScriptManager -> Page, don't have a great way to stop the
@@ -565,7 +699,7 @@ fn processMessages(self: *Client) !bool {
// In case of auth challenge // In case of auth challenge
// TODO give a way to configure the number of auth retries. // TODO give a way to configure the number of auth retries.
if (transfer._auth_challenge != null and transfer._tries < 10) { if (transfer._auth_challenge != null and transfer._tries < 10) {
var wait_for_interception = false; var wait_for_interception = false;
transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }); transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) { if (wait_for_interception) {
@@ -784,6 +918,7 @@ pub const Request = struct {
headers: Http.Headers, headers: Http.Headers,
body: ?[]const u8 = null, body: ?[]const u8 = null,
cookie_jar: *CookieJar, cookie_jar: *CookieJar,
robots: *RobotStore,
resource_type: ResourceType, resource_type: ResourceType,
credentials: ?[:0]const u8 = null, credentials: ?[:0]const u8 = null,
notification: *Notification, notification: *Notification,