pass robot store into Http init

This commit is contained in:
Muki Kiboigo
2026-02-04 11:49:52 -08:00
parent 50aeb9ff21
commit a7095d7dec
7 changed files with 14 additions and 18 deletions

View File

@@ -50,7 +50,9 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
app.config = config;
app.allocator = allocator;
app.http = try Http.init(allocator, config);
app.robots = RobotStore.init(allocator);
app.http = try Http.init(allocator, &app.robots, config);
errdefer app.http.deinit();
app.platform = try Platform.init();
@@ -59,8 +61,6 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
app.snapshot = try Snapshot.load();
errdefer app.snapshot.deinit();
app.robots = RobotStore.init(allocator);
app.app_dir_path = getAndMakeAppDir(allocator);
app.telemetry = try Telemetry.init(app, config.mode);

View File

@@ -559,7 +559,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
.headers = headers,
.body = opts.body,
.cookie_jar = &self._session.cookie_jar,
.robots = &self._session.browser.app.robots,
.resource_type = .document,
.notification = self._session.notification,
.header_callback = pageHeaderDoneCallback,

View File

@@ -265,7 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
.headers = try self.getHeaders(url),
.blocking = is_blocking,
.cookie_jar = &page._session.cookie_jar,
.robots = &page._session.browser.app.robots,
.resource_type = .script,
.notification = page._session.notification,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -381,7 +380,6 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
.method = .GET,
.headers = try self.getHeaders(url),
.cookie_jar = &self.page._session.cookie_jar,
.robots = &self.page._session.browser.app.robots,
.resource_type = .script,
.notification = self.page._session.notification,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -486,7 +484,6 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
.resource_type = .script,
.cookie_jar = &self.page._session.cookie_jar,
.notification = self.page._session.notification,
.robots = &self.page._session.browser.app.robots,
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
.header_callback = Script.headerCallback,
.data_callback = Script.dataCallback,

View File

@@ -79,7 +79,6 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
.resource_type = .fetch,
.cookie_jar = &page._session.cookie_jar,
.notification = page._session.notification,
.robots = &page._session.browser.app.robots,
.start_callback = httpStartCallback,
.header_callback = httpHeaderDoneCallback,
.data_callback = httpDataCallback,

View File

@@ -208,7 +208,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
.headers = headers,
.body = self._request_body,
.cookie_jar = &page._session.cookie_jar,
.robots = &page._session.browser.app.robots,
.resource_type = .xhr,
.notification = page._session.notification,
.start_callback = httpStartCallback,

View File

@@ -87,6 +87,8 @@ queue: TransferQueue,
// The main app allocator
allocator: Allocator,
// Reference to the App-owned Robot Store.
robot_store: *RobotStore,
// Queue of requests that depend on a robots.txt.
// Allows us to fetch the robots.txt just once.
pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
@@ -129,7 +131,7 @@ pub const CDPClient = struct {
const TransferQueue = std.DoublyLinkedList;
pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config) !*Client {
pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, robot_store: *RobotStore, config: *const Config) !*Client {
var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
errdefer transfer_pool.deinit();
@@ -153,6 +155,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config)
.multi = multi,
.handles = handles,
.allocator = allocator,
.robot_store = robot_store,
.http_proxy = http_proxy,
.use_proxy = http_proxy != null,
.config = config,
@@ -235,7 +238,7 @@ pub fn request(self: *Client, req: Request) !void {
errdefer self.allocator.free(robots_url);
// If we have this robots cached, we can take a fast path.
if (req.robots.get(robots_url)) |robot_entry| {
if (self.robot_store.get(robots_url)) |robot_entry| {
defer self.allocator.free(robots_url);
switch (robot_entry) {
@@ -328,7 +331,6 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
.blocking = false,
.cookie_jar = req.cookie_jar,
.notification = req.notification,
.robots = req.robots,
.resource_type = .fetch,
.header_callback = robotsHeaderCallback,
.data_callback = robotsDataCallback,
@@ -370,18 +372,18 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
var allowed = true;
if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
const robots = try ctx.req.robots.robotsFromBytes(
const robots = try ctx.client.robot_store.robotsFromBytes(
ctx.client.config.http_headers.user_agent,
ctx.buffer.items,
);
try ctx.req.robots.put(ctx.robots_url, robots);
try ctx.client.robot_store.put(ctx.robots_url, robots);
const path = URL.getPathname(ctx.req.url);
allowed = robots.isAllowed(path);
} else if (ctx.status == 404) {
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
try ctx.req.robots.putAbsent(ctx.robots_url);
try ctx.client.robot_store.putAbsent(ctx.robots_url);
}
const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
@@ -960,7 +962,6 @@ pub const Request = struct {
headers: Http.Headers,
body: ?[]const u8 = null,
cookie_jar: *CookieJar,
robots: *RobotStore,
resource_type: ResourceType,
credentials: ?[:0]const u8 = null,
notification: *Notification,

View File

@@ -30,6 +30,7 @@ pub const Transfer = Client.Transfer;
const log = @import("../log.zig");
const errors = @import("errors.zig");
const RobotStore = @import("../browser/Robots.zig").RobotStore;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
@@ -46,7 +47,7 @@ client: *Client,
ca_blob: ?c.curl_blob,
arena: ArenaAllocator,
pub fn init(allocator: Allocator, config: *const Config) !Http {
pub fn init(allocator: Allocator, robot_store: *RobotStore, config: *const Config) !Http {
try errorCheck(c.curl_global_init(c.CURL_GLOBAL_SSL));
errdefer c.curl_global_cleanup();
@@ -62,7 +63,7 @@ pub fn init(allocator: Allocator, config: *const Config) !Http {
ca_blob = try loadCerts(allocator, arena.allocator());
}
var client = try Client.init(allocator, ca_blob, config);
var client = try Client.init(allocator, ca_blob, robot_store, config);
errdefer client.deinit();
return .{