diff --git a/src/App.zig b/src/App.zig index 76ffd396..a4ed0e8f 100644 --- a/src/App.zig +++ b/src/App.zig @@ -50,7 +50,9 @@ pub fn init(allocator: Allocator, config: *const Config) !*App { app.config = config; app.allocator = allocator; - app.http = try Http.init(allocator, config); + app.robots = RobotStore.init(allocator); + + app.http = try Http.init(allocator, &app.robots, config); errdefer app.http.deinit(); app.platform = try Platform.init(); @@ -59,8 +61,6 @@ pub fn init(allocator: Allocator, config: *const Config) !*App { app.snapshot = try Snapshot.load(); errdefer app.snapshot.deinit(); - app.robots = RobotStore.init(allocator); - app.app_dir_path = getAndMakeAppDir(allocator); app.telemetry = try Telemetry.init(app, config.mode); diff --git a/src/browser/Page.zig b/src/browser/Page.zig index d879a813..8e86c47a 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -559,7 +559,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi .headers = headers, .body = opts.body, .cookie_jar = &self._session.cookie_jar, - .robots = &self._session.browser.app.robots, .resource_type = .document, .notification = self._session.notification, .header_callback = pageHeaderDoneCallback, diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 01c56a81..344d6232 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -265,7 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .headers = try self.getHeaders(url), .blocking = is_blocking, .cookie_jar = &page._session.cookie_jar, - .robots = &page._session.browser.app.robots, .resource_type = .script, .notification = page._session.notification, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, @@ -381,7 +380,6 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const .method = .GET, .headers = try self.getHeaders(url), .cookie_jar = &self.page._session.cookie_jar, - .robots = &self.page._session.browser.app.robots, .resource_type = .script, .notification = self.page._session.notification, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, @@ -486,7 +484,6 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C .resource_type = .script, .cookie_jar = &self.page._session.cookie_jar, .notification = self.page._session.notification, - .robots = &self.page._session.browser.app.robots, .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, .header_callback = Script.headerCallback, .data_callback = Script.dataCallback, diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 988e9a53..a66fb311 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -79,7 +79,6 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise { .resource_type = .fetch, .cookie_jar = &page._session.cookie_jar, .notification = page._session.notification, - .robots = &page._session.browser.app.robots, .start_callback = httpStartCallback, .header_callback = httpHeaderDoneCallback, .data_callback = httpDataCallback, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 296048b3..7c266e1a 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -208,7 +208,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { .headers = headers, .body = self._request_body, .cookie_jar = &page._session.cookie_jar, - .robots = &page._session.browser.app.robots, .resource_type = .xhr, .notification = page._session.notification, .start_callback = httpStartCallback, diff --git a/src/http/Client.zig b/src/http/Client.zig index 91ab02d1..d65e860f 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -87,6 +87,8 @@ queue: TransferQueue, // The main app allocator allocator: Allocator, +// Reference to the App-owned Robot Store. +robot_store: *RobotStore, // Queue of requests that depend on a robots.txt. // Allows us to fetch the robots.txt just once. pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty, @@ -129,7 +131,7 @@ pub const CDPClient = struct { const TransferQueue = std.DoublyLinkedList; -pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config) !*Client { +pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, robot_store: *RobotStore, config: *const Config) !*Client { var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator); errdefer transfer_pool.deinit(); @@ -153,6 +155,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config) .multi = multi, .handles = handles, .allocator = allocator, + .robot_store = robot_store, .http_proxy = http_proxy, .use_proxy = http_proxy != null, .config = config, @@ -235,7 +238,7 @@ pub fn request(self: *Client, req: Request) !void { errdefer self.allocator.free(robots_url); // If we have this robots cached, we can take a fast path. - if (req.robots.get(robots_url)) |robot_entry| { + if (self.robot_store.get(robots_url)) |robot_entry| { defer self.allocator.free(robots_url); switch (robot_entry) { @@ -328,7 +331,6 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R .blocking = false, .cookie_jar = req.cookie_jar, .notification = req.notification, - .robots = req.robots, .resource_type = .fetch, .header_callback = robotsHeaderCallback, .data_callback = robotsDataCallback, @@ -370,18 +372,18 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { var allowed = true; if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) { - const robots = try ctx.req.robots.robotsFromBytes( + const robots = try ctx.client.robot_store.robotsFromBytes( ctx.client.config.http_headers.user_agent, ctx.buffer.items, ); - try ctx.req.robots.put(ctx.robots_url, robots); + try ctx.client.robot_store.put(ctx.robots_url, robots); const path = URL.getPathname(ctx.req.url); allowed = robots.isAllowed(path); } else if (ctx.status == 404) { log.debug(.http, "robots not found", .{ .url = ctx.robots_url }); - try ctx.req.robots.putAbsent(ctx.robots_url); + try ctx.client.robot_store.putAbsent(ctx.robots_url); } const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable; @@ -960,7 +962,6 @@ pub const Request = struct { headers: Http.Headers, body: ?[]const u8 = null, cookie_jar: *CookieJar, - robots: *RobotStore, resource_type: ResourceType, credentials: ?[:0]const u8 = null, notification: *Notification, diff --git a/src/http/Http.zig b/src/http/Http.zig index 3d488f95..9d550148 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -30,6 +30,7 @@ pub const Transfer = Client.Transfer; const log = @import("../log.zig"); const errors = @import("errors.zig"); +const RobotStore = @import("../browser/Robots.zig").RobotStore; const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; @@ -46,7 +47,7 @@ client: *Client, ca_blob: ?c.curl_blob, arena: ArenaAllocator, -pub fn init(allocator: Allocator, config: *const Config) !Http { +pub fn init(allocator: Allocator, robot_store: *RobotStore, config: *const Config) !Http { try errorCheck(c.curl_global_init(c.CURL_GLOBAL_SSL)); errdefer c.curl_global_cleanup(); @@ -62,7 +63,7 @@ pub fn init(allocator: Allocator, config: *const Config) !Http { ca_blob = try loadCerts(allocator, arena.allocator()); } - var client = try Client.init(allocator, ca_blob, config); + var client = try Client.init(allocator, ca_blob, robot_store, config); errdefer client.deinit(); return .{