mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-02-05 14:57:11 +00:00
pass robot store into Http init
This commit is contained in:
@@ -50,7 +50,9 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
|
||||
app.config = config;
|
||||
app.allocator = allocator;
|
||||
|
||||
app.http = try Http.init(allocator, config);
|
||||
app.robots = RobotStore.init(allocator);
|
||||
|
||||
app.http = try Http.init(allocator, &app.robots, config);
|
||||
errdefer app.http.deinit();
|
||||
|
||||
app.platform = try Platform.init();
|
||||
@@ -59,8 +61,6 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
|
||||
app.snapshot = try Snapshot.load();
|
||||
errdefer app.snapshot.deinit();
|
||||
|
||||
app.robots = RobotStore.init(allocator);
|
||||
|
||||
app.app_dir_path = getAndMakeAppDir(allocator);
|
||||
|
||||
app.telemetry = try Telemetry.init(app, config.mode);
|
||||
|
||||
@@ -559,7 +559,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
|
||||
.headers = headers,
|
||||
.body = opts.body,
|
||||
.cookie_jar = &self._session.cookie_jar,
|
||||
.robots = &self._session.browser.app.robots,
|
||||
.resource_type = .document,
|
||||
.notification = self._session.notification,
|
||||
.header_callback = pageHeaderDoneCallback,
|
||||
|
||||
@@ -265,7 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
|
||||
.headers = try self.getHeaders(url),
|
||||
.blocking = is_blocking,
|
||||
.cookie_jar = &page._session.cookie_jar,
|
||||
.robots = &page._session.browser.app.robots,
|
||||
.resource_type = .script,
|
||||
.notification = page._session.notification,
|
||||
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
|
||||
@@ -381,7 +380,6 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
|
||||
.method = .GET,
|
||||
.headers = try self.getHeaders(url),
|
||||
.cookie_jar = &self.page._session.cookie_jar,
|
||||
.robots = &self.page._session.browser.app.robots,
|
||||
.resource_type = .script,
|
||||
.notification = self.page._session.notification,
|
||||
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
|
||||
@@ -486,7 +484,6 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
|
||||
.resource_type = .script,
|
||||
.cookie_jar = &self.page._session.cookie_jar,
|
||||
.notification = self.page._session.notification,
|
||||
.robots = &self.page._session.browser.app.robots,
|
||||
.start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
|
||||
.header_callback = Script.headerCallback,
|
||||
.data_callback = Script.dataCallback,
|
||||
|
||||
@@ -79,7 +79,6 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
|
||||
.resource_type = .fetch,
|
||||
.cookie_jar = &page._session.cookie_jar,
|
||||
.notification = page._session.notification,
|
||||
.robots = &page._session.browser.app.robots,
|
||||
.start_callback = httpStartCallback,
|
||||
.header_callback = httpHeaderDoneCallback,
|
||||
.data_callback = httpDataCallback,
|
||||
|
||||
@@ -208,7 +208,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
|
||||
.headers = headers,
|
||||
.body = self._request_body,
|
||||
.cookie_jar = &page._session.cookie_jar,
|
||||
.robots = &page._session.browser.app.robots,
|
||||
.resource_type = .xhr,
|
||||
.notification = page._session.notification,
|
||||
.start_callback = httpStartCallback,
|
||||
|
||||
@@ -87,6 +87,8 @@ queue: TransferQueue,
|
||||
// The main app allocator
|
||||
allocator: Allocator,
|
||||
|
||||
// Reference to the App-owned Robot Store.
|
||||
robot_store: *RobotStore,
|
||||
// Queue of requests that depend on a robots.txt.
|
||||
// Allows us to fetch the robots.txt just once.
|
||||
pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
|
||||
@@ -129,7 +131,7 @@ pub const CDPClient = struct {
|
||||
|
||||
const TransferQueue = std.DoublyLinkedList;
|
||||
|
||||
pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config) !*Client {
|
||||
pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, robot_store: *RobotStore, config: *const Config) !*Client {
|
||||
var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
|
||||
errdefer transfer_pool.deinit();
|
||||
|
||||
@@ -153,6 +155,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config)
|
||||
.multi = multi,
|
||||
.handles = handles,
|
||||
.allocator = allocator,
|
||||
.robot_store = robot_store,
|
||||
.http_proxy = http_proxy,
|
||||
.use_proxy = http_proxy != null,
|
||||
.config = config,
|
||||
@@ -235,7 +238,7 @@ pub fn request(self: *Client, req: Request) !void {
|
||||
errdefer self.allocator.free(robots_url);
|
||||
|
||||
// If we have this robots cached, we can take a fast path.
|
||||
if (req.robots.get(robots_url)) |robot_entry| {
|
||||
if (self.robot_store.get(robots_url)) |robot_entry| {
|
||||
defer self.allocator.free(robots_url);
|
||||
|
||||
switch (robot_entry) {
|
||||
@@ -328,7 +331,6 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
|
||||
.blocking = false,
|
||||
.cookie_jar = req.cookie_jar,
|
||||
.notification = req.notification,
|
||||
.robots = req.robots,
|
||||
.resource_type = .fetch,
|
||||
.header_callback = robotsHeaderCallback,
|
||||
.data_callback = robotsDataCallback,
|
||||
@@ -370,18 +372,18 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
|
||||
var allowed = true;
|
||||
|
||||
if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
|
||||
const robots = try ctx.req.robots.robotsFromBytes(
|
||||
const robots = try ctx.client.robot_store.robotsFromBytes(
|
||||
ctx.client.config.http_headers.user_agent,
|
||||
ctx.buffer.items,
|
||||
);
|
||||
|
||||
try ctx.req.robots.put(ctx.robots_url, robots);
|
||||
try ctx.client.robot_store.put(ctx.robots_url, robots);
|
||||
|
||||
const path = URL.getPathname(ctx.req.url);
|
||||
allowed = robots.isAllowed(path);
|
||||
} else if (ctx.status == 404) {
|
||||
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
|
||||
try ctx.req.robots.putAbsent(ctx.robots_url);
|
||||
try ctx.client.robot_store.putAbsent(ctx.robots_url);
|
||||
}
|
||||
|
||||
const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
|
||||
@@ -960,7 +962,6 @@ pub const Request = struct {
|
||||
headers: Http.Headers,
|
||||
body: ?[]const u8 = null,
|
||||
cookie_jar: *CookieJar,
|
||||
robots: *RobotStore,
|
||||
resource_type: ResourceType,
|
||||
credentials: ?[:0]const u8 = null,
|
||||
notification: *Notification,
|
||||
|
||||
@@ -30,6 +30,7 @@ pub const Transfer = Client.Transfer;
|
||||
|
||||
const log = @import("../log.zig");
|
||||
const errors = @import("errors.zig");
|
||||
const RobotStore = @import("../browser/Robots.zig").RobotStore;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||
@@ -46,7 +47,7 @@ client: *Client,
|
||||
ca_blob: ?c.curl_blob,
|
||||
arena: ArenaAllocator,
|
||||
|
||||
pub fn init(allocator: Allocator, config: *const Config) !Http {
|
||||
pub fn init(allocator: Allocator, robot_store: *RobotStore, config: *const Config) !Http {
|
||||
try errorCheck(c.curl_global_init(c.CURL_GLOBAL_SSL));
|
||||
errdefer c.curl_global_cleanup();
|
||||
|
||||
@@ -62,7 +63,7 @@ pub fn init(allocator: Allocator, config: *const Config) !Http {
|
||||
ca_blob = try loadCerts(allocator, arena.allocator());
|
||||
}
|
||||
|
||||
var client = try Client.init(allocator, ca_blob, config);
|
||||
var client = try Client.init(allocator, ca_blob, robot_store, config);
|
||||
errdefer client.deinit();
|
||||
|
||||
return .{
|
||||
|
||||
Reference in New Issue
Block a user