mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-02-05 06:47:11 +00:00
queue requests to run after robots is fetched
This commit is contained in:
@@ -87,6 +87,10 @@ queue: TransferQueue,
|
|||||||
// The main app allocator
|
// The main app allocator
|
||||||
allocator: Allocator,
|
allocator: Allocator,
|
||||||
|
|
||||||
|
// Queue of requests that depend on a robots.txt.
|
||||||
|
// Allows us to fetch the robots.txt just once.
|
||||||
|
pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
|
||||||
|
|
||||||
// Once we have a handle/easy to process a request with, we create a Transfer
|
// Once we have a handle/easy to process a request with, we create a Transfer
|
||||||
// which contains the Request as well as any state we need to process the
|
// which contains the Request as well as any state we need to process the
|
||||||
// request. These wil come and go with each request.
|
// request. These wil come and go with each request.
|
||||||
@@ -165,6 +169,13 @@ pub fn deinit(self: *Client) void {
|
|||||||
_ = c.curl_multi_cleanup(self.multi);
|
_ = c.curl_multi_cleanup(self.multi);
|
||||||
|
|
||||||
self.transfer_pool.deinit();
|
self.transfer_pool.deinit();
|
||||||
|
|
||||||
|
var robots_iter = self.pending_robots_queue.iterator();
|
||||||
|
while (robots_iter.next()) |entry| {
|
||||||
|
entry.value_ptr.deinit(self.allocator);
|
||||||
|
}
|
||||||
|
self.pending_robots_queue.deinit(self.allocator);
|
||||||
|
|
||||||
self.allocator.destroy(self);
|
self.allocator.destroy(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -254,7 +265,10 @@ fn processRequest(self: *Client, req: Request) !void {
|
|||||||
transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
|
transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
|
||||||
|
|
||||||
var wait_for_interception = false;
|
var wait_for_interception = false;
|
||||||
transfer.req.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
|
transfer.req.notification.dispatch(.http_request_intercept, &.{
|
||||||
|
.transfer = transfer,
|
||||||
|
.wait_for_interception = &wait_for_interception,
|
||||||
|
});
|
||||||
if (wait_for_interception == false) {
|
if (wait_for_interception == false) {
|
||||||
// request not intercepted, process it normally
|
// request not intercepted, process it normally
|
||||||
return self.process(transfer);
|
return self.process(transfer);
|
||||||
@@ -293,9 +307,15 @@ const RobotsRequestContext = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
|
fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
|
||||||
|
const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url);
|
||||||
|
|
||||||
|
if (!entry.found_existing) {
|
||||||
|
// If we aren't already fetching this robots,
|
||||||
|
// we want to create a new queue for it and add this request into it.
|
||||||
|
entry.value_ptr.* = .empty;
|
||||||
|
|
||||||
const ctx = try self.allocator.create(RobotsRequestContext);
|
const ctx = try self.allocator.create(RobotsRequestContext);
|
||||||
ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
|
ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
|
||||||
|
|
||||||
const headers = try self.newHeaders();
|
const headers = try self.newHeaders();
|
||||||
|
|
||||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||||
@@ -314,6 +334,9 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
|
|||||||
.done_callback = robotsDoneCallback,
|
.done_callback = robotsDoneCallback,
|
||||||
.error_callback = robotsErrorCallback,
|
.error_callback = robotsErrorCallback,
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try entry.value_ptr.append(self.allocator, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
|
fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
|
||||||
@@ -357,14 +380,22 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
|
|||||||
try ctx.req.robots.putAbsent(ctx.robots_url);
|
try ctx.req.robots.putAbsent(ctx.robots_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!allowed) {
|
const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
|
||||||
log.warn(.http, "blocked by robots", .{ .url = ctx.req.url });
|
defer {
|
||||||
ctx.req.error_callback(ctx.req.ctx, error.RobotsBlocked);
|
queued.deinit(ctx.client.allocator);
|
||||||
return;
|
_ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now process the original request
|
for (queued.items) |queued_req| {
|
||||||
try ctx.client.processRequest(ctx.req);
|
if (!allowed) {
|
||||||
|
log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
|
||||||
|
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
|
||||||
|
} else {
|
||||||
|
ctx.client.processRequest(queued_req) catch |e| {
|
||||||
|
queued_req.error_callback(queued_req.ctx, e);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
||||||
@@ -373,10 +404,18 @@ fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
|||||||
|
|
||||||
log.warn(.http, "robots fetch failed", .{ .err = err });
|
log.warn(.http, "robots fetch failed", .{ .err = err });
|
||||||
|
|
||||||
// On error, allow the request to proceed
|
const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
|
||||||
ctx.client.processRequest(ctx.req) catch |e| {
|
defer {
|
||||||
ctx.req.error_callback(ctx.req.ctx, e);
|
queued.deinit(ctx.client.allocator);
|
||||||
|
_ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// On error, allow all queued requests to proceed
|
||||||
|
for (queued.items) |queued_req| {
|
||||||
|
ctx.client.processRequest(queued_req) catch |e| {
|
||||||
|
queued_req.error_callback(queued_req.ctx, e);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
|
fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
|
||||||
|
|||||||
Reference in New Issue
Block a user