properly handle failed parsing on robots

This commit is contained in:
Muki Kiboigo
2026-02-10 20:09:32 -08:00
parent b408f88b8c
commit f02a37d3f0

View File

@@ -375,19 +375,36 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
var allowed = true; var allowed = true;
if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) { switch (ctx.status) {
const robots = try ctx.client.robot_store.robotsFromBytes( 200 => {
if (ctx.buffer.items.len > 0) {
const robots: ?Robots = ctx.client.robot_store.robotsFromBytes(
ctx.client.config.http_headers.user_agent, ctx.client.config.http_headers.user_agent,
ctx.buffer.items, ctx.buffer.items,
); ) catch blk: {
log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url });
try ctx.client.robot_store.put(ctx.robots_url, robots); // If we fail to parse, we just insert it as absent and ignore.
const path = URL.getPathname(ctx.req.url);
allowed = robots.isAllowed(path);
} else if (ctx.status == 404) {
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
try ctx.client.robot_store.putAbsent(ctx.robots_url); try ctx.client.robot_store.putAbsent(ctx.robots_url);
break :blk null;
};
if (robots) |r| {
try ctx.client.robot_store.put(ctx.robots_url, r);
const path = URL.getPathname(ctx.req.url);
allowed = r.isAllowed(path);
}
}
},
404 => {
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
// If we get a 404, we just insert it as absent.
try ctx.client.robot_store.putAbsent(ctx.robots_url);
},
else => {
log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status });
// If we get an unexpected status, we just insert as absent.
try ctx.client.robot_store.putAbsent(ctx.robots_url);
},
} }
var queued = ctx.client.pending_robots_queue.fetchRemove( var queued = ctx.client.pending_robots_queue.fetchRemove(