From 1a246f2e380f965a4602b4b2d9c5db8c72864dee Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Sat, 31 Jan 2026 18:41:55 -0800
Subject: [PATCH] robots in the actual http client

---
 src/App.zig                               |   5 +
 src/Config.zig                            |  18 ++
 src/browser/Page.zig                      |   1 +
 src/browser/Robots.zig                    | 239 +++++++++++++++++-----
 src/browser/ScriptManager.zig             |   3 +
 src/browser/URL.zig                       |  14 +-
 src/browser/webapi/net/Fetch.zig          |   1 +
 src/browser/webapi/net/XMLHttpRequest.zig |   1 +
 src/http/Client.zig                       | 137 ++++++++++++-
 9 files changed, 357 insertions(+), 62 deletions(-)

diff --git a/src/App.zig b/src/App.zig
index 21b0ecc6..76ffd396 100644
--- a/src/App.zig
+++ b/src/App.zig
@@ -25,6 +25,7 @@ const Config = @import("Config.zig");
 const Snapshot = @import("browser/js/Snapshot.zig");
 const Platform = @import("browser/js/Platform.zig");
 const Telemetry = @import("telemetry/telemetry.zig").Telemetry;
+const RobotStore = @import("browser/Robots.zig").RobotStore;
 
 pub const Http = @import("http/Http.zig");
 pub const ArenaPool = @import("ArenaPool.zig");
@@ -38,6 +39,7 @@ snapshot: Snapshot,
 telemetry: Telemetry,
 allocator: Allocator,
 arena_pool: ArenaPool,
+robots: RobotStore,
 app_dir_path: ?[]const u8,
 shutdown: bool = false,
 
@@ -57,6 +59,8 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
     app.snapshot = try Snapshot.load();
     errdefer app.snapshot.deinit();
 
+    app.robots = RobotStore.init(allocator);
+
     app.app_dir_path = getAndMakeAppDir(allocator);
 
     app.telemetry = try Telemetry.init(app, config.mode);
@@ -79,6 +83,7 @@ pub fn deinit(self: *App) void {
         self.app_dir_path = null;
     }
     self.telemetry.deinit();
+    self.robots.deinit();
     self.http.deinit();
     self.snapshot.deinit();
     self.platform.deinit();
diff --git a/src/Config.zig b/src/Config.zig
index fc4ebcdd..0f285f98 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -57,6 +57,13 @@ pub fn tlsVerifyHost(self: *const Config) bool {
     };
 }
 
+pub fn obeyRobots(self: *const Config) bool {
+    return switch (self.mode) {
+        inline .serve, .fetch => |opts| opts.common.obey_robots,
+        else => unreachable,
+    };
+}
+
 pub fn httpProxy(self: *const Config) ?[:0]const u8 {
     return switch (self.mode) {
         inline .serve, .fetch => |opts| opts.common.http_proxy,
@@ -158,6 +165,7 @@ pub const Fetch = struct {
 };
 
 pub const Common = struct {
+    obey_robots: bool = false,
     proxy_bearer_token: ?[:0]const u8 = null,
     http_proxy: ?[:0]const u8 = null,
     http_max_concurrent: ?u8 = null,
@@ -223,6 +231,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
         \\                advanced option which should only be set if you understand
         \\                and accept the risk of disabling host verification.
         \\
+        \\--obey_robots
+        \\                Fetches and obeys the robots.txt (if available) of the web pages
+        \\                we make requests towards.
+        \\                Defaults to false.
+        \\
         \\--http_proxy    The HTTP proxy to use for all HTTP requests.
         \\                A username:password can be included for basic authentication.
         \\                Defaults to none.
@@ -613,6 +626,11 @@ fn parseCommonArg(
         return true;
     }
 
+    if (std.mem.eql(u8, "--obey_robots", opt)) {
+        common.obey_robots = true;
+        return true;
+    }
+
     if (std.mem.eql(u8, "--http_proxy", opt)) {
         const str = args.next() orelse {
             log.fatal(.app, "missing argument value", .{ .arg = "--http_proxy" });
diff --git a/src/browser/Page.zig b/src/browser/Page.zig
index 8e86c47a..d879a813 100644
--- a/src/browser/Page.zig
+++ b/src/browser/Page.zig
@@ -559,6 +559,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
         .headers = headers,
         .body = opts.body,
         .cookie_jar = &self._session.cookie_jar,
+        .robots = &self._session.browser.app.robots,
         .resource_type = .document,
         .notification = self._session.notification,
         .header_callback = pageHeaderDoneCallback,
diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
index 5a4f9033..2aff774a 100644
--- a/src/browser/Robots.zig
+++ b/src/browser/Robots.zig
@@ -33,13 +33,80 @@ pub const Key = enum {
 pub const Robots = @This();
 pub const empty: Robots = .{ .rules = &.{} };
 
+pub const RobotStore = struct {
+    const RobotsEntry = union(enum) {
+        present: Robots,
+        absent,
+    };
+
+    pub const RobotsMap = std.HashMapUnmanaged([]const u8, RobotsEntry, struct {
+        const Context = @This();
+
+        pub fn hash(_: Context, value: []const u8) u32 {
+            var hasher = std.hash.Wyhash.init(value.len);
+            for (value) |c| {
+                std.hash.autoHash(&hasher, std.ascii.toLower(c));
+            }
+            return @truncate(hasher.final());
+        }
+
+        pub fn eql(_: Context, a: []const u8, b: []const u8) bool {
+            if (a.len != b.len) return false;
+            return std.ascii.eqlIgnoreCase(a, b);
+        }
+    }, 80);
+
+    allocator: std.mem.Allocator,
+    map: RobotsMap,
+
+    pub fn init(allocator: std.mem.Allocator) RobotStore {
+        return .{ .allocator = allocator, .map = .empty };
+    }
+
+    pub fn deinit(self: *RobotStore) void {
+        var iter = self.map.iterator();
+
+        while (iter.next()) |entry| {
+            self.allocator.free(entry.key_ptr.*);
+
+            switch (entry.value_ptr.*) {
+                .present => |*robots| robots.deinit(self.allocator),
+                .absent => {},
+            }
+        }
+
+        self.map.deinit(self.allocator);
+    }
+
+    pub fn get(self: *RobotStore, url: []const u8) ?RobotsEntry {
+        return self.map.get(url);
+    }
+
+    pub fn robotsFromBytes(self: *RobotStore, user_agent: []const u8, bytes: []const u8) !Robots {
+        return try Robots.fromBytes(self.allocator, user_agent, bytes);
+    }
+
+    pub fn put(self: *RobotStore, url: []const u8, robots: Robots) !void {
+        const duped = try self.allocator.dupe(u8, url);
+        try self.map.put(self.allocator, duped, .{ .present = robots });
+    }
+
+    pub fn putAbsent(self: *RobotStore, url: []const u8) !void {
+        const duped = try self.allocator.dupe(u8, url);
+        try self.map.put(self.allocator, duped, .absent);
+    }
+};
+
 rules: []const Rule,
 
-const State = enum {
-    not_in_entry,
-    in_other_entry,
-    in_our_entry,
-    in_wildcard_entry,
+const State = struct {
+    entry: enum {
+        not_in_entry,
+        in_other_entry,
+        in_our_entry,
+        in_wildcard_entry,
+    },
+    has_rules: bool = false,
 };
 
 fn freeRulesInList(allocator: std.mem.Allocator, rules: []const Rule) void {
@@ -62,7 +129,7 @@ fn parseRulesWithUserAgent(
     var wildcard_rules: std.ArrayList(Rule) = .empty;
     defer wildcard_rules.deinit(allocator);
 
-    var state: State = .not_in_entry;
+    var state: State = .{ .entry = .not_in_entry, .has_rules = false };
 
     var iter = std.mem.splitScalar(u8, bytes, '\n');
     while (iter.next()) |line| {
@@ -78,7 +145,6 @@ fn parseRulesWithUserAgent(
             trimmed;
 
         if (true_line.len == 0) {
-            state = .not_in_entry;
             continue;
         }
 
@@ -94,55 +160,69 @@ fn parseRulesWithUserAgent(
         const value = std.mem.trim(u8, true_line[colon_idx + 1 ..], &std.ascii.whitespace);
 
         switch (key) {
-            .@"user-agent" => switch (state) {
-                .in_other_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    }
-                },
-                .in_our_entry => {},
-                .in_wildcard_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    }
-                },
-                .not_in_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    } else if (std.mem.eql(u8, "*", value)) {
-                        state = .in_wildcard_entry;
-                    } else {
-                        state = .in_other_entry;
-                    }
-                },
+            .@"user-agent" => {
+                if (state.has_rules) {
+                    state = .{ .entry = .not_in_entry, .has_rules = false };
+                }
+
+                switch (state.entry) {
+                    .in_other_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        }
+                    },
+                    .in_our_entry => {},
+                    .in_wildcard_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        }
+                    },
+                    .not_in_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        } else if (std.mem.eql(u8, "*", value)) {
+                            state.entry = .in_wildcard_entry;
+                        } else {
+                            state.entry = .in_other_entry;
+                        }
+                    },
+                }
             },
-            .allow => switch (state) {
-                .in_our_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try rules.append(allocator, .{ .allow = duped_value });
-                },
-                .in_other_entry => {},
-                .in_wildcard_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try wildcard_rules.append(allocator, .{ .allow = duped_value });
-                },
-                .not_in_entry => return error.UnexpectedRule,
+            .allow => {
+                defer state.has_rules = true;
+
+                switch (state.entry) {
+                    .in_our_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try rules.append(allocator, .{ .allow = duped_value });
+                    },
+                    .in_other_entry => {},
+                    .in_wildcard_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try wildcard_rules.append(allocator, .{ .allow = duped_value });
+                    },
+                    .not_in_entry => return error.UnexpectedRule,
+                }
             },
-            .disallow => switch (state) {
-                .in_our_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try rules.append(allocator, .{ .disallow = duped_value });
-                },
-                .in_other_entry => {},
-                .in_wildcard_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try wildcard_rules.append(allocator, .{ .disallow = duped_value });
-                },
-                .not_in_entry => return error.UnexpectedRule,
+            .disallow => {
+                defer state.has_rules = true;
+
+                switch (state.entry) {
+                    .in_our_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try rules.append(allocator, .{ .disallow = duped_value });
+                    },
+                    .in_other_entry => {},
+                    .in_wildcard_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try wildcard_rules.append(allocator, .{ .disallow = duped_value });
+                    },
+                    .not_in_entry => return error.UnexpectedRule,
+                }
             },
         }
     }
@@ -737,3 +817,54 @@ test "Robots: isAllowed - Google's real robots.txt" {
     try std.testing.expect(twitterbot.isAllowed("/groups") == false);
     try std.testing.expect(twitterbot.isAllowed("/m/") == false);
 }
+
+test "Robots: user-agent after rules starts new entry" {
+    const allocator = std.testing.allocator;
+
+    const file =
+        \\User-agent: Bot1
+        \\User-agent: Bot2
+        \\Disallow: /admin/
+        \\Allow: /public/
+        \\User-agent: Bot3
+        \\Disallow: /private/
+        \\
+    ;
+
+    var robots1 = try Robots.fromBytes(allocator, "Bot1", file);
+    defer robots1.deinit(allocator);
+    try std.testing.expect(robots1.isAllowed("/admin/") == false);
+    try std.testing.expect(robots1.isAllowed("/public/") == true);
+    try std.testing.expect(robots1.isAllowed("/private/") == true);
+
+    var robots2 = try Robots.fromBytes(allocator, "Bot2", file);
+    defer robots2.deinit(allocator);
+    try std.testing.expect(robots2.isAllowed("/admin/") == false);
+    try std.testing.expect(robots2.isAllowed("/public/") == true);
+    try std.testing.expect(robots2.isAllowed("/private/") == true);
+
+    var robots3 = try Robots.fromBytes(allocator, "Bot3", file);
+    defer robots3.deinit(allocator);
+    try std.testing.expect(robots3.isAllowed("/admin/") == true);
+    try std.testing.expect(robots3.isAllowed("/public/") == true);
+    try std.testing.expect(robots3.isAllowed("/private/") == false);
+}
+
+test "Robots: blank lines don't end entries" {
+    const allocator = std.testing.allocator;
+
+    const file =
+        \\User-agent: MyBot
+        \\Disallow: /admin/
+        \\
+        \\
+        \\Allow: /public/
+        \\
+    ;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot", file);
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/") == false);
+    try std.testing.expect(robots.isAllowed("/public/") == true);
+}
diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig
index 344d6232..01c56a81 100644
--- a/src/browser/ScriptManager.zig
+++ b/src/browser/ScriptManager.zig
@@ -265,6 +265,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
             .headers = try self.getHeaders(url),
             .blocking = is_blocking,
             .cookie_jar = &page._session.cookie_jar,
+            .robots = &page._session.browser.app.robots,
             .resource_type = .script,
             .notification = page._session.notification,
             .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -380,6 +381,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
         .method = .GET,
         .headers = try self.getHeaders(url),
         .cookie_jar = &self.page._session.cookie_jar,
+        .robots = &self.page._session.browser.app.robots,
         .resource_type = .script,
         .notification = self.page._session.notification,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -484,6 +486,7 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
         .resource_type = .script,
         .cookie_jar = &self.page._session.cookie_jar,
         .notification = self.page._session.notification,
+        .robots = &self.page._session.browser.app.robots,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
         .header_callback = Script.headerCallback,
         .data_callback = Script.dataCallback,
diff --git a/src/browser/URL.zig b/src/browser/URL.zig
index 1e5d272a..716480b1 100644
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -502,8 +502,8 @@ pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []cons
     return buf.items[0 .. buf.items.len - 1 :0];
 }
 
-pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) !?[:0]const u8 {
-    const origin = try getOrigin(arena, url) orelse return null;
+pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 {
+    const origin = try getOrigin(arena, url) orelse return error.NoOrigin;
     return try std.fmt.allocPrintSentinel(
         arena,
         "{s}/robots.txt",
@@ -795,24 +795,24 @@ test "URL: getRobotsUrl" {
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io");
-        try testing.expectEqual("https://www.lightpanda.io/robots.txt", url.?);
+        try testing.expectEqual("https://www.lightpanda.io/robots.txt", url);
     }
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io/some/path");
-        try testing.expectString("https://www.lightpanda.io/robots.txt", url.?);
+        try testing.expectString("https://www.lightpanda.io/robots.txt", url);
     }
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io:8080/page");
-        try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url.?);
+        try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url);
     }
     {
         const url = try getRobotsUrl(arena, "http://example.com/deep/nested/path?query=value#fragment");
-        try testing.expectString("http://example.com/robots.txt", url.?);
+        try testing.expectString("http://example.com/robots.txt", url);
     }
     {
         const url = try getRobotsUrl(arena, "https://user:pass@example.com/page");
-        try testing.expectString("https://example.com/robots.txt", url.?);
+        try testing.expectString("https://example.com/robots.txt", url);
     }
 }
diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig
index a66fb311..988e9a53 100644
--- a/src/browser/webapi/net/Fetch.zig
+++ b/src/browser/webapi/net/Fetch.zig
@@ -79,6 +79,7 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
         .resource_type = .fetch,
         .cookie_jar = &page._session.cookie_jar,
         .notification = page._session.notification,
+        .robots = &page._session.browser.app.robots,
         .start_callback = httpStartCallback,
         .header_callback = httpHeaderDoneCallback,
         .data_callback = httpDataCallback,
diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig
index 7c266e1a..296048b3 100644
--- a/src/browser/webapi/net/XMLHttpRequest.zig
+++ b/src/browser/webapi/net/XMLHttpRequest.zig
@@ -208,6 +208,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
         .headers = headers,
         .body = self._request_body,
         .cookie_jar = &page._session.cookie_jar,
+        .robots = &page._session.browser.app.robots,
         .resource_type = .xhr,
         .notification = page._session.notification,
         .start_callback = httpStartCallback,
diff --git a/src/http/Client.zig b/src/http/Client.zig
index cc61b681..a9c21e0c 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -27,6 +27,8 @@ const Config = @import("../Config.zig");
 const URL = @import("../browser/URL.zig");
 const Notification = @import("../Notification.zig");
 const CookieJar = @import("../browser/webapi/storage/Cookie.zig").Jar;
+const Robots = @import("../browser/Robots.zig");
+const RobotStore = Robots.RobotStore;
 
 const c = Http.c;
 const posix = std.posix;
@@ -217,6 +219,36 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
 }
 
 pub fn request(self: *Client, req: Request) !void {
+    if (self.config.obeyRobots()) {
+        const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
+
+        // If we have this robots cached, we can take a fast path.
+        if (req.robots.get(robots_url)) |robot_entry| {
+            defer self.allocator.free(robots_url);
+
+            switch (robot_entry) {
+                // If we have a found robots entry, we check it.
+                .present => |robots| {
+                    const path = URL.getPathname(req.url);
+                    if (!robots.isAllowed(path)) {
+                        req.error_callback(req.ctx, error.RobotsBlocked);
+                        return;
+                    }
+                },
+                // Otherwise, we assume we won't find it again.
+                .absent => {},
+            }
+
+            return self.processRequest(req);
+        }
+
+        return self.fetchRobotsThenProcessRequest(robots_url, req);
+    }
+
+    return self.processRequest(req);
+}
+
+fn processRequest(self: *Client, req: Request) !void {
     const transfer = try self.makeTransfer(req);
 
     transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
@@ -246,6 +278,108 @@ pub fn request(self: *Client, req: Request) !void {
     }
 }
 
+const RobotsRequestContext = struct {
+    client: *Client,
+    req: Request,
+    robots_url: [:0]const u8,
+    buffer: std.ArrayList(u8),
+    status: u16 = 0,
+};
+
+fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
+    const ctx = try self.allocator.create(RobotsRequestContext);
+    ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
+
+    const headers = try self.newHeaders();
+
+    log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
+    try self.processRequest(.{
+        .ctx = ctx,
+        .url = robots_url,
+        .method = .GET,
+        .headers = headers,
+        .blocking = false,
+        .cookie_jar = req.cookie_jar,
+        .notification = req.notification,
+        .robots = req.robots,
+        .resource_type = .fetch,
+        .header_callback = robotsHeaderCallback,
+        .data_callback = robotsDataCallback,
+        .done_callback = robotsDoneCallback,
+        .error_callback = robotsErrorCallback,
+    });
+}
+
+fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
+
+    if (transfer.response_header) |hdr| {
+        log.debug(.browser, "robots status", .{ .status = hdr.status });
+        ctx.status = hdr.status;
+    }
+
+    if (transfer.getContentLength()) |cl| {
+        try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl);
+    }
+
+    return true;
+}
+
+fn robotsDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
+    try ctx.buffer.appendSlice(ctx.client.allocator, data);
+}
+
+fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
+    defer ctx.client.allocator.destroy(ctx);
+    defer ctx.buffer.deinit(ctx.client.allocator);
+    defer ctx.client.allocator.free(ctx.robots_url);
+
+    var allowed = true;
+
+    if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
+        const robots = try ctx.req.robots.robotsFromBytes(
+            ctx.client.config.http_headers.user_agent,
+            ctx.buffer.items,
+        );
+
+        try ctx.req.robots.put(ctx.robots_url, robots);
+
+        const path = URL.getPathname(ctx.req.url);
+        allowed = robots.isAllowed(path);
+    }
+
+    // If not found, store as Not Found.
+    if (ctx.status == 404) {
+        log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
+        try ctx.req.robots.putAbsent(ctx.robots_url);
+    }
+
+    if (!allowed) {
+        log.warn(.http, "blocked by robots", .{ .url = ctx.req.url });
+        ctx.req.error_callback(ctx.req.ctx, error.RobotsBlocked);
+        return;
+    }
+
+    // Now process the original request
+    try ctx.client.processRequest(ctx.req);
+}
+
+fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
+    defer ctx.client.allocator.destroy(ctx);
+    defer ctx.buffer.deinit(ctx.client.allocator);
+    defer ctx.client.allocator.free(ctx.robots_url);
+
+    log.warn(.http, "robots fetch failed", .{ .err = err });
+
+    // On error, allow the request to proceed
+    ctx.client.processRequest(ctx.req) catch |e| {
+        ctx.req.error_callback(ctx.req.ctx, e);
+    };
+}
+
 fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
     // The request was intercepted and is blocking. This is messy, but our
     // callers, the ScriptManager -> Page, don't have a great way to stop the
@@ -565,7 +699,7 @@ fn processMessages(self: *Client) !bool {
 
         // In case of auth challenge
         // TODO give a way to configure the number of auth retries.
-         if (transfer._auth_challenge != null and transfer._tries < 10) {
+        if (transfer._auth_challenge != null and transfer._tries < 10) {
             var wait_for_interception = false;
             transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
             if (wait_for_interception) {
@@ -784,6 +918,7 @@ pub const Request = struct {
     headers: Http.Headers,
     body: ?[]const u8 = null,
     cookie_jar: *CookieJar,
+    robots: *RobotStore,
     resource_type: ResourceType,
     credentials: ?[:0]const u8 = null,
     notification: *Notification,