From e27803038c89712556c9a8425d7d8bfacc2cc402 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Fri, 23 Jan 2026 07:59:46 -0800
Subject: [PATCH 01/14] initial implementation of Robots

---
 src/browser/Robots.zig | 739 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 739 insertions(+)
 create mode 100644 src/browser/Robots.zig

diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
new file mode 100644
index 00000000..5a4f9033
--- /dev/null
+++ b/src/browser/Robots.zig
@@ -0,0 +1,739 @@
+// Copyright (C) 2023-2026  Lightpanda (Selecy SAS)
+//
+// Francis Bouvier <francis@lightpanda.io>
+// Pierre Tachoire <pierre@lightpanda.io>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+const std = @import("std");
+
+pub const Rule = union(enum) {
+    allow: []const u8,
+    disallow: []const u8,
+};
+
+pub const Key = enum {
+    @"user-agent",
+    allow,
+    disallow,
+};
+
+/// https://www.rfc-editor.org/rfc/rfc9309.html
+pub const Robots = @This();
+pub const empty: Robots = .{ .rules = &.{} };
+
+rules: []const Rule,
+
+const State = enum {
+    not_in_entry,
+    in_other_entry,
+    in_our_entry,
+    in_wildcard_entry,
+};
+
+fn freeRulesInList(allocator: std.mem.Allocator, rules: []const Rule) void {
+    for (rules) |rule| {
+        switch (rule) {
+            .allow => |value| allocator.free(value),
+            .disallow => |value| allocator.free(value),
+        }
+    }
+}
+
+fn parseRulesWithUserAgent(
+    allocator: std.mem.Allocator,
+    user_agent: []const u8,
+    bytes: []const u8,
+) ![]const Rule {
+    var rules: std.ArrayList(Rule) = .empty;
+    defer rules.deinit(allocator);
+
+    var wildcard_rules: std.ArrayList(Rule) = .empty;
+    defer wildcard_rules.deinit(allocator);
+
+    var state: State = .not_in_entry;
+
+    var iter = std.mem.splitScalar(u8, bytes, '\n');
+    while (iter.next()) |line| {
+        const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
+
+        // Skip all comment lines.
+        if (std.mem.startsWith(u8, trimmed, "#")) continue;
+
+        // Remove end of line comment.
+        const true_line = if (std.mem.indexOfScalar(u8, trimmed, '#')) |pos|
+            std.mem.trimRight(u8, trimmed[0..pos], &std.ascii.whitespace)
+        else
+            trimmed;
+
+        if (true_line.len == 0) {
+            state = .not_in_entry;
+            continue;
+        }
+
+        const colon_idx = std.mem.indexOfScalar(u8, true_line, ':') orelse return error.MissingColon;
+        const key_str = try std.ascii.allocLowerString(allocator, true_line[0..colon_idx]);
+        defer allocator.free(key_str);
+
+        const key = std.meta.stringToEnum(Key, key_str) orelse {
+            // log.warn(.browser, "robots key", .{ .key = key_str });
+            continue;
+        };
+
+        const value = std.mem.trim(u8, true_line[colon_idx + 1 ..], &std.ascii.whitespace);
+
+        switch (key) {
+            .@"user-agent" => switch (state) {
+                .in_other_entry => {
+                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                        state = .in_our_entry;
+                    }
+                },
+                .in_our_entry => {},
+                .in_wildcard_entry => {
+                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                        state = .in_our_entry;
+                    }
+                },
+                .not_in_entry => {
+                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                        state = .in_our_entry;
+                    } else if (std.mem.eql(u8, "*", value)) {
+                        state = .in_wildcard_entry;
+                    } else {
+                        state = .in_other_entry;
+                    }
+                },
+            },
+            .allow => switch (state) {
+                .in_our_entry => {
+                    const duped_value = try allocator.dupe(u8, value);
+                    errdefer allocator.free(duped_value);
+                    try rules.append(allocator, .{ .allow = duped_value });
+                },
+                .in_other_entry => {},
+                .in_wildcard_entry => {
+                    const duped_value = try allocator.dupe(u8, value);
+                    errdefer allocator.free(duped_value);
+                    try wildcard_rules.append(allocator, .{ .allow = duped_value });
+                },
+                .not_in_entry => return error.UnexpectedRule,
+            },
+            .disallow => switch (state) {
+                .in_our_entry => {
+                    const duped_value = try allocator.dupe(u8, value);
+                    errdefer allocator.free(duped_value);
+                    try rules.append(allocator, .{ .disallow = duped_value });
+                },
+                .in_other_entry => {},
+                .in_wildcard_entry => {
+                    const duped_value = try allocator.dupe(u8, value);
+                    errdefer allocator.free(duped_value);
+                    try wildcard_rules.append(allocator, .{ .disallow = duped_value });
+                },
+                .not_in_entry => return error.UnexpectedRule,
+            },
+        }
+    }
+
+    if (rules.items.len > 0) {
+        freeRulesInList(allocator, wildcard_rules.items);
+        return try rules.toOwnedSlice(allocator);
+    } else {
+        freeRulesInList(allocator, rules.items);
+        return try wildcard_rules.toOwnedSlice(allocator);
+    }
+}
+
+pub fn fromBytes(allocator: std.mem.Allocator, user_agent: []const u8, bytes: []const u8) !Robots {
+    const rules = try parseRulesWithUserAgent(allocator, user_agent, bytes);
+    return .{ .rules = rules };
+}
+
+pub fn deinit(self: *Robots, allocator: std.mem.Allocator) void {
+    freeRulesInList(allocator, self.rules);
+    allocator.free(self.rules);
+}
+
+fn matchPatternRecursive(pattern: []const u8, path: []const u8, exact_match: bool) bool {
+    if (pattern.len == 0) return true;
+
+    const star_pos = std.mem.indexOfScalar(u8, pattern, '*') orelse {
+        if (exact_match) {
+            // If we end in '$', we must be exactly equal.
+            return std.mem.eql(u8, path, pattern);
+        } else {
+            // Otherwise, we are just a prefix.
+            return std.mem.startsWith(u8, path, pattern);
+        }
+    };
+
+    // Ensure the prefix before the '*' matches.
+    if (!std.mem.startsWith(u8, path, pattern[0..star_pos])) {
+        return false;
+    }
+
+    const suffix_pattern = pattern[star_pos + 1 ..];
+    if (suffix_pattern.len == 0) return true;
+
+    var i: usize = star_pos;
+    while (i <= path.len) : (i += 1) {
+        if (matchPatternRecursive(suffix_pattern, path[i..], exact_match)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/// There are rules for how the pattern in robots.txt should be matched.
+///
+/// * should match 0 or more of any character.
+/// $ should signify the end of a path, making it exact.
+/// otherwise, it is a prefix path.
+fn matchPattern(pattern: []const u8, path: []const u8) ?usize {
+    if (pattern.len == 0) return 0;
+    const exact_match = pattern[pattern.len - 1] == '$';
+    const inner_pattern = if (exact_match) pattern[0 .. pattern.len - 1] else pattern;
+
+    if (matchPatternRecursive(
+        inner_pattern,
+        path,
+        exact_match,
+    )) return pattern.len else return null;
+}
+
+pub fn isAllowed(self: *const Robots, path: []const u8) bool {
+    const rules = self.rules;
+
+    var longest_match_len: usize = 0;
+    var is_allowed_result = true;
+
+    for (rules) |rule| {
+        switch (rule) {
+            .allow => |pattern| {
+                if (matchPattern(pattern, path)) |len| {
+                    // Longest or Last Wins.
+                    if (len >= longest_match_len) {
+                        longest_match_len = len;
+                        is_allowed_result = true;
+                    }
+                }
+            },
+            .disallow => |pattern| {
+                if (pattern.len == 0) continue;
+
+                if (matchPattern(pattern, path)) |len| {
+                    // Longest or Last Wins.
+                    if (len >= longest_match_len) {
+                        longest_match_len = len;
+                        is_allowed_result = false;
+                    }
+                }
+            },
+        }
+    }
+
+    return is_allowed_result;
+}
+
+test "Robots: simple robots.txt" {
+    const allocator = std.testing.allocator;
+
+    const file =
+        \\User-agent: *
+        \\Disallow: /private/
+        \\Allow: /public/
+        \\
+        \\User-agent: Googlebot
+        \\Disallow: /admin/
+        \\
+    ;
+
+    const rules = try parseRulesWithUserAgent(allocator, "GoogleBot", file);
+    defer {
+        freeRulesInList(allocator, rules);
+        allocator.free(rules);
+    }
+
+    try std.testing.expectEqual(1, rules.len);
+    try std.testing.expectEqualStrings("/admin/", rules[0].disallow);
+}
+
+test "Robots: matchPattern - simple prefix" {
+    try std.testing.expect(matchPattern("/admin", "/admin/page") != null);
+    try std.testing.expect(matchPattern("/admin", "/admin") != null);
+    try std.testing.expect(matchPattern("/admin", "/other") == null);
+    try std.testing.expect(matchPattern("/admin/page", "/admin") == null);
+}
+
+test "Robots: matchPattern - single wildcard" {
+    try std.testing.expect(matchPattern("/admin/*", "/admin/") != null);
+    try std.testing.expect(matchPattern("/admin/*", "/admin/page") != null);
+    try std.testing.expect(matchPattern("/admin/*", "/admin/page/subpage") != null);
+    try std.testing.expect(matchPattern("/admin/*", "/other/page") == null);
+}
+
+test "Robots: matchPattern - wildcard in middle" {
+    try std.testing.expect(matchPattern("/abc/*/xyz", "/abc/def/xyz") != null);
+    try std.testing.expect(matchPattern("/abc/*/xyz", "/abc/def/ghi/xyz") != null);
+    try std.testing.expect(matchPattern("/abc/*/xyz", "/abc/def") == null);
+    try std.testing.expect(matchPattern("/abc/*/xyz", "/other/def/xyz") == null);
+}
+
+test "Robots: matchPattern - complex wildcard case" {
+    try std.testing.expect(matchPattern("/abc/*/def/xyz", "/abc/def/def/xyz") != null);
+    try std.testing.expect(matchPattern("/abc/*/def/xyz", "/abc/ANYTHING/def/xyz") != null);
+}
+
+test "Robots: matchPattern - multiple wildcards" {
+    try std.testing.expect(matchPattern("/a/*/b/*/c", "/a/x/b/y/c") != null);
+    try std.testing.expect(matchPattern("/a/*/b/*/c", "/a/x/y/b/z/w/c") != null);
+    try std.testing.expect(matchPattern("/*.php", "/index.php") != null);
+    try std.testing.expect(matchPattern("/*.php", "/admin/index.php") != null);
+}
+
+test "Robots: matchPattern - end anchor" {
+    try std.testing.expect(matchPattern("/*.php$", "/index.php") != null);
+    try std.testing.expect(matchPattern("/*.php$", "/index.php?param=value") == null);
+    try std.testing.expect(matchPattern("/admin$", "/admin") != null);
+    try std.testing.expect(matchPattern("/admin$", "/admin/") == null);
+    try std.testing.expect(matchPattern("/fish$", "/fish") != null);
+    try std.testing.expect(matchPattern("/fish$", "/fishheads") == null);
+}
+
+test "Robots: matchPattern - wildcard with extension" {
+    try std.testing.expect(matchPattern("/fish*.php", "/fish.php") != null);
+    try std.testing.expect(matchPattern("/fish*.php", "/fishheads.php") != null);
+    try std.testing.expect(matchPattern("/fish*.php", "/fish/salmon.php") != null);
+    try std.testing.expect(matchPattern("/fish*.php", "/fish.asp") == null);
+}
+
+test "Robots: matchPattern - empty and edge cases" {
+    try std.testing.expect(matchPattern("", "/anything") != null);
+    try std.testing.expect(matchPattern("/", "/") != null);
+    try std.testing.expect(matchPattern("*", "/anything") != null);
+    try std.testing.expect(matchPattern("/*", "/anything") != null);
+    try std.testing.expect(matchPattern("$", "") != null);
+}
+
+test "Robots: matchPattern - real world examples" {
+    try std.testing.expect(matchPattern("/", "/anything") != null);
+
+    try std.testing.expect(matchPattern("/admin/", "/admin/page") != null);
+    try std.testing.expect(matchPattern("/admin/", "/public/page") == null);
+
+    try std.testing.expect(matchPattern("/*.pdf$", "/document.pdf") != null);
+    try std.testing.expect(matchPattern("/*.pdf$", "/document.pdf.bak") == null);
+
+    try std.testing.expect(matchPattern("/*?", "/page?param=value") != null);
+    try std.testing.expect(matchPattern("/*?", "/page") == null);
+}
+
+test "Robots: isAllowed - basic allow/disallow" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot",
+        \\User-agent: MyBot
+        \\Disallow: /admin/
+        \\Allow: /public/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/") == true);
+    try std.testing.expect(robots.isAllowed("/public/page") == true);
+    try std.testing.expect(robots.isAllowed("/admin/secret") == false);
+    try std.testing.expect(robots.isAllowed("/other/page") == true);
+}
+
+test "Robots: isAllowed - longest match wins" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "TestBot",
+        \\User-agent: TestBot
+        \\Disallow: /admin/
+        \\Allow: /admin/public/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/secret") == false);
+    try std.testing.expect(robots.isAllowed("/admin/public/page") == true);
+    try std.testing.expect(robots.isAllowed("/admin/public/") == true);
+}
+
+test "Robots: isAllowed - specific user-agent vs wildcard" {
+    const allocator = std.testing.allocator;
+
+    var robots1 = try Robots.fromBytes(allocator, "Googlebot",
+        \\User-agent: Googlebot
+        \\Disallow: /private/
+        \\
+        \\User-agent: *
+        \\Disallow: /admin/
+        \\
+    );
+    defer robots1.deinit(allocator);
+
+    try std.testing.expect(robots1.isAllowed("/private/page") == false);
+    try std.testing.expect(robots1.isAllowed("/admin/page") == true);
+
+    // Test with other bot (should use wildcard)
+    var robots2 = try Robots.fromBytes(allocator, "OtherBot",
+        \\User-agent: Googlebot
+        \\Disallow: /private/
+        \\
+        \\User-agent: *
+        \\Disallow: /admin/
+        \\
+    );
+    defer robots2.deinit(allocator);
+
+    try std.testing.expect(robots2.isAllowed("/private/page") == true);
+    try std.testing.expect(robots2.isAllowed("/admin/page") == false);
+}
+
+test "Robots: isAllowed - case insensitive user-agent" {
+    const allocator = std.testing.allocator;
+
+    var robots1 = try Robots.fromBytes(allocator, "googlebot",
+        \\User-agent: GoogleBot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots1.deinit(allocator);
+    try std.testing.expect(robots1.isAllowed("/private/") == false);
+
+    var robots2 = try Robots.fromBytes(allocator, "GOOGLEBOT",
+        \\User-agent: GoogleBot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots2.deinit(allocator);
+    try std.testing.expect(robots2.isAllowed("/private/") == false);
+
+    var robots3 = try Robots.fromBytes(allocator, "GoOgLeBoT",
+        \\User-agent: GoogleBot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots3.deinit(allocator);
+    try std.testing.expect(robots3.isAllowed("/private/") == false);
+}
+
+test "Robots: isAllowed - merged rules for same agent" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Googlebot",
+        \\User-agent: Googlebot
+        \\Disallow: /admin/
+        \\
+        \\User-agent: Googlebot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/page") == false);
+    try std.testing.expect(robots.isAllowed("/private/page") == false);
+    try std.testing.expect(robots.isAllowed("/public/page") == true);
+}
+
+test "Robots: isAllowed - wildcards in patterns" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Bot",
+        \\User-agent: Bot
+        \\Disallow: /*.php$
+        \\Allow: /index.php$
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/page.php") == false);
+    try std.testing.expect(robots.isAllowed("/index.php") == true);
+    try std.testing.expect(robots.isAllowed("/page.php?param=1") == true);
+    try std.testing.expect(robots.isAllowed("/page.html") == true);
+}
+
+test "Robots: isAllowed - empty disallow allows everything" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Bot",
+        \\User-agent: Bot
+        \\Disallow:
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/anything") == true);
+    try std.testing.expect(robots.isAllowed("/") == true);
+}
+
+test "Robots: isAllowed - no rules" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Bot", "");
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/anything") == true);
+}
+
+test "Robots: isAllowed - disallow all" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Bot",
+        \\User-agent: Bot
+        \\Disallow: /
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/") == false);
+    try std.testing.expect(robots.isAllowed("/anything") == false);
+    try std.testing.expect(robots.isAllowed("/admin/page") == false);
+}
+
+test "Robots: isAllowed - multiple user-agents in same entry" {
+    const allocator = std.testing.allocator;
+
+    var robots1 = try Robots.fromBytes(allocator, "Googlebot",
+        \\User-agent: Googlebot
+        \\User-agent: Bingbot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots1.deinit(allocator);
+    try std.testing.expect(robots1.isAllowed("/private/") == false);
+
+    var robots2 = try Robots.fromBytes(allocator, "Bingbot",
+        \\User-agent: Googlebot
+        \\User-agent: Bingbot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots2.deinit(allocator);
+    try std.testing.expect(robots2.isAllowed("/private/") == false);
+
+    var robots3 = try Robots.fromBytes(allocator, "OtherBot",
+        \\User-agent: Googlebot
+        \\User-agent: Bingbot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots3.deinit(allocator);
+    try std.testing.expect(robots3.isAllowed("/private/") == true);
+}
+
+test "Robots: isAllowed - wildcard fallback" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "UnknownBot",
+        \\User-agent: *
+        \\Disallow: /admin/
+        \\Allow: /admin/public/
+        \\
+        \\User-agent: Googlebot
+        \\Disallow: /private/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/secret") == false);
+    try std.testing.expect(robots.isAllowed("/admin/public/page") == true);
+    try std.testing.expect(robots.isAllowed("/private/") == true);
+}
+
+test "Robots: isAllowed - complex real-world example" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot",
+        \\User-agent: *
+        \\Disallow: /cgi-bin/
+        \\Disallow: /tmp/
+        \\Disallow: /private/
+        \\
+        \\User-agent: MyBot
+        \\Disallow: /admin/
+        \\Disallow: /*.pdf$
+        \\Allow: /public/*.pdf$
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/") == true);
+    try std.testing.expect(robots.isAllowed("/admin/dashboard") == false);
+    try std.testing.expect(robots.isAllowed("/docs/guide.pdf") == false);
+    try std.testing.expect(robots.isAllowed("/public/manual.pdf") == true);
+    try std.testing.expect(robots.isAllowed("/page.html") == true);
+    try std.testing.expect(robots.isAllowed("/cgi-bin/script.sh") == true);
+}
+
+test "Robots: isAllowed - order doesn't matter for same length" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Bot",
+        \\User-agent: Bot
+        \\ # WOW!!
+        \\Allow: /page
+        \\Disallow: /page
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/page") == false);
+}
+
+test "Robots: isAllowed - empty file uses wildcard defaults" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot",
+        \\User-agent: * # ABCDEF!!!
+        \\Disallow: /admin/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/") == false);
+    try std.testing.expect(robots.isAllowed("/public/") == true);
+}
+test "Robots: isAllowed - wildcard entry with multiple user-agents including specific" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Googlebot",
+        \\User-agent: *
+        \\User-agent: Googlebot
+        \\Disallow: /shared/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/shared/") == false);
+    try std.testing.expect(robots.isAllowed("/other/") == true);
+
+    var robots2 = try Robots.fromBytes(allocator, "Bingbot",
+        \\User-agent: *
+        \\User-agent: Googlebot
+        \\Disallow: /shared/
+        \\
+    );
+    defer robots2.deinit(allocator);
+
+    try std.testing.expect(robots2.isAllowed("/shared/") == false);
+}
+
+test "Robots: isAllowed - specific agent appears after wildcard in entry" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot",
+        \\User-agent: *
+        \\User-agent: MyBot
+        \\User-agent: Bingbot
+        \\Disallow: /admin/
+        \\Allow: /admin/public/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/secret") == false);
+    try std.testing.expect(robots.isAllowed("/admin/public/page") == true);
+}
+
+test "Robots: isAllowed - wildcard should not override specific entry" {
+    const allocator = std.testing.allocator;
+
+    var robots = try Robots.fromBytes(allocator, "Googlebot",
+        \\User-agent: Googlebot
+        \\Disallow: /private/
+        \\
+        \\User-agent: *
+        \\User-agent: Googlebot
+        \\Disallow: /admin/
+        \\
+    );
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/private/") == false);
+    try std.testing.expect(robots.isAllowed("/admin/") == false);
+}
+
+test "Robots: isAllowed - Google's real robots.txt" {
+    const allocator = std.testing.allocator;
+
+    // Simplified version of google.com/robots.txt
+    const google_robots =
+        \\User-agent: *
+        \\User-agent: Yandex
+        \\Disallow: /search
+        \\Allow: /search/about
+        \\Allow: /search/howsearchworks
+        \\Disallow: /imgres
+        \\Disallow: /m?
+        \\Disallow: /m/
+        \\Allow:    /m/finance
+        \\Disallow: /maps/
+        \\Allow: /maps/$
+        \\Allow: /maps/@
+        \\Allow: /maps/dir/
+        \\Disallow: /shopping?
+        \\Allow: /shopping?udm=28$
+        \\
+        \\User-agent: AdsBot-Google
+        \\Disallow: /maps/api/js/
+        \\Allow: /maps/api/js
+        \\Disallow: /maps/api/staticmap
+        \\
+        \\User-agent: Yandex
+        \\Disallow: /about/careers/applications/jobs/results
+        \\
+        \\User-agent: facebookexternalhit
+        \\User-agent: Twitterbot
+        \\Allow: /imgres
+        \\Allow: /search
+        \\Disallow: /groups
+        \\Disallow: /m/
+        \\
+    ;
+
+    var regular_bot = try Robots.fromBytes(allocator, "Googlebot", google_robots);
+    defer regular_bot.deinit(allocator);
+
+    try std.testing.expect(regular_bot.isAllowed("/") == true);
+    try std.testing.expect(regular_bot.isAllowed("/search") == false);
+    try std.testing.expect(regular_bot.isAllowed("/search/about") == true);
+    try std.testing.expect(regular_bot.isAllowed("/search/howsearchworks") == true);
+    try std.testing.expect(regular_bot.isAllowed("/imgres") == false);
+    try std.testing.expect(regular_bot.isAllowed("/m/finance") == true);
+    try std.testing.expect(regular_bot.isAllowed("/m/other") == false);
+    try std.testing.expect(regular_bot.isAllowed("/maps/") == true);
+    try std.testing.expect(regular_bot.isAllowed("/maps/@") == true);
+    try std.testing.expect(regular_bot.isAllowed("/shopping?udm=28") == true);
+    try std.testing.expect(regular_bot.isAllowed("/shopping?udm=28&extra") == false);
+
+    var adsbot = try Robots.fromBytes(allocator, "AdsBot-Google", google_robots);
+    defer adsbot.deinit(allocator);
+
+    try std.testing.expect(adsbot.isAllowed("/maps/api/js") == true);
+    try std.testing.expect(adsbot.isAllowed("/maps/api/js/") == false);
+    try std.testing.expect(adsbot.isAllowed("/maps/api/staticmap") == false);
+
+    var twitterbot = try Robots.fromBytes(allocator, "Twitterbot", google_robots);
+    defer twitterbot.deinit(allocator);
+
+    try std.testing.expect(twitterbot.isAllowed("/imgres") == true);
+    try std.testing.expect(twitterbot.isAllowed("/search") == true);
+    try std.testing.expect(twitterbot.isAllowed("/groups") == false);
+    try std.testing.expect(twitterbot.isAllowed("/m/") == false);
+}

From 48ebc46c5fa9bd8261c07e5cb8b5c7343b573ea5 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Fri, 23 Jan 2026 16:29:41 -0800
Subject: [PATCH 02/14] add getRobotsUrl to URL

---
 src/browser/URL.zig | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/browser/URL.zig b/src/browser/URL.zig
index d36673cc..1e5d272a 100644
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -502,6 +502,16 @@ pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []cons
     return buf.items[0 .. buf.items.len - 1 :0];
 }
 
+pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) !?[:0]const u8 {
+    const origin = try getOrigin(arena, url) orelse return null;
+    return try std.fmt.allocPrintSentinel(
+        arena,
+        "{s}/robots.txt",
+        .{origin},
+        0,
+    );
+}
+
 const testing = @import("../testing.zig");
 test "URL: isCompleteHTTPUrl" {
     try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about"));
@@ -778,3 +788,31 @@ test "URL: concatQueryString" {
         try testing.expectEqual("https://www.lightpanda.io/index?1=2&a=b", url);
     }
 }
+
+test "URL: getRobotsUrl" {
+    defer testing.reset();
+    const arena = testing.arena_allocator;
+
+    {
+        const url = try getRobotsUrl(arena, "https://www.lightpanda.io");
+        try testing.expectEqual("https://www.lightpanda.io/robots.txt", url.?);
+    }
+
+    {
+        const url = try getRobotsUrl(arena, "https://www.lightpanda.io/some/path");
+        try testing.expectString("https://www.lightpanda.io/robots.txt", url.?);
+    }
+
+    {
+        const url = try getRobotsUrl(arena, "https://www.lightpanda.io:8080/page");
+        try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url.?);
+    }
+    {
+        const url = try getRobotsUrl(arena, "http://example.com/deep/nested/path?query=value#fragment");
+        try testing.expectString("http://example.com/robots.txt", url.?);
+    }
+    {
+        const url = try getRobotsUrl(arena, "https://user:pass@example.com/page");
+        try testing.expectString("https://example.com/robots.txt", url.?);
+    }
+}

From 1a246f2e380f965a4602b4b2d9c5db8c72864dee Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Sat, 31 Jan 2026 18:41:55 -0800
Subject: [PATCH 03/14] robots in the actual http client

---
 src/App.zig                               |   5 +
 src/Config.zig                            |  18 ++
 src/browser/Page.zig                      |   1 +
 src/browser/Robots.zig                    | 239 +++++++++++++++++-----
 src/browser/ScriptManager.zig             |   3 +
 src/browser/URL.zig                       |  14 +-
 src/browser/webapi/net/Fetch.zig          |   1 +
 src/browser/webapi/net/XMLHttpRequest.zig |   1 +
 src/http/Client.zig                       | 137 ++++++++++++-
 9 files changed, 357 insertions(+), 62 deletions(-)

diff --git a/src/App.zig b/src/App.zig
index 21b0ecc6..76ffd396 100644
--- a/src/App.zig
+++ b/src/App.zig
@@ -25,6 +25,7 @@ const Config = @import("Config.zig");
 const Snapshot = @import("browser/js/Snapshot.zig");
 const Platform = @import("browser/js/Platform.zig");
 const Telemetry = @import("telemetry/telemetry.zig").Telemetry;
+const RobotStore = @import("browser/Robots.zig").RobotStore;
 
 pub const Http = @import("http/Http.zig");
 pub const ArenaPool = @import("ArenaPool.zig");
@@ -38,6 +39,7 @@ snapshot: Snapshot,
 telemetry: Telemetry,
 allocator: Allocator,
 arena_pool: ArenaPool,
+robots: RobotStore,
 app_dir_path: ?[]const u8,
 shutdown: bool = false,
 
@@ -57,6 +59,8 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
     app.snapshot = try Snapshot.load();
     errdefer app.snapshot.deinit();
 
+    app.robots = RobotStore.init(allocator);
+
     app.app_dir_path = getAndMakeAppDir(allocator);
 
     app.telemetry = try Telemetry.init(app, config.mode);
@@ -79,6 +83,7 @@ pub fn deinit(self: *App) void {
         self.app_dir_path = null;
     }
     self.telemetry.deinit();
+    self.robots.deinit();
     self.http.deinit();
     self.snapshot.deinit();
     self.platform.deinit();
diff --git a/src/Config.zig b/src/Config.zig
index fc4ebcdd..0f285f98 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -57,6 +57,13 @@ pub fn tlsVerifyHost(self: *const Config) bool {
     };
 }
 
+pub fn obeyRobots(self: *const Config) bool {
+    return switch (self.mode) {
+        inline .serve, .fetch => |opts| opts.common.obey_robots,
+        else => unreachable,
+    };
+}
+
 pub fn httpProxy(self: *const Config) ?[:0]const u8 {
     return switch (self.mode) {
         inline .serve, .fetch => |opts| opts.common.http_proxy,
@@ -158,6 +165,7 @@ pub const Fetch = struct {
 };
 
 pub const Common = struct {
+    obey_robots: bool = false,
     proxy_bearer_token: ?[:0]const u8 = null,
     http_proxy: ?[:0]const u8 = null,
     http_max_concurrent: ?u8 = null,
@@ -223,6 +231,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
         \\                advanced option which should only be set if you understand
         \\                and accept the risk of disabling host verification.
         \\
+        \\--obey_robots
+        \\                Fetches and obeys the robots.txt (if available) of the web pages
+        \\                we make requests towards.
+        \\                Defaults to false.
+        \\
         \\--http_proxy    The HTTP proxy to use for all HTTP requests.
         \\                A username:password can be included for basic authentication.
         \\                Defaults to none.
@@ -613,6 +626,11 @@ fn parseCommonArg(
         return true;
     }
 
+    if (std.mem.eql(u8, "--obey_robots", opt)) {
+        common.obey_robots = true;
+        return true;
+    }
+
     if (std.mem.eql(u8, "--http_proxy", opt)) {
         const str = args.next() orelse {
             log.fatal(.app, "missing argument value", .{ .arg = "--http_proxy" });
diff --git a/src/browser/Page.zig b/src/browser/Page.zig
index 8e86c47a..d879a813 100644
--- a/src/browser/Page.zig
+++ b/src/browser/Page.zig
@@ -559,6 +559,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
         .headers = headers,
         .body = opts.body,
         .cookie_jar = &self._session.cookie_jar,
+        .robots = &self._session.browser.app.robots,
         .resource_type = .document,
         .notification = self._session.notification,
         .header_callback = pageHeaderDoneCallback,
diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
index 5a4f9033..2aff774a 100644
--- a/src/browser/Robots.zig
+++ b/src/browser/Robots.zig
@@ -33,13 +33,80 @@ pub const Key = enum {
 pub const Robots = @This();
 pub const empty: Robots = .{ .rules = &.{} };
 
+pub const RobotStore = struct {
+    const RobotsEntry = union(enum) {
+        present: Robots,
+        absent,
+    };
+
+    pub const RobotsMap = std.HashMapUnmanaged([]const u8, RobotsEntry, struct {
+        const Context = @This();
+
+        pub fn hash(_: Context, value: []const u8) u32 {
+            var hasher = std.hash.Wyhash.init(value.len);
+            for (value) |c| {
+                std.hash.autoHash(&hasher, std.ascii.toLower(c));
+            }
+            return @truncate(hasher.final());
+        }
+
+        pub fn eql(_: Context, a: []const u8, b: []const u8) bool {
+            if (a.len != b.len) return false;
+            return std.ascii.eqlIgnoreCase(a, b);
+        }
+    }, 80);
+
+    allocator: std.mem.Allocator,
+    map: RobotsMap,
+
+    pub fn init(allocator: std.mem.Allocator) RobotStore {
+        return .{ .allocator = allocator, .map = .empty };
+    }
+
+    pub fn deinit(self: *RobotStore) void {
+        var iter = self.map.iterator();
+
+        while (iter.next()) |entry| {
+            self.allocator.free(entry.key_ptr.*);
+
+            switch (entry.value_ptr.*) {
+                .present => |*robots| robots.deinit(self.allocator),
+                .absent => {},
+            }
+        }
+
+        self.map.deinit(self.allocator);
+    }
+
+    pub fn get(self: *RobotStore, url: []const u8) ?RobotsEntry {
+        return self.map.get(url);
+    }
+
+    pub fn robotsFromBytes(self: *RobotStore, user_agent: []const u8, bytes: []const u8) !Robots {
+        return try Robots.fromBytes(self.allocator, user_agent, bytes);
+    }
+
+    pub fn put(self: *RobotStore, url: []const u8, robots: Robots) !void {
+        const duped = try self.allocator.dupe(u8, url);
+        try self.map.put(self.allocator, duped, .{ .present = robots });
+    }
+
+    pub fn putAbsent(self: *RobotStore, url: []const u8) !void {
+        const duped = try self.allocator.dupe(u8, url);
+        try self.map.put(self.allocator, duped, .absent);
+    }
+};
+
 rules: []const Rule,
 
-const State = enum {
-    not_in_entry,
-    in_other_entry,
-    in_our_entry,
-    in_wildcard_entry,
+const State = struct {
+    entry: enum {
+        not_in_entry,
+        in_other_entry,
+        in_our_entry,
+        in_wildcard_entry,
+    },
+    has_rules: bool = false,
 };
 
 fn freeRulesInList(allocator: std.mem.Allocator, rules: []const Rule) void {
@@ -62,7 +129,7 @@ fn parseRulesWithUserAgent(
     var wildcard_rules: std.ArrayList(Rule) = .empty;
     defer wildcard_rules.deinit(allocator);
 
-    var state: State = .not_in_entry;
+    var state: State = .{ .entry = .not_in_entry, .has_rules = false };
 
     var iter = std.mem.splitScalar(u8, bytes, '\n');
     while (iter.next()) |line| {
@@ -78,7 +145,6 @@ fn parseRulesWithUserAgent(
             trimmed;
 
         if (true_line.len == 0) {
-            state = .not_in_entry;
             continue;
         }
 
@@ -94,55 +160,69 @@ fn parseRulesWithUserAgent(
         const value = std.mem.trim(u8, true_line[colon_idx + 1 ..], &std.ascii.whitespace);
 
         switch (key) {
-            .@"user-agent" => switch (state) {
-                .in_other_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    }
-                },
-                .in_our_entry => {},
-                .in_wildcard_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    }
-                },
-                .not_in_entry => {
-                    if (std.ascii.eqlIgnoreCase(user_agent, value)) {
-                        state = .in_our_entry;
-                    } else if (std.mem.eql(u8, "*", value)) {
-                        state = .in_wildcard_entry;
-                    } else {
-                        state = .in_other_entry;
-                    }
-                },
+            .@"user-agent" => {
+                if (state.has_rules) {
+                    state = .{ .entry = .not_in_entry, .has_rules = false };
+                }
+
+                switch (state.entry) {
+                    .in_other_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        }
+                    },
+                    .in_our_entry => {},
+                    .in_wildcard_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        }
+                    },
+                    .not_in_entry => {
+                        if (std.ascii.eqlIgnoreCase(user_agent, value)) {
+                            state.entry = .in_our_entry;
+                        } else if (std.mem.eql(u8, "*", value)) {
+                            state.entry = .in_wildcard_entry;
+                        } else {
+                            state.entry = .in_other_entry;
+                        }
+                    },
+                }
             },
-            .allow => switch (state) {
-                .in_our_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try rules.append(allocator, .{ .allow = duped_value });
-                },
-                .in_other_entry => {},
-                .in_wildcard_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try wildcard_rules.append(allocator, .{ .allow = duped_value });
-                },
-                .not_in_entry => return error.UnexpectedRule,
+            .allow => {
+                defer state.has_rules = true;
+
+                switch (state.entry) {
+                    .in_our_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try rules.append(allocator, .{ .allow = duped_value });
+                    },
+                    .in_other_entry => {},
+                    .in_wildcard_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try wildcard_rules.append(allocator, .{ .allow = duped_value });
+                    },
+                    .not_in_entry => return error.UnexpectedRule,
+                }
             },
-            .disallow => switch (state) {
-                .in_our_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try rules.append(allocator, .{ .disallow = duped_value });
-                },
-                .in_other_entry => {},
-                .in_wildcard_entry => {
-                    const duped_value = try allocator.dupe(u8, value);
-                    errdefer allocator.free(duped_value);
-                    try wildcard_rules.append(allocator, .{ .disallow = duped_value });
-                },
-                .not_in_entry => return error.UnexpectedRule,
+            .disallow => {
+                defer state.has_rules = true;
+
+                switch (state.entry) {
+                    .in_our_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try rules.append(allocator, .{ .disallow = duped_value });
+                    },
+                    .in_other_entry => {},
+                    .in_wildcard_entry => {
+                        const duped_value = try allocator.dupe(u8, value);
+                        errdefer allocator.free(duped_value);
+                        try wildcard_rules.append(allocator, .{ .disallow = duped_value });
+                    },
+                    .not_in_entry => return error.UnexpectedRule,
+                }
             },
         }
     }
@@ -737,3 +817,54 @@ test "Robots: isAllowed - Google's real robots.txt" {
     try std.testing.expect(twitterbot.isAllowed("/groups") == false);
     try std.testing.expect(twitterbot.isAllowed("/m/") == false);
 }
+
+test "Robots: user-agent after rules starts new entry" {
+    const allocator = std.testing.allocator;
+
+    const file =
+        \\User-agent: Bot1
+        \\User-agent: Bot2
+        \\Disallow: /admin/
+        \\Allow: /public/
+        \\User-agent: Bot3
+        \\Disallow: /private/
+        \\
+    ;
+
+    var robots1 = try Robots.fromBytes(allocator, "Bot1", file);
+    defer robots1.deinit(allocator);
+    try std.testing.expect(robots1.isAllowed("/admin/") == false);
+    try std.testing.expect(robots1.isAllowed("/public/") == true);
+    try std.testing.expect(robots1.isAllowed("/private/") == true);
+
+    var robots2 = try Robots.fromBytes(allocator, "Bot2", file);
+    defer robots2.deinit(allocator);
+    try std.testing.expect(robots2.isAllowed("/admin/") == false);
+    try std.testing.expect(robots2.isAllowed("/public/") == true);
+    try std.testing.expect(robots2.isAllowed("/private/") == true);
+
+    var robots3 = try Robots.fromBytes(allocator, "Bot3", file);
+    defer robots3.deinit(allocator);
+    try std.testing.expect(robots3.isAllowed("/admin/") == true);
+    try std.testing.expect(robots3.isAllowed("/public/") == true);
+    try std.testing.expect(robots3.isAllowed("/private/") == false);
+}
+
+test "Robots: blank lines don't end entries" {
+    const allocator = std.testing.allocator;
+
+    const file =
+        \\User-agent: MyBot
+        \\Disallow: /admin/
+        \\
+        \\
+        \\Allow: /public/
+        \\
+    ;
+
+    var robots = try Robots.fromBytes(allocator, "MyBot", file);
+    defer robots.deinit(allocator);
+
+    try std.testing.expect(robots.isAllowed("/admin/") == false);
+    try std.testing.expect(robots.isAllowed("/public/") == true);
+}
diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig
index 344d6232..01c56a81 100644
--- a/src/browser/ScriptManager.zig
+++ b/src/browser/ScriptManager.zig
@@ -265,6 +265,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
             .headers = try self.getHeaders(url),
             .blocking = is_blocking,
             .cookie_jar = &page._session.cookie_jar,
+            .robots = &page._session.browser.app.robots,
             .resource_type = .script,
             .notification = page._session.notification,
             .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -380,6 +381,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
         .method = .GET,
         .headers = try self.getHeaders(url),
         .cookie_jar = &self.page._session.cookie_jar,
+        .robots = &self.page._session.browser.app.robots,
         .resource_type = .script,
         .notification = self.page._session.notification,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -484,6 +486,7 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
         .resource_type = .script,
         .cookie_jar = &self.page._session.cookie_jar,
         .notification = self.page._session.notification,
+        .robots = &self.page._session.browser.app.robots,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
         .header_callback = Script.headerCallback,
         .data_callback = Script.dataCallback,
diff --git a/src/browser/URL.zig b/src/browser/URL.zig
index 1e5d272a..716480b1 100644
--- a/src/browser/URL.zig
+++ b/src/browser/URL.zig
@@ -502,8 +502,8 @@ pub fn concatQueryString(arena: Allocator, url: []const u8, query_string: []cons
     return buf.items[0 .. buf.items.len - 1 :0];
 }
 
-pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) !?[:0]const u8 {
-    const origin = try getOrigin(arena, url) orelse return null;
+pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 {
+    const origin = try getOrigin(arena, url) orelse return error.NoOrigin;
     return try std.fmt.allocPrintSentinel(
         arena,
         "{s}/robots.txt",
@@ -795,24 +795,24 @@ test "URL: getRobotsUrl" {
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io");
-        try testing.expectEqual("https://www.lightpanda.io/robots.txt", url.?);
+        try testing.expectEqual("https://www.lightpanda.io/robots.txt", url);
     }
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io/some/path");
-        try testing.expectString("https://www.lightpanda.io/robots.txt", url.?);
+        try testing.expectString("https://www.lightpanda.io/robots.txt", url);
     }
 
     {
         const url = try getRobotsUrl(arena, "https://www.lightpanda.io:8080/page");
-        try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url.?);
+        try testing.expectString("https://www.lightpanda.io:8080/robots.txt", url);
     }
     {
         const url = try getRobotsUrl(arena, "http://example.com/deep/nested/path?query=value#fragment");
-        try testing.expectString("http://example.com/robots.txt", url.?);
+        try testing.expectString("http://example.com/robots.txt", url);
     }
     {
         const url = try getRobotsUrl(arena, "https://user:pass@example.com/page");
-        try testing.expectString("https://example.com/robots.txt", url.?);
+        try testing.expectString("https://example.com/robots.txt", url);
     }
 }
diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig
index a66fb311..988e9a53 100644
--- a/src/browser/webapi/net/Fetch.zig
+++ b/src/browser/webapi/net/Fetch.zig
@@ -79,6 +79,7 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
         .resource_type = .fetch,
         .cookie_jar = &page._session.cookie_jar,
         .notification = page._session.notification,
+        .robots = &page._session.browser.app.robots,
         .start_callback = httpStartCallback,
         .header_callback = httpHeaderDoneCallback,
         .data_callback = httpDataCallback,
diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig
index 7c266e1a..296048b3 100644
--- a/src/browser/webapi/net/XMLHttpRequest.zig
+++ b/src/browser/webapi/net/XMLHttpRequest.zig
@@ -208,6 +208,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
         .headers = headers,
         .body = self._request_body,
         .cookie_jar = &page._session.cookie_jar,
+        .robots = &page._session.browser.app.robots,
         .resource_type = .xhr,
         .notification = page._session.notification,
         .start_callback = httpStartCallback,
diff --git a/src/http/Client.zig b/src/http/Client.zig
index cc61b681..a9c21e0c 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -27,6 +27,8 @@ const Config = @import("../Config.zig");
 const URL = @import("../browser/URL.zig");
 const Notification = @import("../Notification.zig");
 const CookieJar = @import("../browser/webapi/storage/Cookie.zig").Jar;
+const Robots = @import("../browser/Robots.zig");
+const RobotStore = Robots.RobotStore;
 
 const c = Http.c;
 const posix = std.posix;
@@ -217,6 +219,36 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
 }
 
 pub fn request(self: *Client, req: Request) !void {
+    if (self.config.obeyRobots()) {
+        const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
+
+        // If we have this robots cached, we can take a fast path.
+        if (req.robots.get(robots_url)) |robot_entry| {
+            defer self.allocator.free(robots_url);
+
+            switch (robot_entry) {
+                // If we have a found robots entry, we check it.
+                .present => |robots| {
+                    const path = URL.getPathname(req.url);
+                    if (!robots.isAllowed(path)) {
+                        req.error_callback(req.ctx, error.RobotsBlocked);
+                        return;
+                    }
+                },
+                // Otherwise, we assume we won't find it again.
+                .absent => {},
+            }
+
+            return self.processRequest(req);
+        }
+
+        return self.fetchRobotsThenProcessRequest(robots_url, req);
+    }
+
+    return self.processRequest(req);
+}
+
+fn processRequest(self: *Client, req: Request) !void {
     const transfer = try self.makeTransfer(req);
 
     transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
@@ -246,6 +278,108 @@ pub fn request(self: *Client, req: Request) !void {
     }
 }
 
+const RobotsRequestContext = struct {
+    client: *Client,
+    req: Request,
+    robots_url: [:0]const u8,
+    buffer: std.ArrayList(u8),
+    status: u16 = 0,
+};
+
+fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
+    const ctx = try self.allocator.create(RobotsRequestContext);
+    ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
+
+    const headers = try self.newHeaders();
+
+    log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
+    try self.processRequest(.{
+        .ctx = ctx,
+        .url = robots_url,
+        .method = .GET,
+        .headers = headers,
+        .blocking = false,
+        .cookie_jar = req.cookie_jar,
+        .notification = req.notification,
+        .robots = req.robots,
+        .resource_type = .fetch,
+        .header_callback = robotsHeaderCallback,
+        .data_callback = robotsDataCallback,
+        .done_callback = robotsDoneCallback,
+        .error_callback = robotsErrorCallback,
+    });
+}
+
+fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
+
+    if (transfer.response_header) |hdr| {
+        log.debug(.browser, "robots status", .{ .status = hdr.status });
+        ctx.status = hdr.status;
+    }
+
+    if (transfer.getContentLength()) |cl| {
+        try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl);
+    }
+
+    return true;
+}
+
+fn robotsDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
+    try ctx.buffer.appendSlice(ctx.client.allocator, data);
+}
+
+fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
+    defer ctx.client.allocator.destroy(ctx);
+    defer ctx.buffer.deinit(ctx.client.allocator);
+    defer ctx.client.allocator.free(ctx.robots_url);
+
+    var allowed = true;
+
+    if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
+        const robots = try ctx.req.robots.robotsFromBytes(
+            ctx.client.config.http_headers.user_agent,
+            ctx.buffer.items,
+        );
+
+        try ctx.req.robots.put(ctx.robots_url, robots);
+
+        const path = URL.getPathname(ctx.req.url);
+        allowed = robots.isAllowed(path);
+    }
+
+    // If not found, store as Not Found.
+    if (ctx.status == 404) {
+        log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
+        try ctx.req.robots.putAbsent(ctx.robots_url);
+    }
+
+    if (!allowed) {
+        log.warn(.http, "blocked by robots", .{ .url = ctx.req.url });
+        ctx.req.error_callback(ctx.req.ctx, error.RobotsBlocked);
+        return;
+    }
+
+    // Now process the original request
+    try ctx.client.processRequest(ctx.req);
+}
+
+fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
+    defer ctx.client.allocator.destroy(ctx);
+    defer ctx.buffer.deinit(ctx.client.allocator);
+    defer ctx.client.allocator.free(ctx.robots_url);
+
+    log.warn(.http, "robots fetch failed", .{ .err = err });
+
+    // On error, allow the request to proceed
+    ctx.client.processRequest(ctx.req) catch |e| {
+        ctx.req.error_callback(ctx.req.ctx, e);
+    };
+}
+
 fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
     // The request was intercepted and is blocking. This is messy, but our
     // callers, the ScriptManager -> Page, don't have a great way to stop the
@@ -565,7 +699,7 @@ fn processMessages(self: *Client) !bool {
 
         // In case of auth challenge
         // TODO give a way to configure the number of auth retries.
-         if (transfer._auth_challenge != null and transfer._tries < 10) {
+        if (transfer._auth_challenge != null and transfer._tries < 10) {
             var wait_for_interception = false;
             transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
             if (wait_for_interception) {
@@ -784,6 +918,7 @@ pub const Request = struct {
     headers: Http.Headers,
     body: ?[]const u8 = null,
     cookie_jar: *CookieJar,
+    robots: *RobotStore,
     resource_type: ResourceType,
     credentials: ?[:0]const u8 = null,
     notification: *Notification,

From e4f250435d95d9f9475bb489d912b524c18a33f6 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:03:34 -0800
Subject: [PATCH 04/14] include robots url in debug log

---
 src/http/Client.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index a9c21e0c..d6d77271 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -314,7 +314,7 @@ fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
     const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
 
     if (transfer.response_header) |hdr| {
-        log.debug(.browser, "robots status", .{ .status = hdr.status });
+        log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url });
         ctx.status = hdr.status;
     }
 

From b6af5884b11c387b4905678d6d1e6feda7adcca2 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:05:24 -0800
Subject: [PATCH 05/14] use RobotsRequestContext deinit

---
 src/http/Client.zig | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index d6d77271..f3fa923f 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -284,6 +284,12 @@ const RobotsRequestContext = struct {
     robots_url: [:0]const u8,
     buffer: std.ArrayList(u8),
     status: u16 = 0,
+
+    pub fn deinit(self: *RobotsRequestContext) void {
+        self.client.allocator.free(self.robots_url);
+        self.buffer.deinit(self.client.allocator);
+        self.client.allocator.destroy(self);
+    }
 };
 
 fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
@@ -332,9 +338,7 @@ fn robotsDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
 
 fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
     const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
-    defer ctx.client.allocator.destroy(ctx);
-    defer ctx.buffer.deinit(ctx.client.allocator);
-    defer ctx.client.allocator.free(ctx.robots_url);
+    defer ctx.deinit();
 
     var allowed = true;
 
@@ -348,10 +352,7 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
 
         const path = URL.getPathname(ctx.req.url);
         allowed = robots.isAllowed(path);
-    }
-
-    // If not found, store as Not Found.
-    if (ctx.status == 404) {
+    } else if (ctx.status == 404) {
         log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
         try ctx.req.robots.putAbsent(ctx.robots_url);
     }
@@ -368,9 +369,7 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
 
 fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
     const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
-    defer ctx.client.allocator.destroy(ctx);
-    defer ctx.buffer.deinit(ctx.client.allocator);
-    defer ctx.client.allocator.free(ctx.robots_url);
+    defer ctx.deinit();
 
     log.warn(.http, "robots fetch failed", .{ .err = err });
 

From f9104c71f6b55b6e7e9c156c0966c754cb667282 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:10:07 -0800
Subject: [PATCH 06/14] log instead of returning error on unexpected rule

---
 src/browser/Robots.zig | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
index 2aff774a..a11026cd 100644
--- a/src/browser/Robots.zig
+++ b/src/browser/Robots.zig
@@ -17,6 +17,7 @@
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 const std = @import("std");
+const log = @import("../log.zig");
 
 pub const Rule = union(enum) {
     allow: []const u8,
@@ -203,7 +204,10 @@ fn parseRulesWithUserAgent(
                         errdefer allocator.free(duped_value);
                         try wildcard_rules.append(allocator, .{ .allow = duped_value });
                     },
-                    .not_in_entry => return error.UnexpectedRule,
+                    .not_in_entry => {
+                        log.warn(.browser, "robots unexpected rule", .{ .rule = "allow" });
+                        continue;
+                    },
                 }
             },
             .disallow => {
@@ -221,7 +225,10 @@ fn parseRulesWithUserAgent(
                         errdefer allocator.free(duped_value);
                         try wildcard_rules.append(allocator, .{ .disallow = duped_value });
                     },
-                    .not_in_entry => return error.UnexpectedRule,
+                    .not_in_entry => {
+                        log.warn(.browser, "robots unexpected rule", .{ .rule = "disallow" });
+                        continue;
+                    },
                 }
             },
         }

From 29ee7d41f5cf62de729e3a96586df9e2eeef1119 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:30:27 -0800
Subject: [PATCH 07/14] queue requests to run after robots is fetched

---
 src/http/Client.zig | 99 +++++++++++++++++++++++++++++++--------------
 1 file changed, 69 insertions(+), 30 deletions(-)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index f3fa923f..60b01047 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -87,6 +87,10 @@ queue: TransferQueue,
 // The main app allocator
 allocator: Allocator,
 
+// Queue of requests that depend on a robots.txt.
+// Allows us to fetch the robots.txt just once.
+pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
+
 // Once we have a handle/easy to process a request with, we create a Transfer
 // which contains the Request as well as any state we need to process the
 // request. These wil come and go with each request.
@@ -165,6 +169,13 @@ pub fn deinit(self: *Client) void {
     _ = c.curl_multi_cleanup(self.multi);
 
     self.transfer_pool.deinit();
+
+    var robots_iter = self.pending_robots_queue.iterator();
+    while (robots_iter.next()) |entry| {
+        entry.value_ptr.deinit(self.allocator);
+    }
+    self.pending_robots_queue.deinit(self.allocator);
+
     self.allocator.destroy(self);
 }
 
@@ -254,7 +265,10 @@ fn processRequest(self: *Client, req: Request) !void {
     transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
 
     var wait_for_interception = false;
-    transfer.req.notification.dispatch(.http_request_intercept, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
+    transfer.req.notification.dispatch(.http_request_intercept, &.{
+        .transfer = transfer,
+        .wait_for_interception = &wait_for_interception,
+    });
     if (wait_for_interception == false) {
         // request not intercepted, process it normally
         return self.process(transfer);
@@ -293,27 +307,36 @@ const RobotsRequestContext = struct {
 };
 
 fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
-    const ctx = try self.allocator.create(RobotsRequestContext);
-    ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
+    const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url);
 
-    const headers = try self.newHeaders();
+    if (!entry.found_existing) {
+        // If we aren't already fetching this robots,
+        // we want to create a new queue for it and add this request into it.
+        entry.value_ptr.* = .empty;
 
-    log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
-    try self.processRequest(.{
-        .ctx = ctx,
-        .url = robots_url,
-        .method = .GET,
-        .headers = headers,
-        .blocking = false,
-        .cookie_jar = req.cookie_jar,
-        .notification = req.notification,
-        .robots = req.robots,
-        .resource_type = .fetch,
-        .header_callback = robotsHeaderCallback,
-        .data_callback = robotsDataCallback,
-        .done_callback = robotsDoneCallback,
-        .error_callback = robotsErrorCallback,
-    });
+        const ctx = try self.allocator.create(RobotsRequestContext);
+        ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
+        const headers = try self.newHeaders();
+
+        log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
+        try self.processRequest(.{
+            .ctx = ctx,
+            .url = robots_url,
+            .method = .GET,
+            .headers = headers,
+            .blocking = false,
+            .cookie_jar = req.cookie_jar,
+            .notification = req.notification,
+            .robots = req.robots,
+            .resource_type = .fetch,
+            .header_callback = robotsHeaderCallback,
+            .data_callback = robotsDataCallback,
+            .done_callback = robotsDoneCallback,
+            .error_callback = robotsErrorCallback,
+        });
+    }
+
+    try entry.value_ptr.append(self.allocator, req);
 }
 
 fn robotsHeaderCallback(transfer: *Http.Transfer) !bool {
@@ -357,14 +380,22 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
         try ctx.req.robots.putAbsent(ctx.robots_url);
     }
 
-    if (!allowed) {
-        log.warn(.http, "blocked by robots", .{ .url = ctx.req.url });
-        ctx.req.error_callback(ctx.req.ctx, error.RobotsBlocked);
-        return;
+    const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
+    defer {
+        queued.deinit(ctx.client.allocator);
+        _ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
     }
 
-    // Now process the original request
-    try ctx.client.processRequest(ctx.req);
+    for (queued.items) |queued_req| {
+        if (!allowed) {
+            log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
+            queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
+        } else {
+            ctx.client.processRequest(queued_req) catch |e| {
+                queued_req.error_callback(queued_req.ctx, e);
+            };
+        }
+    }
 }
 
 fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
@@ -373,10 +404,18 @@ fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
 
     log.warn(.http, "robots fetch failed", .{ .err = err });
 
-    // On error, allow the request to proceed
-    ctx.client.processRequest(ctx.req) catch |e| {
-        ctx.req.error_callback(ctx.req.ctx, e);
-    };
+    const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
+    defer {
+        queued.deinit(ctx.client.allocator);
+        _ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
+    }
+
+    // On error, allow all queued requests to proceed
+    for (queued.items) |queued_req| {
+        ctx.client.processRequest(queued_req) catch |e| {
+            queued_req.error_callback(queued_req.ctx, e);
+        };
+    }
 }
 
 fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {

From e620c28a1c2536bb86208c8a8a0767f4c42791c5 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:35:48 -0800
Subject: [PATCH 08/14] stop leaking robots_url when in robot queue

---
 src/http/Client.zig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index 60b01047..91ab02d1 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -232,6 +232,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
 pub fn request(self: *Client, req: Request) !void {
     if (self.config.obeyRobots()) {
         const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
+        errdefer self.allocator.free(robots_url);
 
         // If we have this robots cached, we can take a fast path.
         if (req.robots.get(robots_url)) |robot_entry| {
@@ -334,6 +335,9 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
             .done_callback = robotsDoneCallback,
             .error_callback = robotsErrorCallback,
         });
+    } else {
+        // Not using our own robots URL, only using the one from the first request.
+        self.allocator.free(robots_url);
     }
 
     try entry.value_ptr.append(self.allocator, req);

From 50aeb9ff21f571f33ca3f5116741f058b8bb81b5 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:39:39 -0800
Subject: [PATCH 09/14] add comment explaining rule choice in robots

---
 src/browser/Robots.zig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
index a11026cd..6b6062b1 100644
--- a/src/browser/Robots.zig
+++ b/src/browser/Robots.zig
@@ -234,6 +234,8 @@ fn parseRulesWithUserAgent(
         }
     }
 
+    // If we have rules for our specific User-Agent, we will use those rules.
+    // If we don't have any rules, we fallback to using the wildcard ("*") rules.
     if (rules.items.len > 0) {
         freeRulesInList(allocator, wildcard_rules.items);
         return try rules.toOwnedSlice(allocator);

From a7095d7decfbe62af09cc945b8b51f93122d0edb Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Wed, 4 Feb 2026 11:49:52 -0800
Subject: [PATCH 10/14] pass robot store into Http init

---
 src/App.zig                               |  6 +++---
 src/browser/Page.zig                      |  1 -
 src/browser/ScriptManager.zig             |  3 ---
 src/browser/webapi/net/Fetch.zig          |  1 -
 src/browser/webapi/net/XMLHttpRequest.zig |  1 -
 src/http/Client.zig                       | 15 ++++++++-------
 src/http/Http.zig                         |  5 +++--
 7 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/App.zig b/src/App.zig
index 76ffd396..a4ed0e8f 100644
--- a/src/App.zig
+++ b/src/App.zig
@@ -50,7 +50,9 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
     app.config = config;
     app.allocator = allocator;
 
-    app.http = try Http.init(allocator, config);
+    app.robots = RobotStore.init(allocator);
+
+    app.http = try Http.init(allocator, &app.robots, config);
     errdefer app.http.deinit();
 
     app.platform = try Platform.init();
@@ -59,8 +61,6 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
     app.snapshot = try Snapshot.load();
     errdefer app.snapshot.deinit();
 
-    app.robots = RobotStore.init(allocator);
-
     app.app_dir_path = getAndMakeAppDir(allocator);
 
     app.telemetry = try Telemetry.init(app, config.mode);
diff --git a/src/browser/Page.zig b/src/browser/Page.zig
index d879a813..8e86c47a 100644
--- a/src/browser/Page.zig
+++ b/src/browser/Page.zig
@@ -559,7 +559,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
         .headers = headers,
         .body = opts.body,
         .cookie_jar = &self._session.cookie_jar,
-        .robots = &self._session.browser.app.robots,
         .resource_type = .document,
         .notification = self._session.notification,
         .header_callback = pageHeaderDoneCallback,
diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig
index 01c56a81..344d6232 100644
--- a/src/browser/ScriptManager.zig
+++ b/src/browser/ScriptManager.zig
@@ -265,7 +265,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
             .headers = try self.getHeaders(url),
             .blocking = is_blocking,
             .cookie_jar = &page._session.cookie_jar,
-            .robots = &page._session.browser.app.robots,
             .resource_type = .script,
             .notification = page._session.notification,
             .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -381,7 +380,6 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
         .method = .GET,
         .headers = try self.getHeaders(url),
         .cookie_jar = &self.page._session.cookie_jar,
-        .robots = &self.page._session.browser.app.robots,
         .resource_type = .script,
         .notification = self.page._session.notification,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
@@ -486,7 +484,6 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
         .resource_type = .script,
         .cookie_jar = &self.page._session.cookie_jar,
         .notification = self.page._session.notification,
-        .robots = &self.page._session.browser.app.robots,
         .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null,
         .header_callback = Script.headerCallback,
         .data_callback = Script.dataCallback,
diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig
index 988e9a53..a66fb311 100644
--- a/src/browser/webapi/net/Fetch.zig
+++ b/src/browser/webapi/net/Fetch.zig
@@ -79,7 +79,6 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
         .resource_type = .fetch,
         .cookie_jar = &page._session.cookie_jar,
         .notification = page._session.notification,
-        .robots = &page._session.browser.app.robots,
         .start_callback = httpStartCallback,
         .header_callback = httpHeaderDoneCallback,
         .data_callback = httpDataCallback,
diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig
index 296048b3..7c266e1a 100644
--- a/src/browser/webapi/net/XMLHttpRequest.zig
+++ b/src/browser/webapi/net/XMLHttpRequest.zig
@@ -208,7 +208,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
         .headers = headers,
         .body = self._request_body,
         .cookie_jar = &page._session.cookie_jar,
-        .robots = &page._session.browser.app.robots,
         .resource_type = .xhr,
         .notification = page._session.notification,
         .start_callback = httpStartCallback,
diff --git a/src/http/Client.zig b/src/http/Client.zig
index 91ab02d1..d65e860f 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -87,6 +87,8 @@ queue: TransferQueue,
 // The main app allocator
 allocator: Allocator,
 
+// Reference to the App-owned Robot Store.
+robot_store: *RobotStore,
 // Queue of requests that depend on a robots.txt.
 // Allows us to fetch the robots.txt just once.
 pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
@@ -129,7 +131,7 @@ pub const CDPClient = struct {
 
 const TransferQueue = std.DoublyLinkedList;
 
-pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config) !*Client {
+pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, robot_store: *RobotStore, config: *const Config) !*Client {
     var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator);
     errdefer transfer_pool.deinit();
 
@@ -153,6 +155,7 @@ pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, config: *const Config)
         .multi = multi,
         .handles = handles,
         .allocator = allocator,
+        .robot_store = robot_store,
         .http_proxy = http_proxy,
         .use_proxy = http_proxy != null,
         .config = config,
@@ -235,7 +238,7 @@ pub fn request(self: *Client, req: Request) !void {
         errdefer self.allocator.free(robots_url);
 
         // If we have this robots cached, we can take a fast path.
-        if (req.robots.get(robots_url)) |robot_entry| {
+        if (self.robot_store.get(robots_url)) |robot_entry| {
             defer self.allocator.free(robots_url);
 
             switch (robot_entry) {
@@ -328,7 +331,6 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
             .blocking = false,
             .cookie_jar = req.cookie_jar,
             .notification = req.notification,
-            .robots = req.robots,
             .resource_type = .fetch,
             .header_callback = robotsHeaderCallback,
             .data_callback = robotsDataCallback,
@@ -370,18 +372,18 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
     var allowed = true;
 
     if (ctx.status >= 200 and ctx.status < 400 and ctx.buffer.items.len > 0) {
-        const robots = try ctx.req.robots.robotsFromBytes(
+        const robots = try ctx.client.robot_store.robotsFromBytes(
             ctx.client.config.http_headers.user_agent,
             ctx.buffer.items,
         );
 
-        try ctx.req.robots.put(ctx.robots_url, robots);
+        try ctx.client.robot_store.put(ctx.robots_url, robots);
 
         const path = URL.getPathname(ctx.req.url);
         allowed = robots.isAllowed(path);
     } else if (ctx.status == 404) {
         log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
-        try ctx.req.robots.putAbsent(ctx.robots_url);
+        try ctx.client.robot_store.putAbsent(ctx.robots_url);
     }
 
     const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
@@ -960,7 +962,6 @@ pub const Request = struct {
     headers: Http.Headers,
     body: ?[]const u8 = null,
     cookie_jar: *CookieJar,
-    robots: *RobotStore,
     resource_type: ResourceType,
     credentials: ?[:0]const u8 = null,
     notification: *Notification,
diff --git a/src/http/Http.zig b/src/http/Http.zig
index 3d488f95..9d550148 100644
--- a/src/http/Http.zig
+++ b/src/http/Http.zig
@@ -30,6 +30,7 @@ pub const Transfer = Client.Transfer;
 
 const log = @import("../log.zig");
 const errors = @import("errors.zig");
+const RobotStore = @import("../browser/Robots.zig").RobotStore;
 
 const Allocator = std.mem.Allocator;
 const ArenaAllocator = std.heap.ArenaAllocator;
@@ -46,7 +47,7 @@ client: *Client,
 ca_blob: ?c.curl_blob,
 arena: ArenaAllocator,
 
-pub fn init(allocator: Allocator, config: *const Config) !Http {
+pub fn init(allocator: Allocator, robot_store: *RobotStore, config: *const Config) !Http {
     try errorCheck(c.curl_global_init(c.CURL_GLOBAL_SSL));
     errdefer c.curl_global_cleanup();
 
@@ -62,7 +63,7 @@ pub fn init(allocator: Allocator, config: *const Config) !Http {
         ca_blob = try loadCerts(allocator, arena.allocator());
     }
 
-    var client = try Client.init(allocator, ca_blob, config);
+    var client = try Client.init(allocator, ca_blob, robot_store, config);
     errdefer client.deinit();
 
     return .{

From 34067a1d70414cc80b0e9c489cca65dc94180925 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Thu, 5 Feb 2026 08:02:35 -0800
Subject: [PATCH 11/14] only use eqlIgnoreCase for RobotStore

---
 src/browser/Robots.zig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/browser/Robots.zig b/src/browser/Robots.zig
index 6b6062b1..709481ee 100644
--- a/src/browser/Robots.zig
+++ b/src/browser/Robots.zig
@@ -52,7 +52,6 @@ pub const RobotStore = struct {
         }
 
         pub fn eql(_: Context, a: []const u8, b: []const u8) bool {
-            if (a.len != b.len) return false;
             return std.ascii.eqlIgnoreCase(a, b);
         }
     }, 80);

From 46c73a05a9d24921d6b6a0686dbba28e8b82c2e3 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Mon, 9 Feb 2026 05:35:32 -0800
Subject: [PATCH 12/14] panic instead of unreachable on robots callbacks

---
 src/http/Client.zig | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index d65e860f..c69b0cf9 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -386,13 +386,12 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
         try ctx.client.robot_store.putAbsent(ctx.robots_url);
     }
 
-    const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
-    defer {
-        queued.deinit(ctx.client.allocator);
-        _ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
-    }
+    var queued = ctx.client.pending_robots_queue.fetchRemove(
+        ctx.robots_url,
+    ) orelse @panic("Client.robotsDoneCallbacke empty queue");
+    defer queued.value.deinit(ctx.client.allocator);
 
-    for (queued.items) |queued_req| {
+    for (queued.value.items) |queued_req| {
         if (!allowed) {
             log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
             queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
@@ -410,14 +409,13 @@ fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
 
     log.warn(.http, "robots fetch failed", .{ .err = err });
 
-    const queued = ctx.client.pending_robots_queue.getPtr(ctx.robots_url) orelse unreachable;
-    defer {
-        queued.deinit(ctx.client.allocator);
-        _ = ctx.client.pending_robots_queue.remove(ctx.robots_url);
-    }
+    var queued = ctx.client.pending_robots_queue.fetchRemove(
+        ctx.robots_url,
+    ) orelse @panic("Client.robotsErrorCallback empty queue");
+    defer queued.value.deinit(ctx.client.allocator);
 
     // On error, allow all queued requests to proceed
-    for (queued.items) |queued_req| {
+    for (queued.value.items) |queued_req| {
         ctx.client.processRequest(queued_req) catch |e| {
             queued_req.error_callback(queued_req.ctx, e);
         };

From 65c9b2a5f70d0cab99fbe5af5b3ea8dcf3525985 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Mon, 9 Feb 2026 05:51:42 -0800
Subject: [PATCH 13/14] add robotsShutdownCallback

---
 src/http/Client.zig | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index c69b0cf9..90bba041 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -314,11 +314,14 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
     const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url);
 
     if (!entry.found_existing) {
+        errdefer self.allocator.free(robots_url);
+
         // If we aren't already fetching this robots,
         // we want to create a new queue for it and add this request into it.
         entry.value_ptr.* = .empty;
 
         const ctx = try self.allocator.create(RobotsRequestContext);
+        errdefer self.allocator.destroy(ctx);
         ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
         const headers = try self.newHeaders();
 
@@ -336,6 +339,7 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
             .data_callback = robotsDataCallback,
             .done_callback = robotsDoneCallback,
             .error_callback = robotsErrorCallback,
+            .shutdown_callback = robotsShutdownCallback,
         });
     } else {
         // Not using our own robots URL, only using the one from the first request.
@@ -422,6 +426,18 @@ fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
     }
 }
 
+fn robotsShutdownCallback(ctx_ptr: *anyopaque) void {
+    const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
+    defer ctx.deinit();
+
+    log.debug(.http, "robots fetch shutdown", .{});
+
+    var queued = ctx.client.pending_robots_queue.fetchRemove(
+        ctx.robots_url,
+    ) orelse @panic("Client.robotsErrorCallback empty queue");
+    defer queued.value.deinit(ctx.client.allocator);
+}
+
 fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
     // The request was intercepted and is blocking. This is messy, but our
     // callers, the ScriptManager -> Page, don't have a great way to stop the

From e1850440b0ba9b2e7983246d5098c6a18765c0b0 Mon Sep 17 00:00:00 2001
From: Muki Kiboigo <muki@muki.gg>
Date: Mon, 9 Feb 2026 15:24:35 -0800
Subject: [PATCH 14/14] shutdown queued req on robots shutdown

---
 src/http/Client.zig | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/http/Client.zig b/src/http/Client.zig
index 90bba041..26419b6a 100644
--- a/src/http/Client.zig
+++ b/src/http/Client.zig
@@ -436,6 +436,12 @@ fn robotsShutdownCallback(ctx_ptr: *anyopaque) void {
         ctx.robots_url,
     ) orelse @panic("Client.robotsErrorCallback empty queue");
     defer queued.value.deinit(ctx.client.allocator);
+
+    for (queued.value.items) |queued_req| {
+        if (queued_req.shutdown_callback) |shutdown_cb| {
+            shutdown_cb(queued_req.ctx);
+        }
+    }
 }
 
 fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {