switch to iterative match solving

This commit is contained in:
Muki Kiboigo
2026-02-12 22:54:35 -08:00
parent d0c381b3df
commit 576dbb7ce6

View File

@@ -355,51 +355,76 @@ fn matchPattern(compiled: CompiledPattern, path: []const u8) bool {
const pattern = compiled.pattern; const pattern = compiled.pattern;
const exact_match = pattern[pattern.len - 1] == '$'; const exact_match = pattern[pattern.len - 1] == '$';
const inner_pattern = if (exact_match) pattern[0 .. pattern.len - 1] else pattern; const inner_pattern = if (exact_match) pattern[0 .. pattern.len - 1] else pattern;
return matchPatternRecursive(inner_pattern, path, exact_match); return matchInnerPattern(inner_pattern, path, exact_match);
}, },
} }
} }
fn matchPatternRecursive(pattern: []const u8, path: []const u8, exact_match: bool) bool { fn matchInnerPattern(pattern: []const u8, path: []const u8, exact_match: bool) bool {
if (pattern.len == 0) return true; var pattern_idx: usize = 0;
var path_idx: usize = 0;
const star_pos = std.mem.indexOfScalar(u8, pattern, '*') orelse { var star_pattern_idx: ?usize = null;
if (exact_match) { var star_path_idx: ?usize = null;
// If we end in '$', we must be exactly equal.
return std.mem.eql(u8, path, pattern); while (pattern_idx < pattern.len or path_idx < path.len) {
} else { // 1: If pattern is consumed and we are doing prefix match, we matched.
// Otherwise, we are just a prefix. if (pattern_idx >= pattern.len and !exact_match) {
return std.mem.startsWith(u8, path, pattern); return true;
} }
};
// Ensure the prefix before the '*' matches. // 2: Current character is a wildcard
if (!std.mem.startsWith(u8, path, pattern[0..star_pos])) { if (pattern_idx < pattern.len and pattern[pattern_idx] == '*') {
star_pattern_idx = pattern_idx;
star_path_idx = path_idx;
pattern_idx += 1;
continue;
}
// 3: Characters match, advance both heads.
if (pattern_idx < pattern.len and path_idx < path.len and pattern[pattern_idx] == path[path_idx]) {
pattern_idx += 1;
path_idx += 1;
continue;
}
// 4: we have a previous wildcard, backtrack and try matching more.
if (star_pattern_idx) |star_p_idx| {
// if we have exhausted the path,
// we know we haven't matched.
if (star_path_idx.? > path.len) {
return false;
}
pattern_idx = star_p_idx + 1;
path_idx = star_path_idx.?;
star_path_idx.? += 1;
continue;
}
// Fallthrough: No match and no backtracking.
return false; return false;
} }
const suffix_pattern = pattern[star_pos + 1 ..]; // Handle trailing widlcards that can match 0 characters.
if (suffix_pattern.len == 0) return true; while (pattern_idx < pattern.len and pattern[pattern_idx] == '*') {
pattern_idx += 1;
var i: usize = star_pos;
while (i <= path.len) : (i += 1) {
if (matchPatternRecursive(suffix_pattern, path[i..], exact_match)) {
return true;
}
} }
return false; if (exact_match) {
// Both must be fully consumed.
return pattern_idx == pattern.len and path_idx == path.len;
}
// For prefix match, pattern must be completed.
return pattern_idx == pattern.len;
} }
pub fn isAllowed(self: *const Robots, path: []const u8) bool { pub fn isAllowed(self: *const Robots, path: []const u8) bool {
for (self.rules) |rule| { for (self.rules) |rule| {
switch (rule) { switch (rule) {
.allow => |compiled| { .allow => |compiled| if (matchPattern(compiled, path)) return true,
if (matchPattern(compiled, path)) return true; .disallow => |compiled| if (matchPattern(compiled, path)) return false,
},
.disallow => |compiled| {
if (matchPattern(compiled, path)) return false;
},
} }
} }