mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-22 12:44:43 +00:00
no more longest match tracking in robots
This commit is contained in:
@@ -122,7 +122,7 @@ fn parseRulesWithUserAgent(
|
|||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
user_agent: []const u8,
|
user_agent: []const u8,
|
||||||
raw_bytes: []const u8,
|
raw_bytes: []const u8,
|
||||||
) ![]const Rule {
|
) ![]Rule {
|
||||||
var rules: std.ArrayList(Rule) = .empty;
|
var rules: std.ArrayList(Rule) = .empty;
|
||||||
defer rules.deinit(allocator);
|
defer rules.deinit(allocator);
|
||||||
|
|
||||||
@@ -252,6 +252,39 @@ fn parseRulesWithUserAgent(
|
|||||||
|
|
||||||
pub fn fromBytes(allocator: std.mem.Allocator, user_agent: []const u8, bytes: []const u8) !Robots {
|
pub fn fromBytes(allocator: std.mem.Allocator, user_agent: []const u8, bytes: []const u8) !Robots {
|
||||||
const rules = try parseRulesWithUserAgent(allocator, user_agent, bytes);
|
const rules = try parseRulesWithUserAgent(allocator, user_agent, bytes);
|
||||||
|
|
||||||
|
// sort by order once.
|
||||||
|
std.mem.sort(Rule, rules, {}, struct {
|
||||||
|
fn lessThan(_: void, a: Rule, b: Rule) bool {
|
||||||
|
const a_len = switch (a) {
|
||||||
|
.allow => |p| p.len,
|
||||||
|
.disallow => |p| p.len,
|
||||||
|
};
|
||||||
|
|
||||||
|
const b_len = switch (b) {
|
||||||
|
.allow => |p| p.len,
|
||||||
|
.disallow => |p| p.len,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sort by length first.
|
||||||
|
if (a_len != b_len) {
|
||||||
|
return a_len > b_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, allow should beat disallow.
|
||||||
|
const a_is_allow = switch (a) {
|
||||||
|
.allow => true,
|
||||||
|
.disallow => false,
|
||||||
|
};
|
||||||
|
const b_is_allow = switch (b) {
|
||||||
|
.allow => true,
|
||||||
|
.disallow => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
return a_is_allow and !b_is_allow;
|
||||||
|
}
|
||||||
|
}.lessThan);
|
||||||
|
|
||||||
return .{ .rules = rules };
|
return .{ .rules = rules };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -309,37 +342,19 @@ fn matchPattern(pattern: []const u8, path: []const u8) ?usize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn isAllowed(self: *const Robots, path: []const u8) bool {
|
pub fn isAllowed(self: *const Robots, path: []const u8) bool {
|
||||||
const rules = self.rules;
|
for (self.rules) |rule| {
|
||||||
|
|
||||||
var longest_match_len: usize = 0;
|
|
||||||
var is_allowed_result = true;
|
|
||||||
|
|
||||||
for (rules) |rule| {
|
|
||||||
switch (rule) {
|
switch (rule) {
|
||||||
.allow => |pattern| {
|
.allow => |pattern| {
|
||||||
if (matchPattern(pattern, path)) |len| {
|
if (matchPattern(pattern, path) != null) return true;
|
||||||
// Longest or Last Wins.
|
|
||||||
if (len >= longest_match_len) {
|
|
||||||
longest_match_len = len;
|
|
||||||
is_allowed_result = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
.disallow => |pattern| {
|
.disallow => |pattern| {
|
||||||
if (pattern.len == 0) continue;
|
if (pattern.len == 0) continue;
|
||||||
|
if (matchPattern(pattern, path) != null) return false;
|
||||||
if (matchPattern(pattern, path)) |len| {
|
|
||||||
// Longest or Last Wins.
|
|
||||||
if (len >= longest_match_len) {
|
|
||||||
longest_match_len = len;
|
|
||||||
is_allowed_result = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return is_allowed_result;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Robots: simple robots.txt" {
|
test "Robots: simple robots.txt" {
|
||||||
@@ -675,7 +690,7 @@ test "Robots: isAllowed - complex real-world example" {
|
|||||||
try std.testing.expect(robots.isAllowed("/cgi-bin/script.sh") == true);
|
try std.testing.expect(robots.isAllowed("/cgi-bin/script.sh") == true);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Robots: isAllowed - order doesn't matter for same length" {
|
test "Robots: isAllowed - order doesn't matter + allow wins" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
|
|
||||||
var robots = try Robots.fromBytes(allocator, "Bot",
|
var robots = try Robots.fromBytes(allocator, "Bot",
|
||||||
@@ -687,7 +702,7 @@ test "Robots: isAllowed - order doesn't matter for same length" {
|
|||||||
);
|
);
|
||||||
defer robots.deinit(allocator);
|
defer robots.deinit(allocator);
|
||||||
|
|
||||||
try std.testing.expect(robots.isAllowed("/page") == false);
|
try std.testing.expect(robots.isAllowed("/page") == true);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Robots: isAllowed - empty file uses wildcard defaults" {
|
test "Robots: isAllowed - empty file uses wildcard defaults" {
|
||||||
|
|||||||
Reference in New Issue
Block a user