add case insensitivity support to selector parsing

This commit is contained in:
Muki Kiboigo
2026-01-06 23:23:26 -08:00
parent c92903aae5
commit 622ca3121f
3 changed files with 64 additions and 24 deletions

View File

@@ -436,38 +436,64 @@ fn matchesPart(el: *Node.Element, part: Part, scope: *Node, page: *Page) bool {
} }
fn matchesAttribute(el: *Node.Element, attr: Selector.Attribute) bool { fn matchesAttribute(el: *Node.Element, attr: Selector.Attribute) bool {
// attr.name is already normalized to lowercase during parsing, so we can use the fast path
const value = el.getAttributeSafe(attr.name) orelse { const value = el.getAttributeSafe(attr.name) orelse {
return false; return false;
}; };
switch (attr.matcher) { switch (attr.matcher) {
.presence => return true, .presence => return true,
.exact => |expected| return std.mem.eql(u8, value, expected), .exact => |expected| {
.word => |needle| { return if (attr.case_insensitive)
var it = std.mem.splitAny(u8, value, &std.ascii.whitespace); std.ascii.eqlIgnoreCase(value, expected)
while (it.next()) |world| { else
if (std.mem.eql(u8, world, needle)) { std.mem.eql(u8, value, expected);
return true; },
.substring => |expected| {
return if (attr.case_insensitive)
std.ascii.indexOfIgnoreCase(value, expected) != null
else
std.mem.indexOf(u8, value, expected) != null;
},
.starts_with => |expected| {
return if (attr.case_insensitive)
std.ascii.startsWithIgnoreCase(value, expected)
else
std.mem.startsWith(u8, value, expected);
},
.ends_with => |expected| {
return if (attr.case_insensitive)
std.ascii.endsWithIgnoreCase(value, expected)
else
std.mem.endsWith(u8, value, expected);
},
.word => |expected| {
// Space-separated word match (like class names)
var it = std.mem.tokenizeScalar(u8, value, ' ');
while (it.next()) |word| {
const same = if (attr.case_insensitive)
std.ascii.eqlIgnoreCase(word, expected)
else
std.mem.eql(u8, word, expected);
if (same) return true;
}
return false;
},
.prefix_dash => |expected| {
// Matches value or value- prefix (for language codes like en, en-US)
if (attr.case_insensitive) {
if (std.ascii.eqlIgnoreCase(value, expected)) return true;
if (value.len > expected.len and value[expected.len] == '-') {
return std.ascii.eqlIgnoreCase(value[0..expected.len], expected);
}
} else {
if (std.mem.eql(u8, value, expected)) return true;
if (value.len > expected.len and value[expected.len] == '-') {
return std.mem.eql(u8, value[0..expected.len], expected);
} }
} }
return false; return false;
}, },
.prefix_dash => |prefix| {
if (std.mem.startsWith(u8, value, prefix) == false) {
return false;
}
if (value.len == prefix.len) {
return true;
}
if (value[prefix.len] == '-') {
return true;
}
return false;
},
.starts_with => |prefix| return std.mem.startsWith(u8, value, prefix),
.ends_with => |suffix| return std.mem.endsWith(u8, value, suffix),
.substring => |substr| return std.mem.indexOf(u8, value, substr) != null,
} }
} }

View File

@@ -904,11 +904,12 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute {
// Normalize the name to lowercase for fast matching (consistent with Attribute.normalizeNameForLookup) // Normalize the name to lowercase for fast matching (consistent with Attribute.normalizeNameForLookup)
const normalized = try Attribute.normalizeNameForLookup(attr_name, page); const normalized = try Attribute.normalizeNameForLookup(attr_name, page);
const name = try arena.dupe(u8, normalized); const name = try arena.dupe(u8, normalized);
var case_insensitive = false;
_ = self.skipSpaces(); _ = self.skipSpaces();
if (self.peek() == ']') { if (self.peek() == ']') {
self.input = self.input[1..]; self.input = self.input[1..];
return .{ .name = name, .matcher = .presence }; return .{ .name = name, .matcher = .presence, .case_insensitive = case_insensitive };
} }
const matcher_type = try self.attributeMatcher(); const matcher_type = try self.attributeMatcher();
@@ -918,6 +919,18 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute {
const value = try arena.dupe(u8, value_raw); const value = try arena.dupe(u8, value_raw);
_ = self.skipSpaces(); _ = self.skipSpaces();
// Parse optional case-sensitivity flag
if (std.ascii.toLower(self.peek()) == 'i') {
self.input = self.input[1..];
case_insensitive = true;
_ = self.skipSpaces();
} else if (std.ascii.toLower(self.peek()) == 's') {
// 's' flag means case-sensitive (explicit)
self.input = self.input[1..];
case_insensitive = false;
_ = self.skipSpaces();
}
if (self.peek() != ']') { if (self.peek() != ']') {
return error.InvalidAttributeSelector; return error.InvalidAttributeSelector;
} }
@@ -933,7 +946,7 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute {
.presence => unreachable, .presence => unreachable,
}; };
return .{ .name = name, .matcher = matcher }; return .{ .name = name, .matcher = matcher, .case_insensitive = case_insensitive };
} }
fn attributeName(self: *Parser) ![]const u8 { fn attributeName(self: *Parser) ![]const u8 {

View File

@@ -119,6 +119,7 @@ pub const Part = union(enum) {
pub const Attribute = struct { pub const Attribute = struct {
name: []const u8, name: []const u8,
matcher: AttributeMatcher, matcher: AttributeMatcher,
case_insensitive: bool,
}; };
pub const AttributeMatcher = union(enum) { pub const AttributeMatcher = union(enum) {