From 622ca3121f6c9b6dcc0efa2e6ae9f82131e1955c Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Tue, 6 Jan 2026 23:23:26 -0800 Subject: [PATCH] add case insensitivity support to selector parsing --- src/browser/webapi/selector/List.zig | 70 ++++++++++++++++-------- src/browser/webapi/selector/Parser.zig | 17 +++++- src/browser/webapi/selector/Selector.zig | 1 + 3 files changed, 64 insertions(+), 24 deletions(-) diff --git a/src/browser/webapi/selector/List.zig b/src/browser/webapi/selector/List.zig index 5b1eb632..7d03c40c 100644 --- a/src/browser/webapi/selector/List.zig +++ b/src/browser/webapi/selector/List.zig @@ -436,38 +436,64 @@ fn matchesPart(el: *Node.Element, part: Part, scope: *Node, page: *Page) bool { } fn matchesAttribute(el: *Node.Element, attr: Selector.Attribute) bool { - // attr.name is already normalized to lowercase during parsing, so we can use the fast path const value = el.getAttributeSafe(attr.name) orelse { return false; }; switch (attr.matcher) { .presence => return true, - .exact => |expected| return std.mem.eql(u8, value, expected), - .word => |needle| { - var it = std.mem.splitAny(u8, value, &std.ascii.whitespace); - while (it.next()) |world| { - if (std.mem.eql(u8, world, needle)) { - return true; + .exact => |expected| { + return if (attr.case_insensitive) + std.ascii.eqlIgnoreCase(value, expected) + else + std.mem.eql(u8, value, expected); + }, + .substring => |expected| { + return if (attr.case_insensitive) + std.ascii.indexOfIgnoreCase(value, expected) != null + else + std.mem.indexOf(u8, value, expected) != null; + }, + .starts_with => |expected| { + return if (attr.case_insensitive) + std.ascii.startsWithIgnoreCase(value, expected) + else + std.mem.startsWith(u8, value, expected); + }, + .ends_with => |expected| { + return if (attr.case_insensitive) + std.ascii.endsWithIgnoreCase(value, expected) + else + std.mem.endsWith(u8, value, expected); + }, + .word => |expected| { + // Space-separated word match (like class names) + var it = std.mem.tokenizeScalar(u8, value, ' '); + while (it.next()) |word| { + const same = if (attr.case_insensitive) + std.ascii.eqlIgnoreCase(word, expected) + else + std.mem.eql(u8, word, expected); + + if (same) return true; + } + return false; + }, + .prefix_dash => |expected| { + // Matches value or value- prefix (for language codes like en, en-US) + if (attr.case_insensitive) { + if (std.ascii.eqlIgnoreCase(value, expected)) return true; + if (value.len > expected.len and value[expected.len] == '-') { + return std.ascii.eqlIgnoreCase(value[0..expected.len], expected); + } + } else { + if (std.mem.eql(u8, value, expected)) return true; + if (value.len > expected.len and value[expected.len] == '-') { + return std.mem.eql(u8, value[0..expected.len], expected); } } return false; }, - .prefix_dash => |prefix| { - if (std.mem.startsWith(u8, value, prefix) == false) { - return false; - } - if (value.len == prefix.len) { - return true; - } - if (value[prefix.len] == '-') { - return true; - } - return false; - }, - .starts_with => |prefix| return std.mem.startsWith(u8, value, prefix), - .ends_with => |suffix| return std.mem.endsWith(u8, value, suffix), - .substring => |substr| return std.mem.indexOf(u8, value, substr) != null, } } diff --git a/src/browser/webapi/selector/Parser.zig b/src/browser/webapi/selector/Parser.zig index 02d9e1c7..18a8bdd5 100644 --- a/src/browser/webapi/selector/Parser.zig +++ b/src/browser/webapi/selector/Parser.zig @@ -904,11 +904,12 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute { // Normalize the name to lowercase for fast matching (consistent with Attribute.normalizeNameForLookup) const normalized = try Attribute.normalizeNameForLookup(attr_name, page); const name = try arena.dupe(u8, normalized); + var case_insensitive = false; _ = self.skipSpaces(); if (self.peek() == ']') { self.input = self.input[1..]; - return .{ .name = name, .matcher = .presence }; + return .{ .name = name, .matcher = .presence, .case_insensitive = case_insensitive }; } const matcher_type = try self.attributeMatcher(); @@ -918,6 +919,18 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute { const value = try arena.dupe(u8, value_raw); _ = self.skipSpaces(); + // Parse optional case-sensitivity flag + if (std.ascii.toLower(self.peek()) == 'i') { + self.input = self.input[1..]; + case_insensitive = true; + _ = self.skipSpaces(); + } else if (std.ascii.toLower(self.peek()) == 's') { + // 's' flag means case-sensitive (explicit) + self.input = self.input[1..]; + case_insensitive = false; + _ = self.skipSpaces(); + } + if (self.peek() != ']') { return error.InvalidAttributeSelector; } @@ -933,7 +946,7 @@ fn attribute(self: *Parser, arena: Allocator, page: *Page) !Selector.Attribute { .presence => unreachable, }; - return .{ .name = name, .matcher = matcher }; + return .{ .name = name, .matcher = matcher, .case_insensitive = case_insensitive }; } fn attributeName(self: *Parser) ![]const u8 { diff --git a/src/browser/webapi/selector/Selector.zig b/src/browser/webapi/selector/Selector.zig index 6f0869eb..1f1f1d53 100644 --- a/src/browser/webapi/selector/Selector.zig +++ b/src/browser/webapi/selector/Selector.zig @@ -119,6 +119,7 @@ pub const Part = union(enum) { pub const Attribute = struct { name: []const u8, matcher: AttributeMatcher, + case_insensitive: bool, }; pub const AttributeMatcher = union(enum) {