diff --git a/src/browser/tests/document/query_selector.html b/src/browser/tests/document/query_selector.html index 91675b82..b333069e 100644 --- a/src/browser/tests/document/query_selector.html +++ b/src/browser/tests/document/query_selector.html @@ -270,3 +270,36 @@ testing.expectEqual('rect', document.querySelector('svg g rect').tagName); } + + + +
Non-ASCII class 1
+
Non-ASCII class 2
+Non-ASCII ID 1 +

Non-ASCII ID 2

+ + + +Punctuation test + + diff --git a/src/browser/webapi/selector/Parser.zig b/src/browser/webapi/selector/Parser.zig index a594f66f..e188c4e7 100644 --- a/src/browser/webapi/selector/Parser.zig +++ b/src/browser/webapi/selector/Parser.zig @@ -49,29 +49,71 @@ const ParseError = error{ StringTooLarge, }; +// CSS Syntax preprocessing: normalize line endings (CRLF → LF, CR → LF) +// https://drafts.csswg.org/css-syntax/#input-preprocessing +fn preprocessInput(arena: Allocator, input: []const u8) ![]const u8 { + var i = std.mem.indexOfScalar(u8, input, '\r') orelse return input; + + var result = try std.ArrayList(u8).initCapacity(arena, input.len); + result.appendSliceAssumeCapacity(input[0..i]); + + while (i < input.len) { + const c = input[i]; + if (c == '\r') { + result.appendAssumeCapacity('\n'); + i += 1; + if (i < input.len and input[i] == '\n') { + i += 1; + } + } else { + result.appendAssumeCapacity(c); + i += 1; + } + } + + return result.items; +} + pub fn parseList(arena: Allocator, input: []const u8, page: *Page) ParseError![]const Selector.Selector { + // Preprocess input to normalize line endings + const preprocessed = try preprocessInput(arena, input); + var selectors: std.ArrayList(Selector.Selector) = .empty; - var remaining = input; + var remaining = preprocessed; while (true) { const trimmed = std.mem.trimLeft(u8, remaining, &std.ascii.whitespace); if (trimmed.len == 0) break; var comma_pos: usize = trimmed.len; var depth: usize = 0; - for (trimmed, 0..) |c, i| { + var i: usize = 0; + while (i < trimmed.len) { + const c = trimmed[i]; switch (c) { - '(' => depth += 1, + '\\' => { + // Skip escape sequence (backslash + next character) + i += 1; + if (i < trimmed.len) i += 1; + }, + '(' => { + depth += 1; + i += 1; + }, ')' => { if (depth > 0) depth -= 1; + i += 1; }, ',' => { if (depth == 0) { comma_pos = i; break; } + i += 1; + }, + else => { + i += 1; }, - else => {}, } } @@ -237,8 +279,9 @@ fn parsePart(self: *Parser, arena: Allocator, page: *Page) !Part { }, '[' => .{ .attribute = try self.attribute(arena, page) }, ':' => .{ .pseudo_class = try self.pseudoClass(arena, page) }, - 'a'...'z', 'A'...'Z', '_' => blk: { - const tag_name = try self.tag(); + 'a'...'z', 'A'...'Z', '_', '\\', 0x80...0xFF => blk: { + // Use parseIdentifier for full escape support + const tag_name = try self.parseIdentifier(arena, error.InvalidTagSelector); if (tag_name.len > 256) { return error.InvalidTagSelector; }