Merge pull request #1523 from lightpanda-io/query_selector_edge

Support a few more selector edge cases
This commit is contained in:
Karl Seguin
2026-02-12 07:37:36 +08:00
committed by GitHub
2 changed files with 82 additions and 6 deletions

View File

@@ -270,3 +270,36 @@
testing.expectEqual('rect', document.querySelector('svg g rect').tagName);
}
</script>
<script id=special>
testing.expectEqual(null, document.querySelector('\\'));
testing.expectEqual(null, document.querySelector('div\\'));
testing.expectEqual(null, document.querySelector('.test-class\\'));
testing.expectEqual(null, document.querySelector('#byId\\'));
</script>
<div class="café">Non-ASCII class 1</div>
<div class="日本語">Non-ASCII class 2</div>
<span id="niño">Non-ASCII ID 1</span>
<p id="🎨">Non-ASCII ID 2</p>
<script id=nonAsciiSelectors>
testing.expectEqual('Non-ASCII class 1', document.querySelector('.café').textContent);
testing.expectEqual('Non-ASCII class 2', document.querySelector('.日本語').textContent);
testing.expectEqual('Non-ASCII ID 1', document.querySelector('#niño').textContent);
testing.expectEqual('Non-ASCII ID 2', document.querySelector('#🎨').textContent);
testing.expectEqual('Non-ASCII class 1', document.querySelector('div.café').textContent);
testing.expectEqual('Non-ASCII ID 1', document.querySelector('span#niño').textContent);
</script>
<span id=".,:!">Punctuation test</span>
<script id=escapedPunctuation>
{
// Test escaped punctuation in ID selectors
testing.expectEqual('Punctuation test', document.querySelector('#\\.\\,\\:\\!').textContent);
}
</script>

View File

@@ -49,29 +49,71 @@ const ParseError = error{
StringTooLarge,
};
// CSS Syntax preprocessing: normalize line endings (CRLF → LF, CR → LF)
// https://drafts.csswg.org/css-syntax/#input-preprocessing
fn preprocessInput(arena: Allocator, input: []const u8) ![]const u8 {
var i = std.mem.indexOfScalar(u8, input, '\r') orelse return input;
var result = try std.ArrayList(u8).initCapacity(arena, input.len);
result.appendSliceAssumeCapacity(input[0..i]);
while (i < input.len) {
const c = input[i];
if (c == '\r') {
result.appendAssumeCapacity('\n');
i += 1;
if (i < input.len and input[i] == '\n') {
i += 1;
}
} else {
result.appendAssumeCapacity(c);
i += 1;
}
}
return result.items;
}
pub fn parseList(arena: Allocator, input: []const u8, page: *Page) ParseError![]const Selector.Selector {
// Preprocess input to normalize line endings
const preprocessed = try preprocessInput(arena, input);
var selectors: std.ArrayList(Selector.Selector) = .empty;
var remaining = input;
var remaining = preprocessed;
while (true) {
const trimmed = std.mem.trimLeft(u8, remaining, &std.ascii.whitespace);
if (trimmed.len == 0) break;
var comma_pos: usize = trimmed.len;
var depth: usize = 0;
for (trimmed, 0..) |c, i| {
var i: usize = 0;
while (i < trimmed.len) {
const c = trimmed[i];
switch (c) {
'(' => depth += 1,
'\\' => {
// Skip escape sequence (backslash + next character)
i += 1;
if (i < trimmed.len) i += 1;
},
'(' => {
depth += 1;
i += 1;
},
')' => {
if (depth > 0) depth -= 1;
i += 1;
},
',' => {
if (depth == 0) {
comma_pos = i;
break;
}
i += 1;
},
else => {
i += 1;
},
else => {},
}
}
@@ -237,8 +279,9 @@ fn parsePart(self: *Parser, arena: Allocator, page: *Page) !Part {
},
'[' => .{ .attribute = try self.attribute(arena, page) },
':' => .{ .pseudo_class = try self.pseudoClass(arena, page) },
'a'...'z', 'A'...'Z', '_' => blk: {
const tag_name = try self.tag();
'a'...'z', 'A'...'Z', '_', '\\', 0x80...0xFF => blk: {
// Use parseIdentifier for full escape support
const tag_name = try self.parseIdentifier(arena, error.InvalidTagSelector);
if (tag_name.len > 256) {
return error.InvalidTagSelector;
}