mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-12-17 00:38:59 +00:00
918 lines
31 KiB
Zig
918 lines
31 KiB
Zig
// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
|
|
//
|
|
// Francis Bouvier <francis@lightpanda.io>
|
|
// Pierre Tachoire <pierre@lightpanda.io>
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as
|
|
// published by the Free Software Foundation, either version 3 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
// CSS Selector parser
|
|
// This file is a rewrite in Zig of Cascadia CSS Selector parser.
|
|
// see https://github.com/andybalholm/cascadia
|
|
// see https://github.com/andybalholm/cascadia/blob/master/parser.go
|
|
const std = @import("std");
|
|
const ascii = std.ascii;
|
|
|
|
const selector = @import("selector.zig");
|
|
const Selector = selector.Selector;
|
|
const PseudoClass = selector.PseudoClass;
|
|
const AttributeOP = selector.AttributeOP;
|
|
const Combinator = selector.Combinator;
|
|
|
|
pub const ParseError = error{
|
|
ExpectedSelector,
|
|
ExpectedIdentifier,
|
|
ExpectedName,
|
|
ExpectedIDSelector,
|
|
ExpectedClassSelector,
|
|
ExpectedAttributeSelector,
|
|
ExpectedString,
|
|
ExpectedRegexp,
|
|
ExpectedPseudoClassSelector,
|
|
ExpectedParenthesis,
|
|
ExpectedParenthesisClose,
|
|
ExpectedNthExpression,
|
|
ExpectedInteger,
|
|
InvalidEscape,
|
|
EscapeLineEndingOutsideString,
|
|
InvalidUnicode,
|
|
UnicodeIsNotHandled,
|
|
WriteError,
|
|
PseudoElementNotAtSelectorEnd,
|
|
PseudoElementNotUnique,
|
|
PseudoElementDisabled,
|
|
InvalidAttributeOperator,
|
|
InvalidAttributeSelector,
|
|
InvalidString,
|
|
InvalidRegexp,
|
|
InvalidPseudoClassSelector,
|
|
EmptyPseudoClassSelector,
|
|
InvalidPseudoClass,
|
|
InvalidPseudoElement,
|
|
UnmatchParenthesis,
|
|
NotHandled,
|
|
UnknownPseudoSelector,
|
|
InvalidNthExpression,
|
|
} || PseudoClass.Error || Combinator.Error || std.mem.Allocator.Error;
|
|
|
|
pub const ParseOptions = struct {
|
|
accept_pseudo_elts: bool = true,
|
|
};
|
|
|
|
pub const Parser = struct {
|
|
s: []const u8, // string to parse
|
|
i: usize = 0, // current position
|
|
|
|
opts: ParseOptions,
|
|
|
|
pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
return p.parseSelectorGroup(alloc);
|
|
}
|
|
|
|
// skipWhitespace consumes whitespace characters and comments.
|
|
// It returns true if there was actually anything to skip.
|
|
fn skipWhitespace(p: *Parser) bool {
|
|
var i = p.i;
|
|
while (i < p.s.len) {
|
|
const c = p.s[i];
|
|
// Whitespaces.
|
|
if (ascii.isWhitespace(c)) {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
// Comments.
|
|
if (c == '/') {
|
|
if (std.mem.startsWith(u8, p.s[i..], "/*")) {
|
|
if (std.mem.indexOf(u8, p.s[i..], "*/")) |end| {
|
|
i += end + "*/".len;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (i > p.i) {
|
|
p.i = i;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// parseSimpleSelectorSequence parses a selector sequence that applies to
|
|
// a single element.
|
|
fn parseSimpleSelectorSequence(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
if (p.i >= p.s.len) {
|
|
return ParseError.ExpectedSelector;
|
|
}
|
|
|
|
var buf = std.ArrayList(Selector).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
switch (p.s[p.i]) {
|
|
'*' => {
|
|
// It's the universal selector. Just skip over it, since it
|
|
// doesn't affect the meaning.
|
|
p.i += 1;
|
|
|
|
// other version of universal selector
|
|
if (p.i + 2 < p.s.len and std.mem.eql(u8, "|*", p.s[p.i .. p.i + 2])) {
|
|
p.i += 2;
|
|
}
|
|
},
|
|
'#', '.', '[', ':' => {
|
|
// There's no type selector. Wait to process the other till the
|
|
// main loop.
|
|
},
|
|
else => try buf.append(try p.parseTypeSelector(alloc)),
|
|
}
|
|
|
|
var pseudo_elt: ?PseudoClass = null;
|
|
|
|
loop: while (p.i < p.s.len) {
|
|
var ns: Selector = switch (p.s[p.i]) {
|
|
'#' => try p.parseIDSelector(alloc),
|
|
'.' => try p.parseClassSelector(alloc),
|
|
'[' => try p.parseAttributeSelector(alloc),
|
|
':' => try p.parsePseudoclassSelector(alloc),
|
|
else => break :loop,
|
|
};
|
|
errdefer ns.deinit(alloc);
|
|
|
|
// From https://drafts.csswg.org/selectors-3/#pseudo-elements :
|
|
// "Only one pseudo-element may appear per selector, and if present
|
|
// it must appear after the sequence of simple selectors that
|
|
// represents the subjects of the selector.""
|
|
switch (ns) {
|
|
.pseudo_element => |e| {
|
|
// We found a pseudo-element.
|
|
// Only one pseudo-element is accepted per selector.
|
|
if (pseudo_elt != null) return ParseError.PseudoElementNotUnique;
|
|
if (!p.opts.accept_pseudo_elts) return ParseError.PseudoElementDisabled;
|
|
|
|
pseudo_elt = e;
|
|
ns.deinit(alloc);
|
|
},
|
|
else => {
|
|
if (pseudo_elt != null) return ParseError.PseudoElementNotAtSelectorEnd;
|
|
try buf.append(ns);
|
|
},
|
|
}
|
|
}
|
|
|
|
// no need wrap the selectors in compoundSelector
|
|
if (buf.items.len == 1 and pseudo_elt == null) return buf.items[0];
|
|
|
|
return .{ .compound = .{ .selectors = try buf.toOwnedSlice(), .pseudo_elt = pseudo_elt } };
|
|
}
|
|
|
|
// parseTypeSelector parses a type selector (one that matches by tag name).
|
|
fn parseTypeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
try p.parseIdentifier(buf.writer());
|
|
|
|
return .{ .tag = try buf.toOwnedSlice() };
|
|
}
|
|
|
|
// parseIdentifier parses an identifier.
|
|
fn parseIdentifier(p: *Parser, w: anytype) ParseError!void {
|
|
const prefix = '-';
|
|
var numPrefix: usize = 0;
|
|
|
|
while (p.s.len > p.i and p.s[p.i] == prefix) {
|
|
p.i += 1;
|
|
numPrefix += 1;
|
|
}
|
|
|
|
if (p.s.len <= p.i) {
|
|
return ParseError.ExpectedSelector;
|
|
}
|
|
|
|
const c = p.s[p.i];
|
|
if (!nameStart(c) or c == '\\') {
|
|
return ParseError.ExpectedSelector;
|
|
}
|
|
|
|
var ii: usize = 0;
|
|
while (ii < numPrefix) {
|
|
w.writeByte(prefix) catch return ParseError.WriteError;
|
|
ii += 1;
|
|
}
|
|
try parseName(p, w);
|
|
}
|
|
|
|
// parseName parses a name (which is like an identifier, but doesn't have
|
|
// extra restrictions on the first character).
|
|
fn parseName(p: *Parser, w: anytype) ParseError!void {
|
|
var i = p.i;
|
|
var ok = false;
|
|
|
|
while (i < p.s.len) {
|
|
const c = p.s[i];
|
|
|
|
if (nameChar(c)) {
|
|
const start = i;
|
|
while (i < p.s.len and nameChar(p.s[i])) i += 1;
|
|
w.writeAll(p.s[start..i]) catch return ParseError.WriteError;
|
|
ok = true;
|
|
} else if (c == '\\') {
|
|
p.i = i;
|
|
try p.parseEscape(w);
|
|
i = p.i;
|
|
ok = true;
|
|
} else {
|
|
// default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!ok) return ParseError.ExpectedName;
|
|
p.i = i;
|
|
}
|
|
|
|
// parseEscape parses a backslash escape.
|
|
// The returned string is owned by the caller.
|
|
fn parseEscape(p: *Parser, w: anytype) ParseError!void {
|
|
if (p.s.len < p.i + 2 or p.s[p.i] != '\\') {
|
|
return ParseError.InvalidEscape;
|
|
}
|
|
|
|
const start = p.i + 1;
|
|
const c = p.s[start];
|
|
if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString;
|
|
|
|
// unicode escape (hex)
|
|
if (ascii.isHex(c)) {
|
|
var i: usize = start;
|
|
while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) {
|
|
i += 1;
|
|
}
|
|
const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode;
|
|
if (p.s.len > i) {
|
|
switch (p.s[i]) {
|
|
'\r' => {
|
|
i += 1;
|
|
if (p.s.len > i and p.s[i] == '\n') i += 1;
|
|
},
|
|
' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
|
|
else => {},
|
|
}
|
|
p.i = i;
|
|
var buf: [4]u8 = undefined;
|
|
const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode;
|
|
w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Return the literal character after the backslash.
|
|
p.i += 2;
|
|
w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError;
|
|
}
|
|
|
|
// parseIDSelector parses a selector that matches by id attribute.
|
|
fn parseIDSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedIDSelector;
|
|
if (p.s[p.i] != '#') return ParseError.ExpectedIDSelector;
|
|
|
|
p.i += 1;
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try p.parseName(buf.writer());
|
|
return .{ .id = try buf.toOwnedSlice() };
|
|
}
|
|
|
|
// parseClassSelector parses a selector that matches by class attribute.
|
|
fn parseClassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedClassSelector;
|
|
if (p.s[p.i] != '.') return ParseError.ExpectedClassSelector;
|
|
|
|
p.i += 1;
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try p.parseIdentifier(buf.writer());
|
|
return .{ .class = try buf.toOwnedSlice() };
|
|
}
|
|
|
|
// parseAttributeSelector parses a selector that matches by attribute value.
|
|
fn parseAttributeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
if (p.s[p.i] != '[') return ParseError.ExpectedAttributeSelector;
|
|
|
|
p.i += 1;
|
|
_ = p.skipWhitespace();
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try p.parseIdentifier(buf.writer());
|
|
const key = try buf.toOwnedSlice();
|
|
errdefer alloc.free(key);
|
|
|
|
lowerstr(key);
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
if (p.s[p.i] == ']') {
|
|
p.i += 1;
|
|
return .{ .attribute = .{ .key = key } };
|
|
}
|
|
|
|
if (p.i + 2 >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
|
|
const op = try parseAttributeOP(p.s[p.i .. p.i + 2]);
|
|
p.i += op.len();
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
|
|
buf.clearRetainingCapacity();
|
|
var is_val: bool = undefined;
|
|
if (op == .regexp) {
|
|
is_val = false;
|
|
try p.parseRegex(buf.writer());
|
|
} else {
|
|
is_val = true;
|
|
switch (p.s[p.i]) {
|
|
'\'', '"' => try p.parseString(buf.writer()),
|
|
else => try p.parseIdentifier(buf.writer()),
|
|
}
|
|
}
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
|
|
// check if the attribute contains an ignore case flag
|
|
var ci = false;
|
|
if (p.s[p.i] == 'i' or p.s[p.i] == 'I') {
|
|
ci = true;
|
|
p.i += 1;
|
|
}
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
|
|
|
|
if (p.s[p.i] != ']') return ParseError.InvalidAttributeSelector;
|
|
p.i += 1;
|
|
|
|
return .{ .attribute = .{
|
|
.key = key,
|
|
.val = if (is_val) try buf.toOwnedSlice() else null,
|
|
.regexp = if (!is_val) try buf.toOwnedSlice() else null,
|
|
.op = op,
|
|
.ci = ci,
|
|
} };
|
|
}
|
|
|
|
// parseString parses a single- or double-quoted string.
|
|
fn parseString(p: *Parser, writer: anytype) ParseError!void {
|
|
var i = p.i;
|
|
if (p.s.len < i + 2) return ParseError.ExpectedString;
|
|
|
|
const quote = p.s[i];
|
|
i += 1;
|
|
|
|
loop: while (i < p.s.len) {
|
|
switch (p.s[i]) {
|
|
'\\' => {
|
|
if (p.s.len > i + 1) {
|
|
const c = p.s[i + 1];
|
|
switch (c) {
|
|
'\r' => {
|
|
if (p.s.len > i + 2 and p.s[i + 2] == '\n') {
|
|
i += 3;
|
|
continue :loop;
|
|
}
|
|
i += 2;
|
|
continue :loop;
|
|
},
|
|
'\n', std.ascii.control_code.ff => {
|
|
i += 2;
|
|
continue :loop;
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
p.i = i;
|
|
try p.parseEscape(writer);
|
|
i = p.i;
|
|
},
|
|
'\r', '\n', std.ascii.control_code.ff => return ParseError.InvalidString,
|
|
else => |c| {
|
|
if (c == quote) break :loop;
|
|
const start = i;
|
|
while (i < p.s.len) {
|
|
const cc = p.s[i];
|
|
if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
|
|
i += 1;
|
|
}
|
|
writer.writeAll(p.s[start..i]) catch return ParseError.WriteError;
|
|
},
|
|
}
|
|
}
|
|
|
|
if (i >= p.s.len) return ParseError.InvalidString;
|
|
|
|
// Consume the final quote.
|
|
i += 1;
|
|
p.i = i;
|
|
}
|
|
|
|
// parseRegex parses a regular expression; the end is defined by encountering an
|
|
// unmatched closing ')' or ']' which is not consumed
|
|
fn parseRegex(p: *Parser, writer: anytype) ParseError!void {
|
|
var i = p.i;
|
|
if (p.s.len < i + 2) return ParseError.ExpectedRegexp;
|
|
|
|
// number of open parens or brackets;
|
|
// when it becomes negative, finished parsing regex
|
|
var open: isize = 0;
|
|
|
|
loop: while (i < p.s.len) {
|
|
switch (p.s[i]) {
|
|
'(', '[' => open += 1,
|
|
')', ']' => {
|
|
open -= 1;
|
|
if (open < 0) break :loop;
|
|
},
|
|
else => {},
|
|
}
|
|
i += 1;
|
|
}
|
|
|
|
if (i >= p.s.len) return ParseError.InvalidRegexp;
|
|
writer.writeAll(p.s[p.i..i]) catch return ParseError.WriteError;
|
|
p.i = i;
|
|
}
|
|
|
|
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
|
|
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
|
|
// https://drafts.csswg.org/selectors-3/#pseudo-elements
|
|
fn parsePseudoclassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedPseudoClassSelector;
|
|
if (p.s[p.i] != ':') return ParseError.ExpectedPseudoClassSelector;
|
|
|
|
p.i += 1;
|
|
|
|
var must_pseudo_elt: bool = false;
|
|
if (p.i >= p.s.len) return ParseError.EmptyPseudoClassSelector;
|
|
if (p.s[p.i] == ':') { // we found a pseudo-element
|
|
must_pseudo_elt = true;
|
|
p.i += 1;
|
|
}
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try p.parseIdentifier(buf.writer());
|
|
|
|
const pseudo_class = try PseudoClass.parse(buf.items);
|
|
|
|
// reset the buffer to reuse it.
|
|
buf.clearRetainingCapacity();
|
|
|
|
if (must_pseudo_elt and !pseudo_class.isPseudoElement()) return ParseError.InvalidPseudoElement;
|
|
|
|
switch (pseudo_class) {
|
|
.not, .has, .haschild => {
|
|
if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
|
|
|
|
const sel = try p.parseSelectorGroup(alloc);
|
|
if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
|
|
|
|
const s = try alloc.create(Selector);
|
|
errdefer alloc.destroy(s);
|
|
s.* = sel;
|
|
|
|
return .{ .pseudo_class_relative = .{ .pseudo_class = pseudo_class, .match = s } };
|
|
},
|
|
.contains, .containsown => {
|
|
if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
|
|
if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
|
|
|
|
switch (p.s[p.i]) {
|
|
'\'', '"' => try p.parseString(buf.writer()),
|
|
else => try p.parseString(buf.writer()),
|
|
}
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
|
|
if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
|
|
|
|
const val = try buf.toOwnedSlice();
|
|
errdefer alloc.free(val);
|
|
|
|
lowerstr(val);
|
|
|
|
return .{ .pseudo_class_contains = .{ .own = pseudo_class == .containsown, .val = val } };
|
|
},
|
|
.matches, .matchesown => {
|
|
if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
|
|
|
|
try p.parseRegex(buf.writer());
|
|
if (p.i >= p.s.len) return ParseError.InvalidPseudoClassSelector;
|
|
if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
|
|
|
|
return .{ .pseudo_class_regexp = .{ .own = pseudo_class == .matchesown, .regexp = try buf.toOwnedSlice() } };
|
|
},
|
|
.nth_child, .nth_last_child, .nth_of_type, .nth_last_of_type => {
|
|
if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
|
|
const nth = try p.parseNth(alloc);
|
|
if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
|
|
|
|
const last = pseudo_class == .nth_last_child or pseudo_class == .nth_last_of_type;
|
|
const of_type = pseudo_class == .nth_of_type or pseudo_class == .nth_last_of_type;
|
|
return .{ .pseudo_class_nth = .{ .a = nth[0], .b = nth[1], .of_type = of_type, .last = last } };
|
|
},
|
|
.first_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = false } },
|
|
.last_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = true } },
|
|
.first_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = false } },
|
|
.last_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = true } },
|
|
.only_child => return .{ .pseudo_class_only_child = false },
|
|
.only_of_type => return .{ .pseudo_class_only_child = true },
|
|
.input, .empty, .root, .link => return .{ .pseudo_class = pseudo_class },
|
|
.enabled, .disabled, .checked => return .{ .pseudo_class = pseudo_class },
|
|
.lang => {
|
|
if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
|
|
if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
|
|
|
|
try p.parseIdentifier(buf.writer());
|
|
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
|
|
if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
|
|
|
|
const val = try buf.toOwnedSlice();
|
|
errdefer alloc.free(val);
|
|
lowerstr(val);
|
|
|
|
return .{ .pseudo_class_lang = val };
|
|
},
|
|
.visited, .hover, .active, .focus, .target => {
|
|
// Not applicable in a static context: never match.
|
|
return .{ .never_match = pseudo_class };
|
|
},
|
|
.after, .backdrop, .before, .cue, .first_letter => return .{ .pseudo_element = pseudo_class },
|
|
.first_line, .grammar_error, .marker, .placeholder => return .{ .pseudo_element = pseudo_class },
|
|
.selection, .spelling_error => return .{ .pseudo_element = pseudo_class },
|
|
}
|
|
}
|
|
|
|
// consumeParenthesis consumes an opening parenthesis and any following
|
|
// whitespace. It returns true if there was actually a parenthesis to skip.
|
|
fn consumeParenthesis(p: *Parser) bool {
|
|
if (p.i < p.s.len and p.s[p.i] == '(') {
|
|
p.i += 1;
|
|
_ = p.skipWhitespace();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// parseSelectorGroup parses a group of selectors, separated by commas.
|
|
fn parseSelectorGroup(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
const s = try p.parseSelector(alloc);
|
|
|
|
var buf = std.ArrayList(Selector).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try buf.append(s);
|
|
|
|
while (p.i < p.s.len) {
|
|
if (p.s[p.i] != ',') break;
|
|
p.i += 1;
|
|
const ss = try p.parseSelector(alloc);
|
|
try buf.append(ss);
|
|
}
|
|
|
|
if (buf.items.len == 1) return buf.items[0];
|
|
|
|
return .{ .group = try buf.toOwnedSlice() };
|
|
}
|
|
|
|
// parseSelector parses a selector that may include combinators.
|
|
fn parseSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
|
|
_ = p.skipWhitespace();
|
|
var s = try p.parseSimpleSelectorSequence(alloc);
|
|
|
|
while (true) {
|
|
var combinator: Combinator = .empty;
|
|
if (p.skipWhitespace()) {
|
|
combinator = .descendant;
|
|
}
|
|
if (p.i >= p.s.len) {
|
|
return s;
|
|
}
|
|
|
|
switch (p.s[p.i]) {
|
|
'+', '>', '~' => {
|
|
combinator = try Combinator.parse(p.s[p.i]);
|
|
p.i += 1;
|
|
_ = p.skipWhitespace();
|
|
},
|
|
// These characters can't begin a selector, but they can legally occur after one.
|
|
',', ')' => {
|
|
return s;
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
if (combinator == .empty) {
|
|
return s;
|
|
}
|
|
|
|
const c = try p.parseSimpleSelectorSequence(alloc);
|
|
|
|
const first = try alloc.create(Selector);
|
|
errdefer alloc.destroy(first);
|
|
first.* = s;
|
|
|
|
const second = try alloc.create(Selector);
|
|
errdefer alloc.destroy(second);
|
|
second.* = c;
|
|
|
|
s = Selector{ .combined = .{ .first = first, .second = second, .combinator = combinator } };
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
// consumeClosingParenthesis consumes a closing parenthesis and any preceding
|
|
// whitespace. It returns true if there was actually a parenthesis to skip.
|
|
fn consumeClosingParenthesis(p: *Parser) bool {
|
|
const i = p.i;
|
|
_ = p.skipWhitespace();
|
|
if (p.i < p.s.len and p.s[p.i] == ')') {
|
|
p.i += 1;
|
|
return true;
|
|
}
|
|
p.i = i;
|
|
return false;
|
|
}
|
|
|
|
// parseInteger parses a decimal integer.
|
|
fn parseInteger(p: *Parser) ParseError!isize {
|
|
var i = p.i;
|
|
const start = i;
|
|
while (i < p.s.len and '0' <= p.s[i] and p.s[i] <= '9') i += 1;
|
|
if (i == start) return ParseError.ExpectedInteger;
|
|
p.i = i;
|
|
|
|
return std.fmt.parseUnsigned(isize, p.s[start..i], 10) catch ParseError.ExpectedInteger;
|
|
}
|
|
|
|
fn parseNthReadN(p: *Parser, a: isize) ParseError![2]isize {
|
|
_ = p.skipWhitespace();
|
|
if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
|
|
|
|
return switch (p.s[p.i]) {
|
|
'+' => {
|
|
p.i += 1;
|
|
_ = p.skipWhitespace();
|
|
const b = try p.parseInteger();
|
|
return .{ a, b };
|
|
},
|
|
'-' => {
|
|
p.i += 1;
|
|
_ = p.skipWhitespace();
|
|
const b = try p.parseInteger();
|
|
return .{ a, -b };
|
|
},
|
|
else => .{ a, 0 },
|
|
};
|
|
}
|
|
|
|
fn parseNthReadA(p: *Parser, a: isize) ParseError![2]isize {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
|
|
return switch (p.s[p.i]) {
|
|
'n', 'N' => {
|
|
p.i += 1;
|
|
return p.parseNthReadN(a);
|
|
},
|
|
else => .{ 0, a },
|
|
};
|
|
}
|
|
|
|
fn parseNthNegativeA(p: *Parser) ParseError![2]isize {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
|
|
const c = p.s[p.i];
|
|
if (std.ascii.isDigit(c)) {
|
|
const a = try p.parseInteger() * -1;
|
|
return p.parseNthReadA(a);
|
|
}
|
|
if (c == 'n' or c == 'N') {
|
|
p.i += 1;
|
|
return p.parseNthReadN(-1);
|
|
}
|
|
|
|
return ParseError.InvalidNthExpression;
|
|
}
|
|
|
|
fn parseNthPositiveA(p: *Parser) ParseError![2]isize {
|
|
if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
|
|
const c = p.s[p.i];
|
|
if (std.ascii.isDigit(c)) {
|
|
const a = try p.parseInteger();
|
|
return p.parseNthReadA(a);
|
|
}
|
|
if (c == 'n' or c == 'N') {
|
|
p.i += 1;
|
|
return p.parseNthReadN(1);
|
|
}
|
|
|
|
return ParseError.InvalidNthExpression;
|
|
}
|
|
|
|
// parseNth parses the argument for :nth-child (normally of the form an+b).
|
|
fn parseNth(p: *Parser, alloc: std.mem.Allocator) ParseError![2]isize {
|
|
// initial state
|
|
if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
|
|
return switch (p.s[p.i]) {
|
|
'-' => {
|
|
p.i += 1;
|
|
return p.parseNthNegativeA();
|
|
},
|
|
'+' => {
|
|
p.i += 1;
|
|
return p.parseNthPositiveA();
|
|
},
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => p.parseNthPositiveA(),
|
|
'n', 'N' => {
|
|
p.i += 1;
|
|
return p.parseNthReadN(1);
|
|
},
|
|
'o', 'O', 'e', 'E' => {
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
try p.parseName(buf.writer());
|
|
|
|
if (std.ascii.eqlIgnoreCase("odd", buf.items)) return .{ 2, 1 };
|
|
if (std.ascii.eqlIgnoreCase("even", buf.items)) return .{ 2, 0 };
|
|
|
|
return ParseError.InvalidNthExpression;
|
|
},
|
|
else => ParseError.InvalidNthExpression,
|
|
};
|
|
}
|
|
};
|
|
|
|
// nameStart returns whether c can be the first character of an identifier
|
|
// (not counting an initial hyphen, or an escape sequence).
|
|
fn nameStart(c: u8) bool {
|
|
return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127;
|
|
}
|
|
|
|
// nameChar returns whether c can be a character within an identifier
|
|
// (not counting an escape sequence).
|
|
fn nameChar(c: u8) bool {
|
|
return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127 or
|
|
c == '-' or '0' <= c and c <= '9';
|
|
}
|
|
|
|
fn lowerstr(str: []u8) void {
|
|
for (str, 0..) |c, i| {
|
|
str[i] = std.ascii.toLower(c);
|
|
}
|
|
}
|
|
|
|
// parseAttributeOP parses an AttributeOP from a string of 1 or 2 bytes.
|
|
fn parseAttributeOP(s: []const u8) ParseError!AttributeOP {
|
|
if (s.len < 1 or s.len > 2) return ParseError.InvalidAttributeOperator;
|
|
|
|
// if the first sign is equal, we don't check anything else.
|
|
if (s[0] == '=') return .eql;
|
|
|
|
if (s.len != 2 or s[1] != '=') return ParseError.InvalidAttributeOperator;
|
|
|
|
return switch (s[0]) {
|
|
'=' => .eql,
|
|
'!' => .not_eql,
|
|
'~' => .one_of,
|
|
'|' => .prefix_hyphen,
|
|
'^' => .prefix,
|
|
'$' => .suffix,
|
|
'*' => .contains,
|
|
'#' => .regexp,
|
|
else => ParseError.InvalidAttributeOperator,
|
|
};
|
|
}
|
|
|
|
test "parser.skipWhitespace" {
|
|
const testcases = [_]struct {
|
|
s: []const u8,
|
|
i: usize,
|
|
r: bool,
|
|
}{
|
|
.{ .s = "", .i = 0, .r = false },
|
|
.{ .s = "foo", .i = 0, .r = false },
|
|
.{ .s = " ", .i = 1, .r = true },
|
|
.{ .s = " foo", .i = 1, .r = true },
|
|
.{ .s = "/* foo */ bar", .i = 10, .r = true },
|
|
.{ .s = "/* foo", .i = 0, .r = false },
|
|
};
|
|
|
|
for (testcases) |tc| {
|
|
var p = Parser{ .s = tc.s, .opts = .{} };
|
|
const res = p.skipWhitespace();
|
|
try std.testing.expectEqual(tc.r, res);
|
|
try std.testing.expectEqual(tc.i, p.i);
|
|
}
|
|
}
|
|
|
|
test "parser.parseIdentifier" {
|
|
const alloc = std.testing.allocator;
|
|
|
|
const testcases = [_]struct {
|
|
s: []const u8, // given value
|
|
exp: []const u8, // expected value
|
|
err: bool = false,
|
|
}{
|
|
.{ .s = "x", .exp = "x" },
|
|
.{ .s = "96", .exp = "", .err = true },
|
|
.{ .s = "-x", .exp = "-x" },
|
|
.{ .s = "r\\e9 sumé", .exp = "résumé" },
|
|
.{ .s = "r\\0000e9 sumé", .exp = "résumé" },
|
|
.{ .s = "r\\0000e9sumé", .exp = "résumé" },
|
|
.{ .s = "a\\\"b", .exp = "a\"b" },
|
|
};
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
for (testcases) |tc| {
|
|
buf.clearRetainingCapacity();
|
|
|
|
var p = Parser{ .s = tc.s, .opts = .{} };
|
|
p.parseIdentifier(buf.writer()) catch |e| {
|
|
// if error was expected, continue.
|
|
if (tc.err) continue;
|
|
|
|
std.debug.print("test case {s}\n", .{tc.s});
|
|
return e;
|
|
};
|
|
std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
|
|
std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
|
|
return e;
|
|
};
|
|
}
|
|
}
|
|
|
|
test "parser.parseString" {
|
|
const alloc = std.testing.allocator;
|
|
|
|
const testcases = [_]struct {
|
|
s: []const u8, // given value
|
|
exp: []const u8, // expected value
|
|
err: bool = false,
|
|
}{
|
|
.{ .s = "\"x\"", .exp = "x" },
|
|
.{ .s = "'x'", .exp = "x" },
|
|
.{ .s = "'x", .exp = "", .err = true },
|
|
.{ .s = "'x\\\r\nx'", .exp = "xx" },
|
|
.{ .s = "\"r\\e9 sumé\"", .exp = "résumé" },
|
|
.{ .s = "\"r\\0000e9 sumé\"", .exp = "résumé" },
|
|
.{ .s = "\"r\\0000e9sumé\"", .exp = "résumé" },
|
|
.{ .s = "\"a\\\"b\"", .exp = "a\"b" },
|
|
.{ .s = "\"\\\n\"", .exp = "" },
|
|
.{ .s = "\"hello world\"", .exp = "hello world" },
|
|
};
|
|
|
|
var buf = std.ArrayList(u8).init(alloc);
|
|
defer buf.deinit();
|
|
|
|
for (testcases) |tc| {
|
|
buf.clearRetainingCapacity();
|
|
|
|
var p = Parser{ .s = tc.s, .opts = .{} };
|
|
p.parseString(buf.writer()) catch |e| {
|
|
// if error was expected, continue.
|
|
if (tc.err) continue;
|
|
|
|
std.debug.print("test case {s}\n", .{tc.s});
|
|
return e;
|
|
};
|
|
std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
|
|
std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
|
|
return e;
|
|
};
|
|
}
|
|
}
|