// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . // CSS Selector parser // This file is a rewrite in Zig of Cascadia CSS Selector parser. // see https://github.com/andybalholm/cascadia // see https://github.com/andybalholm/cascadia/blob/master/parser.go const std = @import("std"); const ascii = std.ascii; const selector = @import("selector.zig"); const Selector = selector.Selector; const PseudoClass = selector.PseudoClass; const AttributeOP = selector.AttributeOP; const Combinator = selector.Combinator; pub const ParseError = error{ ExpectedSelector, ExpectedIdentifier, ExpectedName, ExpectedIDSelector, ExpectedClassSelector, ExpectedAttributeSelector, ExpectedString, ExpectedRegexp, ExpectedPseudoClassSelector, ExpectedParenthesis, ExpectedParenthesisClose, ExpectedNthExpression, ExpectedInteger, InvalidEscape, EscapeLineEndingOutsideString, InvalidUnicode, UnicodeIsNotHandled, WriteError, PseudoElementNotAtSelectorEnd, PseudoElementNotUnique, PseudoElementDisabled, InvalidAttributeOperator, InvalidAttributeSelector, InvalidString, InvalidRegexp, InvalidPseudoClassSelector, EmptyPseudoClassSelector, InvalidPseudoClass, InvalidPseudoElement, UnmatchParenthesis, NotHandled, UnknownPseudoSelector, InvalidNthExpression, } || PseudoClass.Error || Combinator.Error || std.mem.Allocator.Error; pub const ParseOptions = struct { accept_pseudo_elts: bool = true, }; pub const Parser = struct { s: []const u8, // string to parse i: usize = 0, // current position opts: ParseOptions, pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { return p.parseSelectorGroup(alloc); } // skipWhitespace consumes whitespace characters and comments. // It returns true if there was actually anything to skip. fn skipWhitespace(p: *Parser) bool { var i = p.i; while (i < p.s.len) { const c = p.s[i]; // Whitespaces. if (ascii.isWhitespace(c)) { i += 1; continue; } // Comments. if (c == '/') { if (std.mem.startsWith(u8, p.s[i..], "/*")) { if (std.mem.indexOf(u8, p.s[i..], "*/")) |end| { i += end + "*/".len; continue; } } } break; } if (i > p.i) { p.i = i; return true; } return false; } // parseSimpleSelectorSequence parses a selector sequence that applies to // a single element. fn parseSimpleSelectorSequence(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { if (p.i >= p.s.len) { return ParseError.ExpectedSelector; } var buf = std.ArrayList(Selector).init(alloc); defer buf.deinit(); switch (p.s[p.i]) { '*' => { // It's the universal selector. Just skip over it, since it // doesn't affect the meaning. p.i += 1; // other version of universal selector if (p.i + 2 < p.s.len and std.mem.eql(u8, "|*", p.s[p.i .. p.i + 2])) { p.i += 2; } }, '#', '.', '[', ':' => { // There's no type selector. Wait to process the other till the // main loop. }, else => try buf.append(try p.parseTypeSelector(alloc)), } var pseudo_elt: ?PseudoClass = null; loop: while (p.i < p.s.len) { var ns: Selector = switch (p.s[p.i]) { '#' => try p.parseIDSelector(alloc), '.' => try p.parseClassSelector(alloc), '[' => try p.parseAttributeSelector(alloc), ':' => try p.parsePseudoclassSelector(alloc), else => break :loop, }; errdefer ns.deinit(alloc); // From https://drafts.csswg.org/selectors-3/#pseudo-elements : // "Only one pseudo-element may appear per selector, and if present // it must appear after the sequence of simple selectors that // represents the subjects of the selector."" switch (ns) { .pseudo_element => |e| { // We found a pseudo-element. // Only one pseudo-element is accepted per selector. if (pseudo_elt != null) return ParseError.PseudoElementNotUnique; if (!p.opts.accept_pseudo_elts) return ParseError.PseudoElementDisabled; pseudo_elt = e; ns.deinit(alloc); }, else => { if (pseudo_elt != null) return ParseError.PseudoElementNotAtSelectorEnd; try buf.append(ns); }, } } // no need wrap the selectors in compoundSelector if (buf.items.len == 1 and pseudo_elt == null) return buf.items[0]; return .{ .compound = .{ .selectors = try buf.toOwnedSlice(), .pseudo_elt = pseudo_elt } }; } // parseTypeSelector parses a type selector (one that matches by tag name). fn parseTypeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseIdentifier(buf.writer()); return .{ .tag = try buf.toOwnedSlice() }; } // parseIdentifier parses an identifier. fn parseIdentifier(p: *Parser, w: anytype) ParseError!void { const prefix = '-'; var numPrefix: usize = 0; while (p.s.len > p.i and p.s[p.i] == prefix) { p.i += 1; numPrefix += 1; } if (p.s.len <= p.i) { return ParseError.ExpectedSelector; } const c = p.s[p.i]; if (!nameStart(c) or c == '\\') { return ParseError.ExpectedSelector; } var ii: usize = 0; while (ii < numPrefix) { w.writeByte(prefix) catch return ParseError.WriteError; ii += 1; } try parseName(p, w); } // parseName parses a name (which is like an identifier, but doesn't have // extra restrictions on the first character). fn parseName(p: *Parser, w: anytype) ParseError!void { var i = p.i; var ok = false; while (i < p.s.len) { const c = p.s[i]; if (nameChar(c)) { const start = i; while (i < p.s.len and nameChar(p.s[i])) i += 1; w.writeAll(p.s[start..i]) catch return ParseError.WriteError; ok = true; } else if (c == '\\') { p.i = i; try p.parseEscape(w); i = p.i; ok = true; } else { // default: break; } } if (!ok) return ParseError.ExpectedName; p.i = i; } // parseEscape parses a backslash escape. // The returned string is owned by the caller. fn parseEscape(p: *Parser, w: anytype) ParseError!void { if (p.s.len < p.i + 2 or p.s[p.i] != '\\') { return ParseError.InvalidEscape; } const start = p.i + 1; const c = p.s[start]; if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString; // unicode escape (hex) if (ascii.isHex(c)) { var i: usize = start; while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) { i += 1; } const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode; if (p.s.len > i) { switch (p.s[i]) { '\r' => { i += 1; if (p.s.len > i and p.s[i] == '\n') i += 1; }, ' ', '\t', '\n', std.ascii.control_code.ff => i += 1, else => {}, } p.i = i; var buf: [4]u8 = undefined; const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode; w.writeAll(buf[0..ln]) catch return ParseError.WriteError; return; } } // Return the literal character after the backslash. p.i += 2; w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError; } // parseIDSelector parses a selector that matches by id attribute. fn parseIDSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { if (p.i >= p.s.len) return ParseError.ExpectedIDSelector; if (p.s[p.i] != '#') return ParseError.ExpectedIDSelector; p.i += 1; var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseName(buf.writer()); return .{ .id = try buf.toOwnedSlice() }; } // parseClassSelector parses a selector that matches by class attribute. fn parseClassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { if (p.i >= p.s.len) return ParseError.ExpectedClassSelector; if (p.s[p.i] != '.') return ParseError.ExpectedClassSelector; p.i += 1; var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseIdentifier(buf.writer()); return .{ .class = try buf.toOwnedSlice() }; } // parseAttributeSelector parses a selector that matches by attribute value. fn parseAttributeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; if (p.s[p.i] != '[') return ParseError.ExpectedAttributeSelector; p.i += 1; _ = p.skipWhitespace(); var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseIdentifier(buf.writer()); const key = try buf.toOwnedSlice(); errdefer alloc.free(key); lowerstr(key); _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; if (p.s[p.i] == ']') { p.i += 1; return .{ .attribute = .{ .key = key } }; } if (p.i + 2 >= p.s.len) return ParseError.ExpectedAttributeSelector; const op = try parseAttributeOP(p.s[p.i .. p.i + 2]); p.i += op.len(); _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; buf.clearRetainingCapacity(); var is_val: bool = undefined; if (op == .regexp) { is_val = false; try p.parseRegex(buf.writer()); } else { is_val = true; switch (p.s[p.i]) { '\'', '"' => try p.parseString(buf.writer()), else => try p.parseIdentifier(buf.writer()), } } _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; // check if the attribute contains an ignore case flag var ci = false; if (p.s[p.i] == 'i' or p.s[p.i] == 'I') { ci = true; p.i += 1; } _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; if (p.s[p.i] != ']') return ParseError.InvalidAttributeSelector; p.i += 1; return .{ .attribute = .{ .key = key, .val = if (is_val) try buf.toOwnedSlice() else null, .regexp = if (!is_val) try buf.toOwnedSlice() else null, .op = op, .ci = ci, } }; } // parseString parses a single- or double-quoted string. fn parseString(p: *Parser, writer: anytype) ParseError!void { var i = p.i; if (p.s.len < i + 2) return ParseError.ExpectedString; const quote = p.s[i]; i += 1; loop: while (i < p.s.len) { switch (p.s[i]) { '\\' => { if (p.s.len > i + 1) { const c = p.s[i + 1]; switch (c) { '\r' => { if (p.s.len > i + 2 and p.s[i + 2] == '\n') { i += 3; continue :loop; } i += 2; continue :loop; }, '\n', std.ascii.control_code.ff => { i += 2; continue :loop; }, else => {}, } } p.i = i; try p.parseEscape(writer); i = p.i; }, '\r', '\n', std.ascii.control_code.ff => return ParseError.InvalidString, else => |c| { if (c == quote) break :loop; const start = i; while (i < p.s.len) { const cc = p.s[i]; if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break; i += 1; } writer.writeAll(p.s[start..i]) catch return ParseError.WriteError; }, } } if (i >= p.s.len) return ParseError.InvalidString; // Consume the final quote. i += 1; p.i = i; } // parseRegex parses a regular expression; the end is defined by encountering an // unmatched closing ')' or ']' which is not consumed fn parseRegex(p: *Parser, writer: anytype) ParseError!void { var i = p.i; if (p.s.len < i + 2) return ParseError.ExpectedRegexp; // number of open parens or brackets; // when it becomes negative, finished parsing regex var open: isize = 0; loop: while (i < p.s.len) { switch (p.s[i]) { '(', '[' => open += 1, ')', ']' => { open -= 1; if (open < 0) break :loop; }, else => {}, } i += 1; } if (i >= p.s.len) return ParseError.InvalidRegexp; writer.writeAll(p.s[p.i..i]) catch return ParseError.WriteError; p.i = i; } // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. // https://drafts.csswg.org/selectors-3/#pseudo-elements fn parsePseudoclassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { if (p.i >= p.s.len) return ParseError.ExpectedPseudoClassSelector; if (p.s[p.i] != ':') return ParseError.ExpectedPseudoClassSelector; p.i += 1; var must_pseudo_elt: bool = false; if (p.i >= p.s.len) return ParseError.EmptyPseudoClassSelector; if (p.s[p.i] == ':') { // we found a pseudo-element must_pseudo_elt = true; p.i += 1; } var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseIdentifier(buf.writer()); const pseudo_class = try PseudoClass.parse(buf.items); // reset the buffer to reuse it. buf.clearRetainingCapacity(); if (must_pseudo_elt and !pseudo_class.isPseudoElement()) return ParseError.InvalidPseudoElement; switch (pseudo_class) { .not, .has, .haschild => { if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; const sel = try p.parseSelectorGroup(alloc); if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; const s = try alloc.create(Selector); errdefer alloc.destroy(s); s.* = sel; return .{ .pseudo_class_relative = .{ .pseudo_class = pseudo_class, .match = s } }; }, .contains, .containsown => { if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; if (p.i == p.s.len) return ParseError.UnmatchParenthesis; switch (p.s[p.i]) { '\'', '"' => try p.parseString(buf.writer()), else => try p.parseString(buf.writer()), } _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.InvalidPseudoClass; if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; const val = try buf.toOwnedSlice(); errdefer alloc.free(val); lowerstr(val); return .{ .pseudo_class_contains = .{ .own = pseudo_class == .containsown, .val = val } }; }, .matches, .matchesown => { if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; try p.parseRegex(buf.writer()); if (p.i >= p.s.len) return ParseError.InvalidPseudoClassSelector; if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; return .{ .pseudo_class_regexp = .{ .own = pseudo_class == .matchesown, .regexp = try buf.toOwnedSlice() } }; }, .nth_child, .nth_last_child, .nth_of_type, .nth_last_of_type => { if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; const nth = try p.parseNth(alloc); if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; const last = pseudo_class == .nth_last_child or pseudo_class == .nth_last_of_type; const of_type = pseudo_class == .nth_of_type or pseudo_class == .nth_last_of_type; return .{ .pseudo_class_nth = .{ .a = nth[0], .b = nth[1], .of_type = of_type, .last = last } }; }, .first_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = false } }, .last_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = true } }, .first_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = false } }, .last_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = true } }, .only_child => return .{ .pseudo_class_only_child = false }, .only_of_type => return .{ .pseudo_class_only_child = true }, .input, .empty, .root, .link => return .{ .pseudo_class = pseudo_class }, .enabled, .disabled, .checked => return .{ .pseudo_class = pseudo_class }, .lang => { if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; if (p.i == p.s.len) return ParseError.UnmatchParenthesis; try p.parseIdentifier(buf.writer()); _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.InvalidPseudoClass; if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; const val = try buf.toOwnedSlice(); errdefer alloc.free(val); lowerstr(val); return .{ .pseudo_class_lang = val }; }, .visited, .hover, .active, .focus, .target => { // Not applicable in a static context: never match. return .{ .never_match = pseudo_class }; }, .after, .backdrop, .before, .cue, .first_letter => return .{ .pseudo_element = pseudo_class }, .first_line, .grammar_error, .marker, .placeholder => return .{ .pseudo_element = pseudo_class }, .selection, .spelling_error => return .{ .pseudo_element = pseudo_class }, } } // consumeParenthesis consumes an opening parenthesis and any following // whitespace. It returns true if there was actually a parenthesis to skip. fn consumeParenthesis(p: *Parser) bool { if (p.i < p.s.len and p.s[p.i] == '(') { p.i += 1; _ = p.skipWhitespace(); return true; } return false; } // parseSelectorGroup parses a group of selectors, separated by commas. fn parseSelectorGroup(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { const s = try p.parseSelector(alloc); var buf = std.ArrayList(Selector).init(alloc); defer buf.deinit(); try buf.append(s); while (p.i < p.s.len) { if (p.s[p.i] != ',') break; p.i += 1; const ss = try p.parseSelector(alloc); try buf.append(ss); } if (buf.items.len == 1) return buf.items[0]; return .{ .group = try buf.toOwnedSlice() }; } // parseSelector parses a selector that may include combinators. fn parseSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { _ = p.skipWhitespace(); var s = try p.parseSimpleSelectorSequence(alloc); while (true) { var combinator: Combinator = .empty; if (p.skipWhitespace()) { combinator = .descendant; } if (p.i >= p.s.len) { return s; } switch (p.s[p.i]) { '+', '>', '~' => { combinator = try Combinator.parse(p.s[p.i]); p.i += 1; _ = p.skipWhitespace(); }, // These characters can't begin a selector, but they can legally occur after one. ',', ')' => { return s; }, else => {}, } if (combinator == .empty) { return s; } const c = try p.parseSimpleSelectorSequence(alloc); const first = try alloc.create(Selector); errdefer alloc.destroy(first); first.* = s; const second = try alloc.create(Selector); errdefer alloc.destroy(second); second.* = c; s = Selector{ .combined = .{ .first = first, .second = second, .combinator = combinator } }; } return s; } // consumeClosingParenthesis consumes a closing parenthesis and any preceding // whitespace. It returns true if there was actually a parenthesis to skip. fn consumeClosingParenthesis(p: *Parser) bool { const i = p.i; _ = p.skipWhitespace(); if (p.i < p.s.len and p.s[p.i] == ')') { p.i += 1; return true; } p.i = i; return false; } // parseInteger parses a decimal integer. fn parseInteger(p: *Parser) ParseError!isize { var i = p.i; const start = i; while (i < p.s.len and '0' <= p.s[i] and p.s[i] <= '9') i += 1; if (i == start) return ParseError.ExpectedInteger; p.i = i; return std.fmt.parseUnsigned(isize, p.s[start..i], 10) catch ParseError.ExpectedInteger; } fn parseNthReadN(p: *Parser, a: isize) ParseError![2]isize { _ = p.skipWhitespace(); if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; return switch (p.s[p.i]) { '+' => { p.i += 1; _ = p.skipWhitespace(); const b = try p.parseInteger(); return .{ a, b }; }, '-' => { p.i += 1; _ = p.skipWhitespace(); const b = try p.parseInteger(); return .{ a, -b }; }, else => .{ a, 0 }, }; } fn parseNthReadA(p: *Parser, a: isize) ParseError![2]isize { if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; return switch (p.s[p.i]) { 'n', 'N' => { p.i += 1; return p.parseNthReadN(a); }, else => .{ 0, a }, }; } fn parseNthNegativeA(p: *Parser) ParseError![2]isize { if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; const c = p.s[p.i]; if (std.ascii.isDigit(c)) { const a = try p.parseInteger() * -1; return p.parseNthReadA(a); } if (c == 'n' or c == 'N') { p.i += 1; return p.parseNthReadN(-1); } return ParseError.InvalidNthExpression; } fn parseNthPositiveA(p: *Parser) ParseError![2]isize { if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; const c = p.s[p.i]; if (std.ascii.isDigit(c)) { const a = try p.parseInteger(); return p.parseNthReadA(a); } if (c == 'n' or c == 'N') { p.i += 1; return p.parseNthReadN(1); } return ParseError.InvalidNthExpression; } // parseNth parses the argument for :nth-child (normally of the form an+b). fn parseNth(p: *Parser, alloc: std.mem.Allocator) ParseError![2]isize { // initial state if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; return switch (p.s[p.i]) { '-' => { p.i += 1; return p.parseNthNegativeA(); }, '+' => { p.i += 1; return p.parseNthPositiveA(); }, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => p.parseNthPositiveA(), 'n', 'N' => { p.i += 1; return p.parseNthReadN(1); }, 'o', 'O', 'e', 'E' => { var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); try p.parseName(buf.writer()); if (std.ascii.eqlIgnoreCase("odd", buf.items)) return .{ 2, 1 }; if (std.ascii.eqlIgnoreCase("even", buf.items)) return .{ 2, 0 }; return ParseError.InvalidNthExpression; }, else => ParseError.InvalidNthExpression, }; } }; // nameStart returns whether c can be the first character of an identifier // (not counting an initial hyphen, or an escape sequence). fn nameStart(c: u8) bool { return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127; } // nameChar returns whether c can be a character within an identifier // (not counting an escape sequence). fn nameChar(c: u8) bool { return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127 or c == '-' or '0' <= c and c <= '9'; } fn lowerstr(str: []u8) void { for (str, 0..) |c, i| { str[i] = std.ascii.toLower(c); } } // parseAttributeOP parses an AttributeOP from a string of 1 or 2 bytes. fn parseAttributeOP(s: []const u8) ParseError!AttributeOP { if (s.len < 1 or s.len > 2) return ParseError.InvalidAttributeOperator; // if the first sign is equal, we don't check anything else. if (s[0] == '=') return .eql; if (s.len != 2 or s[1] != '=') return ParseError.InvalidAttributeOperator; return switch (s[0]) { '=' => .eql, '!' => .not_eql, '~' => .one_of, '|' => .prefix_hyphen, '^' => .prefix, '$' => .suffix, '*' => .contains, '#' => .regexp, else => ParseError.InvalidAttributeOperator, }; } test "parser.skipWhitespace" { const testcases = [_]struct { s: []const u8, i: usize, r: bool, }{ .{ .s = "", .i = 0, .r = false }, .{ .s = "foo", .i = 0, .r = false }, .{ .s = " ", .i = 1, .r = true }, .{ .s = " foo", .i = 1, .r = true }, .{ .s = "/* foo */ bar", .i = 10, .r = true }, .{ .s = "/* foo", .i = 0, .r = false }, }; for (testcases) |tc| { var p = Parser{ .s = tc.s, .opts = .{} }; const res = p.skipWhitespace(); try std.testing.expectEqual(tc.r, res); try std.testing.expectEqual(tc.i, p.i); } } test "parser.parseIdentifier" { const alloc = std.testing.allocator; const testcases = [_]struct { s: []const u8, // given value exp: []const u8, // expected value err: bool = false, }{ .{ .s = "x", .exp = "x" }, .{ .s = "96", .exp = "", .err = true }, .{ .s = "-x", .exp = "-x" }, .{ .s = "r\\e9 sumé", .exp = "résumé" }, .{ .s = "r\\0000e9 sumé", .exp = "résumé" }, .{ .s = "r\\0000e9sumé", .exp = "résumé" }, .{ .s = "a\\\"b", .exp = "a\"b" }, }; var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); for (testcases) |tc| { buf.clearRetainingCapacity(); var p = Parser{ .s = tc.s, .opts = .{} }; p.parseIdentifier(buf.writer()) catch |e| { // if error was expected, continue. if (tc.err) continue; std.debug.print("test case {s}\n", .{tc.s}); return e; }; std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| { std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items }); return e; }; } } test "parser.parseString" { const alloc = std.testing.allocator; const testcases = [_]struct { s: []const u8, // given value exp: []const u8, // expected value err: bool = false, }{ .{ .s = "\"x\"", .exp = "x" }, .{ .s = "'x'", .exp = "x" }, .{ .s = "'x", .exp = "", .err = true }, .{ .s = "'x\\\r\nx'", .exp = "xx" }, .{ .s = "\"r\\e9 sumé\"", .exp = "résumé" }, .{ .s = "\"r\\0000e9 sumé\"", .exp = "résumé" }, .{ .s = "\"r\\0000e9sumé\"", .exp = "résumé" }, .{ .s = "\"a\\\"b\"", .exp = "a\"b" }, .{ .s = "\"\\\n\"", .exp = "" }, .{ .s = "\"hello world\"", .exp = "hello world" }, }; var buf = std.ArrayList(u8).init(alloc); defer buf.deinit(); for (testcases) |tc| { buf.clearRetainingCapacity(); var p = Parser{ .s = tc.s, .opts = .{} }; p.parseString(buf.writer()) catch |e| { // if error was expected, continue. if (tc.err) continue; std.debug.print("test case {s}\n", .{tc.s}); return e; }; std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| { std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items }); return e; }; } }