From 97c80530101e73a44e7e8db2494d2d4055087774 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 14 Mar 2024 16:39:02 +0100 Subject: [PATCH 01/28] css: implement css query parser --- src/css/parser.zig | 1201 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1201 insertions(+) create mode 100644 src/css/parser.zig diff --git a/src/css/parser.zig b/src/css/parser.zig new file mode 100644 index 00000000..e34e5071 --- /dev/null +++ b/src/css/parser.zig @@ -0,0 +1,1201 @@ +// CSS Selector parser +// This file is a rewrite in Zig of Cascadia CSS Selector parser. +// see https://github.com/andybalholm/cascadia +// see https://github.com/andybalholm/cascadia/blob/master/parser.go +const std = @import("std"); +const ascii = std.ascii; + +pub const AttributeOP = enum { + eql, // = + not_eql, // != + one_of, // ~= + prefix_hyphen, // |= + prefix, // ^= + suffix, // $= + contains, // *= + regexp, // #= + + fn len(op: AttributeOP) u2 { + if (op == .eql) return 1; + return 2; + } +}; + +pub const PseudoClass = enum { + not, + has, + haschild, + contains, + containsown, + matches, + matchesown, + nth_child, + nth_last_child, + nth_of_type, + nth_last_of_type, + first_child, + last_child, + first_of_type, + last_of_type, + only_child, + only_of_type, + input, + empty, + root, + link, + lang, + enabled, + disabled, + checked, + visited, + hover, + active, + focus, + target, + after, + backdrop, + before, + cue, + first_letter, + first_line, + grammar_error, + marker, + placeholder, + selection, + spelling_error, + + fn isPseudoElement(pc: PseudoClass) bool { + return switch (pc) { + .after, .backdrop, .before, .cue, .first_letter => true, + .first_line, .grammar_error, .marker, .placeholder => true, + .selection, .spelling_error => true, + else => false, + }; + } + + fn parse(s: []const u8) ParseError!PseudoClass { + if (std.ascii.eqlIgnoreCase(s, "not")) return .not; + if (std.ascii.eqlIgnoreCase(s, "has")) return .has; + if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild; + if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains; + if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown; + if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches; + if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown; + if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child; + if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child; + if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type; + if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type; + if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child; + if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child; + if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type; + if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type; + if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child; + if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type; + if (std.ascii.eqlIgnoreCase(s, "input")) return .input; + if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty; + if (std.ascii.eqlIgnoreCase(s, "root")) return .root; + if (std.ascii.eqlIgnoreCase(s, "link")) return .link; + if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang; + if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled; + if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled; + if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked; + if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited; + if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover; + if (std.ascii.eqlIgnoreCase(s, "active")) return .active; + if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus; + if (std.ascii.eqlIgnoreCase(s, "target")) return .target; + if (std.ascii.eqlIgnoreCase(s, "after")) return .after; + if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop; + if (std.ascii.eqlIgnoreCase(s, "before")) return .before; + if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue; + if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter; + if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line; + if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error; + if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker; + if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder; + if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection; + if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error; + return ParseError.InvalidPseudoClass; + } +}; + +pub const Selector = union(enum) { + compound: struct { + selectors: []Selector, + pseudo_elt: ?PseudoClass, + }, + group: []Selector, + tag: []const u8, + id: []const u8, + class: []const u8, + attribute: struct { + key: []const u8, + val: ?[]const u8 = null, + op: ?AttributeOP = null, + regexp: ?[]const u8 = null, + ci: bool = false, + }, + combined: struct { + first: *Selector, + second: *Selector, + combinator: u8, + }, + + never_match: PseudoClass, + + pseudo_class: PseudoClass, + pseudo_class_only_child: bool, + pseudo_class_lang: []const u8, + pseudo_class_relative: struct { + pseudo_class: PseudoClass, + match: *Selector, + }, + pseudo_class_contains: struct { + own: bool, + val: []const u8, + }, + pseudo_class_regexp: struct { + own: bool, + regexp: []const u8, + }, + pseudo_class_nth: struct { + a: isize, + b: isize, + of_type: bool, + last: bool, + }, + pseudo_element: PseudoClass, + + fn deinit(sel: Selector, alloc: std.mem.Allocator) void { + switch (sel) { + .group => |v| { + for (v) |vv| vv.deinit(alloc); + alloc.free(v); + }, + .compound => |v| { + for (v.selectors) |vv| vv.deinit(alloc); + alloc.free(v.selectors); + }, + .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v), + .attribute => |att| { + alloc.free(att.key); + if (att.val) |v| alloc.free(v); + if (att.regexp) |v| alloc.free(v); + }, + .combined => |c| { + c.first.deinit(alloc); + alloc.destroy(c.first); + c.second.deinit(alloc); + alloc.destroy(c.second); + }, + .pseudo_class_relative => |v| { + v.match.deinit(alloc); + alloc.destroy(v.match); + }, + .pseudo_class_contains => |v| alloc.free(v.val), + .pseudo_class_regexp => |v| alloc.free(v.regexp), + .pseudo_class, .pseudo_element, .never_match => {}, + .pseudo_class_nth, .pseudo_class_only_child => {}, + } + } +}; + +pub const ParseError = error{ + ExpectedSelector, + ExpectedIdentifier, + ExpectedName, + ExpectedIDSelector, + ExpectedClassSelector, + ExpectedAttributeSelector, + ExpectedString, + ExpectedRegexp, + ExpectedPseudoClassSelector, + ExpectedParenthesis, + ExpectedParenthesisClose, + ExpectedNthExpression, + ExpectedInteger, + InvalidEscape, + EscapeLineEndingOutsideString, + InvalidUnicode, + UnicodeIsNotHandled, + WriteError, + PseudoElementNotAtSelectorEnd, + PseudoElementNotUnique, + PseudoElementDisabled, + InvalidAttributeOperator, + InvalidAttributeSelector, + InvalidString, + InvalidRegexp, + InvalidPseudoClassSelector, + EmptyPseudoClassSelector, + InvalidPseudoClass, + InvalidPseudoElement, + UnmatchParenthesis, + NotHandled, + UnknownPseudoSelector, + InvalidNthExpression, +} || std.mem.Allocator.Error; + +pub const ParseOptions = struct { + accept_pseudo_elts: bool = true, +}; + +// Parse parse a selector string and returns the parsed result or an error. +pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: ParseOptions) ParseError!Selector { + var p = Parser{ .s = s, .i = 0, .opts = opts }; + return p.parseSelector(alloc); +} + +const Parser = struct { + s: []const u8, // string to parse + i: usize = 0, // current position + + opts: ParseOptions, + + // skipWhitespace consumes whitespace characters and comments. + // It returns true if there was actually anything to skip. + fn skipWhitespace(p: *Parser) bool { + var i = p.i; + while (i < p.s.len) { + const c = p.s[i]; + // Whitespaces. + if (ascii.isWhitespace(c)) { + i += 1; + continue; + } + + // Comments. + if (c == '/') { + if (std.mem.startsWith(u8, p.s[i..], "/*")) { + if (std.mem.indexOf(u8, p.s[i..], "*/")) |end| { + i += end + "*/".len; + continue; + } + } + } + break; + } + + if (i > p.i) { + p.i = i; + return true; + } + + return false; + } + + // parseSimpleSelectorSequence parses a selector sequence that applies to + // a single element. + fn parseSimpleSelectorSequence(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + if (p.i >= p.s.len) { + return ParseError.ExpectedSelector; + } + + var buf = std.ArrayList(Selector).init(alloc); + defer buf.deinit(); + + switch (p.s[p.i]) { + '*' => { + // It's the universal selector. Just skip over it, since it + // doesn't affect the meaning. + p.i += 1; + + // other version of universal selector + if (p.i + 2 < p.s.len and std.mem.eql(u8, "|*", p.s[p.i .. p.i + 2])) { + p.i += 2; + } + }, + '#', '.', '[', ':' => { + // There's no type selector. Wait to process the other till the + // main loop. + }, + else => try buf.append(try p.parseTypeSelector(alloc)), + } + + var pseudo_elt: ?PseudoClass = null; + + loop: while (p.i < p.s.len) { + var ns: Selector = switch (p.s[p.i]) { + '#' => try p.parseIDSelector(alloc), + '.' => try p.parseClassSelector(alloc), + '[' => try p.parseAttributeSelector(alloc), + ':' => try p.parsePseudoclassSelector(alloc), + else => break :loop, + }; + errdefer ns.deinit(alloc); + + // From https://drafts.csswg.org/selectors-3/#pseudo-elements : + // "Only one pseudo-element may appear per selector, and if present + // it must appear after the sequence of simple selectors that + // represents the subjects of the selector."" + switch (ns) { + .pseudo_element => |e| { + // We found a pseudo-element. + // Only one pseudo-element is accepted per selector. + if (pseudo_elt != null) return ParseError.PseudoElementNotUnique; + if (!p.opts.accept_pseudo_elts) return ParseError.PseudoElementDisabled; + + pseudo_elt = e; + ns.deinit(alloc); + }, + else => { + if (pseudo_elt != null) return ParseError.PseudoElementNotAtSelectorEnd; + try buf.append(ns); + }, + } + } + + // no need wrap the selectors in compoundSelector + if (buf.items.len == 1 and pseudo_elt == null) return buf.items[0]; + + return .{ .compound = .{ .selectors = try buf.toOwnedSlice(), .pseudo_elt = pseudo_elt } }; + } + + // parseTypeSelector parses a type selector (one that matches by tag name). + fn parseTypeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + try p.parseIdentifier(buf.writer()); + + return .{ .tag = try buf.toOwnedSlice() }; + } + + // parseIdentifier parses an identifier. + fn parseIdentifier(p: *Parser, w: anytype) ParseError!void { + const prefix = '-'; + var numPrefix: usize = 0; + + while (p.s.len > p.i and p.s[p.i] == prefix) { + p.i += 1; + numPrefix += 1; + } + + if (p.s.len <= p.i) { + return ParseError.ExpectedSelector; + } + + const c = p.s[p.i]; + if (!nameStart(c) or c == '\\') { + return ParseError.ExpectedSelector; + } + + var ii: usize = 0; + while (ii < numPrefix) { + w.writeByte(prefix) catch return ParseError.WriteError; + ii += 1; + } + try parseName(p, w); + } + + // parseName parses a name (which is like an identifier, but doesn't have + // extra restrictions on the first character). + fn parseName(p: *Parser, w: anytype) ParseError!void { + var i = p.i; + var ok = false; + + while (i < p.s.len) { + const c = p.s[i]; + + if (nameChar(c)) { + const start = i; + while (i < p.s.len and nameChar(p.s[i])) i += 1; + w.writeAll(p.s[start..i]) catch return ParseError.WriteError; + ok = true; + } else if (c == '\\') { + p.i = i; + try p.parseEscape(w); + i = p.i; + ok = true; + } else { + // default: + break; + } + } + + if (!ok) return ParseError.ExpectedName; + p.i = i; + } + + // parseEscape parses a backslash escape. + // The returned string is owned by the caller. + fn parseEscape(p: *Parser, w: anytype) ParseError!void { + if (p.s.len < p.i + 2 or p.s[p.i] != '\\') { + return ParseError.InvalidEscape; + } + + const start = p.i + 1; + const c = p.s[start]; + if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString; + + // unicode escape (hex) + if (ascii.isHex(c)) { + var i: usize = start; + while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) { + i += 1; + } + const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode; + if (p.s.len > i) { + switch (p.s[i]) { + '\r' => { + i += 1; + if (p.s.len > i and p.s[i] == '\n') i += 1; + }, + ' ', '\t', '\n', std.ascii.control_code.ff => i += 1, + else => {}, + } + p.i = i; + var buf: [4]u8 = undefined; + const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode; + w.writeAll(buf[0..ln]) catch return ParseError.WriteError; + return; + } + } + + // Return the literal character after the backslash. + p.i += 2; + w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError; + } + + // parseIDSelector parses a selector that matches by id attribute. + fn parseIDSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + if (p.i >= p.s.len) return ParseError.ExpectedIDSelector; + if (p.s[p.i] != '#') return ParseError.ExpectedIDSelector; + + p.i += 1; + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try p.parseName(buf.writer()); + return .{ .id = try buf.toOwnedSlice() }; + } + + // parseClassSelector parses a selector that matches by class attribute. + fn parseClassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + if (p.i >= p.s.len) return ParseError.ExpectedClassSelector; + if (p.s[p.i] != '.') return ParseError.ExpectedClassSelector; + + p.i += 1; + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try p.parseIdentifier(buf.writer()); + return .{ .class = try buf.toOwnedSlice() }; + } + + // parseAttributeSelector parses a selector that matches by attribute value. + fn parseAttributeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; + if (p.s[p.i] != '[') return ParseError.ExpectedAttributeSelector; + + p.i += 1; + _ = p.skipWhitespace(); + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try p.parseIdentifier(buf.writer()); + const key = try buf.toOwnedSlice(); + errdefer alloc.free(key); + + lowerstr(key); + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; + if (p.s[p.i] == ']') { + p.i += 1; + return .{ .attribute = .{ .key = key } }; + } + + if (p.i + 2 >= p.s.len) return ParseError.ExpectedAttributeSelector; + + const op = try parseAttributeOP(p.s[p.i .. p.i + 2]); + p.i += op.len(); + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; + + buf.clearRetainingCapacity(); + var is_val: bool = undefined; + if (op == .regexp) { + is_val = false; + try p.parseRegex(buf.writer()); + } else { + is_val = true; + switch (p.s[p.i]) { + '\'', '"' => try p.parseString(buf.writer()), + else => try p.parseIdentifier(buf.writer()), + } + } + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; + + // check if the attribute contains an ignore case flag + var ci = false; + if (p.s[p.i] == 'i' or p.s[p.i] == 'I') { + ci = true; + p.i += 1; + } + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector; + + if (p.s[p.i] != ']') return ParseError.InvalidAttributeSelector; + p.i += 1; + + return .{ .attribute = .{ + .key = key, + .val = if (is_val) try buf.toOwnedSlice() else null, + .regexp = if (!is_val) try buf.toOwnedSlice() else null, + .op = op, + .ci = ci, + } }; + } + + // parseString parses a single- or double-quoted string. + fn parseString(p: *Parser, writer: anytype) ParseError!void { + var i = p.i; + if (p.s.len < i + 2) return ParseError.ExpectedString; + + const quote = p.s[i]; + i += 1; + + loop: while (i < p.s.len) { + switch (p.s[i]) { + '\\' => { + if (p.s.len > i + 1) { + const c = p.s[i + 1]; + switch (c) { + '\r' => { + if (p.s.len > i + 2 and p.s[i + 2] == '\n') { + i += 3; + continue :loop; + } + i += 2; + continue :loop; + }, + '\n', std.ascii.control_code.ff => { + i += 2; + continue :loop; + }, + else => {}, + } + } + p.i = i; + try p.parseEscape(writer); + i = p.i; + }, + '\r', '\n', std.ascii.control_code.ff => return ParseError.InvalidString, + else => |c| { + if (c == quote) break :loop; + const start = i; + while (i < p.s.len) { + const cc = p.s[i]; + if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break; + i += 1; + } + writer.writeAll(p.s[start..i]) catch return ParseError.WriteError; + }, + } + } + + if (i >= p.s.len) return ParseError.InvalidString; + + // Consume the final quote. + i += 1; + p.i = i; + } + + // parseRegex parses a regular expression; the end is defined by encountering an + // unmatched closing ')' or ']' which is not consumed + fn parseRegex(p: *Parser, writer: anytype) ParseError!void { + var i = p.i; + if (p.s.len < i + 2) return ParseError.ExpectedRegexp; + + // number of open parens or brackets; + // when it becomes negative, finished parsing regex + var open: isize = 0; + + loop: while (i < p.s.len) { + switch (p.s[i]) { + '(', '[' => open += 1, + ')', ']' => { + open -= 1; + if (open < 0) break :loop; + }, + else => {}, + } + i += 1; + } + + if (i >= p.s.len) return ParseError.InvalidRegexp; + writer.writeAll(p.s[p.i..i]) catch return ParseError.WriteError; + p.i = i; + } + + // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element + // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. + // https://drafts.csswg.org/selectors-3/#pseudo-elements + fn parsePseudoclassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + if (p.i >= p.s.len) return ParseError.ExpectedPseudoClassSelector; + if (p.s[p.i] != ':') return ParseError.ExpectedPseudoClassSelector; + + p.i += 1; + + var must_pseudo_elt: bool = false; + if (p.i >= p.s.len) return ParseError.EmptyPseudoClassSelector; + if (p.s[p.i] == ':') { // we found a pseudo-element + must_pseudo_elt = true; + p.i += 1; + } + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try p.parseIdentifier(buf.writer()); + + const pseudo_class = try PseudoClass.parse(buf.items); + + // reset the buffer to reuse it. + buf.clearRetainingCapacity(); + + if (must_pseudo_elt and !pseudo_class.isPseudoElement()) return ParseError.InvalidPseudoElement; + + switch (pseudo_class) { + .not, .has, .haschild => { + if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; + + const sel = try p.parseSelectorGroup(alloc); + if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; + + const s = try alloc.create(Selector); + errdefer alloc.destroy(s); + s.* = sel; + + return .{ .pseudo_class_relative = .{ .pseudo_class = pseudo_class, .match = s } }; + }, + .contains, .containsown => { + if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; + if (p.i == p.s.len) return ParseError.UnmatchParenthesis; + + switch (p.s[p.i]) { + '\'', '"' => try p.parseString(buf.writer()), + else => try p.parseString(buf.writer()), + } + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.InvalidPseudoClass; + if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; + + const val = try buf.toOwnedSlice(); + errdefer alloc.free(val); + + lowerstr(val); + + return .{ .pseudo_class_contains = .{ .own = pseudo_class == .containsown, .val = val } }; + }, + .matches, .matchesown => { + if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; + + try p.parseRegex(buf.writer()); + if (p.i >= p.s.len) return ParseError.InvalidPseudoClassSelector; + if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; + + return .{ .pseudo_class_regexp = .{ .own = pseudo_class == .matchesown, .regexp = try buf.toOwnedSlice() } }; + }, + .nth_child, .nth_last_child, .nth_of_type, .nth_last_of_type => { + if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; + const nth = try p.parseNth(alloc); + if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; + + const last = pseudo_class == .nth_last_child or pseudo_class == .nth_last_of_type; + const of_type = pseudo_class == .nth_of_type or pseudo_class == .nth_last_of_type; + return .{ .pseudo_class_nth = .{ .a = nth[0], .b = nth[1], .of_type = of_type, .last = last } }; + }, + .first_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = false } }, + .last_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = true } }, + .first_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = false } }, + .last_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = true } }, + .only_child => return .{ .pseudo_class_only_child = false }, + .only_of_type => return .{ .pseudo_class_only_child = true }, + .input, .empty, .root, .link => return .{ .pseudo_class = pseudo_class }, + .enabled, .disabled, .checked => return .{ .pseudo_class = pseudo_class }, + .lang => { + if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis; + if (p.i == p.s.len) return ParseError.UnmatchParenthesis; + + try p.parseIdentifier(buf.writer()); + + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.InvalidPseudoClass; + if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose; + + const val = try buf.toOwnedSlice(); + errdefer alloc.free(val); + lowerstr(val); + + return .{ .pseudo_class_lang = val }; + }, + .visited, .hover, .active, .focus, .target => { + // Not applicable in a static context: never match. + return .{ .never_match = pseudo_class }; + }, + .after, .backdrop, .before, .cue, .first_letter => return .{ .pseudo_element = pseudo_class }, + .first_line, .grammar_error, .marker, .placeholder => return .{ .pseudo_element = pseudo_class }, + .selection, .spelling_error => return .{ .pseudo_element = pseudo_class }, + } + } + + // consumeParenthesis consumes an opening parenthesis and any following + // whitespace. It returns true if there was actually a parenthesis to skip. + fn consumeParenthesis(p: *Parser) bool { + if (p.i < p.s.len and p.s[p.i] == '(') { + p.i += 1; + _ = p.skipWhitespace(); + return true; + } + return false; + } + + // parseSelectorGroup parses a group of selectors, separated by commas. + fn parseSelectorGroup(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + const s = try p.parseSelector(alloc); + + var buf = std.ArrayList(Selector).init(alloc); + defer buf.deinit(); + + try buf.append(s); + + while (p.i < p.s.len) { + if (p.s[p.i] != ',') break; + p.i += 1; + const ss = try p.parseSelector(alloc); + try buf.append(ss); + } + + return .{ .group = try buf.toOwnedSlice() }; + } + + // parseSelector parses a selector that may include combinators. + fn parseSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + _ = p.skipWhitespace(); + var s = try p.parseSimpleSelectorSequence(alloc); + + while (true) { + var combinator: u8 = undefined; + if (p.skipWhitespace()) { + combinator = ' '; + } + if (p.i >= p.s.len) { + return s; + } + + switch (p.s[p.i]) { + '+', '>', '~' => { + combinator = p.s[p.i]; + p.i += 1; + _ = p.skipWhitespace(); + }, + // These characters can't begin a selector, but they can legally occur after one. + ',', ')' => return s, + else => {}, + } + + if (combinator == 0) { + return s; + } + + const c = try p.parseSimpleSelectorSequence(alloc); + + const first = try alloc.create(Selector); + errdefer alloc.destroy(first); + first.* = s; + + const second = try alloc.create(Selector); + errdefer alloc.destroy(second); + second.* = c; + + s = Selector{ .combined = .{ .first = first, .second = second, .combinator = combinator } }; + } + + return s; + } + + // consumeClosingParenthesis consumes a closing parenthesis and any preceding + // whitespace. It returns true if there was actually a parenthesis to skip. + fn consumeClosingParenthesis(p: *Parser) bool { + const i = p.i; + _ = p.skipWhitespace(); + if (p.i < p.s.len and p.s[p.i] == ')') { + p.i += 1; + return true; + } + p.i = i; + return false; + } + + // parseInteger parses a decimal integer. + fn parseInteger(p: *Parser) ParseError!isize { + var i = p.i; + const start = i; + while (i < p.s.len and '0' <= p.s[i] and p.s[i] <= '9') i += 1; + if (i == start) return ParseError.ExpectedInteger; + p.i = i; + + return std.fmt.parseUnsigned(isize, p.s[start..i], 10) catch ParseError.ExpectedInteger; + } + + fn parseNthReadN(p: *Parser, a: isize) ParseError![2]isize { + _ = p.skipWhitespace(); + if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; + + return switch (p.s[p.i]) { + '+' => { + p.i += 1; + _ = p.skipWhitespace(); + const b = try p.parseInteger(); + return .{ a, b }; + }, + '-' => { + p.i += 1; + _ = p.skipWhitespace(); + const b = try p.parseInteger(); + return .{ a, -b }; + }, + else => .{ a, 0 }, + }; + } + + fn parseNthReadA(p: *Parser, a: isize) ParseError![2]isize { + if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; + return switch (p.s[p.i]) { + 'n', 'N' => { + p.i += 1; + return p.parseNthReadN(a); + }, + else => .{ 0, a }, + }; + } + + fn parseNthNegativeA(p: *Parser) ParseError![2]isize { + if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; + const c = p.s[p.i]; + if (std.ascii.isDigit(c)) { + const a = try p.parseInteger() * -1; + return p.parseNthReadA(a); + } + if (c == 'n' or c == 'N') { + p.i += 1; + return p.parseNthReadN(-1); + } + + return ParseError.InvalidNthExpression; + } + + fn parseNthPositiveA(p: *Parser) ParseError![2]isize { + if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; + const c = p.s[p.i]; + if (std.ascii.isDigit(c)) { + const a = try p.parseInteger() * -1; + return p.parseNthReadA(a); + } + if (c == 'n' or c == 'N') { + p.i += 1; + return p.parseNthReadN(1); + } + + return ParseError.InvalidNthExpression; + } + + // parseNth parses the argument for :nth-child (normally of the form an+b). + fn parseNth(p: *Parser, alloc: std.mem.Allocator) ParseError![2]isize { + // initial state + if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; + return switch (p.s[p.i]) { + '-' => { + p.i += 1; + return p.parseNthNegativeA(); + }, + '+' => { + p.i += 1; + return p.parseNthPositiveA(); + }, + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => p.parseNthPositiveA(), + 'n', 'N' => { + p.i += 1; + return p.parseNthReadN(1); + }, + 'o', 'O', 'e', 'E' => { + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + try p.parseName(buf.writer()); + + if (std.ascii.eqlIgnoreCase("odd", buf.items)) return .{ 2, 1 }; + if (std.ascii.eqlIgnoreCase("even", buf.items)) return .{ 2, 0 }; + + return ParseError.InvalidNthExpression; + }, + else => ParseError.InvalidNthExpression, + }; + } +}; + +// nameStart returns whether c can be the first character of an identifier +// (not counting an initial hyphen, or an escape sequence). +fn nameStart(c: u8) bool { + return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127; +} + +// nameChar returns whether c can be a character within an identifier +// (not counting an escape sequence). +fn nameChar(c: u8) bool { + return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127 or + c == '-' or '0' <= c and c <= '9'; +} + +fn lowerstr(str: []u8) void { + for (str, 0..) |c, i| { + str[i] = std.ascii.toLower(c); + } +} + +// parseAttributeOP parses an AttributeOP from a string of 1 or 2 bytes. +fn parseAttributeOP(s: []const u8) ParseError!AttributeOP { + if (s.len < 1 or s.len > 2) return ParseError.InvalidAttributeOperator; + + // if the first sign is equal, we don't check anything else. + if (s[0] == '=') return .eql; + + if (s.len != 2 or s[1] != '=') return ParseError.InvalidAttributeOperator; + + return switch (s[0]) { + '=' => .eql, + '!' => .not_eql, + '~' => .one_of, + '|' => .prefix_hyphen, + '^' => .prefix, + '$' => .suffix, + '*' => .contains, + '#' => .regexp, + else => ParseError.InvalidAttributeOperator, + }; +} + +test "parser.skipWhitespace" { + const testcases = [_]struct { + s: []const u8, + i: usize, + r: bool, + }{ + .{ .s = "", .i = 0, .r = false }, + .{ .s = "foo", .i = 0, .r = false }, + .{ .s = " ", .i = 1, .r = true }, + .{ .s = " foo", .i = 1, .r = true }, + .{ .s = "/* foo */ bar", .i = 10, .r = true }, + .{ .s = "/* foo", .i = 0, .r = false }, + }; + + for (testcases) |tc| { + var p = Parser{ .s = tc.s, .opts = .{} }; + const res = p.skipWhitespace(); + try std.testing.expectEqual(tc.r, res); + try std.testing.expectEqual(tc.i, p.i); + } +} + +test "parser.parseIdentifier" { + const alloc = std.testing.allocator; + + const testcases = [_]struct { + s: []const u8, // given value + exp: []const u8, // expected value + err: bool = false, + }{ + .{ .s = "x", .exp = "x" }, + .{ .s = "96", .exp = "", .err = true }, + .{ .s = "-x", .exp = "-x" }, + .{ .s = "r\\e9 sumé", .exp = "résumé" }, + .{ .s = "r\\0000e9 sumé", .exp = "résumé" }, + .{ .s = "r\\0000e9sumé", .exp = "résumé" }, + .{ .s = "a\\\"b", .exp = "a\"b" }, + }; + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + for (testcases) |tc| { + buf.clearRetainingCapacity(); + + var p = Parser{ .s = tc.s, .opts = .{} }; + p.parseIdentifier(buf.writer()) catch |e| { + // if error was expected, continue. + if (tc.err) continue; + + std.debug.print("test case {s}\n", .{tc.s}); + return e; + }; + std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| { + std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items }); + return e; + }; + } +} + +test "parser.parseString" { + const alloc = std.testing.allocator; + + const testcases = [_]struct { + s: []const u8, // given value + exp: []const u8, // expected value + err: bool = false, + }{ + .{ .s = "\"x\"", .exp = "x" }, + .{ .s = "'x'", .exp = "x" }, + .{ .s = "'x", .exp = "", .err = true }, + .{ .s = "'x\\\r\nx'", .exp = "xx" }, + .{ .s = "\"r\\e9 sumé\"", .exp = "résumé" }, + .{ .s = "\"r\\0000e9 sumé\"", .exp = "résumé" }, + .{ .s = "\"r\\0000e9sumé\"", .exp = "résumé" }, + .{ .s = "\"a\\\"b\"", .exp = "a\"b" }, + .{ .s = "\"\\\n\"", .exp = "" }, + .{ .s = "\"hello world\"", .exp = "hello world" }, + }; + + var buf = std.ArrayList(u8).init(alloc); + defer buf.deinit(); + + for (testcases) |tc| { + buf.clearRetainingCapacity(); + + var p = Parser{ .s = tc.s, .opts = .{} }; + p.parseString(buf.writer()) catch |e| { + // if error was expected, continue. + if (tc.err) continue; + + std.debug.print("test case {s}\n", .{tc.s}); + return e; + }; + std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| { + std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items }); + return e; + }; + } +} + +test "parser." { + const alloc = std.testing.allocator; + + const testcases = [_][]const u8{ + "address", + "*", + "#foo", + "li#t1", + "*#t4", + ".t1", + "p.t1", + "div.teST", + ".t1.fail", + "p.t1.t2", + "p.--t1", + "p.--t1.--t2", + "p[title]", + "div[class=\"red\" i]", + "address[title=\"foo\"]", + "address[title=\"FoOIgnoRECaSe\" i]", + "address[title!=\"foo\"]", + "address[title!=\"foo\" i]", + "p[title!=\"FooBarUFoo\" i]", + "[ \t title ~= foo ]", + "p[title~=\"FOO\" i]", + "p[title~=toofoo i]", + "[title~=\"hello world\"]", + "[title~=\"hello\" i]", + "[title~=\"hello\" I]", + "[lang|=\"en\"]", + "[lang|=\"EN\" i]", + "[lang|=\"EN\" i]", + "[title^=\"foo\"]", + "[title^=\"foo\" i]", + "[title$=\"bar\"]", + "[title$=\"BAR\" i]", + "[title*=\"bar\"]", + "[title*=\"BaRu\" i]", + "[title*=\"BaRu\" I]", + "p[class$=\" \"]", + "p[class$=\"\"]", + "p[class^=\" \"]", + "p[class^=\"\"]", + "p[class*=\" \"]", + "p[class*=\"\"]", + "input[name=Sex][value=F]", + "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", + ".t1:not(.t2)", + "div:not(.t1)", + "div:not([class=\"t2\"])", + "li:nth-child(odd)", + "li:nth-child(even)", + "li:nth-child(-n+2)", + "li:nth-child(3n+1)", + "li:nth-last-child(odd)", + "li:nth-last-child(even)", + "li:nth-last-child(-n+2)", + "li:nth-last-child(3n+1)", + "span:first-child", + "span:last-child", + "p:nth-of-type(2)", + "p:nth-last-of-type(2)", + "p:last-of-type", + "p:first-of-type", + "p:only-child", + "p:only-of-type", + ":empty", + "div p", + "div table p", + "div > p", + "p ~ p", + "p + p", + "li, p", + "p +/*This is a comment*/ p", + "p:contains(\"that wraps\")", + "p:containsOwn(\"that wraps\")", + ":containsOwn(\"inner\")", + "p:containsOwn(\"block\")", + "div:has(#p1)", + "div:has(:containsOwn(\"2\"))", + "body :has(:containsOwn(\"2\"))", + "body :haschild(:containsOwn(\"2\"))", + "p:matches([\\d])", + "p:matches([a-z])", + "p:matches([a-zA-Z])", + "p:matches([^\\d])", + "p:matches(^(0|a))", + "p:matches(^\\d+$)", + "p:not(:matches(^\\d+$))", + "div :matchesOwn(^\\d+$)", + "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", + "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", + ":input", + ":root", + "*:root", + "html:nth-child(1)", + "*:root:first-child", + "*:root:nth-child(1)", + "a:not(:root)", + "body > *:nth-child(3n+2)", + "input:disabled", + ":disabled", + ":enabled", + "div.class1, div.class2", + }; + + for (testcases) |tc| { + const s = Parse(alloc, tc, .{}) catch |e| { + std.debug.print("query {s}", .{tc}); + return e; + }; + defer s.deinit(alloc); + } +} From 6cf805360dab189ffceec50bee687b44799da86c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 15 Mar 2024 08:59:41 +0100 Subject: [PATCH 02/28] css: extract selector in its own file --- src/css/parser.zig | 200 ++----------------------------------------- src/css/selector.zig | 200 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 195 deletions(-) create mode 100644 src/css/selector.zig diff --git a/src/css/parser.zig b/src/css/parser.zig index e34e5071..d6905bec 100644 --- a/src/css/parser.zig +++ b/src/css/parser.zig @@ -5,200 +5,10 @@ const std = @import("std"); const ascii = std.ascii; -pub const AttributeOP = enum { - eql, // = - not_eql, // != - one_of, // ~= - prefix_hyphen, // |= - prefix, // ^= - suffix, // $= - contains, // *= - regexp, // #= - - fn len(op: AttributeOP) u2 { - if (op == .eql) return 1; - return 2; - } -}; - -pub const PseudoClass = enum { - not, - has, - haschild, - contains, - containsown, - matches, - matchesown, - nth_child, - nth_last_child, - nth_of_type, - nth_last_of_type, - first_child, - last_child, - first_of_type, - last_of_type, - only_child, - only_of_type, - input, - empty, - root, - link, - lang, - enabled, - disabled, - checked, - visited, - hover, - active, - focus, - target, - after, - backdrop, - before, - cue, - first_letter, - first_line, - grammar_error, - marker, - placeholder, - selection, - spelling_error, - - fn isPseudoElement(pc: PseudoClass) bool { - return switch (pc) { - .after, .backdrop, .before, .cue, .first_letter => true, - .first_line, .grammar_error, .marker, .placeholder => true, - .selection, .spelling_error => true, - else => false, - }; - } - - fn parse(s: []const u8) ParseError!PseudoClass { - if (std.ascii.eqlIgnoreCase(s, "not")) return .not; - if (std.ascii.eqlIgnoreCase(s, "has")) return .has; - if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild; - if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains; - if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown; - if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches; - if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown; - if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child; - if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child; - if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type; - if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type; - if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child; - if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child; - if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type; - if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type; - if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child; - if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type; - if (std.ascii.eqlIgnoreCase(s, "input")) return .input; - if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty; - if (std.ascii.eqlIgnoreCase(s, "root")) return .root; - if (std.ascii.eqlIgnoreCase(s, "link")) return .link; - if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang; - if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled; - if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled; - if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked; - if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited; - if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover; - if (std.ascii.eqlIgnoreCase(s, "active")) return .active; - if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus; - if (std.ascii.eqlIgnoreCase(s, "target")) return .target; - if (std.ascii.eqlIgnoreCase(s, "after")) return .after; - if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop; - if (std.ascii.eqlIgnoreCase(s, "before")) return .before; - if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue; - if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter; - if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line; - if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error; - if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker; - if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder; - if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection; - if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error; - return ParseError.InvalidPseudoClass; - } -}; - -pub const Selector = union(enum) { - compound: struct { - selectors: []Selector, - pseudo_elt: ?PseudoClass, - }, - group: []Selector, - tag: []const u8, - id: []const u8, - class: []const u8, - attribute: struct { - key: []const u8, - val: ?[]const u8 = null, - op: ?AttributeOP = null, - regexp: ?[]const u8 = null, - ci: bool = false, - }, - combined: struct { - first: *Selector, - second: *Selector, - combinator: u8, - }, - - never_match: PseudoClass, - - pseudo_class: PseudoClass, - pseudo_class_only_child: bool, - pseudo_class_lang: []const u8, - pseudo_class_relative: struct { - pseudo_class: PseudoClass, - match: *Selector, - }, - pseudo_class_contains: struct { - own: bool, - val: []const u8, - }, - pseudo_class_regexp: struct { - own: bool, - regexp: []const u8, - }, - pseudo_class_nth: struct { - a: isize, - b: isize, - of_type: bool, - last: bool, - }, - pseudo_element: PseudoClass, - - fn deinit(sel: Selector, alloc: std.mem.Allocator) void { - switch (sel) { - .group => |v| { - for (v) |vv| vv.deinit(alloc); - alloc.free(v); - }, - .compound => |v| { - for (v.selectors) |vv| vv.deinit(alloc); - alloc.free(v.selectors); - }, - .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v), - .attribute => |att| { - alloc.free(att.key); - if (att.val) |v| alloc.free(v); - if (att.regexp) |v| alloc.free(v); - }, - .combined => |c| { - c.first.deinit(alloc); - alloc.destroy(c.first); - c.second.deinit(alloc); - alloc.destroy(c.second); - }, - .pseudo_class_relative => |v| { - v.match.deinit(alloc); - alloc.destroy(v.match); - }, - .pseudo_class_contains => |v| alloc.free(v.val), - .pseudo_class_regexp => |v| alloc.free(v.regexp), - .pseudo_class, .pseudo_element, .never_match => {}, - .pseudo_class_nth, .pseudo_class_only_child => {}, - } - } -}; +const selector = @import("selector.zig"); +const Selector = selector.Selector; +const PseudoClass = selector.PseudoClass; +const AttributeOP = selector.AttributeOP; pub const ParseError = error{ ExpectedSelector, @@ -234,7 +44,7 @@ pub const ParseError = error{ NotHandled, UnknownPseudoSelector, InvalidNthExpression, -} || std.mem.Allocator.Error; +} || PseudoClass.Error || std.mem.Allocator.Error; pub const ParseOptions = struct { accept_pseudo_elts: bool = true, diff --git a/src/css/selector.zig b/src/css/selector.zig new file mode 100644 index 00000000..f112d397 --- /dev/null +++ b/src/css/selector.zig @@ -0,0 +1,200 @@ +const std = @import("std"); + +pub const AttributeOP = enum { + eql, // = + not_eql, // != + one_of, // ~= + prefix_hyphen, // |= + prefix, // ^= + suffix, // $= + contains, // *= + regexp, // #= + + pub fn len(op: AttributeOP) u2 { + if (op == .eql) return 1; + return 2; + } +}; + +pub const PseudoClass = enum { + not, + has, + haschild, + contains, + containsown, + matches, + matchesown, + nth_child, + nth_last_child, + nth_of_type, + nth_last_of_type, + first_child, + last_child, + first_of_type, + last_of_type, + only_child, + only_of_type, + input, + empty, + root, + link, + lang, + enabled, + disabled, + checked, + visited, + hover, + active, + focus, + target, + after, + backdrop, + before, + cue, + first_letter, + first_line, + grammar_error, + marker, + placeholder, + selection, + spelling_error, + + pub const Error = error{ + InvalidPseudoClass, + }; + + pub fn isPseudoElement(pc: PseudoClass) bool { + return switch (pc) { + .after, .backdrop, .before, .cue, .first_letter => true, + .first_line, .grammar_error, .marker, .placeholder => true, + .selection, .spelling_error => true, + else => false, + }; + } + + pub fn parse(s: []const u8) Error!PseudoClass { + if (std.ascii.eqlIgnoreCase(s, "not")) return .not; + if (std.ascii.eqlIgnoreCase(s, "has")) return .has; + if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild; + if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains; + if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown; + if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches; + if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown; + if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child; + if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child; + if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type; + if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type; + if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child; + if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child; + if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type; + if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type; + if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child; + if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type; + if (std.ascii.eqlIgnoreCase(s, "input")) return .input; + if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty; + if (std.ascii.eqlIgnoreCase(s, "root")) return .root; + if (std.ascii.eqlIgnoreCase(s, "link")) return .link; + if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang; + if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled; + if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled; + if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked; + if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited; + if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover; + if (std.ascii.eqlIgnoreCase(s, "active")) return .active; + if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus; + if (std.ascii.eqlIgnoreCase(s, "target")) return .target; + if (std.ascii.eqlIgnoreCase(s, "after")) return .after; + if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop; + if (std.ascii.eqlIgnoreCase(s, "before")) return .before; + if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue; + if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter; + if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line; + if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error; + if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker; + if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder; + if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection; + if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error; + return Error.InvalidPseudoClass; + } +}; + +pub const Selector = union(enum) { + compound: struct { + selectors: []Selector, + pseudo_elt: ?PseudoClass, + }, + group: []Selector, + tag: []const u8, + id: []const u8, + class: []const u8, + attribute: struct { + key: []const u8, + val: ?[]const u8 = null, + op: ?AttributeOP = null, + regexp: ?[]const u8 = null, + ci: bool = false, + }, + combined: struct { + first: *Selector, + second: *Selector, + combinator: u8, + }, + + never_match: PseudoClass, + + pseudo_class: PseudoClass, + pseudo_class_only_child: bool, + pseudo_class_lang: []const u8, + pseudo_class_relative: struct { + pseudo_class: PseudoClass, + match: *Selector, + }, + pseudo_class_contains: struct { + own: bool, + val: []const u8, + }, + pseudo_class_regexp: struct { + own: bool, + regexp: []const u8, + }, + pseudo_class_nth: struct { + a: isize, + b: isize, + of_type: bool, + last: bool, + }, + pseudo_element: PseudoClass, + + pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void { + switch (sel) { + .group => |v| { + for (v) |vv| vv.deinit(alloc); + alloc.free(v); + }, + .compound => |v| { + for (v.selectors) |vv| vv.deinit(alloc); + alloc.free(v.selectors); + }, + .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v), + .attribute => |att| { + alloc.free(att.key); + if (att.val) |v| alloc.free(v); + if (att.regexp) |v| alloc.free(v); + }, + .combined => |c| { + c.first.deinit(alloc); + alloc.destroy(c.first); + c.second.deinit(alloc); + alloc.destroy(c.second); + }, + .pseudo_class_relative => |v| { + v.match.deinit(alloc); + alloc.destroy(v.match); + }, + .pseudo_class_contains => |v| alloc.free(v.val), + .pseudo_class_regexp => |v| alloc.free(v.regexp), + .pseudo_class, .pseudo_element, .never_match => {}, + .pseudo_class_nth, .pseudo_class_only_child => {}, + } + } +}; From d9c76aa13e4defe1e83b7ace2f926601a0fe9536 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 15 Mar 2024 09:06:34 +0100 Subject: [PATCH 03/28] css: extract public api on its own file --- src/css/css.zig | 127 +++++++++++++++++++++++++++++++++++++++++++++ src/css/parser.zig | 127 ++------------------------------------------- 2 files changed, 132 insertions(+), 122 deletions(-) create mode 100644 src/css/css.zig diff --git a/src/css/css.zig b/src/css/css.zig new file mode 100644 index 00000000..6092b1d0 --- /dev/null +++ b/src/css/css.zig @@ -0,0 +1,127 @@ +// CSS Selector parser and query +// This package is a rewrite in Zig of Cascadia CSS Selector parser. +// see https://github.com/andybalholm/cascadia +const std = @import("std"); +const Selector = @import("selector.zig").Selector; +const parser = @import("parser.zig"); + +// Parse parse a selector string and returns the parsed result or an error. +pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector { + var p = parser.Parser{ .s = s, .i = 0, .opts = opts }; + return p.parse(alloc); +} + +test "Parse" { + const alloc = std.testing.allocator; + + const testcases = [_][]const u8{ + "address", + "*", + "#foo", + "li#t1", + "*#t4", + ".t1", + "p.t1", + "div.teST", + ".t1.fail", + "p.t1.t2", + "p.--t1", + "p.--t1.--t2", + "p[title]", + "div[class=\"red\" i]", + "address[title=\"foo\"]", + "address[title=\"FoOIgnoRECaSe\" i]", + "address[title!=\"foo\"]", + "address[title!=\"foo\" i]", + "p[title!=\"FooBarUFoo\" i]", + "[ \t title ~= foo ]", + "p[title~=\"FOO\" i]", + "p[title~=toofoo i]", + "[title~=\"hello world\"]", + "[title~=\"hello\" i]", + "[title~=\"hello\" I]", + "[lang|=\"en\"]", + "[lang|=\"EN\" i]", + "[lang|=\"EN\" i]", + "[title^=\"foo\"]", + "[title^=\"foo\" i]", + "[title$=\"bar\"]", + "[title$=\"BAR\" i]", + "[title*=\"bar\"]", + "[title*=\"BaRu\" i]", + "[title*=\"BaRu\" I]", + "p[class$=\" \"]", + "p[class$=\"\"]", + "p[class^=\" \"]", + "p[class^=\"\"]", + "p[class*=\" \"]", + "p[class*=\"\"]", + "input[name=Sex][value=F]", + "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", + ".t1:not(.t2)", + "div:not(.t1)", + "div:not([class=\"t2\"])", + "li:nth-child(odd)", + "li:nth-child(even)", + "li:nth-child(-n+2)", + "li:nth-child(3n+1)", + "li:nth-last-child(odd)", + "li:nth-last-child(even)", + "li:nth-last-child(-n+2)", + "li:nth-last-child(3n+1)", + "span:first-child", + "span:last-child", + "p:nth-of-type(2)", + "p:nth-last-of-type(2)", + "p:last-of-type", + "p:first-of-type", + "p:only-child", + "p:only-of-type", + ":empty", + "div p", + "div table p", + "div > p", + "p ~ p", + "p + p", + "li, p", + "p +/*This is a comment*/ p", + "p:contains(\"that wraps\")", + "p:containsOwn(\"that wraps\")", + ":containsOwn(\"inner\")", + "p:containsOwn(\"block\")", + "div:has(#p1)", + "div:has(:containsOwn(\"2\"))", + "body :has(:containsOwn(\"2\"))", + "body :haschild(:containsOwn(\"2\"))", + "p:matches([\\d])", + "p:matches([a-z])", + "p:matches([a-zA-Z])", + "p:matches([^\\d])", + "p:matches(^(0|a))", + "p:matches(^\\d+$)", + "p:not(:matches(^\\d+$))", + "div :matchesOwn(^\\d+$)", + "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", + "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", + ":input", + ":root", + "*:root", + "html:nth-child(1)", + "*:root:first-child", + "*:root:nth-child(1)", + "a:not(:root)", + "body > *:nth-child(3n+2)", + "input:disabled", + ":disabled", + ":enabled", + "div.class1, div.class2", + }; + + for (testcases) |tc| { + const s = Parse(alloc, tc, .{}) catch |e| { + std.debug.print("query {s}", .{tc}); + return e; + }; + defer s.deinit(alloc); + } +} diff --git a/src/css/parser.zig b/src/css/parser.zig index d6905bec..6bec4cbd 100644 --- a/src/css/parser.zig +++ b/src/css/parser.zig @@ -50,18 +50,16 @@ pub const ParseOptions = struct { accept_pseudo_elts: bool = true, }; -// Parse parse a selector string and returns the parsed result or an error. -pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: ParseOptions) ParseError!Selector { - var p = Parser{ .s = s, .i = 0, .opts = opts }; - return p.parseSelector(alloc); -} - -const Parser = struct { +pub const Parser = struct { s: []const u8, // string to parse i: usize = 0, // current position opts: ParseOptions, + pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { + return p.parseSelector(alloc); + } + // skipWhitespace consumes whitespace characters and comments. // It returns true if there was actually anything to skip. fn skipWhitespace(p: *Parser) bool { @@ -894,118 +892,3 @@ test "parser.parseString" { }; } } - -test "parser." { - const alloc = std.testing.allocator; - - const testcases = [_][]const u8{ - "address", - "*", - "#foo", - "li#t1", - "*#t4", - ".t1", - "p.t1", - "div.teST", - ".t1.fail", - "p.t1.t2", - "p.--t1", - "p.--t1.--t2", - "p[title]", - "div[class=\"red\" i]", - "address[title=\"foo\"]", - "address[title=\"FoOIgnoRECaSe\" i]", - "address[title!=\"foo\"]", - "address[title!=\"foo\" i]", - "p[title!=\"FooBarUFoo\" i]", - "[ \t title ~= foo ]", - "p[title~=\"FOO\" i]", - "p[title~=toofoo i]", - "[title~=\"hello world\"]", - "[title~=\"hello\" i]", - "[title~=\"hello\" I]", - "[lang|=\"en\"]", - "[lang|=\"EN\" i]", - "[lang|=\"EN\" i]", - "[title^=\"foo\"]", - "[title^=\"foo\" i]", - "[title$=\"bar\"]", - "[title$=\"BAR\" i]", - "[title*=\"bar\"]", - "[title*=\"BaRu\" i]", - "[title*=\"BaRu\" I]", - "p[class$=\" \"]", - "p[class$=\"\"]", - "p[class^=\" \"]", - "p[class^=\"\"]", - "p[class*=\" \"]", - "p[class*=\"\"]", - "input[name=Sex][value=F]", - "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", - ".t1:not(.t2)", - "div:not(.t1)", - "div:not([class=\"t2\"])", - "li:nth-child(odd)", - "li:nth-child(even)", - "li:nth-child(-n+2)", - "li:nth-child(3n+1)", - "li:nth-last-child(odd)", - "li:nth-last-child(even)", - "li:nth-last-child(-n+2)", - "li:nth-last-child(3n+1)", - "span:first-child", - "span:last-child", - "p:nth-of-type(2)", - "p:nth-last-of-type(2)", - "p:last-of-type", - "p:first-of-type", - "p:only-child", - "p:only-of-type", - ":empty", - "div p", - "div table p", - "div > p", - "p ~ p", - "p + p", - "li, p", - "p +/*This is a comment*/ p", - "p:contains(\"that wraps\")", - "p:containsOwn(\"that wraps\")", - ":containsOwn(\"inner\")", - "p:containsOwn(\"block\")", - "div:has(#p1)", - "div:has(:containsOwn(\"2\"))", - "body :has(:containsOwn(\"2\"))", - "body :haschild(:containsOwn(\"2\"))", - "p:matches([\\d])", - "p:matches([a-z])", - "p:matches([a-zA-Z])", - "p:matches([^\\d])", - "p:matches(^(0|a))", - "p:matches(^\\d+$)", - "p:not(:matches(^\\d+$))", - "div :matchesOwn(^\\d+$)", - "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", - "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", - ":input", - ":root", - "*:root", - "html:nth-child(1)", - "*:root:first-child", - "*:root:nth-child(1)", - "a:not(:root)", - "body > *:nth-child(3n+2)", - "input:disabled", - ":disabled", - ":enabled", - "div.class1, div.class2", - }; - - for (testcases) |tc| { - const s = Parse(alloc, tc, .{}) catch |e| { - std.debug.print("query {s}", .{tc}); - return e; - }; - defer s.deinit(alloc); - } -} From a131e96ed5a2816a5fd3e140f884a9a95ee3c70a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 15 Mar 2024 15:03:55 +0100 Subject: [PATCH 04/28] css: lower case parse function --- src/css/css.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/css/css.zig b/src/css/css.zig index 6092b1d0..67d00d89 100644 --- a/src/css/css.zig +++ b/src/css/css.zig @@ -5,13 +5,13 @@ const std = @import("std"); const Selector = @import("selector.zig").Selector; const parser = @import("parser.zig"); -// Parse parse a selector string and returns the parsed result or an error. -pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector { +// parse parse a selector string and returns the parsed result or an error. +pub fn parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector { var p = parser.Parser{ .s = s, .i = 0, .opts = opts }; return p.parse(alloc); } -test "Parse" { +test "parse" { const alloc = std.testing.allocator; const testcases = [_][]const u8{ @@ -118,7 +118,7 @@ test "Parse" { }; for (testcases) |tc| { - const s = Parse(alloc, tc, .{}) catch |e| { + const s = parse(alloc, tc, .{}) catch |e| { std.debug.print("query {s}", .{tc}); return e; }; From b59fd9b1fb314db38387f7a6742a946f164121ad Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 15 Mar 2024 16:09:16 +0100 Subject: [PATCH 05/28] css: matcher draft --- src/css/css.zig | 31 +++++++++++++++++++++ src/css/libdom.zig | 22 +++++++++++++++ src/css/match_test.zig | 61 ++++++++++++++++++++++++++++++++++++++++++ src/css/selector.zig | 7 +++++ 4 files changed, 121 insertions(+) create mode 100644 src/css/libdom.zig create mode 100644 src/css/match_test.zig diff --git a/src/css/css.zig b/src/css/css.zig index 67d00d89..9bc6cca2 100644 --- a/src/css/css.zig +++ b/src/css/css.zig @@ -11,6 +11,37 @@ pub fn parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) return p.parse(alloc); } +// matchFirst call m.match with the first node that matches the selector s, from the +// descendants of n and returns true. If none matches, it returns false. +pub fn matchFirst(s: Selector, node: anytype, m: anytype) !bool { + var c = try node.firstChild(); + while (true) { + if (c == null) break; + + if (try s.match(c.?)) { + try m.match(c.?); + return true; + } + + if (try matchFirst(s, c.?, m)) return true; + c = try c.?.nextSibling(); + } + return false; +} + +// matchAll call m.match with the all the nodes that matches the selector s, from the +// descendants of n. +pub fn matchAll(s: Selector, node: anytype, m: anytype) !void { + var c = try node.firstChild(); + while (true) { + if (c == null) break; + + if (try s.match(c.?)) try m.match(c.?); + try matchFirst(s, c.?, m); + c = try c.?.nextSibling(); + } +} + test "parse" { const alloc = std.testing.allocator; diff --git a/src/css/libdom.zig b/src/css/libdom.zig new file mode 100644 index 00000000..49623286 --- /dev/null +++ b/src/css/libdom.zig @@ -0,0 +1,22 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +// Node implementation with Netsurf Libdom C lib. +pub const Node = struct { + node: *parser.Node, + + pub fn firstChild(n: Node) !?Node { + const c = try parser.nodeFirstChild(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn nextSibling(n: Node) ?Node { + const c = try parser.nodeNextSibling(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } +}; diff --git a/src/css/match_test.zig b/src/css/match_test.zig new file mode 100644 index 00000000..ab7d725e --- /dev/null +++ b/src/css/match_test.zig @@ -0,0 +1,61 @@ +const std = @import("std"); +const css = @import("css.zig"); + +// Node mock implementation for test only. +pub const Node = struct { + child: ?*const Node = null, + sibling: ?*const Node = null, + + name: []const u8 = "", + + pub fn firstChild(n: *const Node) !?*const Node { + return n.child; + } + + pub fn nextSibling(n: *const Node) !?*const Node { + return n.sibling; + } + + pub fn tag(n: *const Node) ![]const u8 { + return n.name; + } +}; +const Matcher = struct { + const Nodes = std.ArrayList(*const Node); + + nodes: Nodes, + + fn init(alloc: std.mem.Allocator) Matcher { + return .{ .nodes = Nodes.init(alloc) }; + } + + fn deinit(m: *Matcher) void { + m.nodes.deinit(); + } + + fn reset(m: *Matcher) void { + m.nodes.clearRetainingCapacity(); + } + + pub fn match(m: *Matcher, n: *const Node) !void { + try m.nodes.append(n); + } +}; + +test "matchFirst" { + const alloc = std.testing.allocator; + + const s = try css.parse(alloc, "address", .{}); + defer s.deinit(alloc); + + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + const node: Node = .{ + .child = &.{ .name = "address" }, + }; + + _ = try css.matchFirst(s, &node, &matcher); + try std.testing.expect(1 == matcher.nodes.items.len); + try std.testing.expect(matcher.nodes.items[0] == node.child); +} diff --git a/src/css/selector.zig b/src/css/selector.zig index f112d397..30c0a147 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -165,6 +165,13 @@ pub const Selector = union(enum) { }, pseudo_element: PseudoClass, + pub fn match(s: Selector, n: anytype) !bool { + return switch (s) { + .tag => |v| std.ascii.eqlIgnoreCase(v, try n.tag()), + else => false, + }; + } + pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void { switch (sel) { .group => |v| { From 954a6935863f5fb5759fab5f8939c57b4165a443 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 09:49:12 +0100 Subject: [PATCH 06/28] css: add matcher test w/ libdom --- src/css/libdom.zig | 6 +++++- src/css/libdom_test.zig | 44 +++++++++++++++++++++++++++++++++++++++++ src/css/match_test.zig | 1 + src/run_tests.zig | 14 +++++++++---- 4 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 src/css/libdom_test.zig diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 49623286..8fb23aad 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -13,10 +13,14 @@ pub const Node = struct { return null; } - pub fn nextSibling(n: Node) ?Node { + pub fn nextSibling(n: Node) !?Node { const c = try parser.nodeNextSibling(n.node); if (c) |cc| return .{ .node = cc }; return null; } + + pub fn tag(n: Node) ![]const u8 { + return try parser.nodeName(n.node); + } }; diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig new file mode 100644 index 00000000..f143c17b --- /dev/null +++ b/src/css/libdom_test.zig @@ -0,0 +1,44 @@ +const std = @import("std"); +const css = @import("css.zig"); +const Node = @import("libdom.zig").Node; +const parser = @import("../netsurf.zig"); + +const Matcher = struct { + const Nodes = std.ArrayList(Node); + + nodes: Nodes, + + fn init(alloc: std.mem.Allocator) Matcher { + return .{ .nodes = Nodes.init(alloc) }; + } + + fn deinit(m: *Matcher) void { + m.nodes.deinit(); + } + + fn reset(m: *Matcher) void { + m.nodes.clearRetainingCapacity(); + } + + pub fn match(m: *Matcher, n: Node) !void { + try m.nodes.append(n); + } +}; + +test "matchFirst" { + const alloc = std.testing.allocator; + + const s = try css.parse(alloc, "address", .{}); + defer s.deinit(alloc); + + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + const doc = try parser.documentHTMLParseFromStr("
This address...
"); + defer parser.documentHTMLClose(doc) catch {}; + + const node = Node{ .node = parser.documentHTMLToNode(doc) }; + + _ = try css.matchFirst(s, node, &matcher); + try std.testing.expect(1 == matcher.nodes.items.len); +} diff --git a/src/css/match_test.zig b/src/css/match_test.zig index ab7d725e..90c3cd1a 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -20,6 +20,7 @@ pub const Node = struct { return n.name; } }; + const Matcher = struct { const Nodes = std.ArrayList(*const Node); diff --git a/src/run_tests.zig b/src/run_tests.zig index 2f5c8d6b..9227a20d 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -98,11 +98,17 @@ pub fn main() !void { } test { - const AsyncTest = @import("async/test.zig"); - std.testing.refAllDecls(AsyncTest); + const asyncTest = @import("async/test.zig"); + std.testing.refAllDecls(asyncTest); - const DumpTest = @import("browser/dump.zig"); - std.testing.refAllDecls(DumpTest); + const dumpTest = @import("browser/dump.zig"); + std.testing.refAllDecls(dumpTest); + + const cssMatchTest = @import("css/match_test.zig"); + std.testing.refAllDecls(cssMatchTest); + + const cssLibdomTest = @import("css/libdom_test.zig"); + std.testing.refAllDecls(cssLibdomTest); } fn testJSRuntime() !void { From 7839f466ea52da16f95e81ac76fb0f5d96be5fb8 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 11:35:47 +0100 Subject: [PATCH 07/28] css: refacto test --- src/css/css.zig | 2 +- src/css/libdom_test.zig | 64 ++++++++++++++++++++++++++++++++++++----- src/css/match_test.zig | 54 +++++++++++++++++++++++++++++----- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/src/css/css.zig b/src/css/css.zig index 9bc6cca2..fc5e8995 100644 --- a/src/css/css.zig +++ b/src/css/css.zig @@ -37,7 +37,7 @@ pub fn matchAll(s: Selector, node: anytype, m: anytype) !void { if (c == null) break; if (try s.match(c.?)) try m.match(c.?); - try matchFirst(s, c.?, m); + try matchAll(s, c.?, m); c = try c.?.nextSibling(); } } diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig index f143c17b..5a952cfc 100644 --- a/src/css/libdom_test.zig +++ b/src/css/libdom_test.zig @@ -28,17 +28,67 @@ const Matcher = struct { test "matchFirst" { const alloc = std.testing.allocator; - const s = try css.parse(alloc, "address", .{}); - defer s.deinit(alloc); + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + const testcases = [_]struct { + q: []const u8, + html: []const u8, + exp: usize, + }{ + .{ + .q = "address", + .html = "
This address...
", + .exp = 1, + }, + }; + + for (testcases) |tc| { + matcher.reset(); + + const doc = try parser.documentHTMLParseFromStr(tc.html); + defer parser.documentHTMLClose(doc) catch {}; + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + const node = Node{ .node = parser.documentHTMLToNode(doc) }; + + _ = try css.matchFirst(s, node, &matcher); + try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + } +} + +test "matchAll" { + const alloc = std.testing.allocator; var matcher = Matcher.init(alloc); defer matcher.deinit(); - const doc = try parser.documentHTMLParseFromStr("
This address...
"); - defer parser.documentHTMLClose(doc) catch {}; + const testcases = [_]struct { + q: []const u8, + html: []const u8, + exp: usize, + }{ + .{ + .q = "address", + .html = "
This address...
", + .exp = 1, + }, + }; - const node = Node{ .node = parser.documentHTMLToNode(doc) }; + for (testcases) |tc| { + matcher.reset(); - _ = try css.matchFirst(s, node, &matcher); - try std.testing.expect(1 == matcher.nodes.items.len); + const doc = try parser.documentHTMLParseFromStr(tc.html); + defer parser.documentHTMLClose(doc) catch {}; + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + const node = Node{ .node = parser.documentHTMLToNode(doc) }; + + _ = try css.matchAll(s, node, &matcher); + try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + } } diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 90c3cd1a..5df71da6 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -46,17 +46,57 @@ const Matcher = struct { test "matchFirst" { const alloc = std.testing.allocator; - const s = try css.parse(alloc, "address", .{}); - defer s.deinit(alloc); + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + const testcases = [_]struct { + q: []const u8, + n: Node, + exp: usize, + }{ + .{ + .q = "address", + .n = .{ .name = "body", .child = &.{ .name = "address" } }, + .exp = 1, + }, + }; + + for (testcases) |tc| { + matcher.reset(); + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + _ = try css.matchFirst(s, &tc.n, &matcher); + try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + } +} + +test "matchAll" { + const alloc = std.testing.allocator; var matcher = Matcher.init(alloc); defer matcher.deinit(); - const node: Node = .{ - .child = &.{ .name = "address" }, + const testcases = [_]struct { + q: []const u8, + n: Node, + exp: usize, + }{ + .{ + .q = "address", + .n = .{ .name = "body", .child = &.{ .name = "address" } }, + .exp = 1, + }, }; - _ = try css.matchFirst(s, &node, &matcher); - try std.testing.expect(1 == matcher.nodes.items.len); - try std.testing.expect(matcher.nodes.items[0] == node.child); + for (testcases) |tc| { + matcher.reset(); + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + _ = try css.matchAll(s, &tc.n, &matcher); + try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + } } From 4629e8a9eb1964942c6d18bc135027f4cd4037aa Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 11:36:06 +0100 Subject: [PATCH 08/28] css: check if node is an html element --- src/css/libdom.zig | 5 +++++ src/css/match_test.zig | 4 ++++ src/css/selector.zig | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 8fb23aad..57d4af54 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -20,6 +20,11 @@ pub const Node = struct { return null; } + pub fn isElement(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .element; + } + pub fn tag(n: Node) ![]const u8 { return try parser.nodeName(n.node); } diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 5df71da6..a37de623 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -16,6 +16,10 @@ pub const Node = struct { return n.sibling; } + pub fn isElement(_: *const Node) bool { + return true; + } + pub fn tag(n: *const Node) ![]const u8 { return n.name; } diff --git a/src/css/selector.zig b/src/css/selector.zig index 30c0a147..0ce92099 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -167,7 +167,7 @@ pub const Selector = union(enum) { pub fn match(s: Selector, n: anytype) !bool { return switch (s) { - .tag => |v| std.ascii.eqlIgnoreCase(v, try n.tag()), + .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()), else => false, }; } From d64fffc5b3664eb5184ce1fc7f157696495dc4fc Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 12:48:03 +0100 Subject: [PATCH 09/28] css: implement id and class match selector --- src/css/libdom.zig | 4 ++++ src/css/libdom_test.zig | 18 ++++++++---------- src/css/match_test.zig | 39 +++++++++++++++++++++++++++++++++++++-- src/css/selector.zig | 12 ++++++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 57d4af54..318e401c 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -28,4 +28,8 @@ pub const Node = struct { pub fn tag(n: Node) ![]const u8 { return try parser.nodeName(n.node); } + + pub fn attr(n: Node, key: []const u8) !?[]const u8 { + return try parser.elementGetAttribute(parser.nodeToElement(n.node), key); + } }; diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig index 5a952cfc..0e5a255d 100644 --- a/src/css/libdom_test.zig +++ b/src/css/libdom_test.zig @@ -36,11 +36,10 @@ test "matchFirst" { html: []const u8, exp: usize, }{ - .{ - .q = "address", - .html = "
This address...
", - .exp = 1, - }, + .{ .q = "address", .html = "
This address...
", .exp = 1 }, + .{ .q = "#foo", .html = "

", .exp = 1 }, + .{ .q = ".t1", .html = "

  • ", .exp = 1 }, + .{ .q = ".t3", .html = "
    • ", .exp = 1 }, }; for (testcases) |tc| { @@ -70,11 +69,10 @@ test "matchAll" { html: []const u8, exp: usize, }{ - .{ - .q = "address", - .html = "
      This address...
      ", - .exp = 1, - }, + .{ .q = "address", .html = "
      This address...
      ", .exp = 1 }, + .{ .q = "#foo", .html = "

      ", .exp = 1 }, + .{ .q = ".t1", .html = "

      • ", .exp = 1 }, + .{ .q = ".t3", .html = "
        • ", .exp = 1 }, }; for (testcases) |tc| { diff --git a/src/css/match_test.zig b/src/css/match_test.zig index a37de623..6997a970 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -7,6 +7,7 @@ pub const Node = struct { sibling: ?*const Node = null, name: []const u8 = "", + att: ?[]const u8 = null, pub fn firstChild(n: *const Node) !?*const Node { return n.child; @@ -23,6 +24,10 @@ pub const Node = struct { pub fn tag(n: *const Node) ![]const u8 { return n.name; } + + pub fn attr(n: *const Node, _: []const u8) !?[]const u8 { + return n.att; + } }; const Matcher = struct { @@ -60,7 +65,22 @@ test "matchFirst" { }{ .{ .q = "address", - .n = .{ .name = "body", .child = &.{ .name = "address" } }, + .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } }, + .exp = 1, + }, + .{ + .q = "#foo", + .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } }, + .exp = 1, + }, + .{ + .q = ".t1", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } }, + .exp = 1, + }, + .{ + .q = ".t1", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } }, .exp = 1, }, }; @@ -89,7 +109,22 @@ test "matchAll" { }{ .{ .q = "address", - .n = .{ .name = "body", .child = &.{ .name = "address" } }, + .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } }, + .exp = 1, + }, + .{ + .q = "#foo", + .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } }, + .exp = 1, + }, + .{ + .q = ".t1", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } }, + .exp = 1, + }, + .{ + .q = ".t1", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } }, .exp = 1, }, }; diff --git a/src/css/selector.zig b/src/css/selector.zig index 0ce92099..06b95937 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -165,9 +165,21 @@ pub const Selector = union(enum) { }, pseudo_element: PseudoClass, + // returns true if s is a whitespace-separated list that includes val. + fn contains(haystack: []const u8, needle: []const u8) bool { + if (haystack.len == 0) return false; + var it = std.mem.splitAny(u8, haystack, " \t\r\n"); // TODO add \f + while (it.next()) |part| { + if (std.mem.eql(u8, part, needle)) return true; + } + return false; + } + pub fn match(s: Selector, n: anytype) !bool { return switch (s) { .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()), + .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false), + .class => |v| return n.isElement() and contains(try n.attr("class") orelse return false, v), else => false, }; } From 5e8ec4532dc0dbbb16944b7bc1129d637b05228a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 16:01:46 +0100 Subject: [PATCH 10/28] css: add attribute matcher --- src/css/match_test.zig | 140 +++++++++++++++++++++++++++++++++++++++++ src/css/selector.zig | 55 +++++++++++++++- 2 files changed, 192 insertions(+), 3 deletions(-) diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 6997a970..d0ac3c81 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -83,6 +83,76 @@ test "matchFirst" { .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } }, .exp = 1, }, + .{ + .q = "[foo]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } }, + .exp = 0, + }, + .{ + .q = "[foo]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 0, + }, + .{ + .q = "[foo!=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo!=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo~=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo~=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 0, + }, + .{ + .q = "[foo^=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo$=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo*=rb]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } }, + .exp = 0, + }, }; for (testcases) |tc| { @@ -127,6 +197,76 @@ test "matchAll" { .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } }, .exp = 1, }, + .{ + .q = "[foo]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } }, + .exp = 0, + }, + .{ + .q = "[foo]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 0, + }, + .{ + .q = "[foo!=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo!=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 2, + }, + .{ + .q = "[foo~=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo~=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 0, + }, + .{ + .q = "[foo^=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo$=baz]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo*=rb]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } }, + .exp = 1, + }, + .{ + .q = "[foo|=bar]", + .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } }, + .exp = 0, + }, }; for (testcases) |tc| { diff --git a/src/css/selector.zig b/src/css/selector.zig index 06b95937..8eae1aef 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -166,20 +166,69 @@ pub const Selector = union(enum) { pseudo_element: PseudoClass, // returns true if s is a whitespace-separated list that includes val. - fn contains(haystack: []const u8, needle: []const u8) bool { + fn word(haystack: []const u8, needle: []const u8, ci: bool) bool { if (haystack.len == 0) return false; var it = std.mem.splitAny(u8, haystack, " \t\r\n"); // TODO add \f while (it.next()) |part| { - if (std.mem.eql(u8, part, needle)) return true; + if (eql(part, needle, ci)) return true; } return false; } + fn eql(a: []const u8, b: []const u8, ci: bool) bool { + if (ci) return std.ascii.eqlIgnoreCase(a, b); + return std.mem.eql(u8, a, b); + } + + fn starts(haystack: []const u8, needle: []const u8, ci: bool) bool { + if (ci) return std.ascii.startsWithIgnoreCase(haystack, needle); + return std.mem.startsWith(u8, haystack, needle); + } + + fn ends(haystack: []const u8, needle: []const u8, ci: bool) bool { + if (ci) return std.ascii.endsWithIgnoreCase(haystack, needle); + return std.mem.endsWith(u8, haystack, needle); + } + + fn contains(haystack: []const u8, needle: []const u8, ci: bool) bool { + if (ci) return std.ascii.indexOfIgnoreCase(haystack, needle) != null; + return std.mem.indexOf(u8, haystack, needle) != null; + } + pub fn match(s: Selector, n: anytype) !bool { return switch (s) { .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()), .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false), - .class => |v| return n.isElement() and contains(try n.attr("class") orelse return false, v), + .class => |v| return n.isElement() and word(try n.attr("class") orelse return false, v, false), + .attribute => |v| { + const attr = try n.attr(v.key); + + if (v.op == null) return attr != null; + if (v.val == null or v.val.?.len == 0) return false; + + const val = v.val.?; + + return switch (v.op.?) { + .eql => attr != null and eql(attr.?, val, v.ci), + .not_eql => attr == null or !eql(attr.?, val, v.ci), + .one_of => attr != null and word(attr.?, val, v.ci), + .prefix => attr != null and starts(attr.?, val, v.ci), + .suffix => attr != null and ends(attr.?, val, v.ci), + .contains => attr != null and contains(attr.?, val, v.ci), + .prefix_hyphen => { + if (attr == null) return false; + if (eql(attr.?, val, v.ci)) return true; + + if (attr.?.len <= val.len) return false; + + if (!starts(attr.?, val, v.ci)) return false; + + return attr.?[val.len] == '-'; + }, + .regexp => false, // TODO handle regexp attribute operator. + }; + }, + .never_match => return false, else => false, }; } From a2e747002b01ce6dde4f3099c555a86988553535 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 21:21:44 +0100 Subject: [PATCH 11/28] css: use parseSelectorGroup() with parse() --- src/css/parser.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/css/parser.zig b/src/css/parser.zig index 6bec4cbd..d2110883 100644 --- a/src/css/parser.zig +++ b/src/css/parser.zig @@ -57,7 +57,7 @@ pub const Parser = struct { opts: ParseOptions, pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector { - return p.parseSelector(alloc); + return p.parseSelectorGroup(alloc); } // skipWhitespace consumes whitespace characters and comments. @@ -583,6 +583,8 @@ pub const Parser = struct { try buf.append(ss); } + if (buf.items.len == 1) return buf.items[0]; + return .{ .group = try buf.toOwnedSlice() }; } From d0dbbacd690ce40a7e221bbf26127ae35548c4fb Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 16:05:06 +0100 Subject: [PATCH 12/28] css: enable all css tests in zig build test --- src/run_tests.zig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/run_tests.zig b/src/run_tests.zig index 9227a20d..84ca45fd 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -104,6 +104,12 @@ test { const dumpTest = @import("browser/dump.zig"); std.testing.refAllDecls(dumpTest); + const cssTest = @import("css/css.zig"); + std.testing.refAllDecls(cssTest); + + const cssParserTest = @import("css/parser.zig"); + std.testing.refAllDecls(cssParserTest); + const cssMatchTest = @import("css/match_test.zig"); std.testing.refAllDecls(cssMatchTest); From 75e80a47e6c22beee0215e75a64b48ebcb231b86 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 18 Mar 2024 21:21:28 +0100 Subject: [PATCH 13/28] css: implement group, compound and start combined match --- src/css/libdom.zig | 7 +++++ src/css/match_test.zig | 63 +++++++++++++++++++++++++++++++++++++++--- src/css/parser.zig | 15 ++++++---- src/css/selector.zig | 62 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 136 insertions(+), 11 deletions(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 318e401c..7c06cd1a 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -20,6 +20,13 @@ pub const Node = struct { return null; } + pub fn parent(n: Node) !?Node { + const c = try parser.nodeParentNode(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + pub fn isElement(n: Node) bool { const t = parser.nodeType(n.node) catch return false; return t == .element; diff --git a/src/css/match_test.zig b/src/css/match_test.zig index d0ac3c81..22694961 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -5,6 +5,7 @@ const css = @import("css.zig"); pub const Node = struct { child: ?*const Node = null, sibling: ?*const Node = null, + par: ?*const Node = null, name: []const u8 = "", att: ?[]const u8 = null, @@ -17,6 +18,10 @@ pub const Node = struct { return n.sibling; } + pub fn parent(n: *const Node) !?*const Node { + return n.par; + } + pub fn isElement(_: *const Node) bool { return true; } @@ -153,6 +158,24 @@ test "matchFirst" { .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } }, .exp = 0, }, + .{ + .q = "strong, a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } }, + .exp = 1, + }, + .{ + .q = "p a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{ + .name = "a", + .par = &.{ .name = "span", .par = &.{ .name = "p" } }, + } } } }, + .exp = 1, + }, }; for (testcases) |tc| { @@ -161,8 +184,15 @@ test "matchFirst" { const s = try css.parse(alloc, tc.q, .{}); defer s.deinit(alloc); - _ = try css.matchFirst(s, &tc.n, &matcher); - try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + _ = css.matchFirst(s, &tc.n, &matcher) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; } } @@ -267,6 +297,24 @@ test "matchAll" { .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } }, .exp = 0, }, + .{ + .q = "strong, a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 2, + }, + .{ + .q = "p a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } }, + .exp = 1, + }, + .{ + .q = "p a", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{ + .name = "a", + .par = &.{ .name = "span", .par = &.{ .name = "p" } }, + } } } }, + .exp = 1, + }, }; for (testcases) |tc| { @@ -275,7 +323,14 @@ test "matchAll" { const s = try css.parse(alloc, tc.q, .{}); defer s.deinit(alloc); - _ = try css.matchAll(s, &tc.n, &matcher); - try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + _ = css.matchAll(s, &tc.n, &matcher) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; } } diff --git a/src/css/parser.zig b/src/css/parser.zig index d2110883..f0da6504 100644 --- a/src/css/parser.zig +++ b/src/css/parser.zig @@ -9,6 +9,7 @@ const selector = @import("selector.zig"); const Selector = selector.Selector; const PseudoClass = selector.PseudoClass; const AttributeOP = selector.AttributeOP; +const Combinator = selector.Combinator; pub const ParseError = error{ ExpectedSelector, @@ -44,7 +45,7 @@ pub const ParseError = error{ NotHandled, UnknownPseudoSelector, InvalidNthExpression, -} || PseudoClass.Error || std.mem.Allocator.Error; +} || PseudoClass.Error || Combinator.Error || std.mem.Allocator.Error; pub const ParseOptions = struct { accept_pseudo_elts: bool = true, @@ -594,9 +595,9 @@ pub const Parser = struct { var s = try p.parseSimpleSelectorSequence(alloc); while (true) { - var combinator: u8 = undefined; + var combinator: Combinator = .empty; if (p.skipWhitespace()) { - combinator = ' '; + combinator = .descendant; } if (p.i >= p.s.len) { return s; @@ -604,16 +605,18 @@ pub const Parser = struct { switch (p.s[p.i]) { '+', '>', '~' => { - combinator = p.s[p.i]; + combinator = try Combinator.parse(p.s[p.i]); p.i += 1; _ = p.skipWhitespace(); }, // These characters can't begin a selector, but they can legally occur after one. - ',', ')' => return s, + ',', ')' => { + return s; + }, else => {}, } - if (combinator == 0) { + if (combinator == .empty) { return s; } diff --git a/src/css/selector.zig b/src/css/selector.zig index 8eae1aef..f94144b5 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -16,6 +16,28 @@ pub const AttributeOP = enum { } }; +pub const Combinator = enum { + empty, + descendant, // space + child, // > + next_sibling, // + + subsequent_sibling, // ~ + + pub const Error = error{ + InvalidCombinator, + }; + + pub fn parse(c: u8) Error!Combinator { + return switch (c) { + ' ' => .descendant, + '>' => .child, + '+' => .next_sibling, + '~' => .subsequent_sibling, + else => Error.InvalidCombinator, + }; + } +}; + pub const PseudoClass = enum { not, has, @@ -119,6 +141,10 @@ pub const PseudoClass = enum { }; pub const Selector = union(enum) { + pub const Error = error{ + UnknownCombinedCombinator, + }; + compound: struct { selectors: []Selector, pseudo_elt: ?PseudoClass, @@ -137,7 +163,7 @@ pub const Selector = union(enum) { combined: struct { first: *Selector, second: *Selector, - combinator: u8, + combinator: Combinator, }, never_match: PseudoClass, @@ -200,6 +226,40 @@ pub const Selector = union(enum) { .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()), .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false), .class => |v| return n.isElement() and word(try n.attr("class") orelse return false, v, false), + .group => |v| { + for (v) |sel| { + if (try sel.match(n)) return true; + } + return false; + }, + .compound => |v| { + if (v.selectors.len == 0) return n.isElement(); + + for (v.selectors) |sel| { + if (!try sel.match(n)) return false; + } + return true; + }, + .combined => |v| { + return switch (v.combinator) { + .empty => try v.first.match(n), + .descendant => { + if (!try v.second.match(n)) return false; + + // The first must match a ascendent. + var p = try n.parent(); + while (p != null) { + if (try v.first.match(p.?)) { + return true; + } + p = try p.?.parent(); + } + + return false; + }, + else => return Error.UnknownCombinedCombinator, + }; + }, .attribute => |v| { const attr = try n.attr(v.key); From 9c997ec86d5b667e30edfb14ef6cda1d3c4f71dd Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 19 Mar 2024 09:25:52 +0100 Subject: [PATCH 14/28] css: add pseudo class relative match --- src/css/match_test.zig | 50 ++++++++++++++++++++++++++++++++++++++++++ src/css/selector.zig | 32 +++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 22694961..254e9156 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -176,6 +176,31 @@ test "matchFirst" { } } } }, .exp = 1, }, + .{ + .q = ":not(p)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p:has(a)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p:has(strong)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 0, + }, + .{ + .q = "p:haschild(a)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p:haschild(strong)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 0, + }, }; for (testcases) |tc| { @@ -315,6 +340,31 @@ test "matchAll" { } } } }, .exp = 1, }, + .{ + .q = ":not(p)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 2, + }, + .{ + .q = "p:has(a)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p:has(strong)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 0, + }, + .{ + .q = "p:haschild(a)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 1, + }, + .{ + .q = "p:haschild(strong)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, + .exp = 0, + }, }; for (testcases) |tc| { diff --git a/src/css/selector.zig b/src/css/selector.zig index f94144b5..df0788b0 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -143,6 +143,7 @@ pub const PseudoClass = enum { pub const Selector = union(enum) { pub const Error = error{ UnknownCombinedCombinator, + UnsupportedRelativePseudoClass, }; compound: struct { @@ -289,10 +290,41 @@ pub const Selector = union(enum) { }; }, .never_match => return false, + .pseudo_class_relative => |v| { + if (!n.isElement()) return false; + + return switch (v.pseudo_class) { + .not => !try v.match.match(n), + .has => try hasDescendantMatch(v.match, n), + .haschild => try hasChildMatch(v.match, n), + else => Error.UnsupportedRelativePseudoClass, + }; + }, else => false, }; } + fn hasDescendantMatch(s: *const Selector, n: anytype) anyerror!bool { + var c = try n.firstChild(); + while (c != null) { + if (try s.match(c.?)) return true; + if (c.?.isElement() and try hasDescendantMatch(s, c.?)) return true; + c = try c.?.nextSibling(); + } + + return false; + } + + fn hasChildMatch(s: *const Selector, n: anytype) anyerror!bool { + var c = try n.firstChild(); + while (c != null) { + if (try s.match(c.?)) return true; + c = try c.?.nextSibling(); + } + + return false; + } + pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void { switch (sel) { .group => |v| { From db5d9332853d75153912136c9d725659001a67bf Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 08:50:57 +0100 Subject: [PATCH 15/28] css: add nth- pseudo class --- src/css/libdom.zig | 18 ++++++ src/css/match_test.zig | 85 +++++++++++++++++++++++++++- src/css/parser.zig | 2 +- src/css/selector.zig | 124 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 225 insertions(+), 4 deletions(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 7c06cd1a..04c99a66 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -13,6 +13,13 @@ pub const Node = struct { return null; } + pub fn lastChild(n: Node) !?Node { + const c = try parser.nodeLastChild(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + pub fn nextSibling(n: Node) !?Node { const c = try parser.nodeNextSibling(n.node); if (c) |cc| return .{ .node = cc }; @@ -20,6 +27,13 @@ pub const Node = struct { return null; } + pub fn prevSibling(n: Node) !?Node { + const c = try parser.nodePreviousSibling(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + pub fn parent(n: Node) !?Node { const c = try parser.nodeParentNode(n.node); if (c) |cc| return .{ .node = cc }; @@ -39,4 +53,8 @@ pub const Node = struct { pub fn attr(n: Node, key: []const u8) !?[]const u8 { return try parser.elementGetAttribute(parser.nodeToElement(n.node), key); } + + pub fn eql(a: Node, b: Node) bool { + return a.node == b.node; + } }; diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 254e9156..9aaeedbd 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -4,7 +4,9 @@ const css = @import("css.zig"); // Node mock implementation for test only. pub const Node = struct { child: ?*const Node = null, + last: ?*const Node = null, sibling: ?*const Node = null, + prev: ?*const Node = null, par: ?*const Node = null, name: []const u8 = "", @@ -14,10 +16,18 @@ pub const Node = struct { return n.child; } + pub fn lastChild(n: *const Node) !?*const Node { + return n.last; + } + pub fn nextSibling(n: *const Node) !?*const Node { return n.sibling; } + pub fn prevSibling(n: *const Node) !?*const Node { + return n.prev; + } + pub fn parent(n: *const Node) !?*const Node { return n.par; } @@ -33,6 +43,10 @@ pub const Node = struct { pub fn attr(n: *const Node, _: []const u8) !?[]const u8 { return n.att; } + + pub fn eql(a: *const Node, b: *const Node) bool { + return a == b; + } }; const Matcher = struct { @@ -373,7 +387,7 @@ test "matchAll" { const s = try css.parse(alloc, tc.q, .{}); defer s.deinit(alloc); - _ = css.matchAll(s, &tc.n, &matcher) catch |e| { + css.matchAll(s, &tc.n, &matcher) catch |e| { std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); return e; }; @@ -384,3 +398,72 @@ test "matchAll" { }; } } + +test "nth pseudo class" { + const alloc = std.testing.allocator; + + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + var p1: Node = .{ .name = "p" }; + var p2: Node = .{ .name = "p" }; + + p1.sibling = &p2; + p2.prev = &p1; + + var root: Node = .{ .child = &p1, .last = &p2 }; + p1.par = &root; + p2.par = &root; + + const testcases = [_]struct { + q: []const u8, + n: Node, + exp: ?*const Node, + }{ + .{ .q = "a:nth-of-type(1)", .n = root, .exp = null }, + .{ .q = "p:nth-of-type(1)", .n = root, .exp = &p1 }, + .{ .q = "p:nth-of-type(2)", .n = root, .exp = &p2 }, + .{ .q = "p:nth-of-type(0)", .n = root, .exp = null }, + .{ .q = "p:nth-of-type(2n)", .n = root, .exp = &p2 }, + .{ .q = "p:nth-last-child(1)", .n = root, .exp = &p2 }, + .{ .q = "p:nth-last-child(2)", .n = root, .exp = &p1 }, + .{ .q = "p:nth-child(1)", .n = root, .exp = &p1 }, + .{ .q = "p:nth-child(2)", .n = root, .exp = &p2 }, + .{ .q = "p:nth-child(odd)", .n = root, .exp = &p1 }, + .{ .q = "p:nth-child(even)", .n = root, .exp = &p2 }, + .{ .q = "p:nth-child(n+2)", .n = root, .exp = &p2 }, + }; + + for (testcases) |tc| { + matcher.reset(); + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + css.matchAll(s, &tc.n, &matcher) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + if (tc.exp) |exp_n| { + const exp: usize = 1; + std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + continue; + } + + const exp: usize = 0; + std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + } +} diff --git a/src/css/parser.zig b/src/css/parser.zig index f0da6504..b23991c1 100644 --- a/src/css/parser.zig +++ b/src/css/parser.zig @@ -711,7 +711,7 @@ pub const Parser = struct { if (p.i >= p.s.len) return ParseError.ExpectedNthExpression; const c = p.s[p.i]; if (std.ascii.isDigit(c)) { - const a = try p.parseInteger() * -1; + const a = try p.parseInteger(); return p.parseNthReadA(a); } if (c == 'n' or c == 'N') { diff --git a/src/css/selector.zig b/src/css/selector.zig index df0788b0..381b1d67 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -144,6 +144,9 @@ pub const Selector = union(enum) { pub const Error = error{ UnknownCombinedCombinator, UnsupportedRelativePseudoClass, + UnsupportedContainsPseudoClass, + UnsupportedRegexpPseudoClass, + UnsupportedAttrRegexpOperator, }; compound: struct { @@ -222,6 +225,7 @@ pub const Selector = union(enum) { return std.mem.indexOf(u8, haystack, needle) != null; } + // match returns true if the node matches the selector query. pub fn match(s: Selector, n: anytype) !bool { return switch (s) { .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()), @@ -286,7 +290,7 @@ pub const Selector = union(enum) { return attr.?[val.len] == '-'; }, - .regexp => false, // TODO handle regexp attribute operator. + .regexp => return Error.UnsupportedAttrRegexpOperator, // TODO handle regexp attribute operator. }; }, .never_match => return false, @@ -300,10 +304,126 @@ pub const Selector = union(enum) { else => Error.UnsupportedRelativePseudoClass, }; }, - else => false, + .pseudo_class_contains => return Error.UnsupportedContainsPseudoClass, // TODO, need mem allocation. + .pseudo_class_regexp => return Error.UnsupportedRegexpPseudoClass, // TODO need mem allocation. + .pseudo_class_nth => |v| { + if (v.a == 0) { + if (v.last) { + return simpleNthLastChildMatch(v.b, v.of_type, n); + } + return simpleNthChildMatch(v.b, v.of_type, n); + } + return nthChildMatch(v.a, v.b, v.last, v.of_type, n); + }, + .pseudo_class => return false, + .pseudo_class_only_child => return false, + .pseudo_class_lang => return false, + .pseudo_element => return false, }; } + // simpleNthLastChildMatch implements :nth-last-child(b). + // If ofType is true, implements :nth-last-of-type instead. + fn simpleNthLastChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool { + if (!n.isElement()) return false; + + const p = try n.parent(); + if (p == null) return false; + + const ntag = try n.tag(); + + var count: isize = 0; + var c = try p.?.lastChild(); + // loop hover all n siblings. + while (c != null) { + // ignore non elements or others tags if of-type is true. + if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) { + c = try c.?.prevSibling(); + continue; + } + + count += 1; + + if (n.eql(c.?)) return count == b; + if (count >= b) return false; + + c = try c.?.prevSibling(); + } + + return false; + } + + // simpleNthChildMatch implements :nth-child(b). + // If ofType is true, implements :nth-of-type instead. + fn simpleNthChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool { + if (!n.isElement()) return false; + + const p = try n.parent(); + if (p == null) return false; + + const ntag = try n.tag(); + + var count: isize = 0; + var c = try p.?.firstChild(); + // loop hover all n siblings. + while (c != null) { + // ignore non elements or others tags if of-type is true. + if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) { + c = try c.?.nextSibling(); + continue; + } + + count += 1; + + if (n.eql(c.?)) return count == b; + if (count >= b) return false; + + c = try c.?.nextSibling(); + } + + return false; + } + + // nthChildMatch implements :nth-child(an+b). + // If last is true, implements :nth-last-child instead. + // If ofType is true, implements :nth-of-type instead. + fn nthChildMatch(a: isize, b: isize, last: bool, of_type: bool, n: anytype) anyerror!bool { + if (!n.isElement()) return false; + + const p = try n.parent(); + if (p == null) return false; + + const ntag = try n.tag(); + + var i: isize = -1; + var count: isize = 0; + var c = try p.?.firstChild(); + // loop hover all n siblings. + while (c != null) { + // ignore non elements or others tags if of-type is true. + if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) { + c = try c.?.nextSibling(); + continue; + } + count += 1; + + if (n.eql(c.?)) { + i = count; + if (!last) break; + } + + c = try c.?.nextSibling(); + } + + if (i == -1) return false; + + if (last) i = count - i + 1; + + i -= b; + if (a == 0) return i == 0; + return @mod(i, a) == 0 and @divTrunc(i, a) >= 0; + } + fn hasDescendantMatch(s: *const Selector, n: anytype) anyerror!bool { var c = try n.firstChild(); while (c != null) { From bd899111d5f5837bdc3a834093b97ec5d68a01da Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 10:25:46 +0100 Subject: [PATCH 16/28] css: implement :only-child and :only-of-type --- src/css/match_test.zig | 64 ++++++++++++++++++++++++++++++++++++++++++ src/css/selector.zig | 53 ++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 9aaeedbd..8e659043 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -399,6 +399,70 @@ test "matchAll" { } } +test "pseudo class" { + const alloc = std.testing.allocator; + + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + var p1: Node = .{ .name = "p" }; + var p2: Node = .{ .name = "p" }; + var a1: Node = .{ .name = "a" }; + + p1.sibling = &p2; + p2.prev = &p1; + + p2.sibling = &a1; + a1.prev = &p2; + + var root: Node = .{ .child = &p1, .last = &a1 }; + p1.par = &root; + p2.par = &root; + a1.par = &root; + + const testcases = [_]struct { + q: []const u8, + n: Node, + exp: ?*const Node, + }{ + .{ .q = "p:only-child", .n = root, .exp = null }, + .{ .q = "a:only-of-type", .n = root, .exp = &a1 }, + }; + + for (testcases) |tc| { + matcher.reset(); + + const s = try css.parse(alloc, tc.q, .{}); + defer s.deinit(alloc); + + css.matchAll(s, &tc.n, &matcher) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + if (tc.exp) |exp_n| { + const exp: usize = 1; + std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + + continue; + } + + const exp: usize = 0; + std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| { + std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s }); + return e; + }; + } +} + test "nth pseudo class" { const alloc = std.testing.allocator; diff --git a/src/css/selector.zig b/src/css/selector.zig index 381b1d67..a0c5d4d0 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -145,6 +145,7 @@ pub const Selector = union(enum) { UnknownCombinedCombinator, UnsupportedRelativePseudoClass, UnsupportedContainsPseudoClass, + UnsupportedPseudoClass, UnsupportedRegexpPseudoClass, UnsupportedAttrRegexpOperator, }; @@ -315,13 +316,61 @@ pub const Selector = union(enum) { } return nthChildMatch(v.a, v.b, v.last, v.of_type, n); }, - .pseudo_class => return false, - .pseudo_class_only_child => return false, + .pseudo_class => |v| { + switch (v) { + .input => return Error.UnsupportedPseudoClass, + .empty => return Error.UnsupportedPseudoClass, + .root => return Error.UnsupportedPseudoClass, + .link => return Error.UnsupportedPseudoClass, + .enabled => return Error.UnsupportedPseudoClass, + .disabled => return Error.UnsupportedPseudoClass, + .checked => return Error.UnsupportedPseudoClass, + .visited => return Error.UnsupportedPseudoClass, + .hover => return Error.UnsupportedPseudoClass, + .active => return Error.UnsupportedPseudoClass, + .focus => return Error.UnsupportedPseudoClass, + .target => return Error.UnsupportedPseudoClass, + + // all others pseudo class are handled by specialized + // pseudo_class_X selectors. + else => return Error.UnsupportedPseudoClass, + } + }, + .pseudo_class_only_child => |v| onlyChildMatch(v, n), .pseudo_class_lang => return false, .pseudo_element => return false, }; } + // onlyChildMatch implements :only-child + // If `ofType` is true, it implements :only-of-type instead. + fn onlyChildMatch(of_type: bool, n: anytype) anyerror!bool { + if (!n.isElement()) return false; + + const p = try n.parent(); + if (p == null) return false; + + const ntag = try n.tag(); + + var count: usize = 0; + var c = try p.?.firstChild(); + // loop hover all n siblings. + while (c != null) { + // ignore non elements or others tags if of-type is true. + if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) { + c = try c.?.nextSibling(); + continue; + } + + count += 1; + if (count > 1) return false; + + c = try c.?.nextSibling(); + } + + return count == 1; + } + // simpleNthLastChildMatch implements :nth-last-child(b). // If ofType is true, implements :nth-last-of-type instead. fn simpleNthLastChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool { From 2671cda98f36467cca2202ee1e1b7764ed7efcd4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 11:43:32 +0100 Subject: [PATCH 17/28] css: implement :lang match --- src/css/match_test.zig | 20 ++++++++++++++++++++ src/css/selector.zig | 19 ++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 8e659043..47fbdb78 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -215,6 +215,16 @@ test "matchFirst" { .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, .exp = 0, }, + .{ + .q = "p:lang(en)", + .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } }, + .exp = 1, + }, + .{ + .q = "a:lang(en)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } }, + .exp = 1, + }, }; for (testcases) |tc| { @@ -379,6 +389,16 @@ test "matchAll" { .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } }, .exp = 0, }, + .{ + .q = "p:lang(en)", + .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } }, + .exp = 1, + }, + .{ + .q = "a:lang(en)", + .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } }, + .exp = 1, + }, }; for (testcases) |tc| { diff --git a/src/css/selector.zig b/src/css/selector.zig index a0c5d4d0..72485cac 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -337,11 +337,28 @@ pub const Selector = union(enum) { } }, .pseudo_class_only_child => |v| onlyChildMatch(v, n), - .pseudo_class_lang => return false, + .pseudo_class_lang => |v| langMatch(v, n), .pseudo_element => return false, }; } + fn langMatch(lang: []const u8, n: anytype) anyerror!bool { + if (try n.attr("lang")) |own| { + if (std.mem.eql(u8, own, lang)) return true; + + // check if the lang attr starts with lang+'-' + if (std.mem.startsWith(u8, own, lang)) { + if (own.len > lang.len and own[lang.len] == '-') return true; + } + } + + // if the tag doesn't match, try the parent. + const p = try n.parent(); + if (p == null) return false; + + return langMatch(lang, p.?); + } + // onlyChildMatch implements :only-child // If `ofType` is true, it implements :only-of-type instead. fn onlyChildMatch(of_type: bool, n: anytype) anyerror!bool { From de9d253dc99d01b3f6e09525eabd38962681cfdc Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 14:48:08 +0100 Subject: [PATCH 18/28] css: implement missing pseudo classes :input :empty :root :link :enabled :disabled :checked --- src/css/selector.zig | 171 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 156 insertions(+), 15 deletions(-) diff --git a/src/css/selector.zig b/src/css/selector.zig index 72485cac..b55199e0 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -146,6 +146,7 @@ pub const Selector = union(enum) { UnsupportedRelativePseudoClass, UnsupportedContainsPseudoClass, UnsupportedPseudoClass, + UnsupportedPseudoElement, UnsupportedRegexpPseudoClass, UnsupportedAttrRegexpOperator, }; @@ -317,31 +318,171 @@ pub const Selector = union(enum) { return nthChildMatch(v.a, v.b, v.last, v.of_type, n); }, .pseudo_class => |v| { - switch (v) { - .input => return Error.UnsupportedPseudoClass, - .empty => return Error.UnsupportedPseudoClass, - .root => return Error.UnsupportedPseudoClass, - .link => return Error.UnsupportedPseudoClass, - .enabled => return Error.UnsupportedPseudoClass, - .disabled => return Error.UnsupportedPseudoClass, - .checked => return Error.UnsupportedPseudoClass, - .visited => return Error.UnsupportedPseudoClass, - .hover => return Error.UnsupportedPseudoClass, - .active => return Error.UnsupportedPseudoClass, - .focus => return Error.UnsupportedPseudoClass, - .target => return Error.UnsupportedPseudoClass, + return switch (v) { + .input => { + if (!n.isElement()) return false; + const ntag = try n.tag(); + + return std.ascii.eqlIgnoreCase("input", ntag) or + std.ascii.eqlIgnoreCase("select", ntag) or + std.ascii.eqlIgnoreCase("button", ntag) or + std.ascii.eqlIgnoreCase("textarea", ntag); + }, + .empty => { + if (!n.isElement()) return false; + + var c = try n.firstChild(); + while (c != null) { + if (c.?.isElement()) return false; + + // TODO check text node content equals an empty + // string ("") + + c = try c.?.nextSibling(); + } + + return true; + }, + .root => { + if (!n.isElement()) return false; + + const p = try n.parent(); + return p == null; + }, + .link => { + const ntag = try n.tag(); + + return std.ascii.eqlIgnoreCase("a", ntag) or + std.ascii.eqlIgnoreCase("area", ntag) or + std.ascii.eqlIgnoreCase("link", ntag); + }, + .enabled => { + if (!n.isElement()) return false; + + const ntag = try n.tag(); + + if (std.ascii.eqlIgnoreCase("a", ntag) or + std.ascii.eqlIgnoreCase("area", ntag) or + std.ascii.eqlIgnoreCase("link", ntag)) + { + return try n.attr("href") != null; + } + + if (std.ascii.eqlIgnoreCase("optgroup", ntag) or + std.ascii.eqlIgnoreCase("menuitem", ntag) or + std.ascii.eqlIgnoreCase("fieldset", ntag)) + { + return try n.attr("disabled") == null; + } + + if (std.ascii.eqlIgnoreCase("input", ntag) or + std.ascii.eqlIgnoreCase("button", ntag) or + std.ascii.eqlIgnoreCase("select", ntag) or + std.ascii.eqlIgnoreCase("textarea", ntag) or + std.ascii.eqlIgnoreCase("option", ntag)) + { + return try n.attr("disabled") == null and + !try inDisabledFieldset(n); + } + + return false; + }, + .disabled => { + if (!n.isElement()) return false; + + const ntag = try n.tag(); + + if (std.ascii.eqlIgnoreCase("optgroup", ntag) or + std.ascii.eqlIgnoreCase("menuitem", ntag) or + std.ascii.eqlIgnoreCase("fieldset", ntag)) + { + return try n.attr("disabled") != null; + } + + if (std.ascii.eqlIgnoreCase("input", ntag) or + std.ascii.eqlIgnoreCase("button", ntag) or + std.ascii.eqlIgnoreCase("select", ntag) or + std.ascii.eqlIgnoreCase("textarea", ntag) or + std.ascii.eqlIgnoreCase("option", ntag)) + { + return try n.attr("disabled") != null or + try inDisabledFieldset(n); + } + + return false; + }, + .checked => { + if (!n.isElement()) return false; + + const ntag = try n.tag(); + + if (std.ascii.eqlIgnoreCase("intput", ntag)) { + const ntype = try n.attr("type"); + if (ntype == null) return false; + + if (std.mem.eql(u8, ntype.?, "checkbox") or + std.mem.eql(u8, ntype.?, "radio")) + { + return try n.attr("checked") != null; + } + + return false; + } + if (std.ascii.eqlIgnoreCase("option", ntag)) { + return try n.attr("selected") != null; + } + + return false; + }, + .visited => return false, + .hover => return false, + .active => return false, + .focus => return false, + // TODO implement using the url fragment. + // see https://developer.mozilla.org/en-US/docs/Web/CSS/:target + .target => return false, // all others pseudo class are handled by specialized // pseudo_class_X selectors. else => return Error.UnsupportedPseudoClass, - } + }; }, .pseudo_class_only_child => |v| onlyChildMatch(v, n), .pseudo_class_lang => |v| langMatch(v, n), - .pseudo_element => return false, + + // pseudo elements doesn't make sense in the matching process. + // > A CSS pseudo-element is a keyword added to a selector that + // > lets you style a specific part of the selected element(s). + // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-elements + .pseudo_element => return Error.UnsupportedPseudoElement, }; } + fn inDisabledFieldset(n: anytype) anyerror!bool { + const p = try n.parent(); + if (p == null) return false; + + const ptag = try p.?.tag(); + + if (std.ascii.eqlIgnoreCase("fieldset", ptag) and + try p.?.attr("disabled") != null) + { + return true; + } + + // TODO should we handle legend like cascadia does? + // The implemention below looks suspicious, I didn't find a test case + // in cascadia and I didn't find the reference about legend in the + // specs. For now I do prefer ignoring this part. + // + // ``` + // (n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) { + // ``` + // https://github.com/andybalholm/cascadia/blob/master/pseudo_classes.go#L434 + + return try inDisabledFieldset(p.?); + } + fn langMatch(lang: []const u8, n: anytype) anyerror!bool { if (try n.attr("lang")) |own| { if (std.mem.eql(u8, own, lang)) return true; From e7738744cb00eb986b0b4d3ef9978e4a5a0d4b60 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 15:39:59 +0100 Subject: [PATCH 19/28] css: add libdom tests --- src/css/libdom_test.zig | 231 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 223 insertions(+), 8 deletions(-) diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig index 0e5a255d..b8444f06 100644 --- a/src/css/libdom_test.zig +++ b/src/css/libdom_test.zig @@ -37,9 +37,107 @@ test "matchFirst" { exp: usize, }{ .{ .q = "address", .html = "
          This address...
          ", .exp = 1 }, + .{ .q = "*", .html = "text", .exp = 1 }, + .{ .q = "*", .html = "", .exp = 1 }, .{ .q = "#foo", .html = "

          ", .exp = 1 }, - .{ .q = ".t1", .html = "

          • ", .exp = 1 }, + .{ .q = "li#t1", .html = "
            • ", .exp = 1 }, .{ .q = ".t3", .html = "

              • ", .exp = 1 }, + .{ .q = "*#t4", .html = "
                1. ", .exp = 1 }, + .{ .q = ".t1", .html = "
                  • ", .exp = 1 }, + .{ .q = "p.t1", .html = "

                    ", .exp = 1 }, + .{ .q = "div.teST", .html = "

                    ", .exp = 0 }, + .{ .q = ".t1.fail", .html = "

                    ", .exp = 0 }, + .{ .q = "p.t1.t2", .html = "

                    ", .exp = 1 }, + .{ .q = "p.--t1", .html = "

                    ", .exp = 1 }, + .{ .q = "p.--t1.--t2", .html = "

                    ", .exp = 1 }, + .{ .q = "p[title]", .html = "

                    ", .exp = 1 }, + .{ .q = "div[class=\"red\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "address[title=\"foo\"]", .html = "
                    ", .exp = 1 }, + .{ .q = "address[title=\"FoOIgnoRECaSe\" i]", .html = "
                    ", .exp = 1 }, + .{ .q = "address[title!=\"foo\"]", .html = "
                    ", .exp = 1 }, + .{ .q = "address[title!=\"foo\" i]", .html = "
                    ", .exp = 1 }, + .{ .q = "p[title!=\"FooBarUFoo\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[ title ~= foo ]", .html = "

                    ", .exp = 1 }, + .{ .q = "p[title~=\"FOO\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "p[title~=toofoo i]", .html = "

                    ", .exp = 0 }, + .{ .q = "[title~=\"hello world\"]", .html = "

                    ", .exp = 0 }, + .{ .q = "[title~=\"hello\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title~=\"hello\" I]", .html = "

                    ", .exp = 1 }, + .{ .q = "[lang|=\"en\"]", .html = "

                    ", .exp = 1 }, + .{ .q = "[lang|=\"EN\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[lang|=\"EN\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title^=\"foo\"]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title^=\"foo\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title$=\"bar\"]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title$=\"BAR\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title*=\"bar\"]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title*=\"BaRu\" i]", .html = "

                    ", .exp = 1 }, + .{ .q = "[title*=\"BaRu\" I]", .html = "

                    ", .exp = 1 }, + .{ .q = "p[class$=\" \"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "p[class$=\"\"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "p[class^=\" \"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "p[class^=\"\"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "p[class*=\" \"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "p[class*=\"\"]", .html = "

                    This text should be green.

                    This text should be green.

                    ", .exp = 0 }, + .{ .q = "input[name=Sex][value=F]", .html = "", .exp = 1 }, + .{ .q = "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", .html = "aaa
                    ", .exp = 1 }, + .{ .q = ".t1:not(.t2)", .html = "

                    ", .exp = 0 }, + .{ .q = "div:not(.t1)", .html = "

                    ", .exp = 1 }, + .{ .q = "div:not([class=\"t2\"])", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-child(odd)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-child(even)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-child(-n+2)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-child(3n+1)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-last-child(odd)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-last-child(even)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-last-child(-n+2)", .html = "
                    ", .exp = 1 }, + .{ .q = "li:nth-last-child(3n+1)", .html = "
                    ", .exp = 1 }, + .{ .q = "span:first-child", .html = "

                    some text and a span and another

                    ", .exp = 1 }, + .{ .q = "span:last-child", .html = "a span and some text", .exp = 1 }, + .{ .q = "p:nth-of-type(2)", .html = "

                    ", .exp = 1 }, + .{ .q = "p:nth-last-of-type(2)", .html = "

                    ", .exp = 1 }, + .{ .q = "p:last-of-type", .html = "

                    ", .exp = 1 }, + .{ .q = "p:first-of-type", .html = "

                    ", .exp = 1 }, + .{ .q = "p:only-child", .html = "

                    ", .exp = 1 }, + .{ .q = "p:only-of-type", .html = "

                    ", .exp = 1 }, + .{ .q = ":empty", .html = "

                    Hello

                    ", .exp = 1 }, + .{ .q = "div p", .html = "

                    ", .exp = 1 }, + .{ .q = "div table p", .html = "

                    ", .exp = 1 }, + .{ .q = "div > p", .html = "

                    ", .exp = 1 }, + .{ .q = "p ~ p", .html = "

                    ", .exp = 1 }, + .{ .q = "p + p", .html = "

                    ", .exp = 1 }, + .{ .q = "li, p", .html = "

                    ", .exp = 1 }, + .{ .q = "p +/*This is a comment*/ p", .html = "

                    ", .exp = 1 }, + .{ .q = "p:contains(\"that wraps\")", .html = "

                    Text block that wraps inner text and continues

                    ", .exp = 1 }, + .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                    Text block that wraps inner text and continues

                    ", .exp = 0 }, + .{ .q = ":containsOwn(\"inner\")", .html = "

                    Text block that wraps inner text and continues

                    ", .exp = 1 }, + .{ .q = "p:containsOwn(\"block\")", .html = "

                    Text block that wraps inner text and continues

                    ", .exp = 1 }, + .{ .q = "div:has(#p1)", .html = "

                    text content

                    ", .exp = 1 }, + .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                    contents 1

                    contents 2

                    ", .exp = 1 }, + .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                    contents 1

                    contents 2

                    ", .exp = 1 }, + .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                    contents 1

                    contents 2

                    ", .exp = 1 }, + // .{ .q = "p:matches([\\d])", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:matches([a-z])", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:matches([a-zA-Z])", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:matches([^\\d])", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:matches(^(0|a))", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:matches(^\\d+$)", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "p:not(:matches(^\\d+$))", .html = "

                    0123456789

                    abcdef

                    0123ABCD

                    ", .exp = 1 }, + // .{ .q = "div :matchesOwn(^\\d+$)", .html = "

                    0123456789

                    ", .exp = 1 }, + // .{ .q = "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", .html = "", .exp = 1 }, + // .{ .q = "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", .html = "", .exp = 1 }, + .{ .q = ":input", .html = "
                    ", .exp = 1 }, + .{ .q = ":root", .html = "", .exp = 1 }, + .{ .q = "*:root", .html = "", .exp = 1 }, + .{ .q = "html:nth-child(1)", .html = "", .exp = 1 }, + .{ .q = "*:root:first-child", .html = "", .exp = 1 }, + .{ .q = "*:root:nth-child(1)", .html = "", .exp = 1 }, + .{ .q = "a:not(:root)", .html = "
                    ", .exp = 1 }, + .{ .q = "body > *:nth-child(3n+2)", .html = "

                    ", .exp = 1 }, + .{ .q = "input:disabled", .html = "
                    ", .exp = 1 }, + .{ .q = ":disabled", .html = "
                    ", .exp = 1 }, + .{ .q = ":enabled", .html = "
                    ", .exp = 1 }, + .{ .q = "div.class1, div.class2", .html = "
                    ", .exp = 1 }, }; for (testcases) |tc| { @@ -48,13 +146,23 @@ test "matchFirst" { const doc = try parser.documentHTMLParseFromStr(tc.html); defer parser.documentHTMLClose(doc) catch {}; - const s = try css.parse(alloc, tc.q, .{}); + const s = css.parse(alloc, tc.q, .{}) catch |e| { + std.debug.print("parse, query: {s}\n", .{tc.q}); + return e; + }; + defer s.deinit(alloc); const node = Node{ .node = parser.documentHTMLToNode(doc) }; - _ = try css.matchFirst(s, node, &matcher); - try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + _ = css.matchFirst(s, node, &matcher) catch |e| { + std.debug.print("match, query: {s}\n", .{tc.q}); + return e; + }; + std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| { + std.debug.print("expectation, query: {s}\n", .{tc.q}); + return e; + }; } } @@ -70,9 +178,107 @@ test "matchAll" { exp: usize, }{ .{ .q = "address", .html = "
                    This address...
                    ", .exp = 1 }, + .{ .q = "*", .html = "text", .exp = 3 }, + .{ .q = "*", .html = "", .exp = 3 }, .{ .q = "#foo", .html = "

                    ", .exp = 1 }, - .{ .q = ".t1", .html = "

                    • ", .exp = 1 }, + .{ .q = "li#t1", .html = "
                      • ", .exp = 1 }, .{ .q = ".t3", .html = "

                        • ", .exp = 1 }, + .{ .q = "*#t4", .html = "
                          1. ", .exp = 1 }, + .{ .q = ".t1", .html = "
                            • ", .exp = 1 }, + .{ .q = "p.t1", .html = "

                              ", .exp = 1 }, + .{ .q = "div.teST", .html = "

                              ", .exp = 0 }, + .{ .q = ".t1.fail", .html = "

                              ", .exp = 0 }, + .{ .q = "p.t1.t2", .html = "

                              ", .exp = 1 }, + .{ .q = "p.--t1", .html = "

                              ", .exp = 1 }, + .{ .q = "p.--t1.--t2", .html = "

                              ", .exp = 1 }, + .{ .q = "p[title]", .html = "

                              ", .exp = 1 }, + .{ .q = "div[class=\"red\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "address[title=\"foo\"]", .html = "
                              ", .exp = 1 }, + .{ .q = "address[title=\"FoOIgnoRECaSe\" i]", .html = "
                              ", .exp = 1 }, + .{ .q = "address[title!=\"foo\"]", .html = "
                              ", .exp = 2 }, + .{ .q = "address[title!=\"foo\" i]", .html = "
                              ", .exp = 2 }, + .{ .q = "p[title!=\"FooBarUFoo\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "[ title ~= foo ]", .html = "

                              ", .exp = 1 }, + .{ .q = "p[title~=\"FOO\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "p[title~=toofoo i]", .html = "

                              ", .exp = 0 }, + .{ .q = "[title~=\"hello world\"]", .html = "

                              ", .exp = 0 }, + .{ .q = "[title~=\"hello\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title~=\"hello\" I]", .html = "

                              ", .exp = 1 }, + .{ .q = "[lang|=\"en\"]", .html = "

                              ", .exp = 2 }, + .{ .q = "[lang|=\"EN\" i]", .html = "

                              ", .exp = 2 }, + .{ .q = "[lang|=\"EN\" i]", .html = "

                              ", .exp = 2 }, + .{ .q = "[title^=\"foo\"]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title^=\"foo\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title$=\"bar\"]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title$=\"BAR\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title*=\"bar\"]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title*=\"BaRu\" i]", .html = "

                              ", .exp = 1 }, + .{ .q = "[title*=\"BaRu\" I]", .html = "

                              ", .exp = 1 }, + .{ .q = "p[class$=\" \"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "p[class$=\"\"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "p[class^=\" \"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "p[class^=\"\"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "p[class*=\" \"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "p[class*=\"\"]", .html = "

                              This text should be green.

                              This text should be green.

                              ", .exp = 0 }, + .{ .q = "input[name=Sex][value=F]", .html = "", .exp = 1 }, + .{ .q = "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", .html = "aaa
                              ", .exp = 1 }, + .{ .q = ".t1:not(.t2)", .html = "

                              ", .exp = 0 }, + .{ .q = "div:not(.t1)", .html = "

                              ", .exp = 1 }, + .{ .q = "div:not([class=\"t2\"])", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-child(odd)", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-child(even)", .html = "
                              ", .exp = 1 }, + .{ .q = "li:nth-child(-n+2)", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-child(3n+1)", .html = "
                              ", .exp = 1 }, + .{ .q = "li:nth-last-child(odd)", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-last-child(even)", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-last-child(-n+2)", .html = "
                              ", .exp = 2 }, + .{ .q = "li:nth-last-child(3n+1)", .html = "
                              ", .exp = 2 }, + .{ .q = "span:first-child", .html = "

                              some text and a span and another

                              ", .exp = 1 }, + .{ .q = "span:last-child", .html = "a span and some text", .exp = 1 }, + .{ .q = "p:nth-of-type(2)", .html = "

                              ", .exp = 1 }, + .{ .q = "p:nth-last-of-type(2)", .html = "

                              ", .exp = 1 }, + .{ .q = "p:last-of-type", .html = "

                              ", .exp = 1 }, + .{ .q = "p:first-of-type", .html = "

                              ", .exp = 1 }, + .{ .q = "p:only-child", .html = "

                              ", .exp = 1 }, + .{ .q = "p:only-of-type", .html = "

                              ", .exp = 1 }, + .{ .q = ":empty", .html = "

                              Hello

                              ", .exp = 3 }, + .{ .q = "div p", .html = "

                              ", .exp = 2 }, + .{ .q = "div table p", .html = "

                              ", .exp = 1 }, + .{ .q = "div > p", .html = "

                              ", .exp = 2 }, + .{ .q = "p ~ p", .html = "

                              ", .exp = 2 }, + .{ .q = "p + p", .html = "

                              ", .exp = 1 }, + .{ .q = "li, p", .html = "

                              ", .exp = 3 }, + .{ .q = "p +/*This is a comment*/ p", .html = "

                              ", .exp = 1 }, + .{ .q = "p:contains(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 0 }, + .{ .q = ":containsOwn(\"inner\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + .{ .q = "p:containsOwn(\"block\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + .{ .q = "div:has(#p1)", .html = "

                              text content

                              ", .exp = 1 }, + .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 2 }, + .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "p:matches([\\d])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, + // .{ .q = "p:matches([a-z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, + // .{ .q = "p:matches([a-zA-Z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, + // .{ .q = "p:matches([^\\d])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, + // .{ .q = "p:matches(^(0|a))", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 3 }, + // .{ .q = "p:matches(^\\d+$)", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, + // .{ .q = "p:not(:matches(^\\d+$))", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, + // .{ .q = "div :matchesOwn(^\\d+$)", .html = "

                              0123456789

                              ", .exp = 2 }, + // .{ .q = "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", .html = "", .exp = 2 }, + // .{ .q = "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", .html = "", .exp = 1 }, + .{ .q = ":input", .html = "
                              ", .exp = 5 }, + .{ .q = ":root", .html = "", .exp = 1 }, + .{ .q = "*:root", .html = "", .exp = 1 }, + .{ .q = "html:nth-child(1)", .html = "", .exp = 1 }, + .{ .q = "*:root:first-child", .html = "", .exp = 1 }, + .{ .q = "*:root:nth-child(1)", .html = "", .exp = 1 }, + .{ .q = "a:not(:root)", .html = "
                              ", .exp = 1 }, + .{ .q = "body > *:nth-child(3n+2)", .html = "

                              ", .exp = 2 }, + .{ .q = "input:disabled", .html = "
                              ", .exp = 1 }, + .{ .q = ":disabled", .html = "
                              ", .exp = 1 }, + .{ .q = ":enabled", .html = "
                              ", .exp = 1 }, + .{ .q = "div.class1, div.class2", .html = "
                              ", .exp = 2 }, }; for (testcases) |tc| { @@ -81,12 +287,21 @@ test "matchAll" { const doc = try parser.documentHTMLParseFromStr(tc.html); defer parser.documentHTMLClose(doc) catch {}; - const s = try css.parse(alloc, tc.q, .{}); + const s = css.parse(alloc, tc.q, .{}) catch |e| { + std.debug.print("parse, query: {s}\n", .{tc.q}); + return e; + }; defer s.deinit(alloc); const node = Node{ .node = parser.documentHTMLToNode(doc) }; - _ = try css.matchAll(s, node, &matcher); - try std.testing.expectEqual(tc.exp, matcher.nodes.items.len); + _ = css.matchAll(s, node, &matcher) catch |e| { + std.debug.print("match, query: {s}\n", .{tc.q}); + return e; + }; + std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| { + std.debug.print("expectation, query: {s}\n", .{tc.q}); + return e; + }; } } From 565d612abbef54135b4860bb827b8add5abb0e41 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 15:40:23 +0100 Subject: [PATCH 20/28] css: trim attribute op value --- src/css/selector.zig | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/css/selector.zig b/src/css/selector.zig index b55199e0..ee637549 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -268,7 +268,7 @@ pub const Selector = union(enum) { }; }, .attribute => |v| { - const attr = try n.attr(v.key); + var attr = try n.attr(v.key); if (v.op == null) return attr != null; if (v.val == null or v.val.?.len == 0) return false; @@ -279,9 +279,30 @@ pub const Selector = union(enum) { .eql => attr != null and eql(attr.?, val, v.ci), .not_eql => attr == null or !eql(attr.?, val, v.ci), .one_of => attr != null and word(attr.?, val, v.ci), - .prefix => attr != null and starts(attr.?, val, v.ci), - .suffix => attr != null and ends(attr.?, val, v.ci), - .contains => attr != null and contains(attr.?, val, v.ci), + .prefix => { + if (attr == null) return false; + attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace); + + if (attr.?.len == 0) return false; + + return starts(attr.?, val, v.ci); + }, + .suffix => { + if (attr == null) return false; + attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace); + + if (attr.?.len == 0) return false; + + return ends(attr.?, val, v.ci); + }, + .contains => { + if (attr == null) return false; + attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace); + + if (attr.?.len == 0) return false; + + return contains(attr.?, val, v.ci); + }, .prefix_hyphen => { if (attr == null) return false; if (eql(attr.?, val, v.ci)) return true; From dcc7e51556f56738151624bc7f75beb25338718e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 17:09:11 +0100 Subject: [PATCH 21/28] css: implement ~, + and > combinators --- src/css/selector.zig | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/css/selector.zig b/src/css/selector.zig index ee637549..696203d5 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -264,7 +264,34 @@ pub const Selector = union(enum) { return false; }, - else => return Error.UnknownCombinedCombinator, + .child => { + const p = try n.parent(); + if (p == null) return false; + + return try v.second.match(n) and try v.first.match(p.?); + }, + .next_sibling => { + if (!try v.second.match(n)) return false; + var c = try n.prevSibling(); + while (c != null) { + if (!c.?.isElement()) { // TODO must check text node or comment node instead. + c = try c.?.prevSibling(); + continue; + } + return try v.first.match(c.?); + } + return false; + }, + .subsequent_sibling => { + if (!try v.second.match(n)) return false; + + var c = try n.prevSibling(); + while (c != null) { + if (try v.first.match(c.?)) return true; + c = try c.?.prevSibling(); + } + return false; + }, }; }, .attribute => |v| { From 8a918407839515ee6df9d6d495a69cced29b50a7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 17:09:55 +0100 Subject: [PATCH 22/28] css: comment :contains test --- src/css/libdom_test.zig | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig index b8444f06..ee651f2f 100644 --- a/src/css/libdom_test.zig +++ b/src/css/libdom_test.zig @@ -108,14 +108,14 @@ test "matchFirst" { .{ .q = "p + p", .html = "

                              ", .exp = 1 }, .{ .q = "li, p", .html = "

                              ", .exp = 1 }, .{ .q = "p +/*This is a comment*/ p", .html = "

                              ", .exp = 1 }, - .{ .q = "p:contains(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, - .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 0 }, - .{ .q = ":containsOwn(\"inner\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, - .{ .q = "p:containsOwn(\"block\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, - .{ .q = "div:has(#p1)", .html = "

                              text content

                              ", .exp = 1 }, - .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, - .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, - .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "p:contains(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 0 }, + // .{ .q = ":containsOwn(\"inner\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "p:containsOwn(\"block\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "div:has(#p1)", .html = "

                              text content

                              ", .exp = 1 }, + // .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, // .{ .q = "p:matches([\\d])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, // .{ .q = "p:matches([a-z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, // .{ .q = "p:matches([a-zA-Z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, @@ -249,14 +249,14 @@ test "matchAll" { .{ .q = "p + p", .html = "

                              ", .exp = 1 }, .{ .q = "li, p", .html = "

                              ", .exp = 3 }, .{ .q = "p +/*This is a comment*/ p", .html = "

                              ", .exp = 1 }, - .{ .q = "p:contains(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, - .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 0 }, - .{ .q = ":containsOwn(\"inner\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, - .{ .q = "p:containsOwn(\"block\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "p:contains(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "p:containsOwn(\"that wraps\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 0 }, + // .{ .q = ":containsOwn(\"inner\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, + // .{ .q = "p:containsOwn(\"block\")", .html = "

                              Text block that wraps inner text and continues

                              ", .exp = 1 }, .{ .q = "div:has(#p1)", .html = "

                              text content

                              ", .exp = 1 }, - .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, - .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 2 }, - .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "div:has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, + // .{ .q = "body :has(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 2 }, + // .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "

                              contents 1

                              contents 2

                              ", .exp = 1 }, // .{ .q = "p:matches([\\d])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, // .{ .q = "p:matches([a-z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 1 }, // .{ .q = "p:matches([a-zA-Z])", .html = "

                              0123456789

                              abcdef

                              0123ABCD

                              ", .exp = 2 }, From 2c7650cdb15ef39e8b84134034852a410fecc074 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 17:38:21 +0100 Subject: [PATCH 23/28] css: add isDocument, isText and isComment --- src/css/libdom.zig | 15 +++++++++++++++ src/css/match_test.zig | 12 ++++++++++++ src/css/selector.zig | 4 ++-- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 04c99a66..93acb790 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -46,6 +46,21 @@ pub const Node = struct { return t == .element; } + pub fn isDocument(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .document; + } + + pub fn isComment(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .comment; + } + + pub fn isText(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .text; + } + pub fn tag(n: Node) ![]const u8 { return try parser.nodeName(n.node); } diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 47fbdb78..796ee7d7 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -36,6 +36,18 @@ pub const Node = struct { return true; } + pub fn isDocument(_: *const Node) bool { + return false; + } + + pub fn isComment(_: *const Node) bool { + return false; + } + + pub fn isText(_: *const Node) bool { + return false; + } + pub fn tag(n: *const Node) ![]const u8 { return n.name; } diff --git a/src/css/selector.zig b/src/css/selector.zig index 696203d5..e678fb1d 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -274,7 +274,7 @@ pub const Selector = union(enum) { if (!try v.second.match(n)) return false; var c = try n.prevSibling(); while (c != null) { - if (!c.?.isElement()) { // TODO must check text node or comment node instead. + if (c.?.isText() or c.?.isComment()) { c = try c.?.prevSibling(); continue; } @@ -395,7 +395,7 @@ pub const Selector = union(enum) { if (!n.isElement()) return false; const p = try n.parent(); - return p == null; + return (p != null and p.?.isDocument()); }, .link => { const ntag = try n.tag(); From 4e61a50946d638915b204513c8ef4d0538c73eed Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 17:45:19 +0100 Subject: [PATCH 24/28] css: add isEmptyText in node interface --- src/css/libdom.zig | 8 ++++++++ src/css/match_test.zig | 4 ++++ src/css/selector.zig | 6 ++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 93acb790..4166b216 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -61,6 +61,14 @@ pub const Node = struct { return t == .text; } + pub fn isEmptyText(n: Node) !bool { + const data = try parser.nodeTextContent(n.node); + if (data == null) return true; + if (data.?.len == 0) return true; + + return std.mem.trim(u8, data.?, &std.ascii.whitespace).len == 0; + } + pub fn tag(n: Node) ![]const u8 { return try parser.nodeName(n.node); } diff --git a/src/css/match_test.zig b/src/css/match_test.zig index 796ee7d7..f30f2e5b 100644 --- a/src/css/match_test.zig +++ b/src/css/match_test.zig @@ -48,6 +48,10 @@ pub const Node = struct { return false; } + pub fn isEmptyText(_: *const Node) !bool { + return false; + } + pub fn tag(n: *const Node) ![]const u8 { return n.name; } diff --git a/src/css/selector.zig b/src/css/selector.zig index e678fb1d..fbe782d3 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -383,8 +383,10 @@ pub const Selector = union(enum) { while (c != null) { if (c.?.isElement()) return false; - // TODO check text node content equals an empty - // string ("") + if (c.?.isText()) { + if (try c.?.isEmptyText()) continue; + return false; + } c = try c.?.nextSibling(); } From 4c50b2af1a2eb20936090e5422395c110b1c6136 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 17:55:30 +0100 Subject: [PATCH 25/28] css: implement legend siblings check for :disabled --- src/css/selector.zig | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/css/selector.zig b/src/css/selector.zig index fbe782d3..b5779899 100644 --- a/src/css/selector.zig +++ b/src/css/selector.zig @@ -508,14 +508,26 @@ pub const Selector = union(enum) { }; } + fn hasLegendInPreviousSiblings(n: anytype) anyerror!bool { + var c = try n.prevSibling(); + while (c != null) { + const ctag = try c.?.tag(); + if (std.ascii.eqlIgnoreCase("legend", ctag)) return true; + c = try c.?.prevSibling(); + } + return false; + } + fn inDisabledFieldset(n: anytype) anyerror!bool { const p = try n.parent(); if (p == null) return false; + const ntag = try n.tag(); const ptag = try p.?.tag(); if (std.ascii.eqlIgnoreCase("fieldset", ptag) and - try p.?.attr("disabled") != null) + try p.?.attr("disabled") != null and + (!std.ascii.eqlIgnoreCase("legend", ntag) or try hasLegendInPreviousSiblings(n))) { return true; } From 0fa49b99bfc4873bd9a611205ec31b84d771f6f4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 25 Mar 2024 18:35:28 +0100 Subject: [PATCH 26/28] css: add README --- src/css/README.md | 218 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 src/css/README.md diff --git a/src/css/README.md b/src/css/README.md new file mode 100644 index 00000000..fc2a7352 --- /dev/null +++ b/src/css/README.md @@ -0,0 +1,218 @@ +# css + +Lightpanda css implements CSS selectors parsing and matching in Zig. +This package is a port of the Go lib [andybalholm/cascadia](https://github.com/andybalholm/cascadia). + +## Usage + +### Query parser + +```zig +const css = @import("css.zig"); + +const selector = try css.parse(alloc, "h1", .{}); +defer selector.deinit(alloc); +``` + +### DOM tree match + +The lib expects a `Node` interface implementation to match your DOM tree. + +```zig +pub const Node = struct { + pub fn firstChild(_: Node) !?Node { + return error.TODO; + } + + pub fn lastChild(_: Node) !?Node { + return error.TODO; + } + + pub fn nextSibling(_: Node) !?Node { + return error.TODO; + } + + pub fn prevSibling(_: Node) !?Node { + return error.TODO; + } + + pub fn parent(_: Node) !?Node { + return error.TODO; + } + + pub fn isElement(_: Node) bool { + return false; + } + + pub fn isDocument(_: Node) bool { + return false; + } + + pub fn isComment(_: Node) bool { + return false; + } + + pub fn isText(_: Node) bool { + return false; + } + + pub fn isEmptyText(_: Node) !bool { + return error.TODO; + } + + pub fn tag(_: Node) ![]const u8 { + return error.TODO; + } + + pub fn attr(_: Node, _: []const u8) !?[]const u8 { + return error.TODO; + } + + pub fn eql(_: Node, _: Node) bool { + return false; + } +}; +``` + +You also need do define a `Matcher` implementing a `match` function to +accumulate the results. + +```zig +const Matcher = struct { + const Nodes = std.ArrayList(Node); + + nodes: Nodes, + + fn init(alloc: std.mem.Allocator) Matcher { + return .{ .nodes = Nodes.init(alloc) }; + } + + fn deinit(m: *Matcher) void { + m.nodes.deinit(); + } + + pub fn match(m: *Matcher, n: Node) !void { + try m.nodes.append(n); + } +}; +``` + +Then you can use the lib itself. + +```zig +var matcher = Matcher.init(alloc); +defer matcher.deinit(); + +try css.matchAll(selector, node, &matcher); +_ = try css.matchFirst(selector, node, &matcher); // returns true if a node matched. +``` + +## Features + +* [x] parse query selector +* [x] `matchAll` +* [x] `matchFirst` +* [ ] specificity + +### Selectors implemented + +#### Selectors + +* [x] Class selectors +* [x] Id selectors +* [x] Type selectors +* [x] Universal selectors +* [ ] Nesting selectors + +#### Combinators + +* [x] Child combinator +* [ ] Column combinator +* [x] Descendant combinator +* [ ] Namespace combinator +* [x] Next-sibling combinator +* [x] Selector list combinator +* [x] Subsequent-sibling combinator + +#### Attribute + +* [x] `[attr]` +* [x] `[attr=value]` +* [x] `[attr|=value]` +* [x] `[attr^=value]` +* [x] `[attr$=value]` +* [ ] `[attr*=value]` +* [x] `[attr operator value i]` +* [ ] `[attr operator value s]` + +#### Pseudo classes + +* [ ] `:active` +* [ ] `:any-link` +* [ ] `:autofill` +* [ ] `:blank Experimental` +* [x] `:checked` +* [ ] `:current Experimental` +* [ ] `:default` +* [ ] `:defined` +* [ ] `:dir() Experimental` +* [x] `:disabled` +* [x] `:empty` +* [x] `:enabled` +* [ ] `:first` +* [x] `:first-child` +* [x] `:first-of-type` +* [ ] `:focus` +* [ ] `:focus-visible` +* [ ] `:focus-within` +* [ ] `:fullscreen` +* [ ] `:future Experimental` +* [x] `:has() Experimental` +* [ ] `:host` +* [ ] `:host()` +* [ ] `:host-context() Experimental` +* [ ] `:hover` +* [ ] `:indeterminate` +* [ ] `:in-range` +* [ ] `:invalid` +* [ ] `:is()` +* [x] `:lang()` +* [x] `:last-child` +* [x] `:last-of-type` +* [ ] `:left` +* [x] `:link` +* [ ] `:local-link Experimental` +* [ ] `:modal` +* [x] `:not()` +* [x] `:nth-child()` +* [x] `:nth-last-child()` +* [x] `:nth-last-of-type()` +* [x] `:nth-of-type()` +* [x] `:only-child` +* [x] `:only-of-type` +* [ ] `:optional` +* [ ] `:out-of-range` +* [ ] `:past Experimental` +* [ ] `:paused` +* [ ] `:picture-in-picture` +* [ ] `:placeholder-shown` +* [ ] `:playing` +* [ ] `:read-only` +* [ ] `:read-write` +* [ ] `:required` +* [ ] `:right` +* [x] `:root` +* [ ] `:scope` +* [ ] `:state() Experimental` +* [ ] `:target` +* [ ] `:target-within Experimental` +* [ ] `:user-invalid Experimental` +* [ ] `:valid` +* [ ] `:visited` +* [ ] `:where()` +* [ ] `:contains()` +* [ ] `:containsown()` +* [ ] `:matched()` +* [ ] `:matchesown()` +* [x] `:root` + From 4d5f6d42fa9f9078d39b24f838c0aad4cc6056e8 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 26 Mar 2024 10:02:15 +0100 Subject: [PATCH 27/28] dom: use the css matcher for DOM --- src/dom/css.zig | 61 +++++++++++++++++++++++++++++++++++++++++++ src/dom/document.zig | 57 ++++++++++------------------------------ src/dom/element.zig | 61 ++++++++++++------------------------------- src/dom/node.zig | 2 +- src/dom/nodelist.zig | 2 +- src/html/document.zig | 2 +- 6 files changed, 94 insertions(+), 91 deletions(-) create mode 100644 src/dom/css.zig diff --git a/src/dom/css.zig b/src/dom/css.zig new file mode 100644 index 00000000..4e293c92 --- /dev/null +++ b/src/dom/css.zig @@ -0,0 +1,61 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +const css = @import("../css/css.zig"); +const Node = @import("../css/libdom.zig").Node; +const NodeList = @import("nodelist.zig").NodeList; + +const MatchFirst = struct { + n: ?*parser.Node = null, + + pub fn match(m: *MatchFirst, n: Node) !void { + m.n = n.node; + } +}; + +pub fn querySelector(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !?*parser.Node { + const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true }); + defer ps.deinit(alloc); + + var m = MatchFirst{}; + + _ = try css.matchFirst(ps, Node{ .node = n }, &m); + return m.n; +} + +const MatchAll = struct { + alloc: std.mem.Allocator, + nl: NodeList, + + fn init(alloc: std.mem.Allocator) MatchAll { + return .{ + .alloc = alloc, + .nl = NodeList.init(), + }; + } + + fn deinit(m: *MatchAll) void { + m.nl.deinit(m.alloc); + } + + pub fn match(m: *MatchAll, n: Node) !void { + try m.nl.append(m.alloc, n.node); + } + + fn toOwnedList(m: *MatchAll) NodeList { + defer m.nl = NodeList.init(); + return m.nl; + } +}; + +pub fn querySelectorAll(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !NodeList { + const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true }); + defer ps.deinit(alloc); + + var m = MatchAll.init(alloc); + defer m.deinit(); + + try css.matchAll(ps, Node{ .node = n }, &m); + return m.toOwnedList(); +} diff --git a/src/dom/document.zig b/src/dom/document.zig index 7f3af5cf..c99cc5fc 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -13,6 +13,7 @@ const NodeUnion = @import("node.zig").Union; const Walker = @import("walker.zig").WalkerDepthFirst; const collection = @import("html_collection.zig"); +const css = @import("css.zig"); const Element = @import("element.zig").Element; const ElementUnion = @import("element.zig").Union; @@ -188,54 +189,18 @@ pub const Document = struct { return 1; } - // TODO netsurf doesn't handle query selectors. We have to implement a - // solution by ourselves. - // For now we handle only * and single id selector like `#foo`. - pub fn _querySelector(self: *parser.Document, selectors: []const u8) !?ElementUnion { - if (selectors.len == 0) return null; + pub fn _querySelector(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !?ElementUnion { + if (selector.len == 0) return null; - // catch-all, return the firstElementChild - if (selectors[0] == '*') return try get_firstElementChild(self); + const n = try css.querySelector(alloc, parser.documentToNode(self), selector); - // support only simple id selector. - if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null; + if (n == null) return null; - return try _getElementById(self, selectors[1..]); + return try Element.toInterface(parser.nodeToElement(n.?)); } - // TODO netsurf doesn't handle query selectors. We have to implement a - // solution by ourselves. - // We handle only * and single id selector like `#foo`. - pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selectors: []const u8) !NodeList { - var list = try NodeList.init(); - errdefer list.deinit(alloc); - - if (selectors.len == 0) return list; - - // catch-all, return all elements - if (selectors[0] == '*') { - // walk over the node tree fo find the node by id. - const root = parser.documentToNode(self); - const walker = Walker{}; - var next: ?*parser.Node = null; - while (true) { - next = try walker.get_next(root, next) orelse return list; - // ignore non-element nodes. - if (try parser.nodeType(next.?) != .element) { - continue; - } - try list.append(alloc, next.?); - } - } - - // support only simple id selector. - if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list; - - // walk over the node tree fo find the node by id. - const e = try parser.documentGetElementById(self, selectors[1..]) orelse return list; - try list.append(alloc, parser.elementToNode(e)); - - return list; + pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !NodeList { + return css.querySelectorAll(alloc, parser.documentToNode(self), selector); } // TODO according with https://dom.spec.whatwg.org/#parentnode, the @@ -426,6 +391,12 @@ pub fn testExecFn( .{ .src = "document.querySelector('*').nodeName", .ex = "HTML" }, .{ .src = "document.querySelector('#content').id", .ex = "content" }, .{ .src = "document.querySelector('#para').id", .ex = "para" }, + .{ .src = "document.querySelector('.ok').id", .ex = "link" }, + .{ .src = "document.querySelector('a ~ p').id", .ex = "para-empty" }, + .{ .src = "document.querySelector(':root').nodeName", .ex = "HTML" }, + + .{ .src = "document.querySelectorAll('p').length", .ex = "2" }, + .{ .src = "document.querySelectorAll('.ok').item(0).id", .ex = "link" }, }; try checkCases(js_env, &querySelector); diff --git a/src/dom/element.zig b/src/dom/element.zig index 7f58ea10..80d31992 100644 --- a/src/dom/element.zig +++ b/src/dom/element.zig @@ -9,6 +9,7 @@ const Variadic = jsruntime.Variadic; const collection = @import("html_collection.zig"); const writeNode = @import("../browser/dump.zig").writeNode; +const css = @import("css.zig"); const Node = @import("node.zig").Node; const Walker = @import("walker.zig").WalkerDepthFirst; @@ -263,56 +264,18 @@ pub const Element = struct { } } - // TODO netsurf doesn't handle query selectors. We have to implement a - // solution by ourselves. - // We handle only * and single id selector like `#foo`. - pub fn _querySelector(self: *parser.Element, selectors: []const u8) !?Union { - if (selectors.len == 0) return null; + pub fn _querySelector(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !?Union { + if (selector.len == 0) return null; - // catch-all, return the firstElementChild - if (selectors[0] == '*') return try get_firstElementChild(self); + const n = try css.querySelector(alloc, parser.elementToNode(self), selector); - // support only simple id selector. - if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null; + if (n == null) return null; - // walk over the node tree fo find the node by id. - const n = try getElementById(self, selectors[1..]) orelse return null; - return try toInterface(parser.nodeToElement(n)); + return try toInterface(parser.nodeToElement(n.?)); } - // TODO netsurf doesn't handle query selectors. We have to implement a - // solution by ourselves. - // We handle only * and single id selector like `#foo`. - pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selectors: []const u8) !NodeList { - var list = try NodeList.init(); - errdefer list.deinit(alloc); - - if (selectors.len == 0) return list; - - // catch-all, return all elements - if (selectors[0] == '*') { - // walk over the node tree fo find the node by id. - const root = parser.elementToNode(self); - const walker = Walker{}; - var next: ?*parser.Node = null; - while (true) { - next = try walker.get_next(root, next) orelse return list; - // ignore non-element nodes. - if (try parser.nodeType(next.?) != .element) { - continue; - } - try list.append(alloc, next.?); - } - } - - // support only simple id selector. - if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list; - - // walk over the node tree fo find the node by id. - const n = try getElementById(self, selectors[1..]) orelse return list; - try list.append(alloc, n); - - return list; + pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !NodeList { + return css.querySelectorAll(alloc, parser.elementToNode(self), selector); } // TODO according with https://dom.spec.whatwg.org/#parentnode, the @@ -433,6 +396,12 @@ pub fn testExecFn( .{ .src = "e.querySelector('#link').id", .ex = "link" }, .{ .src = "e.querySelector('#para').id", .ex = "para" }, .{ .src = "e.querySelector('*').id", .ex = "link" }, + .{ .src = "e.querySelector('')", .ex = "null" }, + .{ .src = "e.querySelector('*').id", .ex = "link" }, + .{ .src = "e.querySelector('#content')", .ex = "null" }, + .{ .src = "e.querySelector('#para').id", .ex = "para" }, + .{ .src = "e.querySelector('.ok').id", .ex = "link" }, + .{ .src = "e.querySelector('a ~ p').id", .ex = "para-empty" }, .{ .src = "e.querySelectorAll('foo').length", .ex = "0" }, .{ .src = "e.querySelectorAll('#foo').length", .ex = "0" }, @@ -441,6 +410,8 @@ pub fn testExecFn( .{ .src = "e.querySelectorAll('#para').length", .ex = "1" }, .{ .src = "e.querySelectorAll('#para').item(0).id", .ex = "para" }, .{ .src = "e.querySelectorAll('*').length", .ex = "4" }, + .{ .src = "e.querySelectorAll('p').length", .ex = "2" }, + .{ .src = "e.querySelectorAll('.ok').item(0).id", .ex = "link" }, }; try checkCases(js_env, &querySelector); diff --git a/src/dom/node.zig b/src/dom/node.zig index 7e6aa383..920768ec 100644 --- a/src/dom/node.zig +++ b/src/dom/node.zig @@ -199,7 +199,7 @@ pub const Node = struct { } pub fn get_childNodes(self: *parser.Node, alloc: std.mem.Allocator) !NodeList { - var list = try NodeList.init(); + var list = NodeList.init(); errdefer list.deinit(alloc); var n = try parser.nodeFirstChild(self) orelse return list; diff --git a/src/dom/nodelist.zig b/src/dom/nodelist.zig index c685f3b5..19ca644a 100644 --- a/src/dom/nodelist.zig +++ b/src/dom/nodelist.zig @@ -26,7 +26,7 @@ pub const NodeList = struct { nodes: NodesArrayList, - pub fn init() !NodeList { + pub fn init() NodeList { return NodeList{ .nodes = NodesArrayList{}, }; diff --git a/src/html/document.zig b/src/html/document.zig index d463ab29..34943ac2 100644 --- a/src/html/document.zig +++ b/src/html/document.zig @@ -80,7 +80,7 @@ pub const HTMLDocument = struct { } pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList { - var list = try NodeList.init(); + var list = NodeList.init(); errdefer list.deinit(alloc); if (name.len == 0) return list; From 8eb4de9ccbc4d69c94704b00968011e990677b5d Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 26 Mar 2024 11:08:25 +0100 Subject: [PATCH 28/28] css: ensure node is an element before accessing to attr --- src/css/libdom.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/css/libdom.zig b/src/css/libdom.zig index 4166b216..e4e416ea 100644 --- a/src/css/libdom.zig +++ b/src/css/libdom.zig @@ -74,6 +74,7 @@ pub const Node = struct { } pub fn attr(n: Node, key: []const u8) !?[]const u8 { + if (!n.isElement()) return null; return try parser.elementGetAttribute(parser.nodeToElement(n.node), key); }