From 97c80530101e73a44e7e8db2494d2d4055087774 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Thu, 14 Mar 2024 16:39:02 +0100
Subject: [PATCH 01/28] css: implement css query parser

---
 src/css/parser.zig | 1201 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1201 insertions(+)
 create mode 100644 src/css/parser.zig

diff --git a/src/css/parser.zig b/src/css/parser.zig
new file mode 100644
index 00000000..e34e5071
--- /dev/null
+++ b/src/css/parser.zig
@@ -0,0 +1,1201 @@
+// CSS Selector parser
+// This file is a rewrite in Zig of Cascadia CSS Selector parser.
+// see https://github.com/andybalholm/cascadia
+// see https://github.com/andybalholm/cascadia/blob/master/parser.go
+const std = @import("std");
+const ascii = std.ascii;
+
+pub const AttributeOP = enum {
+    eql, // =
+    not_eql, // !=
+    one_of, // ~=
+    prefix_hyphen, // |=
+    prefix, // ^=
+    suffix, // $=
+    contains, // *=
+    regexp, // #=
+
+    fn len(op: AttributeOP) u2 {
+        if (op == .eql) return 1;
+        return 2;
+    }
+};
+
+pub const PseudoClass = enum {
+    not,
+    has,
+    haschild,
+    contains,
+    containsown,
+    matches,
+    matchesown,
+    nth_child,
+    nth_last_child,
+    nth_of_type,
+    nth_last_of_type,
+    first_child,
+    last_child,
+    first_of_type,
+    last_of_type,
+    only_child,
+    only_of_type,
+    input,
+    empty,
+    root,
+    link,
+    lang,
+    enabled,
+    disabled,
+    checked,
+    visited,
+    hover,
+    active,
+    focus,
+    target,
+    after,
+    backdrop,
+    before,
+    cue,
+    first_letter,
+    first_line,
+    grammar_error,
+    marker,
+    placeholder,
+    selection,
+    spelling_error,
+
+    fn isPseudoElement(pc: PseudoClass) bool {
+        return switch (pc) {
+            .after, .backdrop, .before, .cue, .first_letter => true,
+            .first_line, .grammar_error, .marker, .placeholder => true,
+            .selection, .spelling_error => true,
+            else => false,
+        };
+    }
+
+    fn parse(s: []const u8) ParseError!PseudoClass {
+        if (std.ascii.eqlIgnoreCase(s, "not")) return .not;
+        if (std.ascii.eqlIgnoreCase(s, "has")) return .has;
+        if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild;
+        if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains;
+        if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown;
+        if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches;
+        if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown;
+        if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child;
+        if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child;
+        if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child;
+        if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child;
+        if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child;
+        if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "input")) return .input;
+        if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty;
+        if (std.ascii.eqlIgnoreCase(s, "root")) return .root;
+        if (std.ascii.eqlIgnoreCase(s, "link")) return .link;
+        if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang;
+        if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled;
+        if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled;
+        if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked;
+        if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited;
+        if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover;
+        if (std.ascii.eqlIgnoreCase(s, "active")) return .active;
+        if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus;
+        if (std.ascii.eqlIgnoreCase(s, "target")) return .target;
+        if (std.ascii.eqlIgnoreCase(s, "after")) return .after;
+        if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop;
+        if (std.ascii.eqlIgnoreCase(s, "before")) return .before;
+        if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue;
+        if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter;
+        if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line;
+        if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error;
+        if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker;
+        if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder;
+        if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection;
+        if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error;
+        return ParseError.InvalidPseudoClass;
+    }
+};
+
+pub const Selector = union(enum) {
+    compound: struct {
+        selectors: []Selector,
+        pseudo_elt: ?PseudoClass,
+    },
+    group: []Selector,
+    tag: []const u8,
+    id: []const u8,
+    class: []const u8,
+    attribute: struct {
+        key: []const u8,
+        val: ?[]const u8 = null,
+        op: ?AttributeOP = null,
+        regexp: ?[]const u8 = null,
+        ci: bool = false,
+    },
+    combined: struct {
+        first: *Selector,
+        second: *Selector,
+        combinator: u8,
+    },
+
+    never_match: PseudoClass,
+
+    pseudo_class: PseudoClass,
+    pseudo_class_only_child: bool,
+    pseudo_class_lang: []const u8,
+    pseudo_class_relative: struct {
+        pseudo_class: PseudoClass,
+        match: *Selector,
+    },
+    pseudo_class_contains: struct {
+        own: bool,
+        val: []const u8,
+    },
+    pseudo_class_regexp: struct {
+        own: bool,
+        regexp: []const u8,
+    },
+    pseudo_class_nth: struct {
+        a: isize,
+        b: isize,
+        of_type: bool,
+        last: bool,
+    },
+    pseudo_element: PseudoClass,
+
+    fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
+        switch (sel) {
+            .group => |v| {
+                for (v) |vv| vv.deinit(alloc);
+                alloc.free(v);
+            },
+            .compound => |v| {
+                for (v.selectors) |vv| vv.deinit(alloc);
+                alloc.free(v.selectors);
+            },
+            .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v),
+            .attribute => |att| {
+                alloc.free(att.key);
+                if (att.val) |v| alloc.free(v);
+                if (att.regexp) |v| alloc.free(v);
+            },
+            .combined => |c| {
+                c.first.deinit(alloc);
+                alloc.destroy(c.first);
+                c.second.deinit(alloc);
+                alloc.destroy(c.second);
+            },
+            .pseudo_class_relative => |v| {
+                v.match.deinit(alloc);
+                alloc.destroy(v.match);
+            },
+            .pseudo_class_contains => |v| alloc.free(v.val),
+            .pseudo_class_regexp => |v| alloc.free(v.regexp),
+            .pseudo_class, .pseudo_element, .never_match => {},
+            .pseudo_class_nth, .pseudo_class_only_child => {},
+        }
+    }
+};
+
+pub const ParseError = error{
+    ExpectedSelector,
+    ExpectedIdentifier,
+    ExpectedName,
+    ExpectedIDSelector,
+    ExpectedClassSelector,
+    ExpectedAttributeSelector,
+    ExpectedString,
+    ExpectedRegexp,
+    ExpectedPseudoClassSelector,
+    ExpectedParenthesis,
+    ExpectedParenthesisClose,
+    ExpectedNthExpression,
+    ExpectedInteger,
+    InvalidEscape,
+    EscapeLineEndingOutsideString,
+    InvalidUnicode,
+    UnicodeIsNotHandled,
+    WriteError,
+    PseudoElementNotAtSelectorEnd,
+    PseudoElementNotUnique,
+    PseudoElementDisabled,
+    InvalidAttributeOperator,
+    InvalidAttributeSelector,
+    InvalidString,
+    InvalidRegexp,
+    InvalidPseudoClassSelector,
+    EmptyPseudoClassSelector,
+    InvalidPseudoClass,
+    InvalidPseudoElement,
+    UnmatchParenthesis,
+    NotHandled,
+    UnknownPseudoSelector,
+    InvalidNthExpression,
+} || std.mem.Allocator.Error;
+
+pub const ParseOptions = struct {
+    accept_pseudo_elts: bool = true,
+};
+
+// Parse parse a selector string and returns the parsed result or an error.
+pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: ParseOptions) ParseError!Selector {
+    var p = Parser{ .s = s, .i = 0, .opts = opts };
+    return p.parseSelector(alloc);
+}
+
+const Parser = struct {
+    s: []const u8, // string to parse
+    i: usize = 0, // current position
+
+    opts: ParseOptions,
+
+    // skipWhitespace consumes whitespace characters and comments.
+    // It returns true if there was actually anything to skip.
+    fn skipWhitespace(p: *Parser) bool {
+        var i = p.i;
+        while (i < p.s.len) {
+            const c = p.s[i];
+            // Whitespaces.
+            if (ascii.isWhitespace(c)) {
+                i += 1;
+                continue;
+            }
+
+            // Comments.
+            if (c == '/') {
+                if (std.mem.startsWith(u8, p.s[i..], "/*")) {
+                    if (std.mem.indexOf(u8, p.s[i..], "*/")) |end| {
+                        i += end + "*/".len;
+                        continue;
+                    }
+                }
+            }
+            break;
+        }
+
+        if (i > p.i) {
+            p.i = i;
+            return true;
+        }
+
+        return false;
+    }
+
+    // parseSimpleSelectorSequence parses a selector sequence that applies to
+    // a single element.
+    fn parseSimpleSelectorSequence(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        if (p.i >= p.s.len) {
+            return ParseError.ExpectedSelector;
+        }
+
+        var buf = std.ArrayList(Selector).init(alloc);
+        defer buf.deinit();
+
+        switch (p.s[p.i]) {
+            '*' => {
+                // It's the universal selector. Just skip over it, since it
+                // doesn't affect the meaning.
+                p.i += 1;
+
+                // other version of universal selector
+                if (p.i + 2 < p.s.len and std.mem.eql(u8, "|*", p.s[p.i .. p.i + 2])) {
+                    p.i += 2;
+                }
+            },
+            '#', '.', '[', ':' => {
+                // There's no type selector. Wait to process the other till the
+                // main loop.
+            },
+            else => try buf.append(try p.parseTypeSelector(alloc)),
+        }
+
+        var pseudo_elt: ?PseudoClass = null;
+
+        loop: while (p.i < p.s.len) {
+            var ns: Selector = switch (p.s[p.i]) {
+                '#' => try p.parseIDSelector(alloc),
+                '.' => try p.parseClassSelector(alloc),
+                '[' => try p.parseAttributeSelector(alloc),
+                ':' => try p.parsePseudoclassSelector(alloc),
+                else => break :loop,
+            };
+            errdefer ns.deinit(alloc);
+
+            // From https://drafts.csswg.org/selectors-3/#pseudo-elements :
+            // "Only one pseudo-element may appear per selector, and if present
+            // it must appear after the sequence of simple selectors that
+            // represents the subjects of the selector.""
+            switch (ns) {
+                .pseudo_element => |e| {
+                    //  We found a pseudo-element.
+                    //  Only one pseudo-element is accepted per selector.
+                    if (pseudo_elt != null) return ParseError.PseudoElementNotUnique;
+                    if (!p.opts.accept_pseudo_elts) return ParseError.PseudoElementDisabled;
+
+                    pseudo_elt = e;
+                    ns.deinit(alloc);
+                },
+                else => {
+                    if (pseudo_elt != null) return ParseError.PseudoElementNotAtSelectorEnd;
+                    try buf.append(ns);
+                },
+            }
+        }
+
+        // no need wrap the selectors in compoundSelector
+        if (buf.items.len == 1 and pseudo_elt == null) return buf.items[0];
+
+        return .{ .compound = .{ .selectors = try buf.toOwnedSlice(), .pseudo_elt = pseudo_elt } };
+    }
+
+    // parseTypeSelector parses a type selector (one that matches by tag name).
+    fn parseTypeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        var buf = std.ArrayList(u8).init(alloc);
+        defer buf.deinit();
+        try p.parseIdentifier(buf.writer());
+
+        return .{ .tag = try buf.toOwnedSlice() };
+    }
+
+    // parseIdentifier parses an identifier.
+    fn parseIdentifier(p: *Parser, w: anytype) ParseError!void {
+        const prefix = '-';
+        var numPrefix: usize = 0;
+
+        while (p.s.len > p.i and p.s[p.i] == prefix) {
+            p.i += 1;
+            numPrefix += 1;
+        }
+
+        if (p.s.len <= p.i) {
+            return ParseError.ExpectedSelector;
+        }
+
+        const c = p.s[p.i];
+        if (!nameStart(c) or c == '\\') {
+            return ParseError.ExpectedSelector;
+        }
+
+        var ii: usize = 0;
+        while (ii < numPrefix) {
+            w.writeByte(prefix) catch return ParseError.WriteError;
+            ii += 1;
+        }
+        try parseName(p, w);
+    }
+
+    // parseName parses a name (which is like an identifier, but doesn't have
+    // extra restrictions on the first character).
+    fn parseName(p: *Parser, w: anytype) ParseError!void {
+        var i = p.i;
+        var ok = false;
+
+        while (i < p.s.len) {
+            const c = p.s[i];
+
+            if (nameChar(c)) {
+                const start = i;
+                while (i < p.s.len and nameChar(p.s[i])) i += 1;
+                w.writeAll(p.s[start..i]) catch return ParseError.WriteError;
+                ok = true;
+            } else if (c == '\\') {
+                p.i = i;
+                try p.parseEscape(w);
+                i = p.i;
+                ok = true;
+            } else {
+                // default:
+                break;
+            }
+        }
+
+        if (!ok) return ParseError.ExpectedName;
+        p.i = i;
+    }
+
+    // parseEscape parses a backslash escape.
+    // The returned string is owned by the caller.
+    fn parseEscape(p: *Parser, w: anytype) ParseError!void {
+        if (p.s.len < p.i + 2 or p.s[p.i] != '\\') {
+            return ParseError.InvalidEscape;
+        }
+
+        const start = p.i + 1;
+        const c = p.s[start];
+        if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString;
+
+        // unicode escape (hex)
+        if (ascii.isHex(c)) {
+            var i: usize = start;
+            while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) {
+                i += 1;
+            }
+            const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode;
+            if (p.s.len > i) {
+                switch (p.s[i]) {
+                    '\r' => {
+                        i += 1;
+                        if (p.s.len > i and p.s[i] == '\n') i += 1;
+                    },
+                    ' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
+                    else => {},
+                }
+                p.i = i;
+                var buf: [4]u8 = undefined;
+                const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode;
+                w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
+                return;
+            }
+        }
+
+        // Return the literal character after the backslash.
+        p.i += 2;
+        w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError;
+    }
+
+    // parseIDSelector parses a selector that matches by id attribute.
+    fn parseIDSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        if (p.i >= p.s.len) return ParseError.ExpectedIDSelector;
+        if (p.s[p.i] != '#') return ParseError.ExpectedIDSelector;
+
+        p.i += 1;
+
+        var buf = std.ArrayList(u8).init(alloc);
+        defer buf.deinit();
+
+        try p.parseName(buf.writer());
+        return .{ .id = try buf.toOwnedSlice() };
+    }
+
+    // parseClassSelector parses a selector that matches by class attribute.
+    fn parseClassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        if (p.i >= p.s.len) return ParseError.ExpectedClassSelector;
+        if (p.s[p.i] != '.') return ParseError.ExpectedClassSelector;
+
+        p.i += 1;
+
+        var buf = std.ArrayList(u8).init(alloc);
+        defer buf.deinit();
+
+        try p.parseIdentifier(buf.writer());
+        return .{ .class = try buf.toOwnedSlice() };
+    }
+
+    // parseAttributeSelector parses a selector that matches by attribute value.
+    fn parseAttributeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+        if (p.s[p.i] != '[') return ParseError.ExpectedAttributeSelector;
+
+        p.i += 1;
+        _ = p.skipWhitespace();
+
+        var buf = std.ArrayList(u8).init(alloc);
+        defer buf.deinit();
+
+        try p.parseIdentifier(buf.writer());
+        const key = try buf.toOwnedSlice();
+        errdefer alloc.free(key);
+
+        lowerstr(key);
+
+        _ = p.skipWhitespace();
+        if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+        if (p.s[p.i] == ']') {
+            p.i += 1;
+            return .{ .attribute = .{ .key = key } };
+        }
+
+        if (p.i + 2 >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+        const op = try parseAttributeOP(p.s[p.i .. p.i + 2]);
+        p.i += op.len();
+
+        _ = p.skipWhitespace();
+        if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+        buf.clearRetainingCapacity();
+        var is_val: bool = undefined;
+        if (op == .regexp) {
+            is_val = false;
+            try p.parseRegex(buf.writer());
+        } else {
+            is_val = true;
+            switch (p.s[p.i]) {
+                '\'', '"' => try p.parseString(buf.writer()),
+                else => try p.parseIdentifier(buf.writer()),
+            }
+        }
+
+        _ = p.skipWhitespace();
+        if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+        // check if the attribute contains an ignore case flag
+        var ci = false;
+        if (p.s[p.i] == 'i' or p.s[p.i] == 'I') {
+            ci = true;
+            p.i += 1;
+        }
+
+        _ = p.skipWhitespace();
+        if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+        if (p.s[p.i] != ']') return ParseError.InvalidAttributeSelector;
+        p.i += 1;
+
+        return .{ .attribute = .{
+            .key = key,
+            .val = if (is_val) try buf.toOwnedSlice() else null,
+            .regexp = if (!is_val) try buf.toOwnedSlice() else null,
+            .op = op,
+            .ci = ci,
+        } };
+    }
+
+    // parseString parses a single- or double-quoted string.
+    fn parseString(p: *Parser, writer: anytype) ParseError!void {
+        var i = p.i;
+        if (p.s.len < i + 2) return ParseError.ExpectedString;
+
+        const quote = p.s[i];
+        i += 1;
+
+        loop: while (i < p.s.len) {
+            switch (p.s[i]) {
+                '\\' => {
+                    if (p.s.len > i + 1) {
+                        const c = p.s[i + 1];
+                        switch (c) {
+                            '\r' => {
+                                if (p.s.len > i + 2 and p.s[i + 2] == '\n') {
+                                    i += 3;
+                                    continue :loop;
+                                }
+                                i += 2;
+                                continue :loop;
+                            },
+                            '\n', std.ascii.control_code.ff => {
+                                i += 2;
+                                continue :loop;
+                            },
+                            else => {},
+                        }
+                    }
+                    p.i = i;
+                    try p.parseEscape(writer);
+                    i = p.i;
+                },
+                '\r', '\n', std.ascii.control_code.ff => return ParseError.InvalidString,
+                else => |c| {
+                    if (c == quote) break :loop;
+                    const start = i;
+                    while (i < p.s.len) {
+                        const cc = p.s[i];
+                        if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
+                        i += 1;
+                    }
+                    writer.writeAll(p.s[start..i]) catch return ParseError.WriteError;
+                },
+            }
+        }
+
+        if (i >= p.s.len) return ParseError.InvalidString;
+
+        // Consume the final quote.
+        i += 1;
+        p.i = i;
+    }
+
+    // parseRegex parses a regular expression; the end is defined by encountering an
+    // unmatched closing ')' or ']' which is not consumed
+    fn parseRegex(p: *Parser, writer: anytype) ParseError!void {
+        var i = p.i;
+        if (p.s.len < i + 2) return ParseError.ExpectedRegexp;
+
+        // number of open parens or brackets;
+        // when it becomes negative, finished parsing regex
+        var open: isize = 0;
+
+        loop: while (i < p.s.len) {
+            switch (p.s[i]) {
+                '(', '[' => open += 1,
+                ')', ']' => {
+                    open -= 1;
+                    if (open < 0) break :loop;
+                },
+                else => {},
+            }
+            i += 1;
+        }
+
+        if (i >= p.s.len) return ParseError.InvalidRegexp;
+        writer.writeAll(p.s[p.i..i]) catch return ParseError.WriteError;
+        p.i = i;
+    }
+
+    // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
+    // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
+    // https://drafts.csswg.org/selectors-3/#pseudo-elements
+    fn parsePseudoclassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        if (p.i >= p.s.len) return ParseError.ExpectedPseudoClassSelector;
+        if (p.s[p.i] != ':') return ParseError.ExpectedPseudoClassSelector;
+
+        p.i += 1;
+
+        var must_pseudo_elt: bool = false;
+        if (p.i >= p.s.len) return ParseError.EmptyPseudoClassSelector;
+        if (p.s[p.i] == ':') { // we found a pseudo-element
+            must_pseudo_elt = true;
+            p.i += 1;
+        }
+
+        var buf = std.ArrayList(u8).init(alloc);
+        defer buf.deinit();
+
+        try p.parseIdentifier(buf.writer());
+
+        const pseudo_class = try PseudoClass.parse(buf.items);
+
+        // reset the buffer to reuse it.
+        buf.clearRetainingCapacity();
+
+        if (must_pseudo_elt and !pseudo_class.isPseudoElement()) return ParseError.InvalidPseudoElement;
+
+        switch (pseudo_class) {
+            .not, .has, .haschild => {
+                if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+
+                const sel = try p.parseSelectorGroup(alloc);
+                if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+                const s = try alloc.create(Selector);
+                errdefer alloc.destroy(s);
+                s.* = sel;
+
+                return .{ .pseudo_class_relative = .{ .pseudo_class = pseudo_class, .match = s } };
+            },
+            .contains, .containsown => {
+                if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+                if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
+
+                switch (p.s[p.i]) {
+                    '\'', '"' => try p.parseString(buf.writer()),
+                    else => try p.parseString(buf.writer()),
+                }
+
+                _ = p.skipWhitespace();
+                if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
+                if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+                const val = try buf.toOwnedSlice();
+                errdefer alloc.free(val);
+
+                lowerstr(val);
+
+                return .{ .pseudo_class_contains = .{ .own = pseudo_class == .containsown, .val = val } };
+            },
+            .matches, .matchesown => {
+                if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+
+                try p.parseRegex(buf.writer());
+                if (p.i >= p.s.len) return ParseError.InvalidPseudoClassSelector;
+                if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+                return .{ .pseudo_class_regexp = .{ .own = pseudo_class == .matchesown, .regexp = try buf.toOwnedSlice() } };
+            },
+            .nth_child, .nth_last_child, .nth_of_type, .nth_last_of_type => {
+                if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+                const nth = try p.parseNth(alloc);
+                if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+                const last = pseudo_class == .nth_last_child or pseudo_class == .nth_last_of_type;
+                const of_type = pseudo_class == .nth_of_type or pseudo_class == .nth_last_of_type;
+                return .{ .pseudo_class_nth = .{ .a = nth[0], .b = nth[1], .of_type = of_type, .last = last } };
+            },
+            .first_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = false } },
+            .last_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = true } },
+            .first_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = false } },
+            .last_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = true } },
+            .only_child => return .{ .pseudo_class_only_child = false },
+            .only_of_type => return .{ .pseudo_class_only_child = true },
+            .input, .empty, .root, .link => return .{ .pseudo_class = pseudo_class },
+            .enabled, .disabled, .checked => return .{ .pseudo_class = pseudo_class },
+            .lang => {
+                if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+                if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
+
+                try p.parseIdentifier(buf.writer());
+
+                _ = p.skipWhitespace();
+                if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
+                if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+                const val = try buf.toOwnedSlice();
+                errdefer alloc.free(val);
+                lowerstr(val);
+
+                return .{ .pseudo_class_lang = val };
+            },
+            .visited, .hover, .active, .focus, .target => {
+                // Not applicable in a static context: never match.
+                return .{ .never_match = pseudo_class };
+            },
+            .after, .backdrop, .before, .cue, .first_letter => return .{ .pseudo_element = pseudo_class },
+            .first_line, .grammar_error, .marker, .placeholder => return .{ .pseudo_element = pseudo_class },
+            .selection, .spelling_error => return .{ .pseudo_element = pseudo_class },
+        }
+    }
+
+    // consumeParenthesis consumes an opening parenthesis and any following
+    // whitespace. It returns true if there was actually a parenthesis to skip.
+    fn consumeParenthesis(p: *Parser) bool {
+        if (p.i < p.s.len and p.s[p.i] == '(') {
+            p.i += 1;
+            _ = p.skipWhitespace();
+            return true;
+        }
+        return false;
+    }
+
+    // parseSelectorGroup parses a group of selectors, separated by commas.
+    fn parseSelectorGroup(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        const s = try p.parseSelector(alloc);
+
+        var buf = std.ArrayList(Selector).init(alloc);
+        defer buf.deinit();
+
+        try buf.append(s);
+
+        while (p.i < p.s.len) {
+            if (p.s[p.i] != ',') break;
+            p.i += 1;
+            const ss = try p.parseSelector(alloc);
+            try buf.append(ss);
+        }
+
+        return .{ .group = try buf.toOwnedSlice() };
+    }
+
+    // parseSelector parses a selector that may include combinators.
+    fn parseSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        _ = p.skipWhitespace();
+        var s = try p.parseSimpleSelectorSequence(alloc);
+
+        while (true) {
+            var combinator: u8 = undefined;
+            if (p.skipWhitespace()) {
+                combinator = ' ';
+            }
+            if (p.i >= p.s.len) {
+                return s;
+            }
+
+            switch (p.s[p.i]) {
+                '+', '>', '~' => {
+                    combinator = p.s[p.i];
+                    p.i += 1;
+                    _ = p.skipWhitespace();
+                },
+                // These characters can't begin a selector, but they can legally occur after one.
+                ',', ')' => return s,
+                else => {},
+            }
+
+            if (combinator == 0) {
+                return s;
+            }
+
+            const c = try p.parseSimpleSelectorSequence(alloc);
+
+            const first = try alloc.create(Selector);
+            errdefer alloc.destroy(first);
+            first.* = s;
+
+            const second = try alloc.create(Selector);
+            errdefer alloc.destroy(second);
+            second.* = c;
+
+            s = Selector{ .combined = .{ .first = first, .second = second, .combinator = combinator } };
+        }
+
+        return s;
+    }
+
+    // consumeClosingParenthesis consumes a closing parenthesis and any preceding
+    // whitespace. It returns true if there was actually a parenthesis to skip.
+    fn consumeClosingParenthesis(p: *Parser) bool {
+        const i = p.i;
+        _ = p.skipWhitespace();
+        if (p.i < p.s.len and p.s[p.i] == ')') {
+            p.i += 1;
+            return true;
+        }
+        p.i = i;
+        return false;
+    }
+
+    // parseInteger parses a  decimal integer.
+    fn parseInteger(p: *Parser) ParseError!isize {
+        var i = p.i;
+        const start = i;
+        while (i < p.s.len and '0' <= p.s[i] and p.s[i] <= '9') i += 1;
+        if (i == start) return ParseError.ExpectedInteger;
+        p.i = i;
+
+        return std.fmt.parseUnsigned(isize, p.s[start..i], 10) catch ParseError.ExpectedInteger;
+    }
+
+    fn parseNthReadN(p: *Parser, a: isize) ParseError![2]isize {
+        _ = p.skipWhitespace();
+        if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+
+        return switch (p.s[p.i]) {
+            '+' => {
+                p.i += 1;
+                _ = p.skipWhitespace();
+                const b = try p.parseInteger();
+                return .{ a, b };
+            },
+            '-' => {
+                p.i += 1;
+                _ = p.skipWhitespace();
+                const b = try p.parseInteger();
+                return .{ a, -b };
+            },
+            else => .{ a, 0 },
+        };
+    }
+
+    fn parseNthReadA(p: *Parser, a: isize) ParseError![2]isize {
+        if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+        return switch (p.s[p.i]) {
+            'n', 'N' => {
+                p.i += 1;
+                return p.parseNthReadN(a);
+            },
+            else => .{ 0, a },
+        };
+    }
+
+    fn parseNthNegativeA(p: *Parser) ParseError![2]isize {
+        if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+        const c = p.s[p.i];
+        if (std.ascii.isDigit(c)) {
+            const a = try p.parseInteger() * -1;
+            return p.parseNthReadA(a);
+        }
+        if (c == 'n' or c == 'N') {
+            p.i += 1;
+            return p.parseNthReadN(-1);
+        }
+
+        return ParseError.InvalidNthExpression;
+    }
+
+    fn parseNthPositiveA(p: *Parser) ParseError![2]isize {
+        if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+        const c = p.s[p.i];
+        if (std.ascii.isDigit(c)) {
+            const a = try p.parseInteger() * -1;
+            return p.parseNthReadA(a);
+        }
+        if (c == 'n' or c == 'N') {
+            p.i += 1;
+            return p.parseNthReadN(1);
+        }
+
+        return ParseError.InvalidNthExpression;
+    }
+
+    // parseNth parses the argument for :nth-child (normally of the form an+b).
+    fn parseNth(p: *Parser, alloc: std.mem.Allocator) ParseError![2]isize {
+        // initial state
+        if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+        return switch (p.s[p.i]) {
+            '-' => {
+                p.i += 1;
+                return p.parseNthNegativeA();
+            },
+            '+' => {
+                p.i += 1;
+                return p.parseNthPositiveA();
+            },
+            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => p.parseNthPositiveA(),
+            'n', 'N' => {
+                p.i += 1;
+                return p.parseNthReadN(1);
+            },
+            'o', 'O', 'e', 'E' => {
+                var buf = std.ArrayList(u8).init(alloc);
+                defer buf.deinit();
+
+                try p.parseName(buf.writer());
+
+                if (std.ascii.eqlIgnoreCase("odd", buf.items)) return .{ 2, 1 };
+                if (std.ascii.eqlIgnoreCase("even", buf.items)) return .{ 2, 0 };
+
+                return ParseError.InvalidNthExpression;
+            },
+            else => ParseError.InvalidNthExpression,
+        };
+    }
+};
+
+// nameStart returns whether c can be the first character of an identifier
+// (not counting an initial hyphen, or an escape sequence).
+fn nameStart(c: u8) bool {
+    return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127;
+}
+
+// nameChar returns whether c can be a character within an identifier
+// (not counting an escape sequence).
+fn nameChar(c: u8) bool {
+    return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127 or
+        c == '-' or '0' <= c and c <= '9';
+}
+
+fn lowerstr(str: []u8) void {
+    for (str, 0..) |c, i| {
+        str[i] = std.ascii.toLower(c);
+    }
+}
+
+// parseAttributeOP parses an AttributeOP from a string of 1 or 2 bytes.
+fn parseAttributeOP(s: []const u8) ParseError!AttributeOP {
+    if (s.len < 1 or s.len > 2) return ParseError.InvalidAttributeOperator;
+
+    // if the first sign is equal, we don't check anything else.
+    if (s[0] == '=') return .eql;
+
+    if (s.len != 2 or s[1] != '=') return ParseError.InvalidAttributeOperator;
+
+    return switch (s[0]) {
+        '=' => .eql,
+        '!' => .not_eql,
+        '~' => .one_of,
+        '|' => .prefix_hyphen,
+        '^' => .prefix,
+        '$' => .suffix,
+        '*' => .contains,
+        '#' => .regexp,
+        else => ParseError.InvalidAttributeOperator,
+    };
+}
+
+test "parser.skipWhitespace" {
+    const testcases = [_]struct {
+        s: []const u8,
+        i: usize,
+        r: bool,
+    }{
+        .{ .s = "", .i = 0, .r = false },
+        .{ .s = "foo", .i = 0, .r = false },
+        .{ .s = " ", .i = 1, .r = true },
+        .{ .s = " foo", .i = 1, .r = true },
+        .{ .s = "/* foo */ bar", .i = 10, .r = true },
+        .{ .s = "/* foo", .i = 0, .r = false },
+    };
+
+    for (testcases) |tc| {
+        var p = Parser{ .s = tc.s, .opts = .{} };
+        const res = p.skipWhitespace();
+        try std.testing.expectEqual(tc.r, res);
+        try std.testing.expectEqual(tc.i, p.i);
+    }
+}
+
+test "parser.parseIdentifier" {
+    const alloc = std.testing.allocator;
+
+    const testcases = [_]struct {
+        s: []const u8, // given value
+        exp: []const u8, // expected value
+        err: bool = false,
+    }{
+        .{ .s = "x", .exp = "x" },
+        .{ .s = "96", .exp = "", .err = true },
+        .{ .s = "-x", .exp = "-x" },
+        .{ .s = "r\\e9 sumé", .exp = "résumé" },
+        .{ .s = "r\\0000e9 sumé", .exp = "résumé" },
+        .{ .s = "r\\0000e9sumé", .exp = "résumé" },
+        .{ .s = "a\\\"b", .exp = "a\"b" },
+    };
+
+    var buf = std.ArrayList(u8).init(alloc);
+    defer buf.deinit();
+
+    for (testcases) |tc| {
+        buf.clearRetainingCapacity();
+
+        var p = Parser{ .s = tc.s, .opts = .{} };
+        p.parseIdentifier(buf.writer()) catch |e| {
+            // if error was expected, continue.
+            if (tc.err) continue;
+
+            std.debug.print("test case {s}\n", .{tc.s});
+            return e;
+        };
+        std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
+            std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
+            return e;
+        };
+    }
+}
+
+test "parser.parseString" {
+    const alloc = std.testing.allocator;
+
+    const testcases = [_]struct {
+        s: []const u8, // given value
+        exp: []const u8, // expected value
+        err: bool = false,
+    }{
+        .{ .s = "\"x\"", .exp = "x" },
+        .{ .s = "'x'", .exp = "x" },
+        .{ .s = "'x", .exp = "", .err = true },
+        .{ .s = "'x\\\r\nx'", .exp = "xx" },
+        .{ .s = "\"r\\e9 sumé\"", .exp = "résumé" },
+        .{ .s = "\"r\\0000e9 sumé\"", .exp = "résumé" },
+        .{ .s = "\"r\\0000e9sumé\"", .exp = "résumé" },
+        .{ .s = "\"a\\\"b\"", .exp = "a\"b" },
+        .{ .s = "\"\\\n\"", .exp = "" },
+        .{ .s = "\"hello world\"", .exp = "hello world" },
+    };
+
+    var buf = std.ArrayList(u8).init(alloc);
+    defer buf.deinit();
+
+    for (testcases) |tc| {
+        buf.clearRetainingCapacity();
+
+        var p = Parser{ .s = tc.s, .opts = .{} };
+        p.parseString(buf.writer()) catch |e| {
+            // if error was expected, continue.
+            if (tc.err) continue;
+
+            std.debug.print("test case {s}\n", .{tc.s});
+            return e;
+        };
+        std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
+            std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
+            return e;
+        };
+    }
+}
+
+test "parser." {
+    const alloc = std.testing.allocator;
+
+    const testcases = [_][]const u8{
+        "address",
+        "*",
+        "#foo",
+        "li#t1",
+        "*#t4",
+        ".t1",
+        "p.t1",
+        "div.teST",
+        ".t1.fail",
+        "p.t1.t2",
+        "p.--t1",
+        "p.--t1.--t2",
+        "p[title]",
+        "div[class=\"red\" i]",
+        "address[title=\"foo\"]",
+        "address[title=\"FoOIgnoRECaSe\" i]",
+        "address[title!=\"foo\"]",
+        "address[title!=\"foo\" i]",
+        "p[title!=\"FooBarUFoo\" i]",
+        "[  \t title        ~=       foo    ]",
+        "p[title~=\"FOO\" i]",
+        "p[title~=toofoo i]",
+        "[title~=\"hello world\"]",
+        "[title~=\"hello\" i]",
+        "[title~=\"hello\"          I]",
+        "[lang|=\"en\"]",
+        "[lang|=\"EN\" i]",
+        "[lang|=\"EN\"     i]",
+        "[title^=\"foo\"]",
+        "[title^=\"foo\" i]",
+        "[title$=\"bar\"]",
+        "[title$=\"BAR\" i]",
+        "[title*=\"bar\"]",
+        "[title*=\"BaRu\" i]",
+        "[title*=\"BaRu\" I]",
+        "p[class$=\" \"]",
+        "p[class$=\"\"]",
+        "p[class^=\" \"]",
+        "p[class^=\"\"]",
+        "p[class*=\" \"]",
+        "p[class*=\"\"]",
+        "input[name=Sex][value=F]",
+        "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]",
+        ".t1:not(.t2)",
+        "div:not(.t1)",
+        "div:not([class=\"t2\"])",
+        "li:nth-child(odd)",
+        "li:nth-child(even)",
+        "li:nth-child(-n+2)",
+        "li:nth-child(3n+1)",
+        "li:nth-last-child(odd)",
+        "li:nth-last-child(even)",
+        "li:nth-last-child(-n+2)",
+        "li:nth-last-child(3n+1)",
+        "span:first-child",
+        "span:last-child",
+        "p:nth-of-type(2)",
+        "p:nth-last-of-type(2)",
+        "p:last-of-type",
+        "p:first-of-type",
+        "p:only-child",
+        "p:only-of-type",
+        ":empty",
+        "div p",
+        "div table p",
+        "div > p",
+        "p ~ p",
+        "p + p",
+        "li, p",
+        "p +/*This is a comment*/ p",
+        "p:contains(\"that wraps\")",
+        "p:containsOwn(\"that wraps\")",
+        ":containsOwn(\"inner\")",
+        "p:containsOwn(\"block\")",
+        "div:has(#p1)",
+        "div:has(:containsOwn(\"2\"))",
+        "body :has(:containsOwn(\"2\"))",
+        "body :haschild(:containsOwn(\"2\"))",
+        "p:matches([\\d])",
+        "p:matches([a-z])",
+        "p:matches([a-zA-Z])",
+        "p:matches([^\\d])",
+        "p:matches(^(0|a))",
+        "p:matches(^\\d+$)",
+        "p:not(:matches(^\\d+$))",
+        "div :matchesOwn(^\\d+$)",
+        "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])",
+        "[href#=(^https:\\/\\/[^\\/]*\\/?news)]",
+        ":input",
+        ":root",
+        "*:root",
+        "html:nth-child(1)",
+        "*:root:first-child",
+        "*:root:nth-child(1)",
+        "a:not(:root)",
+        "body > *:nth-child(3n+2)",
+        "input:disabled",
+        ":disabled",
+        ":enabled",
+        "div.class1, div.class2",
+    };
+
+    for (testcases) |tc| {
+        const s = Parse(alloc, tc, .{}) catch |e| {
+            std.debug.print("query {s}", .{tc});
+            return e;
+        };
+        defer s.deinit(alloc);
+    }
+}

From 6cf805360dab189ffceec50bee687b44799da86c Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Fri, 15 Mar 2024 08:59:41 +0100
Subject: [PATCH 02/28] css: extract selector in its own file

---
 src/css/parser.zig   | 200 ++-----------------------------------------
 src/css/selector.zig | 200 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+), 195 deletions(-)
 create mode 100644 src/css/selector.zig

diff --git a/src/css/parser.zig b/src/css/parser.zig
index e34e5071..d6905bec 100644
--- a/src/css/parser.zig
+++ b/src/css/parser.zig
@@ -5,200 +5,10 @@
 const std = @import("std");
 const ascii = std.ascii;
 
-pub const AttributeOP = enum {
-    eql, // =
-    not_eql, // !=
-    one_of, // ~=
-    prefix_hyphen, // |=
-    prefix, // ^=
-    suffix, // $=
-    contains, // *=
-    regexp, // #=
-
-    fn len(op: AttributeOP) u2 {
-        if (op == .eql) return 1;
-        return 2;
-    }
-};
-
-pub const PseudoClass = enum {
-    not,
-    has,
-    haschild,
-    contains,
-    containsown,
-    matches,
-    matchesown,
-    nth_child,
-    nth_last_child,
-    nth_of_type,
-    nth_last_of_type,
-    first_child,
-    last_child,
-    first_of_type,
-    last_of_type,
-    only_child,
-    only_of_type,
-    input,
-    empty,
-    root,
-    link,
-    lang,
-    enabled,
-    disabled,
-    checked,
-    visited,
-    hover,
-    active,
-    focus,
-    target,
-    after,
-    backdrop,
-    before,
-    cue,
-    first_letter,
-    first_line,
-    grammar_error,
-    marker,
-    placeholder,
-    selection,
-    spelling_error,
-
-    fn isPseudoElement(pc: PseudoClass) bool {
-        return switch (pc) {
-            .after, .backdrop, .before, .cue, .first_letter => true,
-            .first_line, .grammar_error, .marker, .placeholder => true,
-            .selection, .spelling_error => true,
-            else => false,
-        };
-    }
-
-    fn parse(s: []const u8) ParseError!PseudoClass {
-        if (std.ascii.eqlIgnoreCase(s, "not")) return .not;
-        if (std.ascii.eqlIgnoreCase(s, "has")) return .has;
-        if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild;
-        if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains;
-        if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown;
-        if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches;
-        if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown;
-        if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child;
-        if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child;
-        if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type;
-        if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type;
-        if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child;
-        if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child;
-        if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type;
-        if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type;
-        if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child;
-        if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type;
-        if (std.ascii.eqlIgnoreCase(s, "input")) return .input;
-        if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty;
-        if (std.ascii.eqlIgnoreCase(s, "root")) return .root;
-        if (std.ascii.eqlIgnoreCase(s, "link")) return .link;
-        if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang;
-        if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled;
-        if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled;
-        if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked;
-        if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited;
-        if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover;
-        if (std.ascii.eqlIgnoreCase(s, "active")) return .active;
-        if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus;
-        if (std.ascii.eqlIgnoreCase(s, "target")) return .target;
-        if (std.ascii.eqlIgnoreCase(s, "after")) return .after;
-        if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop;
-        if (std.ascii.eqlIgnoreCase(s, "before")) return .before;
-        if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue;
-        if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter;
-        if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line;
-        if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error;
-        if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker;
-        if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder;
-        if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection;
-        if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error;
-        return ParseError.InvalidPseudoClass;
-    }
-};
-
-pub const Selector = union(enum) {
-    compound: struct {
-        selectors: []Selector,
-        pseudo_elt: ?PseudoClass,
-    },
-    group: []Selector,
-    tag: []const u8,
-    id: []const u8,
-    class: []const u8,
-    attribute: struct {
-        key: []const u8,
-        val: ?[]const u8 = null,
-        op: ?AttributeOP = null,
-        regexp: ?[]const u8 = null,
-        ci: bool = false,
-    },
-    combined: struct {
-        first: *Selector,
-        second: *Selector,
-        combinator: u8,
-    },
-
-    never_match: PseudoClass,
-
-    pseudo_class: PseudoClass,
-    pseudo_class_only_child: bool,
-    pseudo_class_lang: []const u8,
-    pseudo_class_relative: struct {
-        pseudo_class: PseudoClass,
-        match: *Selector,
-    },
-    pseudo_class_contains: struct {
-        own: bool,
-        val: []const u8,
-    },
-    pseudo_class_regexp: struct {
-        own: bool,
-        regexp: []const u8,
-    },
-    pseudo_class_nth: struct {
-        a: isize,
-        b: isize,
-        of_type: bool,
-        last: bool,
-    },
-    pseudo_element: PseudoClass,
-
-    fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
-        switch (sel) {
-            .group => |v| {
-                for (v) |vv| vv.deinit(alloc);
-                alloc.free(v);
-            },
-            .compound => |v| {
-                for (v.selectors) |vv| vv.deinit(alloc);
-                alloc.free(v.selectors);
-            },
-            .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v),
-            .attribute => |att| {
-                alloc.free(att.key);
-                if (att.val) |v| alloc.free(v);
-                if (att.regexp) |v| alloc.free(v);
-            },
-            .combined => |c| {
-                c.first.deinit(alloc);
-                alloc.destroy(c.first);
-                c.second.deinit(alloc);
-                alloc.destroy(c.second);
-            },
-            .pseudo_class_relative => |v| {
-                v.match.deinit(alloc);
-                alloc.destroy(v.match);
-            },
-            .pseudo_class_contains => |v| alloc.free(v.val),
-            .pseudo_class_regexp => |v| alloc.free(v.regexp),
-            .pseudo_class, .pseudo_element, .never_match => {},
-            .pseudo_class_nth, .pseudo_class_only_child => {},
-        }
-    }
-};
+const selector = @import("selector.zig");
+const Selector = selector.Selector;
+const PseudoClass = selector.PseudoClass;
+const AttributeOP = selector.AttributeOP;
 
 pub const ParseError = error{
     ExpectedSelector,
@@ -234,7 +44,7 @@ pub const ParseError = error{
     NotHandled,
     UnknownPseudoSelector,
     InvalidNthExpression,
-} || std.mem.Allocator.Error;
+} || PseudoClass.Error || std.mem.Allocator.Error;
 
 pub const ParseOptions = struct {
     accept_pseudo_elts: bool = true,
diff --git a/src/css/selector.zig b/src/css/selector.zig
new file mode 100644
index 00000000..f112d397
--- /dev/null
+++ b/src/css/selector.zig
@@ -0,0 +1,200 @@
+const std = @import("std");
+
+pub const AttributeOP = enum {
+    eql, // =
+    not_eql, // !=
+    one_of, // ~=
+    prefix_hyphen, // |=
+    prefix, // ^=
+    suffix, // $=
+    contains, // *=
+    regexp, // #=
+
+    pub fn len(op: AttributeOP) u2 {
+        if (op == .eql) return 1;
+        return 2;
+    }
+};
+
+pub const PseudoClass = enum {
+    not,
+    has,
+    haschild,
+    contains,
+    containsown,
+    matches,
+    matchesown,
+    nth_child,
+    nth_last_child,
+    nth_of_type,
+    nth_last_of_type,
+    first_child,
+    last_child,
+    first_of_type,
+    last_of_type,
+    only_child,
+    only_of_type,
+    input,
+    empty,
+    root,
+    link,
+    lang,
+    enabled,
+    disabled,
+    checked,
+    visited,
+    hover,
+    active,
+    focus,
+    target,
+    after,
+    backdrop,
+    before,
+    cue,
+    first_letter,
+    first_line,
+    grammar_error,
+    marker,
+    placeholder,
+    selection,
+    spelling_error,
+
+    pub const Error = error{
+        InvalidPseudoClass,
+    };
+
+    pub fn isPseudoElement(pc: PseudoClass) bool {
+        return switch (pc) {
+            .after, .backdrop, .before, .cue, .first_letter => true,
+            .first_line, .grammar_error, .marker, .placeholder => true,
+            .selection, .spelling_error => true,
+            else => false,
+        };
+    }
+
+    pub fn parse(s: []const u8) Error!PseudoClass {
+        if (std.ascii.eqlIgnoreCase(s, "not")) return .not;
+        if (std.ascii.eqlIgnoreCase(s, "has")) return .has;
+        if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild;
+        if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains;
+        if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown;
+        if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches;
+        if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown;
+        if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child;
+        if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child;
+        if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child;
+        if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child;
+        if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child;
+        if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type;
+        if (std.ascii.eqlIgnoreCase(s, "input")) return .input;
+        if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty;
+        if (std.ascii.eqlIgnoreCase(s, "root")) return .root;
+        if (std.ascii.eqlIgnoreCase(s, "link")) return .link;
+        if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang;
+        if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled;
+        if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled;
+        if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked;
+        if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited;
+        if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover;
+        if (std.ascii.eqlIgnoreCase(s, "active")) return .active;
+        if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus;
+        if (std.ascii.eqlIgnoreCase(s, "target")) return .target;
+        if (std.ascii.eqlIgnoreCase(s, "after")) return .after;
+        if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop;
+        if (std.ascii.eqlIgnoreCase(s, "before")) return .before;
+        if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue;
+        if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter;
+        if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line;
+        if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error;
+        if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker;
+        if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder;
+        if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection;
+        if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error;
+        return Error.InvalidPseudoClass;
+    }
+};
+
+pub const Selector = union(enum) {
+    compound: struct {
+        selectors: []Selector,
+        pseudo_elt: ?PseudoClass,
+    },
+    group: []Selector,
+    tag: []const u8,
+    id: []const u8,
+    class: []const u8,
+    attribute: struct {
+        key: []const u8,
+        val: ?[]const u8 = null,
+        op: ?AttributeOP = null,
+        regexp: ?[]const u8 = null,
+        ci: bool = false,
+    },
+    combined: struct {
+        first: *Selector,
+        second: *Selector,
+        combinator: u8,
+    },
+
+    never_match: PseudoClass,
+
+    pseudo_class: PseudoClass,
+    pseudo_class_only_child: bool,
+    pseudo_class_lang: []const u8,
+    pseudo_class_relative: struct {
+        pseudo_class: PseudoClass,
+        match: *Selector,
+    },
+    pseudo_class_contains: struct {
+        own: bool,
+        val: []const u8,
+    },
+    pseudo_class_regexp: struct {
+        own: bool,
+        regexp: []const u8,
+    },
+    pseudo_class_nth: struct {
+        a: isize,
+        b: isize,
+        of_type: bool,
+        last: bool,
+    },
+    pseudo_element: PseudoClass,
+
+    pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
+        switch (sel) {
+            .group => |v| {
+                for (v) |vv| vv.deinit(alloc);
+                alloc.free(v);
+            },
+            .compound => |v| {
+                for (v.selectors) |vv| vv.deinit(alloc);
+                alloc.free(v.selectors);
+            },
+            .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v),
+            .attribute => |att| {
+                alloc.free(att.key);
+                if (att.val) |v| alloc.free(v);
+                if (att.regexp) |v| alloc.free(v);
+            },
+            .combined => |c| {
+                c.first.deinit(alloc);
+                alloc.destroy(c.first);
+                c.second.deinit(alloc);
+                alloc.destroy(c.second);
+            },
+            .pseudo_class_relative => |v| {
+                v.match.deinit(alloc);
+                alloc.destroy(v.match);
+            },
+            .pseudo_class_contains => |v| alloc.free(v.val),
+            .pseudo_class_regexp => |v| alloc.free(v.regexp),
+            .pseudo_class, .pseudo_element, .never_match => {},
+            .pseudo_class_nth, .pseudo_class_only_child => {},
+        }
+    }
+};

From d9c76aa13e4defe1e83b7ace2f926601a0fe9536 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Fri, 15 Mar 2024 09:06:34 +0100
Subject: [PATCH 03/28] css: extract public api on its own file

---
 src/css/css.zig    | 127 +++++++++++++++++++++++++++++++++++++++++++++
 src/css/parser.zig | 127 ++-------------------------------------------
 2 files changed, 132 insertions(+), 122 deletions(-)
 create mode 100644 src/css/css.zig

diff --git a/src/css/css.zig b/src/css/css.zig
new file mode 100644
index 00000000..6092b1d0
--- /dev/null
+++ b/src/css/css.zig
@@ -0,0 +1,127 @@
+// CSS Selector parser and query
+// This package is a rewrite in Zig of Cascadia CSS Selector parser.
+// see https://github.com/andybalholm/cascadia
+const std = @import("std");
+const Selector = @import("selector.zig").Selector;
+const parser = @import("parser.zig");
+
+// Parse parse a selector string and returns the parsed result or an error.
+pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector {
+    var p = parser.Parser{ .s = s, .i = 0, .opts = opts };
+    return p.parse(alloc);
+}
+
+test "Parse" {
+    const alloc = std.testing.allocator;
+
+    const testcases = [_][]const u8{
+        "address",
+        "*",
+        "#foo",
+        "li#t1",
+        "*#t4",
+        ".t1",
+        "p.t1",
+        "div.teST",
+        ".t1.fail",
+        "p.t1.t2",
+        "p.--t1",
+        "p.--t1.--t2",
+        "p[title]",
+        "div[class=\"red\" i]",
+        "address[title=\"foo\"]",
+        "address[title=\"FoOIgnoRECaSe\" i]",
+        "address[title!=\"foo\"]",
+        "address[title!=\"foo\" i]",
+        "p[title!=\"FooBarUFoo\" i]",
+        "[  \t title        ~=       foo    ]",
+        "p[title~=\"FOO\" i]",
+        "p[title~=toofoo i]",
+        "[title~=\"hello world\"]",
+        "[title~=\"hello\" i]",
+        "[title~=\"hello\"          I]",
+        "[lang|=\"en\"]",
+        "[lang|=\"EN\" i]",
+        "[lang|=\"EN\"     i]",
+        "[title^=\"foo\"]",
+        "[title^=\"foo\" i]",
+        "[title$=\"bar\"]",
+        "[title$=\"BAR\" i]",
+        "[title*=\"bar\"]",
+        "[title*=\"BaRu\" i]",
+        "[title*=\"BaRu\" I]",
+        "p[class$=\" \"]",
+        "p[class$=\"\"]",
+        "p[class^=\" \"]",
+        "p[class^=\"\"]",
+        "p[class*=\" \"]",
+        "p[class*=\"\"]",
+        "input[name=Sex][value=F]",
+        "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]",
+        ".t1:not(.t2)",
+        "div:not(.t1)",
+        "div:not([class=\"t2\"])",
+        "li:nth-child(odd)",
+        "li:nth-child(even)",
+        "li:nth-child(-n+2)",
+        "li:nth-child(3n+1)",
+        "li:nth-last-child(odd)",
+        "li:nth-last-child(even)",
+        "li:nth-last-child(-n+2)",
+        "li:nth-last-child(3n+1)",
+        "span:first-child",
+        "span:last-child",
+        "p:nth-of-type(2)",
+        "p:nth-last-of-type(2)",
+        "p:last-of-type",
+        "p:first-of-type",
+        "p:only-child",
+        "p:only-of-type",
+        ":empty",
+        "div p",
+        "div table p",
+        "div > p",
+        "p ~ p",
+        "p + p",
+        "li, p",
+        "p +/*This is a comment*/ p",
+        "p:contains(\"that wraps\")",
+        "p:containsOwn(\"that wraps\")",
+        ":containsOwn(\"inner\")",
+        "p:containsOwn(\"block\")",
+        "div:has(#p1)",
+        "div:has(:containsOwn(\"2\"))",
+        "body :has(:containsOwn(\"2\"))",
+        "body :haschild(:containsOwn(\"2\"))",
+        "p:matches([\\d])",
+        "p:matches([a-z])",
+        "p:matches([a-zA-Z])",
+        "p:matches([^\\d])",
+        "p:matches(^(0|a))",
+        "p:matches(^\\d+$)",
+        "p:not(:matches(^\\d+$))",
+        "div :matchesOwn(^\\d+$)",
+        "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])",
+        "[href#=(^https:\\/\\/[^\\/]*\\/?news)]",
+        ":input",
+        ":root",
+        "*:root",
+        "html:nth-child(1)",
+        "*:root:first-child",
+        "*:root:nth-child(1)",
+        "a:not(:root)",
+        "body > *:nth-child(3n+2)",
+        "input:disabled",
+        ":disabled",
+        ":enabled",
+        "div.class1, div.class2",
+    };
+
+    for (testcases) |tc| {
+        const s = Parse(alloc, tc, .{}) catch |e| {
+            std.debug.print("query {s}", .{tc});
+            return e;
+        };
+        defer s.deinit(alloc);
+    }
+}
diff --git a/src/css/parser.zig b/src/css/parser.zig
index d6905bec..6bec4cbd 100644
--- a/src/css/parser.zig
+++ b/src/css/parser.zig
@@ -50,18 +50,16 @@ pub const ParseOptions = struct {
     accept_pseudo_elts: bool = true,
 };
 
-// Parse parse a selector string and returns the parsed result or an error.
-pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: ParseOptions) ParseError!Selector {
-    var p = Parser{ .s = s, .i = 0, .opts = opts };
-    return p.parseSelector(alloc);
-}
-
-const Parser = struct {
+pub const Parser = struct {
     s: []const u8, // string to parse
     i: usize = 0, // current position
 
     opts: ParseOptions,
 
+    pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+        return p.parseSelector(alloc);
+    }
+
     // skipWhitespace consumes whitespace characters and comments.
     // It returns true if there was actually anything to skip.
     fn skipWhitespace(p: *Parser) bool {
@@ -894,118 +892,3 @@ test "parser.parseString" {
         };
     }
 }
-
-test "parser." {
-    const alloc = std.testing.allocator;
-
-    const testcases = [_][]const u8{
-        "address",
-        "*",
-        "#foo",
-        "li#t1",
-        "*#t4",
-        ".t1",
-        "p.t1",
-        "div.teST",
-        ".t1.fail",
-        "p.t1.t2",
-        "p.--t1",
-        "p.--t1.--t2",
-        "p[title]",
-        "div[class=\"red\" i]",
-        "address[title=\"foo\"]",
-        "address[title=\"FoOIgnoRECaSe\" i]",
-        "address[title!=\"foo\"]",
-        "address[title!=\"foo\" i]",
-        "p[title!=\"FooBarUFoo\" i]",
-        "[  \t title        ~=       foo    ]",
-        "p[title~=\"FOO\" i]",
-        "p[title~=toofoo i]",
-        "[title~=\"hello world\"]",
-        "[title~=\"hello\" i]",
-        "[title~=\"hello\"          I]",
-        "[lang|=\"en\"]",
-        "[lang|=\"EN\" i]",
-        "[lang|=\"EN\"     i]",
-        "[title^=\"foo\"]",
-        "[title^=\"foo\" i]",
-        "[title$=\"bar\"]",
-        "[title$=\"BAR\" i]",
-        "[title*=\"bar\"]",
-        "[title*=\"BaRu\" i]",
-        "[title*=\"BaRu\" I]",
-        "p[class$=\" \"]",
-        "p[class$=\"\"]",
-        "p[class^=\" \"]",
-        "p[class^=\"\"]",
-        "p[class*=\" \"]",
-        "p[class*=\"\"]",
-        "input[name=Sex][value=F]",
-        "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]",
-        ".t1:not(.t2)",
-        "div:not(.t1)",
-        "div:not([class=\"t2\"])",
-        "li:nth-child(odd)",
-        "li:nth-child(even)",
-        "li:nth-child(-n+2)",
-        "li:nth-child(3n+1)",
-        "li:nth-last-child(odd)",
-        "li:nth-last-child(even)",
-        "li:nth-last-child(-n+2)",
-        "li:nth-last-child(3n+1)",
-        "span:first-child",
-        "span:last-child",
-        "p:nth-of-type(2)",
-        "p:nth-last-of-type(2)",
-        "p:last-of-type",
-        "p:first-of-type",
-        "p:only-child",
-        "p:only-of-type",
-        ":empty",
-        "div p",
-        "div table p",
-        "div > p",
-        "p ~ p",
-        "p + p",
-        "li, p",
-        "p +/*This is a comment*/ p",
-        "p:contains(\"that wraps\")",
-        "p:containsOwn(\"that wraps\")",
-        ":containsOwn(\"inner\")",
-        "p:containsOwn(\"block\")",
-        "div:has(#p1)",
-        "div:has(:containsOwn(\"2\"))",
-        "body :has(:containsOwn(\"2\"))",
-        "body :haschild(:containsOwn(\"2\"))",
-        "p:matches([\\d])",
-        "p:matches([a-z])",
-        "p:matches([a-zA-Z])",
-        "p:matches([^\\d])",
-        "p:matches(^(0|a))",
-        "p:matches(^\\d+$)",
-        "p:not(:matches(^\\d+$))",
-        "div :matchesOwn(^\\d+$)",
-        "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])",
-        "[href#=(^https:\\/\\/[^\\/]*\\/?news)]",
-        ":input",
-        ":root",
-        "*:root",
-        "html:nth-child(1)",
-        "*:root:first-child",
-        "*:root:nth-child(1)",
-        "a:not(:root)",
-        "body > *:nth-child(3n+2)",
-        "input:disabled",
-        ":disabled",
-        ":enabled",
-        "div.class1, div.class2",
-    };
-
-    for (testcases) |tc| {
-        const s = Parse(alloc, tc, .{}) catch |e| {
-            std.debug.print("query {s}", .{tc});
-            return e;
-        };
-        defer s.deinit(alloc);
-    }
-}

From a131e96ed5a2816a5fd3e140f884a9a95ee3c70a Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Fri, 15 Mar 2024 15:03:55 +0100
Subject: [PATCH 04/28] css: lower case parse function

---
 src/css/css.zig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/css/css.zig b/src/css/css.zig
index 6092b1d0..67d00d89 100644
--- a/src/css/css.zig
+++ b/src/css/css.zig
@@ -5,13 +5,13 @@ const std = @import("std");
 const Selector = @import("selector.zig").Selector;
 const parser = @import("parser.zig");
 
-// Parse parse a selector string and returns the parsed result or an error.
-pub fn Parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector {
+// parse parse a selector string and returns the parsed result or an error.
+pub fn parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector {
     var p = parser.Parser{ .s = s, .i = 0, .opts = opts };
     return p.parse(alloc);
 }
 
-test "Parse" {
+test "parse" {
     const alloc = std.testing.allocator;
 
     const testcases = [_][]const u8{
@@ -118,7 +118,7 @@ test "Parse" {
     };
 
     for (testcases) |tc| {
-        const s = Parse(alloc, tc, .{}) catch |e| {
+        const s = parse(alloc, tc, .{}) catch |e| {
             std.debug.print("query {s}", .{tc});
             return e;
         };

From b59fd9b1fb314db38387f7a6742a946f164121ad Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Fri, 15 Mar 2024 16:09:16 +0100
Subject: [PATCH 05/28] css: matcher draft

---
 src/css/css.zig        | 31 +++++++++++++++++++++
 src/css/libdom.zig     | 22 +++++++++++++++
 src/css/match_test.zig | 61 ++++++++++++++++++++++++++++++++++++++++++
 src/css/selector.zig   |  7 +++++
 4 files changed, 121 insertions(+)
 create mode 100644 src/css/libdom.zig
 create mode 100644 src/css/match_test.zig

diff --git a/src/css/css.zig b/src/css/css.zig
index 67d00d89..9bc6cca2 100644
--- a/src/css/css.zig
+++ b/src/css/css.zig
@@ -11,6 +11,37 @@ pub fn parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions)
     return p.parse(alloc);
 }
 
+// matchFirst call m.match with the first node that matches the selector s, from the
+// descendants of n and returns true. If none matches, it returns false.
+pub fn matchFirst(s: Selector, node: anytype, m: anytype) !bool {
+    var c = try node.firstChild();
+    while (true) {
+        if (c == null) break;
+
+        if (try s.match(c.?)) {
+            try m.match(c.?);
+            return true;
+        }
+
+        if (try matchFirst(s, c.?, m)) return true;
+        c = try c.?.nextSibling();
+    }
+    return false;
+}
+
+// matchAll call m.match with the all the nodes that matches the selector s, from the
+// descendants of n.
+pub fn matchAll(s: Selector, node: anytype, m: anytype) !void {
+    var c = try node.firstChild();
+    while (true) {
+        if (c == null) break;
+
+        if (try s.match(c.?)) try m.match(c.?);
+        try matchFirst(s, c.?, m);
+        c = try c.?.nextSibling();
+    }
+}
+
 test "parse" {
     const alloc = std.testing.allocator;
 
diff --git a/src/css/libdom.zig b/src/css/libdom.zig
new file mode 100644
index 00000000..49623286
--- /dev/null
+++ b/src/css/libdom.zig
@@ -0,0 +1,22 @@
+const std = @import("std");
+
+const parser = @import("../netsurf.zig");
+
+// Node implementation with Netsurf Libdom C lib.
+pub const Node = struct {
+    node: *parser.Node,
+
+    pub fn firstChild(n: Node) !?Node {
+        const c = try parser.nodeFirstChild(n.node);
+        if (c) |cc| return .{ .node = cc };
+
+        return null;
+    }
+
+    pub fn nextSibling(n: Node) ?Node {
+        const c = try parser.nodeNextSibling(n.node);
+        if (c) |cc| return .{ .node = cc };
+
+        return null;
+    }
+};
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
new file mode 100644
index 00000000..ab7d725e
--- /dev/null
+++ b/src/css/match_test.zig
@@ -0,0 +1,61 @@
+const std = @import("std");
+const css = @import("css.zig");
+
+// Node mock implementation for test only.
+pub const Node = struct {
+    child: ?*const Node = null,
+    sibling: ?*const Node = null,
+
+    name: []const u8 = "",
+
+    pub fn firstChild(n: *const Node) !?*const Node {
+        return n.child;
+    }
+
+    pub fn nextSibling(n: *const Node) !?*const Node {
+        return n.sibling;
+    }
+
+    pub fn tag(n: *const Node) ![]const u8 {
+        return n.name;
+    }
+};
+const Matcher = struct {
+    const Nodes = std.ArrayList(*const Node);
+
+    nodes: Nodes,
+
+    fn init(alloc: std.mem.Allocator) Matcher {
+        return .{ .nodes = Nodes.init(alloc) };
+    }
+
+    fn deinit(m: *Matcher) void {
+        m.nodes.deinit();
+    }
+
+    fn reset(m: *Matcher) void {
+        m.nodes.clearRetainingCapacity();
+    }
+
+    pub fn match(m: *Matcher, n: *const Node) !void {
+        try m.nodes.append(n);
+    }
+};
+
+test "matchFirst" {
+    const alloc = std.testing.allocator;
+
+    const s = try css.parse(alloc, "address", .{});
+    defer s.deinit(alloc);
+
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    const node: Node = .{
+        .child = &.{ .name = "address" },
+    };
+
+    _ = try css.matchFirst(s, &node, &matcher);
+    try std.testing.expect(1 == matcher.nodes.items.len);
+    try std.testing.expect(matcher.nodes.items[0] == node.child);
+}
diff --git a/src/css/selector.zig b/src/css/selector.zig
index f112d397..30c0a147 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -165,6 +165,13 @@ pub const Selector = union(enum) {
     },
     pseudo_element: PseudoClass,
 
+    pub fn match(s: Selector, n: anytype) !bool {
+        return switch (s) {
+            .tag => |v| std.ascii.eqlIgnoreCase(v, try n.tag()),
+            else => false,
+        };
+    }
+
     pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
         switch (sel) {
             .group => |v| {

From 954a6935863f5fb5759fab5f8939c57b4165a443 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 09:49:12 +0100
Subject: [PATCH 06/28] css: add matcher test w/ libdom

---
 src/css/libdom.zig      |  6 +++++-
 src/css/libdom_test.zig | 44 +++++++++++++++++++++++++++++++++++++++++
 src/css/match_test.zig  |  1 +
 src/run_tests.zig       | 14 +++++++++----
 4 files changed, 60 insertions(+), 5 deletions(-)
 create mode 100644 src/css/libdom_test.zig

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 49623286..8fb23aad 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -13,10 +13,14 @@ pub const Node = struct {
         return null;
     }
 
-    pub fn nextSibling(n: Node) ?Node {
+    pub fn nextSibling(n: Node) !?Node {
         const c = try parser.nodeNextSibling(n.node);
         if (c) |cc| return .{ .node = cc };
 
         return null;
     }
+
+    pub fn tag(n: Node) ![]const u8 {
+        return try parser.nodeName(n.node);
+    }
 };
diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig
new file mode 100644
index 00000000..f143c17b
--- /dev/null
+++ b/src/css/libdom_test.zig
@@ -0,0 +1,44 @@
+const std = @import("std");
+const css = @import("css.zig");
+const Node = @import("libdom.zig").Node;
+const parser = @import("../netsurf.zig");
+
+const Matcher = struct {
+    const Nodes = std.ArrayList(Node);
+
+    nodes: Nodes,
+
+    fn init(alloc: std.mem.Allocator) Matcher {
+        return .{ .nodes = Nodes.init(alloc) };
+    }
+
+    fn deinit(m: *Matcher) void {
+        m.nodes.deinit();
+    }
+
+    fn reset(m: *Matcher) void {
+        m.nodes.clearRetainingCapacity();
+    }
+
+    pub fn match(m: *Matcher, n: Node) !void {
+        try m.nodes.append(n);
+    }
+};
+
+test "matchFirst" {
+    const alloc = std.testing.allocator;
+
+    const s = try css.parse(alloc, "address", .{});
+    defer s.deinit(alloc);
+
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    const doc = try parser.documentHTMLParseFromStr("<body><address>This address...</address></body>");
+    defer parser.documentHTMLClose(doc) catch {};
+
+    const node = Node{ .node = parser.documentHTMLToNode(doc) };
+
+    _ = try css.matchFirst(s, node, &matcher);
+    try std.testing.expect(1 == matcher.nodes.items.len);
+}
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index ab7d725e..90c3cd1a 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -20,6 +20,7 @@ pub const Node = struct {
         return n.name;
     }
 };
+
 const Matcher = struct {
     const Nodes = std.ArrayList(*const Node);
 
diff --git a/src/run_tests.zig b/src/run_tests.zig
index 2f5c8d6b..9227a20d 100644
--- a/src/run_tests.zig
+++ b/src/run_tests.zig
@@ -98,11 +98,17 @@ pub fn main() !void {
 }
 
 test {
-    const AsyncTest = @import("async/test.zig");
-    std.testing.refAllDecls(AsyncTest);
+    const asyncTest = @import("async/test.zig");
+    std.testing.refAllDecls(asyncTest);
 
-    const DumpTest = @import("browser/dump.zig");
-    std.testing.refAllDecls(DumpTest);
+    const dumpTest = @import("browser/dump.zig");
+    std.testing.refAllDecls(dumpTest);
+
+    const cssMatchTest = @import("css/match_test.zig");
+    std.testing.refAllDecls(cssMatchTest);
+
+    const cssLibdomTest = @import("css/libdom_test.zig");
+    std.testing.refAllDecls(cssLibdomTest);
 }
 
 fn testJSRuntime() !void {

From 7839f466ea52da16f95e81ac76fb0f5d96be5fb8 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 11:35:47 +0100
Subject: [PATCH 07/28] css: refacto test

---
 src/css/css.zig         |  2 +-
 src/css/libdom_test.zig | 64 ++++++++++++++++++++++++++++++++++++-----
 src/css/match_test.zig  | 54 +++++++++++++++++++++++++++++-----
 3 files changed, 105 insertions(+), 15 deletions(-)

diff --git a/src/css/css.zig b/src/css/css.zig
index 9bc6cca2..fc5e8995 100644
--- a/src/css/css.zig
+++ b/src/css/css.zig
@@ -37,7 +37,7 @@ pub fn matchAll(s: Selector, node: anytype, m: anytype) !void {
         if (c == null) break;
 
         if (try s.match(c.?)) try m.match(c.?);
-        try matchFirst(s, c.?, m);
+        try matchAll(s, c.?, m);
         c = try c.?.nextSibling();
     }
 }
diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig
index f143c17b..5a952cfc 100644
--- a/src/css/libdom_test.zig
+++ b/src/css/libdom_test.zig
@@ -28,17 +28,67 @@ const Matcher = struct {
 test "matchFirst" {
     const alloc = std.testing.allocator;
 
-    const s = try css.parse(alloc, "address", .{});
-    defer s.deinit(alloc);
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    const testcases = [_]struct {
+        q: []const u8,
+        html: []const u8,
+        exp: usize,
+    }{
+        .{
+            .q = "address",
+            .html = "<body><address>This address...</address></body>",
+            .exp = 1,
+        },
+    };
+
+    for (testcases) |tc| {
+        matcher.reset();
+
+        const doc = try parser.documentHTMLParseFromStr(tc.html);
+        defer parser.documentHTMLClose(doc) catch {};
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        const node = Node{ .node = parser.documentHTMLToNode(doc) };
+
+        _ = try css.matchFirst(s, node, &matcher);
+        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+    }
+}
+
+test "matchAll" {
+    const alloc = std.testing.allocator;
 
     var matcher = Matcher.init(alloc);
     defer matcher.deinit();
 
-    const doc = try parser.documentHTMLParseFromStr("<body><address>This address...</address></body>");
-    defer parser.documentHTMLClose(doc) catch {};
+    const testcases = [_]struct {
+        q: []const u8,
+        html: []const u8,
+        exp: usize,
+    }{
+        .{
+            .q = "address",
+            .html = "<body><address>This address...</address></body>",
+            .exp = 1,
+        },
+    };
 
-    const node = Node{ .node = parser.documentHTMLToNode(doc) };
+    for (testcases) |tc| {
+        matcher.reset();
 
-    _ = try css.matchFirst(s, node, &matcher);
-    try std.testing.expect(1 == matcher.nodes.items.len);
+        const doc = try parser.documentHTMLParseFromStr(tc.html);
+        defer parser.documentHTMLClose(doc) catch {};
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        const node = Node{ .node = parser.documentHTMLToNode(doc) };
+
+        _ = try css.matchAll(s, node, &matcher);
+        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+    }
 }
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 90c3cd1a..5df71da6 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -46,17 +46,57 @@ const Matcher = struct {
 test "matchFirst" {
     const alloc = std.testing.allocator;
 
-    const s = try css.parse(alloc, "address", .{});
-    defer s.deinit(alloc);
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    const testcases = [_]struct {
+        q: []const u8,
+        n: Node,
+        exp: usize,
+    }{
+        .{
+            .q = "address",
+            .n = .{ .name = "body", .child = &.{ .name = "address" } },
+            .exp = 1,
+        },
+    };
+
+    for (testcases) |tc| {
+        matcher.reset();
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        _ = try css.matchFirst(s, &tc.n, &matcher);
+        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+    }
+}
+
+test "matchAll" {
+    const alloc = std.testing.allocator;
 
     var matcher = Matcher.init(alloc);
     defer matcher.deinit();
 
-    const node: Node = .{
-        .child = &.{ .name = "address" },
+    const testcases = [_]struct {
+        q: []const u8,
+        n: Node,
+        exp: usize,
+    }{
+        .{
+            .q = "address",
+            .n = .{ .name = "body", .child = &.{ .name = "address" } },
+            .exp = 1,
+        },
     };
 
-    _ = try css.matchFirst(s, &node, &matcher);
-    try std.testing.expect(1 == matcher.nodes.items.len);
-    try std.testing.expect(matcher.nodes.items[0] == node.child);
+    for (testcases) |tc| {
+        matcher.reset();
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        _ = try css.matchAll(s, &tc.n, &matcher);
+        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+    }
 }

From 4629e8a9eb1964942c6d18bc135027f4cd4037aa Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 11:36:06 +0100
Subject: [PATCH 08/28] css: check if node is an html element

---
 src/css/libdom.zig     | 5 +++++
 src/css/match_test.zig | 4 ++++
 src/css/selector.zig   | 2 +-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 8fb23aad..57d4af54 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -20,6 +20,11 @@ pub const Node = struct {
         return null;
     }
 
+    pub fn isElement(n: Node) bool {
+        const t = parser.nodeType(n.node) catch return false;
+        return t == .element;
+    }
+
     pub fn tag(n: Node) ![]const u8 {
         return try parser.nodeName(n.node);
     }
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 5df71da6..a37de623 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -16,6 +16,10 @@ pub const Node = struct {
         return n.sibling;
     }
 
+    pub fn isElement(_: *const Node) bool {
+        return true;
+    }
+
     pub fn tag(n: *const Node) ![]const u8 {
         return n.name;
     }
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 30c0a147..0ce92099 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -167,7 +167,7 @@ pub const Selector = union(enum) {
 
     pub fn match(s: Selector, n: anytype) !bool {
         return switch (s) {
-            .tag => |v| std.ascii.eqlIgnoreCase(v, try n.tag()),
+            .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
             else => false,
         };
     }

From d64fffc5b3664eb5184ce1fc7f157696495dc4fc Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 12:48:03 +0100
Subject: [PATCH 09/28] css: implement id and class match selector

---
 src/css/libdom.zig      |  4 ++++
 src/css/libdom_test.zig | 18 ++++++++----------
 src/css/match_test.zig  | 39 +++++++++++++++++++++++++++++++++++++--
 src/css/selector.zig    | 12 ++++++++++++
 4 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 57d4af54..318e401c 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -28,4 +28,8 @@ pub const Node = struct {
     pub fn tag(n: Node) ![]const u8 {
         return try parser.nodeName(n.node);
     }
+
+    pub fn attr(n: Node, key: []const u8) !?[]const u8 {
+        return try parser.elementGetAttribute(parser.nodeToElement(n.node), key);
+    }
 };
diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig
index 5a952cfc..0e5a255d 100644
--- a/src/css/libdom_test.zig
+++ b/src/css/libdom_test.zig
@@ -36,11 +36,10 @@ test "matchFirst" {
         html: []const u8,
         exp: usize,
     }{
-        .{
-            .q = "address",
-            .html = "<body><address>This address...</address></body>",
-            .exp = 1,
-        },
+        .{ .q = "address", .html = "<body><address>This address...</address></body>", .exp = 1 },
+        .{ .q = "#foo", .html = "<p id=\"foo\"><p id=\"bar\">", .exp = 1 },
+        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = ".t3", .html = "<ul><li class=\"t1\"><li class=\"t2 t3\">", .exp = 1 },
     };
 
     for (testcases) |tc| {
@@ -70,11 +69,10 @@ test "matchAll" {
         html: []const u8,
         exp: usize,
     }{
-        .{
-            .q = "address",
-            .html = "<body><address>This address...</address></body>",
-            .exp = 1,
-        },
+        .{ .q = "address", .html = "<body><address>This address...</address></body>", .exp = 1 },
+        .{ .q = "#foo", .html = "<p id=\"foo\"><p id=\"bar\">", .exp = 1 },
+        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = ".t3", .html = "<ul><li class=\"t1\"><li class=\"t2 t3\">", .exp = 1 },
     };
 
     for (testcases) |tc| {
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index a37de623..6997a970 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -7,6 +7,7 @@ pub const Node = struct {
     sibling: ?*const Node = null,
 
     name: []const u8 = "",
+    att: ?[]const u8 = null,
 
     pub fn firstChild(n: *const Node) !?*const Node {
         return n.child;
@@ -23,6 +24,10 @@ pub const Node = struct {
     pub fn tag(n: *const Node) ![]const u8 {
         return n.name;
     }
+
+    pub fn attr(n: *const Node, _: []const u8) !?[]const u8 {
+        return n.att;
+    }
 };
 
 const Matcher = struct {
@@ -60,7 +65,22 @@ test "matchFirst" {
     }{
         .{
             .q = "address",
-            .n = .{ .name = "body", .child = &.{ .name = "address" } },
+            .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "#foo",
+            .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } },
+            .exp = 1,
+        },
+        .{
+            .q = ".t1",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } },
+            .exp = 1,
+        },
+        .{
+            .q = ".t1",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
             .exp = 1,
         },
     };
@@ -89,7 +109,22 @@ test "matchAll" {
     }{
         .{
             .q = "address",
-            .n = .{ .name = "body", .child = &.{ .name = "address" } },
+            .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "#foo",
+            .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } },
+            .exp = 1,
+        },
+        .{
+            .q = ".t1",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } },
+            .exp = 1,
+        },
+        .{
+            .q = ".t1",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
             .exp = 1,
         },
     };
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 0ce92099..06b95937 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -165,9 +165,21 @@ pub const Selector = union(enum) {
     },
     pseudo_element: PseudoClass,
 
+    // returns true if s is a whitespace-separated list that includes val.
+    fn contains(haystack: []const u8, needle: []const u8) bool {
+        if (haystack.len == 0) return false;
+        var it = std.mem.splitAny(u8, haystack, " \t\r\n"); // TODO add \f
+        while (it.next()) |part| {
+            if (std.mem.eql(u8, part, needle)) return true;
+        }
+        return false;
+    }
+
     pub fn match(s: Selector, n: anytype) !bool {
         return switch (s) {
             .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
+            .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false),
+            .class => |v| return n.isElement() and contains(try n.attr("class") orelse return false, v),
             else => false,
         };
     }

From 5e8ec4532dc0dbbb16944b7bc1129d637b05228a Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 16:01:46 +0100
Subject: [PATCH 10/28] css: add attribute matcher

---
 src/css/match_test.zig | 140 +++++++++++++++++++++++++++++++++++++++++
 src/css/selector.zig   |  55 +++++++++++++++-
 2 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 6997a970..d0ac3c81 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -83,6 +83,76 @@ test "matchFirst" {
             .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
             .exp = 1,
         },
+        .{
+            .q = "[foo]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo!=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo!=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo~=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo~=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo^=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo$=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo*=rb]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
+            .exp = 0,
+        },
     };
 
     for (testcases) |tc| {
@@ -127,6 +197,76 @@ test "matchAll" {
             .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
             .exp = 1,
         },
+        .{
+            .q = "[foo]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo!=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo!=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 2,
+        },
+        .{
+            .q = "[foo~=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo~=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "[foo^=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo$=baz]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo*=rb]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "[foo|=bar]",
+            .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
+            .exp = 0,
+        },
     };
 
     for (testcases) |tc| {
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 06b95937..8eae1aef 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -166,20 +166,69 @@ pub const Selector = union(enum) {
     pseudo_element: PseudoClass,
 
     // returns true if s is a whitespace-separated list that includes val.
-    fn contains(haystack: []const u8, needle: []const u8) bool {
+    fn word(haystack: []const u8, needle: []const u8, ci: bool) bool {
         if (haystack.len == 0) return false;
         var it = std.mem.splitAny(u8, haystack, " \t\r\n"); // TODO add \f
         while (it.next()) |part| {
-            if (std.mem.eql(u8, part, needle)) return true;
+            if (eql(part, needle, ci)) return true;
         }
         return false;
     }
 
+    fn eql(a: []const u8, b: []const u8, ci: bool) bool {
+        if (ci) return std.ascii.eqlIgnoreCase(a, b);
+        return std.mem.eql(u8, a, b);
+    }
+
+    fn starts(haystack: []const u8, needle: []const u8, ci: bool) bool {
+        if (ci) return std.ascii.startsWithIgnoreCase(haystack, needle);
+        return std.mem.startsWith(u8, haystack, needle);
+    }
+
+    fn ends(haystack: []const u8, needle: []const u8, ci: bool) bool {
+        if (ci) return std.ascii.endsWithIgnoreCase(haystack, needle);
+        return std.mem.endsWith(u8, haystack, needle);
+    }
+
+    fn contains(haystack: []const u8, needle: []const u8, ci: bool) bool {
+        if (ci) return std.ascii.indexOfIgnoreCase(haystack, needle) != null;
+        return std.mem.indexOf(u8, haystack, needle) != null;
+    }
+
     pub fn match(s: Selector, n: anytype) !bool {
         return switch (s) {
             .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
             .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false),
-            .class => |v| return n.isElement() and contains(try n.attr("class") orelse return false, v),
+            .class => |v| return n.isElement() and word(try n.attr("class") orelse return false, v, false),
+            .attribute => |v| {
+                const attr = try n.attr(v.key);
+
+                if (v.op == null) return attr != null;
+                if (v.val == null or v.val.?.len == 0) return false;
+
+                const val = v.val.?;
+
+                return switch (v.op.?) {
+                    .eql => attr != null and eql(attr.?, val, v.ci),
+                    .not_eql => attr == null or !eql(attr.?, val, v.ci),
+                    .one_of => attr != null and word(attr.?, val, v.ci),
+                    .prefix => attr != null and starts(attr.?, val, v.ci),
+                    .suffix => attr != null and ends(attr.?, val, v.ci),
+                    .contains => attr != null and contains(attr.?, val, v.ci),
+                    .prefix_hyphen => {
+                        if (attr == null) return false;
+                        if (eql(attr.?, val, v.ci)) return true;
+
+                        if (attr.?.len <= val.len) return false;
+
+                        if (!starts(attr.?, val, v.ci)) return false;
+
+                        return attr.?[val.len] == '-';
+                    },
+                    .regexp => false, // TODO handle regexp attribute operator.
+                };
+            },
+            .never_match => return false,
             else => false,
         };
     }

From a2e747002b01ce6dde4f3099c555a86988553535 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 21:21:44 +0100
Subject: [PATCH 11/28] css: use parseSelectorGroup() with parse()

---
 src/css/parser.zig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/css/parser.zig b/src/css/parser.zig
index 6bec4cbd..d2110883 100644
--- a/src/css/parser.zig
+++ b/src/css/parser.zig
@@ -57,7 +57,7 @@ pub const Parser = struct {
     opts: ParseOptions,
 
     pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
-        return p.parseSelector(alloc);
+        return p.parseSelectorGroup(alloc);
     }
 
     // skipWhitespace consumes whitespace characters and comments.
@@ -583,6 +583,8 @@ pub const Parser = struct {
             try buf.append(ss);
         }
 
+        if (buf.items.len == 1) return buf.items[0];
+
         return .{ .group = try buf.toOwnedSlice() };
     }
 

From d0dbbacd690ce40a7e221bbf26127ae35548c4fb Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 16:05:06 +0100
Subject: [PATCH 12/28] css: enable all css tests in zig build test

---
 src/run_tests.zig | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/run_tests.zig b/src/run_tests.zig
index 9227a20d..84ca45fd 100644
--- a/src/run_tests.zig
+++ b/src/run_tests.zig
@@ -104,6 +104,12 @@ test {
     const dumpTest = @import("browser/dump.zig");
     std.testing.refAllDecls(dumpTest);
 
+    const cssTest = @import("css/css.zig");
+    std.testing.refAllDecls(cssTest);
+
+    const cssParserTest = @import("css/parser.zig");
+    std.testing.refAllDecls(cssParserTest);
+
     const cssMatchTest = @import("css/match_test.zig");
     std.testing.refAllDecls(cssMatchTest);
 

From 75e80a47e6c22beee0215e75a64b48ebcb231b86 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 18 Mar 2024 21:21:28 +0100
Subject: [PATCH 13/28] css: implement group, compound and start combined match

---
 src/css/libdom.zig     |  7 +++++
 src/css/match_test.zig | 63 +++++++++++++++++++++++++++++++++++++++---
 src/css/parser.zig     | 15 ++++++----
 src/css/selector.zig   | 62 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 136 insertions(+), 11 deletions(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 318e401c..7c06cd1a 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -20,6 +20,13 @@ pub const Node = struct {
         return null;
     }
 
+    pub fn parent(n: Node) !?Node {
+        const c = try parser.nodeParentNode(n.node);
+        if (c) |cc| return .{ .node = cc };
+
+        return null;
+    }
+
     pub fn isElement(n: Node) bool {
         const t = parser.nodeType(n.node) catch return false;
         return t == .element;
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index d0ac3c81..22694961 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -5,6 +5,7 @@ const css = @import("css.zig");
 pub const Node = struct {
     child: ?*const Node = null,
     sibling: ?*const Node = null,
+    par: ?*const Node = null,
 
     name: []const u8 = "",
     att: ?[]const u8 = null,
@@ -17,6 +18,10 @@ pub const Node = struct {
         return n.sibling;
     }
 
+    pub fn parent(n: *const Node) !?*const Node {
+        return n.par;
+    }
+
     pub fn isElement(_: *const Node) bool {
         return true;
     }
@@ -153,6 +158,24 @@ test "matchFirst" {
             .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
             .exp = 0,
         },
+        .{
+            .q = "strong, a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{
+                .name = "a",
+                .par = &.{ .name = "span", .par = &.{ .name = "p" } },
+            } } } },
+            .exp = 1,
+        },
     };
 
     for (testcases) |tc| {
@@ -161,8 +184,15 @@ test "matchFirst" {
         const s = try css.parse(alloc, tc.q, .{});
         defer s.deinit(alloc);
 
-        _ = try css.matchFirst(s, &tc.n, &matcher);
-        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+        _ = css.matchFirst(s, &tc.n, &matcher) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+
+        std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
     }
 }
 
@@ -267,6 +297,24 @@ test "matchAll" {
             .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
             .exp = 0,
         },
+        .{
+            .q = "strong, a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 2,
+        },
+        .{
+            .q = "p a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p a",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{
+                .name = "a",
+                .par = &.{ .name = "span", .par = &.{ .name = "p" } },
+            } } } },
+            .exp = 1,
+        },
     };
 
     for (testcases) |tc| {
@@ -275,7 +323,14 @@ test "matchAll" {
         const s = try css.parse(alloc, tc.q, .{});
         defer s.deinit(alloc);
 
-        _ = try css.matchAll(s, &tc.n, &matcher);
-        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+        _ = css.matchAll(s, &tc.n, &matcher) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+
+        std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
     }
 }
diff --git a/src/css/parser.zig b/src/css/parser.zig
index d2110883..f0da6504 100644
--- a/src/css/parser.zig
+++ b/src/css/parser.zig
@@ -9,6 +9,7 @@ const selector = @import("selector.zig");
 const Selector = selector.Selector;
 const PseudoClass = selector.PseudoClass;
 const AttributeOP = selector.AttributeOP;
+const Combinator = selector.Combinator;
 
 pub const ParseError = error{
     ExpectedSelector,
@@ -44,7 +45,7 @@ pub const ParseError = error{
     NotHandled,
     UnknownPseudoSelector,
     InvalidNthExpression,
-} || PseudoClass.Error || std.mem.Allocator.Error;
+} || PseudoClass.Error || Combinator.Error || std.mem.Allocator.Error;
 
 pub const ParseOptions = struct {
     accept_pseudo_elts: bool = true,
@@ -594,9 +595,9 @@ pub const Parser = struct {
         var s = try p.parseSimpleSelectorSequence(alloc);
 
         while (true) {
-            var combinator: u8 = undefined;
+            var combinator: Combinator = .empty;
             if (p.skipWhitespace()) {
-                combinator = ' ';
+                combinator = .descendant;
             }
             if (p.i >= p.s.len) {
                 return s;
@@ -604,16 +605,18 @@ pub const Parser = struct {
 
             switch (p.s[p.i]) {
                 '+', '>', '~' => {
-                    combinator = p.s[p.i];
+                    combinator = try Combinator.parse(p.s[p.i]);
                     p.i += 1;
                     _ = p.skipWhitespace();
                 },
                 // These characters can't begin a selector, but they can legally occur after one.
-                ',', ')' => return s,
+                ',', ')' => {
+                    return s;
+                },
                 else => {},
             }
 
-            if (combinator == 0) {
+            if (combinator == .empty) {
                 return s;
             }
 
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 8eae1aef..f94144b5 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -16,6 +16,28 @@ pub const AttributeOP = enum {
     }
 };
 
+pub const Combinator = enum {
+    empty,
+    descendant, // space
+    child, // >
+    next_sibling, // +
+    subsequent_sibling, // ~
+
+    pub const Error = error{
+        InvalidCombinator,
+    };
+
+    pub fn parse(c: u8) Error!Combinator {
+        return switch (c) {
+            ' ' => .descendant,
+            '>' => .child,
+            '+' => .next_sibling,
+            '~' => .subsequent_sibling,
+            else => Error.InvalidCombinator,
+        };
+    }
+};
+
 pub const PseudoClass = enum {
     not,
     has,
@@ -119,6 +141,10 @@ pub const PseudoClass = enum {
 };
 
 pub const Selector = union(enum) {
+    pub const Error = error{
+        UnknownCombinedCombinator,
+    };
+
     compound: struct {
         selectors: []Selector,
         pseudo_elt: ?PseudoClass,
@@ -137,7 +163,7 @@ pub const Selector = union(enum) {
     combined: struct {
         first: *Selector,
         second: *Selector,
-        combinator: u8,
+        combinator: Combinator,
     },
 
     never_match: PseudoClass,
@@ -200,6 +226,40 @@ pub const Selector = union(enum) {
             .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
             .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false),
             .class => |v| return n.isElement() and word(try n.attr("class") orelse return false, v, false),
+            .group => |v| {
+                for (v) |sel| {
+                    if (try sel.match(n)) return true;
+                }
+                return false;
+            },
+            .compound => |v| {
+                if (v.selectors.len == 0) return n.isElement();
+
+                for (v.selectors) |sel| {
+                    if (!try sel.match(n)) return false;
+                }
+                return true;
+            },
+            .combined => |v| {
+                return switch (v.combinator) {
+                    .empty => try v.first.match(n),
+                    .descendant => {
+                        if (!try v.second.match(n)) return false;
+
+                        // The first must match a ascendent.
+                        var p = try n.parent();
+                        while (p != null) {
+                            if (try v.first.match(p.?)) {
+                                return true;
+                            }
+                            p = try p.?.parent();
+                        }
+
+                        return false;
+                    },
+                    else => return Error.UnknownCombinedCombinator,
+                };
+            },
             .attribute => |v| {
                 const attr = try n.attr(v.key);
 

From 9c997ec86d5b667e30edfb14ef6cda1d3c4f71dd Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Tue, 19 Mar 2024 09:25:52 +0100
Subject: [PATCH 14/28] css: add pseudo class relative match

---
 src/css/match_test.zig | 50 ++++++++++++++++++++++++++++++++++++++++++
 src/css/selector.zig   | 32 +++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 22694961..254e9156 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -176,6 +176,31 @@ test "matchFirst" {
             } } } },
             .exp = 1,
         },
+        .{
+            .q = ":not(p)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p:has(a)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p:has(strong)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "p:haschild(a)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p:haschild(strong)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 0,
+        },
     };
 
     for (testcases) |tc| {
@@ -315,6 +340,31 @@ test "matchAll" {
             } } } },
             .exp = 1,
         },
+        .{
+            .q = ":not(p)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 2,
+        },
+        .{
+            .q = "p:has(a)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p:has(strong)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 0,
+        },
+        .{
+            .q = "p:haschild(a)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "p:haschild(strong)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+            .exp = 0,
+        },
     };
 
     for (testcases) |tc| {
diff --git a/src/css/selector.zig b/src/css/selector.zig
index f94144b5..df0788b0 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -143,6 +143,7 @@ pub const PseudoClass = enum {
 pub const Selector = union(enum) {
     pub const Error = error{
         UnknownCombinedCombinator,
+        UnsupportedRelativePseudoClass,
     };
 
     compound: struct {
@@ -289,10 +290,41 @@ pub const Selector = union(enum) {
                 };
             },
             .never_match => return false,
+            .pseudo_class_relative => |v| {
+                if (!n.isElement()) return false;
+
+                return switch (v.pseudo_class) {
+                    .not => !try v.match.match(n),
+                    .has => try hasDescendantMatch(v.match, n),
+                    .haschild => try hasChildMatch(v.match, n),
+                    else => Error.UnsupportedRelativePseudoClass,
+                };
+            },
             else => false,
         };
     }
 
+    fn hasDescendantMatch(s: *const Selector, n: anytype) anyerror!bool {
+        var c = try n.firstChild();
+        while (c != null) {
+            if (try s.match(c.?)) return true;
+            if (c.?.isElement() and try hasDescendantMatch(s, c.?)) return true;
+            c = try c.?.nextSibling();
+        }
+
+        return false;
+    }
+
+    fn hasChildMatch(s: *const Selector, n: anytype) anyerror!bool {
+        var c = try n.firstChild();
+        while (c != null) {
+            if (try s.match(c.?)) return true;
+            c = try c.?.nextSibling();
+        }
+
+        return false;
+    }
+
     pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
         switch (sel) {
             .group => |v| {

From db5d9332853d75153912136c9d725659001a67bf Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 08:50:57 +0100
Subject: [PATCH 15/28] css: add nth- pseudo class

---
 src/css/libdom.zig     |  18 ++++++
 src/css/match_test.zig |  85 +++++++++++++++++++++++++++-
 src/css/parser.zig     |   2 +-
 src/css/selector.zig   | 124 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 225 insertions(+), 4 deletions(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 7c06cd1a..04c99a66 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -13,6 +13,13 @@ pub const Node = struct {
         return null;
     }
 
+    pub fn lastChild(n: Node) !?Node {
+        const c = try parser.nodeLastChild(n.node);
+        if (c) |cc| return .{ .node = cc };
+
+        return null;
+    }
+
     pub fn nextSibling(n: Node) !?Node {
         const c = try parser.nodeNextSibling(n.node);
         if (c) |cc| return .{ .node = cc };
@@ -20,6 +27,13 @@ pub const Node = struct {
         return null;
     }
 
+    pub fn prevSibling(n: Node) !?Node {
+        const c = try parser.nodePreviousSibling(n.node);
+        if (c) |cc| return .{ .node = cc };
+
+        return null;
+    }
+
     pub fn parent(n: Node) !?Node {
         const c = try parser.nodeParentNode(n.node);
         if (c) |cc| return .{ .node = cc };
@@ -39,4 +53,8 @@ pub const Node = struct {
     pub fn attr(n: Node, key: []const u8) !?[]const u8 {
         return try parser.elementGetAttribute(parser.nodeToElement(n.node), key);
     }
+
+    pub fn eql(a: Node, b: Node) bool {
+        return a.node == b.node;
+    }
 };
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 254e9156..9aaeedbd 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -4,7 +4,9 @@ const css = @import("css.zig");
 // Node mock implementation for test only.
 pub const Node = struct {
     child: ?*const Node = null,
+    last: ?*const Node = null,
     sibling: ?*const Node = null,
+    prev: ?*const Node = null,
     par: ?*const Node = null,
 
     name: []const u8 = "",
@@ -14,10 +16,18 @@ pub const Node = struct {
         return n.child;
     }
 
+    pub fn lastChild(n: *const Node) !?*const Node {
+        return n.last;
+    }
+
     pub fn nextSibling(n: *const Node) !?*const Node {
         return n.sibling;
     }
 
+    pub fn prevSibling(n: *const Node) !?*const Node {
+        return n.prev;
+    }
+
     pub fn parent(n: *const Node) !?*const Node {
         return n.par;
     }
@@ -33,6 +43,10 @@ pub const Node = struct {
     pub fn attr(n: *const Node, _: []const u8) !?[]const u8 {
         return n.att;
     }
+
+    pub fn eql(a: *const Node, b: *const Node) bool {
+        return a == b;
+    }
 };
 
 const Matcher = struct {
@@ -373,7 +387,7 @@ test "matchAll" {
         const s = try css.parse(alloc, tc.q, .{});
         defer s.deinit(alloc);
 
-        _ = css.matchAll(s, &tc.n, &matcher) catch |e| {
+        css.matchAll(s, &tc.n, &matcher) catch |e| {
             std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
             return e;
         };
@@ -384,3 +398,72 @@ test "matchAll" {
         };
     }
 }
+
+test "nth pseudo class" {
+    const alloc = std.testing.allocator;
+
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    var p1: Node = .{ .name = "p" };
+    var p2: Node = .{ .name = "p" };
+
+    p1.sibling = &p2;
+    p2.prev = &p1;
+
+    var root: Node = .{ .child = &p1, .last = &p2 };
+    p1.par = &root;
+    p2.par = &root;
+
+    const testcases = [_]struct {
+        q: []const u8,
+        n: Node,
+        exp: ?*const Node,
+    }{
+        .{ .q = "a:nth-of-type(1)", .n = root, .exp = null },
+        .{ .q = "p:nth-of-type(1)", .n = root, .exp = &p1 },
+        .{ .q = "p:nth-of-type(2)", .n = root, .exp = &p2 },
+        .{ .q = "p:nth-of-type(0)", .n = root, .exp = null },
+        .{ .q = "p:nth-of-type(2n)", .n = root, .exp = &p2 },
+        .{ .q = "p:nth-last-child(1)", .n = root, .exp = &p2 },
+        .{ .q = "p:nth-last-child(2)", .n = root, .exp = &p1 },
+        .{ .q = "p:nth-child(1)", .n = root, .exp = &p1 },
+        .{ .q = "p:nth-child(2)", .n = root, .exp = &p2 },
+        .{ .q = "p:nth-child(odd)", .n = root, .exp = &p1 },
+        .{ .q = "p:nth-child(even)", .n = root, .exp = &p2 },
+        .{ .q = "p:nth-child(n+2)", .n = root, .exp = &p2 },
+    };
+
+    for (testcases) |tc| {
+        matcher.reset();
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        css.matchAll(s, &tc.n, &matcher) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+
+        if (tc.exp) |exp_n| {
+            const exp: usize = 1;
+            std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+                std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+                return e;
+            };
+
+            std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| {
+                std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+                return e;
+            };
+
+            continue;
+        }
+
+        const exp: usize = 0;
+        std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+    }
+}
diff --git a/src/css/parser.zig b/src/css/parser.zig
index f0da6504..b23991c1 100644
--- a/src/css/parser.zig
+++ b/src/css/parser.zig
@@ -711,7 +711,7 @@ pub const Parser = struct {
         if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
         const c = p.s[p.i];
         if (std.ascii.isDigit(c)) {
-            const a = try p.parseInteger() * -1;
+            const a = try p.parseInteger();
             return p.parseNthReadA(a);
         }
         if (c == 'n' or c == 'N') {
diff --git a/src/css/selector.zig b/src/css/selector.zig
index df0788b0..381b1d67 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -144,6 +144,9 @@ pub const Selector = union(enum) {
     pub const Error = error{
         UnknownCombinedCombinator,
         UnsupportedRelativePseudoClass,
+        UnsupportedContainsPseudoClass,
+        UnsupportedRegexpPseudoClass,
+        UnsupportedAttrRegexpOperator,
     };
 
     compound: struct {
@@ -222,6 +225,7 @@ pub const Selector = union(enum) {
         return std.mem.indexOf(u8, haystack, needle) != null;
     }
 
+    // match returns true if the node matches the selector query.
     pub fn match(s: Selector, n: anytype) !bool {
         return switch (s) {
             .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
@@ -286,7 +290,7 @@ pub const Selector = union(enum) {
 
                         return attr.?[val.len] == '-';
                     },
-                    .regexp => false, // TODO handle regexp attribute operator.
+                    .regexp => return Error.UnsupportedAttrRegexpOperator, // TODO handle regexp attribute operator.
                 };
             },
             .never_match => return false,
@@ -300,10 +304,126 @@ pub const Selector = union(enum) {
                     else => Error.UnsupportedRelativePseudoClass,
                 };
             },
-            else => false,
+            .pseudo_class_contains => return Error.UnsupportedContainsPseudoClass, // TODO, need mem allocation.
+            .pseudo_class_regexp => return Error.UnsupportedRegexpPseudoClass, // TODO need mem allocation.
+            .pseudo_class_nth => |v| {
+                if (v.a == 0) {
+                    if (v.last) {
+                        return simpleNthLastChildMatch(v.b, v.of_type, n);
+                    }
+                    return simpleNthChildMatch(v.b, v.of_type, n);
+                }
+                return nthChildMatch(v.a, v.b, v.last, v.of_type, n);
+            },
+            .pseudo_class => return false,
+            .pseudo_class_only_child => return false,
+            .pseudo_class_lang => return false,
+            .pseudo_element => return false,
         };
     }
 
+    // simpleNthLastChildMatch implements :nth-last-child(b).
+    // If ofType is true, implements :nth-last-of-type instead.
+    fn simpleNthLastChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool {
+        if (!n.isElement()) return false;
+
+        const p = try n.parent();
+        if (p == null) return false;
+
+        const ntag = try n.tag();
+
+        var count: isize = 0;
+        var c = try p.?.lastChild();
+        // loop hover all n siblings.
+        while (c != null) {
+            // ignore non elements or others tags if of-type is true.
+            if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+                c = try c.?.prevSibling();
+                continue;
+            }
+
+            count += 1;
+
+            if (n.eql(c.?)) return count == b;
+            if (count >= b) return false;
+
+            c = try c.?.prevSibling();
+        }
+
+        return false;
+    }
+
+    // simpleNthChildMatch implements :nth-child(b).
+    // If ofType is true, implements :nth-of-type instead.
+    fn simpleNthChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool {
+        if (!n.isElement()) return false;
+
+        const p = try n.parent();
+        if (p == null) return false;
+
+        const ntag = try n.tag();
+
+        var count: isize = 0;
+        var c = try p.?.firstChild();
+        // loop hover all n siblings.
+        while (c != null) {
+            // ignore non elements or others tags if of-type is true.
+            if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+                c = try c.?.nextSibling();
+                continue;
+            }
+
+            count += 1;
+
+            if (n.eql(c.?)) return count == b;
+            if (count >= b) return false;
+
+            c = try c.?.nextSibling();
+        }
+
+        return false;
+    }
+
+    // nthChildMatch implements :nth-child(an+b).
+    // If last is true, implements :nth-last-child instead.
+    // If ofType is true, implements :nth-of-type instead.
+    fn nthChildMatch(a: isize, b: isize, last: bool, of_type: bool, n: anytype) anyerror!bool {
+        if (!n.isElement()) return false;
+
+        const p = try n.parent();
+        if (p == null) return false;
+
+        const ntag = try n.tag();
+
+        var i: isize = -1;
+        var count: isize = 0;
+        var c = try p.?.firstChild();
+        // loop hover all n siblings.
+        while (c != null) {
+            // ignore non elements or others tags if of-type is true.
+            if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+                c = try c.?.nextSibling();
+                continue;
+            }
+            count += 1;
+
+            if (n.eql(c.?)) {
+                i = count;
+                if (!last) break;
+            }
+
+            c = try c.?.nextSibling();
+        }
+
+        if (i == -1) return false;
+
+        if (last) i = count - i + 1;
+
+        i -= b;
+        if (a == 0) return i == 0;
+        return @mod(i, a) == 0 and @divTrunc(i, a) >= 0;
+    }
+
     fn hasDescendantMatch(s: *const Selector, n: anytype) anyerror!bool {
         var c = try n.firstChild();
         while (c != null) {

From bd899111d5f5837bdc3a834093b97ec5d68a01da Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 10:25:46 +0100
Subject: [PATCH 16/28] css: implement :only-child and :only-of-type

---
 src/css/match_test.zig | 64 ++++++++++++++++++++++++++++++++++++++++++
 src/css/selector.zig   | 53 ++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 9aaeedbd..8e659043 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -399,6 +399,70 @@ test "matchAll" {
     }
 }
 
+test "pseudo class" {
+    const alloc = std.testing.allocator;
+
+    var matcher = Matcher.init(alloc);
+    defer matcher.deinit();
+
+    var p1: Node = .{ .name = "p" };
+    var p2: Node = .{ .name = "p" };
+    var a1: Node = .{ .name = "a" };
+
+    p1.sibling = &p2;
+    p2.prev = &p1;
+
+    p2.sibling = &a1;
+    a1.prev = &p2;
+
+    var root: Node = .{ .child = &p1, .last = &a1 };
+    p1.par = &root;
+    p2.par = &root;
+    a1.par = &root;
+
+    const testcases = [_]struct {
+        q: []const u8,
+        n: Node,
+        exp: ?*const Node,
+    }{
+        .{ .q = "p:only-child", .n = root, .exp = null },
+        .{ .q = "a:only-of-type", .n = root, .exp = &a1 },
+    };
+
+    for (testcases) |tc| {
+        matcher.reset();
+
+        const s = try css.parse(alloc, tc.q, .{});
+        defer s.deinit(alloc);
+
+        css.matchAll(s, &tc.n, &matcher) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+
+        if (tc.exp) |exp_n| {
+            const exp: usize = 1;
+            std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+                std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+                return e;
+            };
+
+            std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| {
+                std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+                return e;
+            };
+
+            continue;
+        }
+
+        const exp: usize = 0;
+        std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+            return e;
+        };
+    }
+}
+
 test "nth pseudo class" {
     const alloc = std.testing.allocator;
 
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 381b1d67..a0c5d4d0 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -145,6 +145,7 @@ pub const Selector = union(enum) {
         UnknownCombinedCombinator,
         UnsupportedRelativePseudoClass,
         UnsupportedContainsPseudoClass,
+        UnsupportedPseudoClass,
         UnsupportedRegexpPseudoClass,
         UnsupportedAttrRegexpOperator,
     };
@@ -315,13 +316,61 @@ pub const Selector = union(enum) {
                 }
                 return nthChildMatch(v.a, v.b, v.last, v.of_type, n);
             },
-            .pseudo_class => return false,
-            .pseudo_class_only_child => return false,
+            .pseudo_class => |v| {
+                switch (v) {
+                    .input => return Error.UnsupportedPseudoClass,
+                    .empty => return Error.UnsupportedPseudoClass,
+                    .root => return Error.UnsupportedPseudoClass,
+                    .link => return Error.UnsupportedPseudoClass,
+                    .enabled => return Error.UnsupportedPseudoClass,
+                    .disabled => return Error.UnsupportedPseudoClass,
+                    .checked => return Error.UnsupportedPseudoClass,
+                    .visited => return Error.UnsupportedPseudoClass,
+                    .hover => return Error.UnsupportedPseudoClass,
+                    .active => return Error.UnsupportedPseudoClass,
+                    .focus => return Error.UnsupportedPseudoClass,
+                    .target => return Error.UnsupportedPseudoClass,
+
+                    // all others pseudo class are handled by specialized
+                    // pseudo_class_X selectors.
+                    else => return Error.UnsupportedPseudoClass,
+                }
+            },
+            .pseudo_class_only_child => |v| onlyChildMatch(v, n),
             .pseudo_class_lang => return false,
             .pseudo_element => return false,
         };
     }
 
+    // onlyChildMatch implements :only-child
+    //  If `ofType` is true, it implements :only-of-type instead.
+    fn onlyChildMatch(of_type: bool, n: anytype) anyerror!bool {
+        if (!n.isElement()) return false;
+
+        const p = try n.parent();
+        if (p == null) return false;
+
+        const ntag = try n.tag();
+
+        var count: usize = 0;
+        var c = try p.?.firstChild();
+        // loop hover all n siblings.
+        while (c != null) {
+            // ignore non elements or others tags if of-type is true.
+            if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+                c = try c.?.nextSibling();
+                continue;
+            }
+
+            count += 1;
+            if (count > 1) return false;
+
+            c = try c.?.nextSibling();
+        }
+
+        return count == 1;
+    }
+
     // simpleNthLastChildMatch implements :nth-last-child(b).
     // If ofType is true, implements :nth-last-of-type instead.
     fn simpleNthLastChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool {

From 2671cda98f36467cca2202ee1e1b7764ed7efcd4 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 11:43:32 +0100
Subject: [PATCH 17/28] css: implement :lang match

---
 src/css/match_test.zig | 20 ++++++++++++++++++++
 src/css/selector.zig   | 19 ++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 8e659043..47fbdb78 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -215,6 +215,16 @@ test "matchFirst" {
             .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
             .exp = 0,
         },
+        .{
+            .q = "p:lang(en)",
+            .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "a:lang(en)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } },
+            .exp = 1,
+        },
     };
 
     for (testcases) |tc| {
@@ -379,6 +389,16 @@ test "matchAll" {
             .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
             .exp = 0,
         },
+        .{
+            .q = "p:lang(en)",
+            .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } },
+            .exp = 1,
+        },
+        .{
+            .q = "a:lang(en)",
+            .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } },
+            .exp = 1,
+        },
     };
 
     for (testcases) |tc| {
diff --git a/src/css/selector.zig b/src/css/selector.zig
index a0c5d4d0..72485cac 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -337,11 +337,28 @@ pub const Selector = union(enum) {
                 }
             },
             .pseudo_class_only_child => |v| onlyChildMatch(v, n),
-            .pseudo_class_lang => return false,
+            .pseudo_class_lang => |v| langMatch(v, n),
             .pseudo_element => return false,
         };
     }
 
+    fn langMatch(lang: []const u8, n: anytype) anyerror!bool {
+        if (try n.attr("lang")) |own| {
+            if (std.mem.eql(u8, own, lang)) return true;
+
+            // check if the lang attr starts with lang+'-'
+            if (std.mem.startsWith(u8, own, lang)) {
+                if (own.len > lang.len and own[lang.len] == '-') return true;
+            }
+        }
+
+        // if the tag doesn't match, try the parent.
+        const p = try n.parent();
+        if (p == null) return false;
+
+        return langMatch(lang, p.?);
+    }
+
     // onlyChildMatch implements :only-child
     //  If `ofType` is true, it implements :only-of-type instead.
     fn onlyChildMatch(of_type: bool, n: anytype) anyerror!bool {

From de9d253dc99d01b3f6e09525eabd38962681cfdc Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 14:48:08 +0100
Subject: [PATCH 18/28] css: implement missing pseudo classes

:input :empty :root :link :enabled :disabled :checked
---
 src/css/selector.zig | 171 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 156 insertions(+), 15 deletions(-)

diff --git a/src/css/selector.zig b/src/css/selector.zig
index 72485cac..b55199e0 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -146,6 +146,7 @@ pub const Selector = union(enum) {
         UnsupportedRelativePseudoClass,
         UnsupportedContainsPseudoClass,
         UnsupportedPseudoClass,
+        UnsupportedPseudoElement,
         UnsupportedRegexpPseudoClass,
         UnsupportedAttrRegexpOperator,
     };
@@ -317,31 +318,171 @@ pub const Selector = union(enum) {
                 return nthChildMatch(v.a, v.b, v.last, v.of_type, n);
             },
             .pseudo_class => |v| {
-                switch (v) {
-                    .input => return Error.UnsupportedPseudoClass,
-                    .empty => return Error.UnsupportedPseudoClass,
-                    .root => return Error.UnsupportedPseudoClass,
-                    .link => return Error.UnsupportedPseudoClass,
-                    .enabled => return Error.UnsupportedPseudoClass,
-                    .disabled => return Error.UnsupportedPseudoClass,
-                    .checked => return Error.UnsupportedPseudoClass,
-                    .visited => return Error.UnsupportedPseudoClass,
-                    .hover => return Error.UnsupportedPseudoClass,
-                    .active => return Error.UnsupportedPseudoClass,
-                    .focus => return Error.UnsupportedPseudoClass,
-                    .target => return Error.UnsupportedPseudoClass,
+                return switch (v) {
+                    .input => {
+                        if (!n.isElement()) return false;
+                        const ntag = try n.tag();
+
+                        return std.ascii.eqlIgnoreCase("input", ntag) or
+                            std.ascii.eqlIgnoreCase("select", ntag) or
+                            std.ascii.eqlIgnoreCase("button", ntag) or
+                            std.ascii.eqlIgnoreCase("textarea", ntag);
+                    },
+                    .empty => {
+                        if (!n.isElement()) return false;
+
+                        var c = try n.firstChild();
+                        while (c != null) {
+                            if (c.?.isElement()) return false;
+
+                            // TODO check text node content equals an empty
+                            // string ("")
+
+                            c = try c.?.nextSibling();
+                        }
+
+                        return true;
+                    },
+                    .root => {
+                        if (!n.isElement()) return false;
+
+                        const p = try n.parent();
+                        return p == null;
+                    },
+                    .link => {
+                        const ntag = try n.tag();
+
+                        return std.ascii.eqlIgnoreCase("a", ntag) or
+                            std.ascii.eqlIgnoreCase("area", ntag) or
+                            std.ascii.eqlIgnoreCase("link", ntag);
+                    },
+                    .enabled => {
+                        if (!n.isElement()) return false;
+
+                        const ntag = try n.tag();
+
+                        if (std.ascii.eqlIgnoreCase("a", ntag) or
+                            std.ascii.eqlIgnoreCase("area", ntag) or
+                            std.ascii.eqlIgnoreCase("link", ntag))
+                        {
+                            return try n.attr("href") != null;
+                        }
+
+                        if (std.ascii.eqlIgnoreCase("optgroup", ntag) or
+                            std.ascii.eqlIgnoreCase("menuitem", ntag) or
+                            std.ascii.eqlIgnoreCase("fieldset", ntag))
+                        {
+                            return try n.attr("disabled") == null;
+                        }
+
+                        if (std.ascii.eqlIgnoreCase("input", ntag) or
+                            std.ascii.eqlIgnoreCase("button", ntag) or
+                            std.ascii.eqlIgnoreCase("select", ntag) or
+                            std.ascii.eqlIgnoreCase("textarea", ntag) or
+                            std.ascii.eqlIgnoreCase("option", ntag))
+                        {
+                            return try n.attr("disabled") == null and
+                                !try inDisabledFieldset(n);
+                        }
+
+                        return false;
+                    },
+                    .disabled => {
+                        if (!n.isElement()) return false;
+
+                        const ntag = try n.tag();
+
+                        if (std.ascii.eqlIgnoreCase("optgroup", ntag) or
+                            std.ascii.eqlIgnoreCase("menuitem", ntag) or
+                            std.ascii.eqlIgnoreCase("fieldset", ntag))
+                        {
+                            return try n.attr("disabled") != null;
+                        }
+
+                        if (std.ascii.eqlIgnoreCase("input", ntag) or
+                            std.ascii.eqlIgnoreCase("button", ntag) or
+                            std.ascii.eqlIgnoreCase("select", ntag) or
+                            std.ascii.eqlIgnoreCase("textarea", ntag) or
+                            std.ascii.eqlIgnoreCase("option", ntag))
+                        {
+                            return try n.attr("disabled") != null or
+                                try inDisabledFieldset(n);
+                        }
+
+                        return false;
+                    },
+                    .checked => {
+                        if (!n.isElement()) return false;
+
+                        const ntag = try n.tag();
+
+                        if (std.ascii.eqlIgnoreCase("intput", ntag)) {
+                            const ntype = try n.attr("type");
+                            if (ntype == null) return false;
+
+                            if (std.mem.eql(u8, ntype.?, "checkbox") or
+                                std.mem.eql(u8, ntype.?, "radio"))
+                            {
+                                return try n.attr("checked") != null;
+                            }
+
+                            return false;
+                        }
+                        if (std.ascii.eqlIgnoreCase("option", ntag)) {
+                            return try n.attr("selected") != null;
+                        }
+
+                        return false;
+                    },
+                    .visited => return false,
+                    .hover => return false,
+                    .active => return false,
+                    .focus => return false,
+                    // TODO implement using the url fragment.
+                    // see https://developer.mozilla.org/en-US/docs/Web/CSS/:target
+                    .target => return false,
 
                     // all others pseudo class are handled by specialized
                     // pseudo_class_X selectors.
                     else => return Error.UnsupportedPseudoClass,
-                }
+                };
             },
             .pseudo_class_only_child => |v| onlyChildMatch(v, n),
             .pseudo_class_lang => |v| langMatch(v, n),
-            .pseudo_element => return false,
+
+            // pseudo elements doesn't make sense in the matching process.
+            // > A CSS pseudo-element is a keyword added to a selector that
+            // > lets you style a specific part of the selected element(s).
+            // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-elements
+            .pseudo_element => return Error.UnsupportedPseudoElement,
         };
     }
 
+    fn inDisabledFieldset(n: anytype) anyerror!bool {
+        const p = try n.parent();
+        if (p == null) return false;
+
+        const ptag = try p.?.tag();
+
+        if (std.ascii.eqlIgnoreCase("fieldset", ptag) and
+            try p.?.attr("disabled") != null)
+        {
+            return true;
+        }
+
+        // TODO should we handle legend like cascadia does?
+        // The implemention below looks suspicious, I didn't find a test case
+        // in cascadia and I didn't find the reference about legend in the
+        // specs. For now I do prefer ignoring this part.
+        //
+        // ```
+        // (n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) {
+        // ```
+        // https://github.com/andybalholm/cascadia/blob/master/pseudo_classes.go#L434
+
+        return try inDisabledFieldset(p.?);
+    }
+
     fn langMatch(lang: []const u8, n: anytype) anyerror!bool {
         if (try n.attr("lang")) |own| {
             if (std.mem.eql(u8, own, lang)) return true;

From e7738744cb00eb986b0b4d3ef9978e4a5a0d4b60 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 15:39:59 +0100
Subject: [PATCH 19/28] css: add libdom tests

---
 src/css/libdom_test.zig | 231 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 223 insertions(+), 8 deletions(-)

diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig
index 0e5a255d..b8444f06 100644
--- a/src/css/libdom_test.zig
+++ b/src/css/libdom_test.zig
@@ -37,9 +37,107 @@ test "matchFirst" {
         exp: usize,
     }{
         .{ .q = "address", .html = "<body><address>This address...</address></body>", .exp = 1 },
+        .{ .q = "*", .html = "<!-- comment --><html><head></head><body>text</body></html>", .exp = 1 },
+        .{ .q = "*", .html = "<html><head></head><body></body></html>", .exp = 1 },
         .{ .q = "#foo", .html = "<p id=\"foo\"><p id=\"bar\">", .exp = 1 },
-        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = "li#t1", .html = "<ul><li id=\"t1\"><p id=\"t1\">", .exp = 1 },
         .{ .q = ".t3", .html = "<ul><li class=\"t1\"><li class=\"t2 t3\">", .exp = 1 },
+        .{ .q = "*#t4", .html = "<ol><li id=\"t4\"><li id=\"t44\">", .exp = 1 },
+        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = "p.t1", .html = "<p class=\"t1 t2\">", .exp = 1 },
+        .{ .q = "div.teST", .html = "<div class=\"test\">", .exp = 0 },
+        .{ .q = ".t1.fail", .html = "<p class=\"t1 t2\">", .exp = 0 },
+        .{ .q = "p.t1.t2", .html = "<p class=\"t1 t2\">", .exp = 1 },
+        .{ .q = "p.--t1", .html = "<p class=\"--t1 --t2\">", .exp = 1 },
+        .{ .q = "p.--t1.--t2", .html = "<p class=\"--t1 --t2\">", .exp = 1 },
+        .{ .q = "p[title]", .html = "<p><p title=\"title\">", .exp = 1 },
+        .{ .q = "div[class=\"red\" i]", .html = "<div><div class=\"Red\">", .exp = 1 },
+        .{ .q = "address[title=\"foo\"]", .html = "<address><address title=\"foo\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "address[title=\"FoOIgnoRECaSe\" i]", .html = "<address><address title=\"fooIgnoreCase\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "address[title!=\"foo\"]", .html = "<address><address title=\"foo\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "address[title!=\"foo\" i]", .html = "<address><address title=\"FOO\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "p[title!=\"FooBarUFoo\" i]", .html = "<p title=\"fooBARuFOO\"><p title=\"varfoo\">", .exp = 1 },
+        .{ .q = "[   title        ~=       foo    ]", .html = "<p title=\"tot foo bar\">", .exp = 1 },
+        .{ .q = "p[title~=\"FOO\" i]", .html = "<p title=\"tot foo bar\">", .exp = 1 },
+        .{ .q = "p[title~=toofoo i]", .html = "<p title=\"tot foo bar\">", .exp = 0 },
+        .{ .q = "[title~=\"hello world\"]", .html = "<p title=\"hello world\">", .exp = 0 },
+        .{ .q = "[title~=\"hello\" i]", .html = "<p title=\"HELLO world\">", .exp = 1 },
+        .{ .q = "[title~=\"hello\"          I]", .html = "<p title=\"HELLO world\">", .exp = 1 },
+        .{ .q = "[lang|=\"en\"]", .html = "<p lang=\"en\"><p lang=\"en-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 1 },
+        .{ .q = "[lang|=\"EN\" i]", .html = "<p lang=\"en\"><p lang=\"En-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 1 },
+        .{ .q = "[lang|=\"EN\"     i]", .html = "<p lang=\"en\"><p lang=\"En-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 1 },
+        .{ .q = "[title^=\"foo\"]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title^=\"foo\" i]", .html = "<p title=\"FooBAR\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title$=\"bar\"]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title$=\"BAR\" i]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title*=\"bar\"]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "[title*=\"BaRu\" i]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "[title*=\"BaRu\" I]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "p[class$=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class$=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class^=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class^=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class*=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class*=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "input[name=Sex][value=F]", .html = "<input type=\"radio\" name=\"Sex\" value=\"F\"/>", .exp = 1 },
+        .{ .q = "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", .html = "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" style=\"table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF\"><tr style=\"height:64px\">aaa</tr></table>", .exp = 1 },
+        .{ .q = ".t1:not(.t2)", .html = "<p class=\"t1 t2\">", .exp = 0 },
+        .{ .q = "div:not(.t1)", .html = "<div class=\"t3\">", .exp = 1 },
+        .{ .q = "div:not([class=\"t2\"])", .html = "<div><div class=\"t2\"><div class=\"t3\">", .exp = 1 },
+        .{ .q = "li:nth-child(odd)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-child(even)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-child(-n+2)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-child(3n+1)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-last-child(odd)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 1 },
+        .{ .q = "li:nth-last-child(even)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 1 },
+        .{ .q = "li:nth-last-child(-n+2)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 1 },
+        .{ .q = "li:nth-last-child(3n+1)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 1 },
+        .{ .q = "span:first-child", .html = "<p>some text <span id=\"1\">and a span</span><span id=\"2\"> and another</span></p>", .exp = 1 },
+        .{ .q = "span:last-child", .html = "<span>a span</span> and some text", .exp = 1 },
+        .{ .q = "p:nth-of-type(2)", .html = "<address></address><p id=1><p id=2>", .exp = 1 },
+        .{ .q = "p:nth-last-of-type(2)", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:last-of-type", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:first-of-type", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:only-child", .html = "<div><p id=\"1\"></p><a></a></div><div><p id=\"2\"></p></div>", .exp = 1 },
+        .{ .q = "p:only-of-type", .html = "<div><p id=\"1\"></p><a></a></div><div><p id=\"2\"></p><p id=\"3\"></p></div>", .exp = 1 },
+        .{ .q = ":empty", .html = "<p id=\"1\"><!-- --><p id=\"2\">Hello<p id=\"3\"><span>", .exp = 1 },
+        .{ .q = "div p", .html = "<div><p id=\"1\"><table><tr><td><p id=\"2\"></table></div><p id=\"3\">", .exp = 1 },
+        .{ .q = "div table p", .html = "<div><p id=\"1\"><table><tr><td><p id=\"2\"></table></div><p id=\"3\">", .exp = 1 },
+        .{ .q = "div > p", .html = "<div><p id=\"1\"><div><p id=\"2\"></div><table><tr><td><p id=\"3\"></table></div>", .exp = 1 },
+        .{ .q = "p ~ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
+        .{ .q = "p + p", .html = "<p id=\"1\"></p> <!--comment--> <p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
+        .{ .q = "li, p", .html = "<ul><li></li><li></li></ul><p>", .exp = 1 },
+        .{ .q = "p +/*This is a comment*/ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
+        .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
+        .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "div:has(#p1)", .html = "<div id=\"d1\"><p id=\"p1\"><span>text content</span></p></div><div id=\"d2\"/>", .exp = 1 },
+        .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
+        .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        // .{ .q = "p:matches([\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches([a-z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches([a-zA-Z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches([^\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches(^(0|a))", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches(^\\d+$)", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:not(:matches(^\\d+$))", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "div :matchesOwn(^\\d+$)", .html = "<div><p id=\"p1\">01234<em>567</em>89</p><div>", .exp = 1 },
+        // .{ .q = "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", .html = "<ul> <li><a id=\"a1\" href=\"http://www.google.com/finance\"></a> <li><a id=\"a2\" href=\"http://finance.yahoo.com/\"></a> <li><a id=\"a2\" href=\"http://finance.untrusted.com/\"/> <li><a id=\"a3\" href=\"https://www.google.com/news\"/> <li><a id=\"a4\" href=\"http://news.yahoo.com\"/> </ul>", .exp = 1 },
+        // .{ .q = "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", .html = "<ul> <li><a id=\"a1\" href=\"http://www.google.com/finance\"/> <li><a id=\"a2\" href=\"http://finance.yahoo.com/\"/> <li><a id=\"a3\" href=\"https://www.google.com/news\"></a> <li><a id=\"a4\" href=\"http://news.yahoo.com\"/> </ul>", .exp = 1 },
+        .{ .q = ":input", .html = "<form> <label>Username <input type=\"text\" name=\"username\" /></label> <label>Password <input type=\"password\" name=\"password\" /></label> <label>Country <select name=\"country\"> <option value=\"ca\">Canada</option> <option value=\"us\">United States</option> </select> </label> <label>Bio <textarea name=\"bio\"></textarea></label> <button>Sign up</button> </form>", .exp = 1 },
+        .{ .q = ":root", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "html:nth-child(1)", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root:first-child", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root:nth-child(1)", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "a:not(:root)", .html = "<html><head></head><body><a href=\"http://www.foo.com\"></a></body></html>", .exp = 1 },
+        .{ .q = "body > *:nth-child(3n+2)", .html = "<html><head></head><body><p></p><div></div><span></span><a></a><form></form></body></html>", .exp = 1 },
+        .{ .q = "input:disabled", .html = "<html><head></head><body><fieldset disabled><legend id=\"1\"><input id=\"i1\"/></legend><legend id=\"2\"><input id=\"i2\"/></legend></fieldset></body></html>", .exp = 1 },
+        .{ .q = ":disabled", .html = "<html><head></head><body><fieldset disabled></fieldset></body></html>", .exp = 1 },
+        .{ .q = ":enabled", .html = "<html><head></head><body><fieldset></fieldset></body></html>", .exp = 1 },
+        .{ .q = "div.class1, div.class2", .html = "<div class=class1></div><div class=class2></div><div class=class3></div>", .exp = 1 },
     };
 
     for (testcases) |tc| {
@@ -48,13 +146,23 @@ test "matchFirst" {
         const doc = try parser.documentHTMLParseFromStr(tc.html);
         defer parser.documentHTMLClose(doc) catch {};
 
-        const s = try css.parse(alloc, tc.q, .{});
+        const s = css.parse(alloc, tc.q, .{}) catch |e| {
+            std.debug.print("parse, query: {s}\n", .{tc.q});
+            return e;
+        };
+
         defer s.deinit(alloc);
 
         const node = Node{ .node = parser.documentHTMLToNode(doc) };
 
-        _ = try css.matchFirst(s, node, &matcher);
-        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+        _ = css.matchFirst(s, node, &matcher) catch |e| {
+            std.debug.print("match, query: {s}\n", .{tc.q});
+            return e;
+        };
+        std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("expectation, query: {s}\n", .{tc.q});
+            return e;
+        };
     }
 }
 
@@ -70,9 +178,107 @@ test "matchAll" {
         exp: usize,
     }{
         .{ .q = "address", .html = "<body><address>This address...</address></body>", .exp = 1 },
+        .{ .q = "*", .html = "<!-- comment --><html><head></head><body>text</body></html>", .exp = 3 },
+        .{ .q = "*", .html = "<html><head></head><body></body></html>", .exp = 3 },
         .{ .q = "#foo", .html = "<p id=\"foo\"><p id=\"bar\">", .exp = 1 },
-        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = "li#t1", .html = "<ul><li id=\"t1\"><p id=\"t1\">", .exp = 1 },
         .{ .q = ".t3", .html = "<ul><li class=\"t1\"><li class=\"t2 t3\">", .exp = 1 },
+        .{ .q = "*#t4", .html = "<ol><li id=\"t4\"><li id=\"t44\">", .exp = 1 },
+        .{ .q = ".t1", .html = "<ul><li class=\"t1\"><li class=\"t2\">", .exp = 1 },
+        .{ .q = "p.t1", .html = "<p class=\"t1 t2\">", .exp = 1 },
+        .{ .q = "div.teST", .html = "<div class=\"test\">", .exp = 0 },
+        .{ .q = ".t1.fail", .html = "<p class=\"t1 t2\">", .exp = 0 },
+        .{ .q = "p.t1.t2", .html = "<p class=\"t1 t2\">", .exp = 1 },
+        .{ .q = "p.--t1", .html = "<p class=\"--t1 --t2\">", .exp = 1 },
+        .{ .q = "p.--t1.--t2", .html = "<p class=\"--t1 --t2\">", .exp = 1 },
+        .{ .q = "p[title]", .html = "<p><p title=\"title\">", .exp = 1 },
+        .{ .q = "div[class=\"red\" i]", .html = "<div><div class=\"Red\">", .exp = 1 },
+        .{ .q = "address[title=\"foo\"]", .html = "<address><address title=\"foo\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "address[title=\"FoOIgnoRECaSe\" i]", .html = "<address><address title=\"fooIgnoreCase\"><address title=\"bar\">", .exp = 1 },
+        .{ .q = "address[title!=\"foo\"]", .html = "<address><address title=\"foo\"><address title=\"bar\">", .exp = 2 },
+        .{ .q = "address[title!=\"foo\" i]", .html = "<address><address title=\"FOO\"><address title=\"bar\">", .exp = 2 },
+        .{ .q = "p[title!=\"FooBarUFoo\" i]", .html = "<p title=\"fooBARuFOO\"><p title=\"varfoo\">", .exp = 1 },
+        .{ .q = "[   title        ~=       foo    ]", .html = "<p title=\"tot foo bar\">", .exp = 1 },
+        .{ .q = "p[title~=\"FOO\" i]", .html = "<p title=\"tot foo bar\">", .exp = 1 },
+        .{ .q = "p[title~=toofoo i]", .html = "<p title=\"tot foo bar\">", .exp = 0 },
+        .{ .q = "[title~=\"hello world\"]", .html = "<p title=\"hello world\">", .exp = 0 },
+        .{ .q = "[title~=\"hello\" i]", .html = "<p title=\"HELLO world\">", .exp = 1 },
+        .{ .q = "[title~=\"hello\"          I]", .html = "<p title=\"HELLO world\">", .exp = 1 },
+        .{ .q = "[lang|=\"en\"]", .html = "<p lang=\"en\"><p lang=\"en-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 2 },
+        .{ .q = "[lang|=\"EN\" i]", .html = "<p lang=\"en\"><p lang=\"En-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 2 },
+        .{ .q = "[lang|=\"EN\"     i]", .html = "<p lang=\"en\"><p lang=\"En-gb\"><p lang=\"enough\"><p lang=\"fr-en\">", .exp = 2 },
+        .{ .q = "[title^=\"foo\"]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title^=\"foo\" i]", .html = "<p title=\"FooBAR\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title$=\"bar\"]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title$=\"BAR\" i]", .html = "<p title=\"foobar\"><p title=\"barfoo\">", .exp = 1 },
+        .{ .q = "[title*=\"bar\"]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "[title*=\"BaRu\" i]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "[title*=\"BaRu\" I]", .html = "<p title=\"foobarufoo\">", .exp = 1 },
+        .{ .q = "p[class$=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class$=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class^=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class^=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class*=\" \"]", .html = "<p class=\" \">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "p[class*=\"\"]", .html = "<p class=\"\">This text should be green.</p><p>This text should be green.</p>", .exp = 0 },
+        .{ .q = "input[name=Sex][value=F]", .html = "<input type=\"radio\" name=\"Sex\" value=\"F\"/>", .exp = 1 },
+        .{ .q = "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", .html = "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" style=\"table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF\"><tr style=\"height:64px\">aaa</tr></table>", .exp = 1 },
+        .{ .q = ".t1:not(.t2)", .html = "<p class=\"t1 t2\">", .exp = 0 },
+        .{ .q = "div:not(.t1)", .html = "<div class=\"t3\">", .exp = 1 },
+        .{ .q = "div:not([class=\"t2\"])", .html = "<div><div class=\"t2\"><div class=\"t3\">", .exp = 2 },
+        .{ .q = "li:nth-child(odd)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 2 },
+        .{ .q = "li:nth-child(even)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-child(-n+2)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 2 },
+        .{ .q = "li:nth-child(3n+1)", .html = "<ol><li id=1><li id=2><li id=3></ol>", .exp = 1 },
+        .{ .q = "li:nth-last-child(odd)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 2 },
+        .{ .q = "li:nth-last-child(even)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 2 },
+        .{ .q = "li:nth-last-child(-n+2)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 2 },
+        .{ .q = "li:nth-last-child(3n+1)", .html = "<ol><li id=1><li id=2><li id=3><li id=4></ol>", .exp = 2 },
+        .{ .q = "span:first-child", .html = "<p>some text <span id=\"1\">and a span</span><span id=\"2\"> and another</span></p>", .exp = 1 },
+        .{ .q = "span:last-child", .html = "<span>a span</span> and some text", .exp = 1 },
+        .{ .q = "p:nth-of-type(2)", .html = "<address></address><p id=1><p id=2>", .exp = 1 },
+        .{ .q = "p:nth-last-of-type(2)", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:last-of-type", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:first-of-type", .html = "<address></address><p id=1><p id=2></p><a>", .exp = 1 },
+        .{ .q = "p:only-child", .html = "<div><p id=\"1\"></p><a></a></div><div><p id=\"2\"></p></div>", .exp = 1 },
+        .{ .q = "p:only-of-type", .html = "<div><p id=\"1\"></p><a></a></div><div><p id=\"2\"></p><p id=\"3\"></p></div>", .exp = 1 },
+        .{ .q = ":empty", .html = "<p id=\"1\"><!-- --><p id=\"2\">Hello<p id=\"3\"><span>", .exp = 3 },
+        .{ .q = "div p", .html = "<div><p id=\"1\"><table><tr><td><p id=\"2\"></table></div><p id=\"3\">", .exp = 2 },
+        .{ .q = "div table p", .html = "<div><p id=\"1\"><table><tr><td><p id=\"2\"></table></div><p id=\"3\">", .exp = 1 },
+        .{ .q = "div > p", .html = "<div><p id=\"1\"><div><p id=\"2\"></div><table><tr><td><p id=\"3\"></table></div>", .exp = 2 },
+        .{ .q = "p ~ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 2 },
+        .{ .q = "p + p", .html = "<p id=\"1\"></p> <!--comment--> <p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
+        .{ .q = "li, p", .html = "<ul><li></li><li></li></ul><p>", .exp = 3 },
+        .{ .q = "p +/*This is a comment*/ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
+        .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
+        .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        .{ .q = "div:has(#p1)", .html = "<div id=\"d1\"><p id=\"p1\"><span>text content</span></p></div><div id=\"d2\"/>", .exp = 1 },
+        .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
+        .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 2 },
+        .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        // .{ .q = "p:matches([\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },
+        // .{ .q = "p:matches([a-z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:matches([a-zA-Z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },
+        // .{ .q = "p:matches([^\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },
+        // .{ .q = "p:matches(^(0|a))", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 3 },
+        // .{ .q = "p:matches(^\\d+$)", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
+        // .{ .q = "p:not(:matches(^\\d+$))", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },
+        // .{ .q = "div :matchesOwn(^\\d+$)", .html = "<div><p id=\"p1\">01234<em>567</em>89</p><div>", .exp = 2 },
+        // .{ .q = "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", .html = "<ul> <li><a id=\"a1\" href=\"http://www.google.com/finance\"></a> <li><a id=\"a2\" href=\"http://finance.yahoo.com/\"></a> <li><a id=\"a2\" href=\"http://finance.untrusted.com/\"/> <li><a id=\"a3\" href=\"https://www.google.com/news\"/> <li><a id=\"a4\" href=\"http://news.yahoo.com\"/> </ul>", .exp = 2 },
+        // .{ .q = "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", .html = "<ul> <li><a id=\"a1\" href=\"http://www.google.com/finance\"/> <li><a id=\"a2\" href=\"http://finance.yahoo.com/\"/> <li><a id=\"a3\" href=\"https://www.google.com/news\"></a> <li><a id=\"a4\" href=\"http://news.yahoo.com\"/> </ul>", .exp = 1 },
+        .{ .q = ":input", .html = "<form> <label>Username <input type=\"text\" name=\"username\" /></label> <label>Password <input type=\"password\" name=\"password\" /></label> <label>Country <select name=\"country\"> <option value=\"ca\">Canada</option> <option value=\"us\">United States</option> </select> </label> <label>Bio <textarea name=\"bio\"></textarea></label> <button>Sign up</button> </form>", .exp = 5 },
+        .{ .q = ":root", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "html:nth-child(1)", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root:first-child", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "*:root:nth-child(1)", .html = "<html><head></head><body></body></html>", .exp = 1 },
+        .{ .q = "a:not(:root)", .html = "<html><head></head><body><a href=\"http://www.foo.com\"></a></body></html>", .exp = 1 },
+        .{ .q = "body > *:nth-child(3n+2)", .html = "<html><head></head><body><p></p><div></div><span></span><a></a><form></form></body></html>", .exp = 2 },
+        .{ .q = "input:disabled", .html = "<html><head></head><body><fieldset disabled><legend id=\"1\"><input id=\"i1\"/></legend><legend id=\"2\"><input id=\"i2\"/></legend></fieldset></body></html>", .exp = 1 },
+        .{ .q = ":disabled", .html = "<html><head></head><body><fieldset disabled></fieldset></body></html>", .exp = 1 },
+        .{ .q = ":enabled", .html = "<html><head></head><body><fieldset></fieldset></body></html>", .exp = 1 },
+        .{ .q = "div.class1, div.class2", .html = "<div class=class1></div><div class=class2></div><div class=class3></div>", .exp = 2 },
     };
 
     for (testcases) |tc| {
@@ -81,12 +287,21 @@ test "matchAll" {
         const doc = try parser.documentHTMLParseFromStr(tc.html);
         defer parser.documentHTMLClose(doc) catch {};
 
-        const s = try css.parse(alloc, tc.q, .{});
+        const s = css.parse(alloc, tc.q, .{}) catch |e| {
+            std.debug.print("parse, query: {s}\n", .{tc.q});
+            return e;
+        };
         defer s.deinit(alloc);
 
         const node = Node{ .node = parser.documentHTMLToNode(doc) };
 
-        _ = try css.matchAll(s, node, &matcher);
-        try std.testing.expectEqual(tc.exp, matcher.nodes.items.len);
+        _ = css.matchAll(s, node, &matcher) catch |e| {
+            std.debug.print("match, query: {s}\n", .{tc.q});
+            return e;
+        };
+        std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+            std.debug.print("expectation, query: {s}\n", .{tc.q});
+            return e;
+        };
     }
 }

From 565d612abbef54135b4860bb827b8add5abb0e41 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 15:40:23 +0100
Subject: [PATCH 20/28] css: trim attribute op value

---
 src/css/selector.zig | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/css/selector.zig b/src/css/selector.zig
index b55199e0..ee637549 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -268,7 +268,7 @@ pub const Selector = union(enum) {
                 };
             },
             .attribute => |v| {
-                const attr = try n.attr(v.key);
+                var attr = try n.attr(v.key);
 
                 if (v.op == null) return attr != null;
                 if (v.val == null or v.val.?.len == 0) return false;
@@ -279,9 +279,30 @@ pub const Selector = union(enum) {
                     .eql => attr != null and eql(attr.?, val, v.ci),
                     .not_eql => attr == null or !eql(attr.?, val, v.ci),
                     .one_of => attr != null and word(attr.?, val, v.ci),
-                    .prefix => attr != null and starts(attr.?, val, v.ci),
-                    .suffix => attr != null and ends(attr.?, val, v.ci),
-                    .contains => attr != null and contains(attr.?, val, v.ci),
+                    .prefix => {
+                        if (attr == null) return false;
+                        attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+                        if (attr.?.len == 0) return false;
+
+                        return starts(attr.?, val, v.ci);
+                    },
+                    .suffix => {
+                        if (attr == null) return false;
+                        attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+                        if (attr.?.len == 0) return false;
+
+                        return ends(attr.?, val, v.ci);
+                    },
+                    .contains => {
+                        if (attr == null) return false;
+                        attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+                        if (attr.?.len == 0) return false;
+
+                        return contains(attr.?, val, v.ci);
+                    },
                     .prefix_hyphen => {
                         if (attr == null) return false;
                         if (eql(attr.?, val, v.ci)) return true;

From dcc7e51556f56738151624bc7f75beb25338718e Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 17:09:11 +0100
Subject: [PATCH 21/28] css: implement ~, + and > combinators

---
 src/css/selector.zig | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/css/selector.zig b/src/css/selector.zig
index ee637549..696203d5 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -264,7 +264,34 @@ pub const Selector = union(enum) {
 
                         return false;
                     },
-                    else => return Error.UnknownCombinedCombinator,
+                    .child => {
+                        const p = try n.parent();
+                        if (p == null) return false;
+
+                        return try v.second.match(n) and try v.first.match(p.?);
+                    },
+                    .next_sibling => {
+                        if (!try v.second.match(n)) return false;
+                        var c = try n.prevSibling();
+                        while (c != null) {
+                            if (!c.?.isElement()) { // TODO must check text node or comment node instead.
+                                c = try c.?.prevSibling();
+                                continue;
+                            }
+                            return try v.first.match(c.?);
+                        }
+                        return false;
+                    },
+                    .subsequent_sibling => {
+                        if (!try v.second.match(n)) return false;
+
+                        var c = try n.prevSibling();
+                        while (c != null) {
+                            if (try v.first.match(c.?)) return true;
+                            c = try c.?.prevSibling();
+                        }
+                        return false;
+                    },
                 };
             },
             .attribute => |v| {

From 8a918407839515ee6df9d6d495a69cced29b50a7 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 17:09:55 +0100
Subject: [PATCH 22/28] css: comment :contains test

---
 src/css/libdom_test.zig | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig
index b8444f06..ee651f2f 100644
--- a/src/css/libdom_test.zig
+++ b/src/css/libdom_test.zig
@@ -108,14 +108,14 @@ test "matchFirst" {
         .{ .q = "p + p", .html = "<p id=\"1\"></p> <!--comment--> <p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
         .{ .q = "li, p", .html = "<ul><li></li><li></li></ul><p>", .exp = 1 },
         .{ .q = "p +/*This is a comment*/ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
-        .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
-        .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
-        .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
-        .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
-        .{ .q = "div:has(#p1)", .html = "<div id=\"d1\"><p id=\"p1\"><span>text content</span></p></div><div id=\"d2\"/>", .exp = 1 },
-        .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
-        .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
-        .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        // .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
+        // .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "div:has(#p1)", .html = "<div id=\"d1\"><p id=\"p1\"><span>text content</span></p></div><div id=\"d2\"/>", .exp = 1 },
+        // .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
+        // .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        // .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
         // .{ .q = "p:matches([\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
         // .{ .q = "p:matches([a-z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
         // .{ .q = "p:matches([a-zA-Z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
@@ -249,14 +249,14 @@ test "matchAll" {
         .{ .q = "p + p", .html = "<p id=\"1\"></p> <!--comment--> <p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
         .{ .q = "li, p", .html = "<ul><li></li><li></li></ul><p>", .exp = 3 },
         .{ .q = "p +/*This is a comment*/ p", .html = "<p id=\"1\"><p id=\"2\"></p><address></address><p id=\"3\">", .exp = 1 },
-        .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
-        .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
-        .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
-        .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "p:contains(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "p:containsOwn(\"that wraps\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 0 },
+        // .{ .q = ":containsOwn(\"inner\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
+        // .{ .q = "p:containsOwn(\"block\")", .html = "<p>Text block that <span>wraps inner text</span> and continues</p>", .exp = 1 },
         .{ .q = "div:has(#p1)", .html = "<div id=\"d1\"><p id=\"p1\"><span>text content</span></p></div><div id=\"d2\"/>", .exp = 1 },
-        .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
-        .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 2 },
-        .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
+        // .{ .q = "div:has(:containsOwn(\"2\"))", .html = "<div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p>contents <em>2</em></p></div>", .exp = 1 },
+        // .{ .q = "body :has(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 2 },
+        // .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "<body><div id=\"d1\"><p id=\"p1\"><span>contents 1</span></p></div> <div id=\"d2\"><p id=\"p2\">contents <em>2</em></p></div></body>", .exp = 1 },
         // .{ .q = "p:matches([\\d])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },
         // .{ .q = "p:matches([a-z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 1 },
         // .{ .q = "p:matches([a-zA-Z])", .html = "<p id=\"p1\">0123456789</p><p id=\"p2\">abcdef</p><p id=\"p3\">0123ABCD</p>", .exp = 2 },

From 2c7650cdb15ef39e8b84134034852a410fecc074 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 17:38:21 +0100
Subject: [PATCH 23/28] css: add isDocument, isText and isComment

---
 src/css/libdom.zig     | 15 +++++++++++++++
 src/css/match_test.zig | 12 ++++++++++++
 src/css/selector.zig   |  4 ++--
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 04c99a66..93acb790 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -46,6 +46,21 @@ pub const Node = struct {
         return t == .element;
     }
 
+    pub fn isDocument(n: Node) bool {
+        const t = parser.nodeType(n.node) catch return false;
+        return t == .document;
+    }
+
+    pub fn isComment(n: Node) bool {
+        const t = parser.nodeType(n.node) catch return false;
+        return t == .comment;
+    }
+
+    pub fn isText(n: Node) bool {
+        const t = parser.nodeType(n.node) catch return false;
+        return t == .text;
+    }
+
     pub fn tag(n: Node) ![]const u8 {
         return try parser.nodeName(n.node);
     }
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 47fbdb78..796ee7d7 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -36,6 +36,18 @@ pub const Node = struct {
         return true;
     }
 
+    pub fn isDocument(_: *const Node) bool {
+        return false;
+    }
+
+    pub fn isComment(_: *const Node) bool {
+        return false;
+    }
+
+    pub fn isText(_: *const Node) bool {
+        return false;
+    }
+
     pub fn tag(n: *const Node) ![]const u8 {
         return n.name;
     }
diff --git a/src/css/selector.zig b/src/css/selector.zig
index 696203d5..e678fb1d 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -274,7 +274,7 @@ pub const Selector = union(enum) {
                         if (!try v.second.match(n)) return false;
                         var c = try n.prevSibling();
                         while (c != null) {
-                            if (!c.?.isElement()) { // TODO must check text node or comment node instead.
+                            if (c.?.isText() or c.?.isComment()) {
                                 c = try c.?.prevSibling();
                                 continue;
                             }
@@ -395,7 +395,7 @@ pub const Selector = union(enum) {
                         if (!n.isElement()) return false;
 
                         const p = try n.parent();
-                        return p == null;
+                        return (p != null and p.?.isDocument());
                     },
                     .link => {
                         const ntag = try n.tag();

From 4e61a50946d638915b204513c8ef4d0538c73eed Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 17:45:19 +0100
Subject: [PATCH 24/28] css: add isEmptyText in node interface

---
 src/css/libdom.zig     | 8 ++++++++
 src/css/match_test.zig | 4 ++++
 src/css/selector.zig   | 6 ++++--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 93acb790..4166b216 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -61,6 +61,14 @@ pub const Node = struct {
         return t == .text;
     }
 
+    pub fn isEmptyText(n: Node) !bool {
+        const data = try parser.nodeTextContent(n.node);
+        if (data == null) return true;
+        if (data.?.len == 0) return true;
+
+        return std.mem.trim(u8, data.?, &std.ascii.whitespace).len == 0;
+    }
+
     pub fn tag(n: Node) ![]const u8 {
         return try parser.nodeName(n.node);
     }
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
index 796ee7d7..f30f2e5b 100644
--- a/src/css/match_test.zig
+++ b/src/css/match_test.zig
@@ -48,6 +48,10 @@ pub const Node = struct {
         return false;
     }
 
+    pub fn isEmptyText(_: *const Node) !bool {
+        return false;
+    }
+
     pub fn tag(n: *const Node) ![]const u8 {
         return n.name;
     }
diff --git a/src/css/selector.zig b/src/css/selector.zig
index e678fb1d..fbe782d3 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -383,8 +383,10 @@ pub const Selector = union(enum) {
                         while (c != null) {
                             if (c.?.isElement()) return false;
 
-                            // TODO check text node content equals an empty
-                            // string ("")
+                            if (c.?.isText()) {
+                                if (try c.?.isEmptyText()) continue;
+                                return false;
+                            }
 
                             c = try c.?.nextSibling();
                         }

From 4c50b2af1a2eb20936090e5422395c110b1c6136 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 17:55:30 +0100
Subject: [PATCH 25/28] css: implement legend siblings check for :disabled

---
 src/css/selector.zig | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/css/selector.zig b/src/css/selector.zig
index fbe782d3..b5779899 100644
--- a/src/css/selector.zig
+++ b/src/css/selector.zig
@@ -508,14 +508,26 @@ pub const Selector = union(enum) {
         };
     }
 
+    fn hasLegendInPreviousSiblings(n: anytype) anyerror!bool {
+        var c = try n.prevSibling();
+        while (c != null) {
+            const ctag = try c.?.tag();
+            if (std.ascii.eqlIgnoreCase("legend", ctag)) return true;
+            c = try c.?.prevSibling();
+        }
+        return false;
+    }
+
     fn inDisabledFieldset(n: anytype) anyerror!bool {
         const p = try n.parent();
         if (p == null) return false;
 
+        const ntag = try n.tag();
         const ptag = try p.?.tag();
 
         if (std.ascii.eqlIgnoreCase("fieldset", ptag) and
-            try p.?.attr("disabled") != null)
+            try p.?.attr("disabled") != null and
+            (!std.ascii.eqlIgnoreCase("legend", ntag) or try hasLegendInPreviousSiblings(n)))
         {
             return true;
         }

From 0fa49b99bfc4873bd9a611205ec31b84d771f6f4 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Mon, 25 Mar 2024 18:35:28 +0100
Subject: [PATCH 26/28] css: add README

---
 src/css/README.md | 218 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 218 insertions(+)
 create mode 100644 src/css/README.md

diff --git a/src/css/README.md b/src/css/README.md
new file mode 100644
index 00000000..fc2a7352
--- /dev/null
+++ b/src/css/README.md
@@ -0,0 +1,218 @@
+# css
+
+Lightpanda css implements CSS selectors parsing and matching in Zig.
+This package is a port of the Go lib [andybalholm/cascadia](https://github.com/andybalholm/cascadia).
+
+## Usage
+
+### Query parser
+
+```zig
+const css = @import("css.zig");
+
+const selector = try css.parse(alloc, "h1", .{});
+defer selector.deinit(alloc);
+```
+
+### DOM tree match
+
+The lib expects a `Node` interface implementation to match your DOM tree.
+
+```zig
+pub const Node = struct {
+    pub fn firstChild(_: Node) !?Node {
+        return error.TODO;
+    }
+
+    pub fn lastChild(_: Node) !?Node {
+        return error.TODO;
+    }
+
+    pub fn nextSibling(_: Node) !?Node {
+        return error.TODO;
+    }
+
+    pub fn prevSibling(_: Node) !?Node {
+        return error.TODO;
+    }
+
+    pub fn parent(_: Node) !?Node {
+        return error.TODO;
+    }
+
+    pub fn isElement(_: Node) bool {
+        return false;
+    }
+
+    pub fn isDocument(_: Node) bool {
+        return false;
+    }
+
+    pub fn isComment(_: Node) bool {
+        return false;
+    }
+
+    pub fn isText(_: Node) bool {
+        return false;
+    }
+
+    pub fn isEmptyText(_: Node) !bool {
+        return error.TODO;
+    }
+
+    pub fn tag(_: Node) ![]const u8 {
+        return error.TODO;
+    }
+
+    pub fn attr(_: Node, _: []const u8) !?[]const u8 {
+        return error.TODO;
+    }
+
+    pub fn eql(_: Node, _: Node) bool {
+        return false;
+    }
+};
+```
+
+You also need do define a `Matcher` implementing a `match` function to
+accumulate the results.
+
+```zig
+const Matcher = struct {
+    const Nodes = std.ArrayList(Node);
+
+    nodes: Nodes,
+
+    fn init(alloc: std.mem.Allocator) Matcher {
+        return .{ .nodes = Nodes.init(alloc) };
+    }
+
+    fn deinit(m: *Matcher) void {
+        m.nodes.deinit();
+    }
+
+    pub fn match(m: *Matcher, n: Node) !void {
+        try m.nodes.append(n);
+    }
+};
+```
+
+Then you can use the lib itself.
+
+```zig
+var matcher = Matcher.init(alloc);
+defer matcher.deinit();
+
+try css.matchAll(selector, node, &matcher);
+_ = try css.matchFirst(selector, node, &matcher); // returns true if a node matched.
+```
+
+## Features
+
+* [x] parse query selector
+* [x] `matchAll`
+* [x] `matchFirst`
+* [ ] specificity
+
+### Selectors implemented
+
+#### Selectors
+
+* [x] Class selectors
+* [x] Id selectors
+* [x] Type selectors
+* [x] Universal selectors
+* [ ] Nesting selectors
+
+#### Combinators
+
+* [x] Child combinator
+* [ ] Column combinator
+* [x] Descendant combinator
+* [ ] Namespace combinator
+* [x] Next-sibling combinator
+* [x] Selector list combinator
+* [x] Subsequent-sibling combinator
+
+#### Attribute
+
+* [x] `[attr]`
+* [x] `[attr=value]`
+* [x] `[attr|=value]`
+* [x] `[attr^=value]`
+* [x] `[attr$=value]`
+* [ ] `[attr*=value]`
+* [x] `[attr operator value i]`
+* [ ] `[attr operator value s]`
+
+#### Pseudo classes
+
+* [ ] `:active`
+* [ ] `:any-link`
+* [ ] `:autofill`
+* [ ] `:blank Experimental`
+* [x] `:checked`
+* [ ] `:current Experimental`
+* [ ] `:default`
+* [ ] `:defined`
+* [ ] `:dir() Experimental`
+* [x] `:disabled`
+* [x] `:empty`
+* [x] `:enabled`
+* [ ] `:first`
+* [x] `:first-child`
+* [x] `:first-of-type`
+* [ ] `:focus`
+* [ ] `:focus-visible`
+* [ ] `:focus-within`
+* [ ] `:fullscreen`
+* [ ] `:future Experimental`
+* [x] `:has() Experimental`
+* [ ] `:host`
+* [ ] `:host()`
+* [ ] `:host-context() Experimental`
+* [ ] `:hover`
+* [ ] `:indeterminate`
+* [ ] `:in-range`
+* [ ] `:invalid`
+* [ ] `:is()`
+* [x] `:lang()`
+* [x] `:last-child`
+* [x] `:last-of-type`
+* [ ] `:left`
+* [x] `:link`
+* [ ] `:local-link Experimental`
+* [ ] `:modal`
+* [x] `:not()`
+* [x] `:nth-child()`
+* [x] `:nth-last-child()`
+* [x] `:nth-last-of-type()`
+* [x] `:nth-of-type()`
+* [x] `:only-child`
+* [x] `:only-of-type`
+* [ ] `:optional`
+* [ ] `:out-of-range`
+* [ ] `:past Experimental`
+* [ ] `:paused`
+* [ ] `:picture-in-picture`
+* [ ] `:placeholder-shown`
+* [ ] `:playing`
+* [ ] `:read-only`
+* [ ] `:read-write`
+* [ ] `:required`
+* [ ] `:right`
+* [x] `:root`
+* [ ] `:scope`
+* [ ] `:state() Experimental`
+* [ ] `:target`
+* [ ] `:target-within Experimental`
+* [ ] `:user-invalid Experimental`
+* [ ] `:valid`
+* [ ] `:visited`
+* [ ] `:where()`
+* [ ] `:contains()`
+* [ ] `:containsown()`
+* [ ] `:matched()`
+* [ ] `:matchesown()`
+* [x] `:root`
+

From 4d5f6d42fa9f9078d39b24f838c0aad4cc6056e8 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Tue, 26 Mar 2024 10:02:15 +0100
Subject: [PATCH 27/28] dom: use the css matcher for DOM

---
 src/dom/css.zig       | 61 +++++++++++++++++++++++++++++++++++++++++++
 src/dom/document.zig  | 57 ++++++++++------------------------------
 src/dom/element.zig   | 61 ++++++++++++-------------------------------
 src/dom/node.zig      |  2 +-
 src/dom/nodelist.zig  |  2 +-
 src/html/document.zig |  2 +-
 6 files changed, 94 insertions(+), 91 deletions(-)
 create mode 100644 src/dom/css.zig

diff --git a/src/dom/css.zig b/src/dom/css.zig
new file mode 100644
index 00000000..4e293c92
--- /dev/null
+++ b/src/dom/css.zig
@@ -0,0 +1,61 @@
+const std = @import("std");
+
+const parser = @import("../netsurf.zig");
+
+const css = @import("../css/css.zig");
+const Node = @import("../css/libdom.zig").Node;
+const NodeList = @import("nodelist.zig").NodeList;
+
+const MatchFirst = struct {
+    n: ?*parser.Node = null,
+
+    pub fn match(m: *MatchFirst, n: Node) !void {
+        m.n = n.node;
+    }
+};
+
+pub fn querySelector(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !?*parser.Node {
+    const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
+    defer ps.deinit(alloc);
+
+    var m = MatchFirst{};
+
+    _ = try css.matchFirst(ps, Node{ .node = n }, &m);
+    return m.n;
+}
+
+const MatchAll = struct {
+    alloc: std.mem.Allocator,
+    nl: NodeList,
+
+    fn init(alloc: std.mem.Allocator) MatchAll {
+        return .{
+            .alloc = alloc,
+            .nl = NodeList.init(),
+        };
+    }
+
+    fn deinit(m: *MatchAll) void {
+        m.nl.deinit(m.alloc);
+    }
+
+    pub fn match(m: *MatchAll, n: Node) !void {
+        try m.nl.append(m.alloc, n.node);
+    }
+
+    fn toOwnedList(m: *MatchAll) NodeList {
+        defer m.nl = NodeList.init();
+        return m.nl;
+    }
+};
+
+pub fn querySelectorAll(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !NodeList {
+    const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
+    defer ps.deinit(alloc);
+
+    var m = MatchAll.init(alloc);
+    defer m.deinit();
+
+    try css.matchAll(ps, Node{ .node = n }, &m);
+    return m.toOwnedList();
+}
diff --git a/src/dom/document.zig b/src/dom/document.zig
index 7f3af5cf..c99cc5fc 100644
--- a/src/dom/document.zig
+++ b/src/dom/document.zig
@@ -13,6 +13,7 @@ const NodeUnion = @import("node.zig").Union;
 
 const Walker = @import("walker.zig").WalkerDepthFirst;
 const collection = @import("html_collection.zig");
+const css = @import("css.zig");
 
 const Element = @import("element.zig").Element;
 const ElementUnion = @import("element.zig").Union;
@@ -188,54 +189,18 @@ pub const Document = struct {
         return 1;
     }
 
-    // TODO netsurf doesn't handle query selectors. We have to implement a
-    // solution by ourselves.
-    // For now we handle only * and single id selector like `#foo`.
-    pub fn _querySelector(self: *parser.Document, selectors: []const u8) !?ElementUnion {
-        if (selectors.len == 0) return null;
+    pub fn _querySelector(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !?ElementUnion {
+        if (selector.len == 0) return null;
 
-        // catch-all, return the firstElementChild
-        if (selectors[0] == '*') return try get_firstElementChild(self);
+        const n = try css.querySelector(alloc, parser.documentToNode(self), selector);
 
-        // support only simple id selector.
-        if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
+        if (n == null) return null;
 
-        return try _getElementById(self, selectors[1..]);
+        return try Element.toInterface(parser.nodeToElement(n.?));
     }
 
-    // TODO netsurf doesn't handle query selectors. We have to implement a
-    // solution by ourselves.
-    // We handle only * and single id selector like `#foo`.
-    pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
-        var list = try NodeList.init();
-        errdefer list.deinit(alloc);
-
-        if (selectors.len == 0) return list;
-
-        // catch-all, return all elements
-        if (selectors[0] == '*') {
-            // walk over the node tree fo find the node by id.
-            const root = parser.documentToNode(self);
-            const walker = Walker{};
-            var next: ?*parser.Node = null;
-            while (true) {
-                next = try walker.get_next(root, next) orelse return list;
-                // ignore non-element nodes.
-                if (try parser.nodeType(next.?) != .element) {
-                    continue;
-                }
-                try list.append(alloc, next.?);
-            }
-        }
-
-        // support only simple id selector.
-        if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
-
-        // walk over the node tree fo find the node by id.
-        const e = try parser.documentGetElementById(self, selectors[1..]) orelse return list;
-        try list.append(alloc, parser.elementToNode(e));
-
-        return list;
+    pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
+        return css.querySelectorAll(alloc, parser.documentToNode(self), selector);
     }
 
     // TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -426,6 +391,12 @@ pub fn testExecFn(
         .{ .src = "document.querySelector('*').nodeName", .ex = "HTML" },
         .{ .src = "document.querySelector('#content').id", .ex = "content" },
         .{ .src = "document.querySelector('#para').id", .ex = "para" },
+        .{ .src = "document.querySelector('.ok').id", .ex = "link" },
+        .{ .src = "document.querySelector('a ~ p').id", .ex = "para-empty" },
+        .{ .src = "document.querySelector(':root').nodeName", .ex = "HTML" },
+
+        .{ .src = "document.querySelectorAll('p').length", .ex = "2" },
+        .{ .src = "document.querySelectorAll('.ok').item(0).id", .ex = "link" },
     };
     try checkCases(js_env, &querySelector);
 
diff --git a/src/dom/element.zig b/src/dom/element.zig
index 7f58ea10..80d31992 100644
--- a/src/dom/element.zig
+++ b/src/dom/element.zig
@@ -9,6 +9,7 @@ const Variadic = jsruntime.Variadic;
 
 const collection = @import("html_collection.zig");
 const writeNode = @import("../browser/dump.zig").writeNode;
+const css = @import("css.zig");
 
 const Node = @import("node.zig").Node;
 const Walker = @import("walker.zig").WalkerDepthFirst;
@@ -263,56 +264,18 @@ pub const Element = struct {
         }
     }
 
-    // TODO netsurf doesn't handle query selectors. We have to implement a
-    // solution by ourselves.
-    // We handle only * and single id selector like `#foo`.
-    pub fn _querySelector(self: *parser.Element, selectors: []const u8) !?Union {
-        if (selectors.len == 0) return null;
+    pub fn _querySelector(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !?Union {
+        if (selector.len == 0) return null;
 
-        // catch-all, return the firstElementChild
-        if (selectors[0] == '*') return try get_firstElementChild(self);
+        const n = try css.querySelector(alloc, parser.elementToNode(self), selector);
 
-        // support only simple id selector.
-        if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
+        if (n == null) return null;
 
-        // walk over the node tree fo find the node by id.
-        const n = try getElementById(self, selectors[1..]) orelse return null;
-        return try toInterface(parser.nodeToElement(n));
+        return try toInterface(parser.nodeToElement(n.?));
     }
 
-    // TODO netsurf doesn't handle query selectors. We have to implement a
-    // solution by ourselves.
-    // We handle only * and single id selector like `#foo`.
-    pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
-        var list = try NodeList.init();
-        errdefer list.deinit(alloc);
-
-        if (selectors.len == 0) return list;
-
-        // catch-all, return all elements
-        if (selectors[0] == '*') {
-            // walk over the node tree fo find the node by id.
-            const root = parser.elementToNode(self);
-            const walker = Walker{};
-            var next: ?*parser.Node = null;
-            while (true) {
-                next = try walker.get_next(root, next) orelse return list;
-                // ignore non-element nodes.
-                if (try parser.nodeType(next.?) != .element) {
-                    continue;
-                }
-                try list.append(alloc, next.?);
-            }
-        }
-
-        // support only simple id selector.
-        if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
-
-        // walk over the node tree fo find the node by id.
-        const n = try getElementById(self, selectors[1..]) orelse return list;
-        try list.append(alloc, n);
-
-        return list;
+    pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
+        return css.querySelectorAll(alloc, parser.elementToNode(self), selector);
     }
 
     // TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -433,6 +396,12 @@ pub fn testExecFn(
         .{ .src = "e.querySelector('#link').id", .ex = "link" },
         .{ .src = "e.querySelector('#para').id", .ex = "para" },
         .{ .src = "e.querySelector('*').id", .ex = "link" },
+        .{ .src = "e.querySelector('')", .ex = "null" },
+        .{ .src = "e.querySelector('*').id", .ex = "link" },
+        .{ .src = "e.querySelector('#content')", .ex = "null" },
+        .{ .src = "e.querySelector('#para').id", .ex = "para" },
+        .{ .src = "e.querySelector('.ok').id", .ex = "link" },
+        .{ .src = "e.querySelector('a ~ p').id", .ex = "para-empty" },
 
         .{ .src = "e.querySelectorAll('foo').length", .ex = "0" },
         .{ .src = "e.querySelectorAll('#foo').length", .ex = "0" },
@@ -441,6 +410,8 @@ pub fn testExecFn(
         .{ .src = "e.querySelectorAll('#para').length", .ex = "1" },
         .{ .src = "e.querySelectorAll('#para').item(0).id", .ex = "para" },
         .{ .src = "e.querySelectorAll('*').length", .ex = "4" },
+        .{ .src = "e.querySelectorAll('p').length", .ex = "2" },
+        .{ .src = "e.querySelectorAll('.ok').item(0).id", .ex = "link" },
     };
     try checkCases(js_env, &querySelector);
 
diff --git a/src/dom/node.zig b/src/dom/node.zig
index 7e6aa383..920768ec 100644
--- a/src/dom/node.zig
+++ b/src/dom/node.zig
@@ -199,7 +199,7 @@ pub const Node = struct {
     }
 
     pub fn get_childNodes(self: *parser.Node, alloc: std.mem.Allocator) !NodeList {
-        var list = try NodeList.init();
+        var list = NodeList.init();
         errdefer list.deinit(alloc);
 
         var n = try parser.nodeFirstChild(self) orelse return list;
diff --git a/src/dom/nodelist.zig b/src/dom/nodelist.zig
index c685f3b5..19ca644a 100644
--- a/src/dom/nodelist.zig
+++ b/src/dom/nodelist.zig
@@ -26,7 +26,7 @@ pub const NodeList = struct {
 
     nodes: NodesArrayList,
 
-    pub fn init() !NodeList {
+    pub fn init() NodeList {
         return NodeList{
             .nodes = NodesArrayList{},
         };
diff --git a/src/html/document.zig b/src/html/document.zig
index d463ab29..34943ac2 100644
--- a/src/html/document.zig
+++ b/src/html/document.zig
@@ -80,7 +80,7 @@ pub const HTMLDocument = struct {
     }
 
     pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList {
-        var list = try NodeList.init();
+        var list = NodeList.init();
         errdefer list.deinit(alloc);
 
         if (name.len == 0) return list;

From 8eb4de9ccbc4d69c94704b00968011e990677b5d Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Tue, 26 Mar 2024 11:08:25 +0100
Subject: [PATCH 28/28] css: ensure node is an element before accessing to attr

---
 src/css/libdom.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/css/libdom.zig b/src/css/libdom.zig
index 4166b216..e4e416ea 100644
--- a/src/css/libdom.zig
+++ b/src/css/libdom.zig
@@ -74,6 +74,7 @@ pub const Node = struct {
     }
 
     pub fn attr(n: Node, key: []const u8) !?[]const u8 {
+        if (!n.isElement()) return null;
         return try parser.elementGetAttribute(parser.nodeToElement(n.node), key);
     }