browser/src/browser/webapi/selector/List.zig

// Copyright (C) 2023-2025  Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

const std = @import("std");

const Page = @import("../../Page.zig");

const Node = @import("../Node.zig");
const Part = @import("Selector.zig").Part;
const Selector = @import("Selector.zig");
const TreeWalker = @import("../TreeWalker.zig").Full;
const GenericIterator = @import("../collections/iterator.zig").Entry;

const Allocator = std.mem.Allocator;

const List = @This();

_nodes: []const *Node,
_arena: Allocator,
// For the [somewhat common] case where we just have an #id selector
// we can avoid allocating a slice and just use this.
_single_node: [1]*Node = undefined,

pub const EntryIterator = GenericIterator(Iterator, null);
pub const KeyIterator = GenericIterator(Iterator, "0");
pub const ValueIterator = GenericIterator(Iterator, "1");

pub fn collect(
    allocator: std.mem.Allocator,
    root: *Node,
    selector: Selector.Selector,
    nodes: *std.AutoArrayHashMapUnmanaged(*Node, void),
    page: *Page,
) !void {
    if (optimizeSelector(root, &selector, page)) |result| {
        var tw = TreeWalker.init(result.root, .{});
        if (result.exclude_root) {
            _ = tw.next();
        }

        while (tw.next()) |node| {
            if (matches(node, result.selector, root, page)) {
                try nodes.put(allocator, node, {});
            }
        }
    }
}

// used internally to find the first match
pub fn initOne(root: *Node, selector: Selector.Selector, page: *Page) ?*Node {
    const result = optimizeSelector(root, &selector, page) orelse return null;

    var tw = TreeWalker.init(result.root, .{});
    if (result.exclude_root) {
        _ = tw.next();
    }
    while (tw.next()) |node| {
        if (matches(node, result.selector, root, page)) {
            return node;
        }
    }
    return null;
}

const OptimizeResult = struct {
    root: *Node,
    exclude_root: bool,
    selector: Selector.Selector,
};

fn optimizeSelector(root: *Node, selector: *const Selector.Selector, page: *Page) ?OptimizeResult {
    const anchor = findIdSelector(selector) orelse return .{
        .root = root,
        .selector = selector.*,
        // Always exclude root - querySelector only returns descendants
        .exclude_root = true,
    };

    // If we have a selector with an #id, we can make a pretty easy and
    // powerful optimization. We can use the node for that id as the new
    // root, and only match the selectors after it. However, we'll need to
    // make sure that node matches the selectors before it (the prefix).
    const id = anchor.id;
    const segment_index = anchor.segment_index;

    // Look up the element by ID (O(1) hash map lookup)
    const id_element = page.getElementByIdFromNode(root, id) orelse return null;
    const id_node = id_element.asNode();

    if (!root.contains(id_node)) {
        return null;
    }

    // If the ID is in the first compound
    if (segment_index == null) {
        // Check if there are any segments after the ID
        if (selector.segments.len == 0) {
            // Just '#id', return the node itself
            return .{
                .root = id_node,
                .selector = .{
                    .first = selector.first,
                    .segments = selector.segments,
                },
                .exclude_root = false,
            };
        }

        // Check the combinator of the first segment
        const first_combinator = selector.segments[0].combinator;
        if (first_combinator == .next_sibling or first_combinator == .subsequent_sibling) {
            // Cannot optimize: matches are siblings, not descendants of the ID node
            // Fall back to searching the entire tree
            return .{
                .root = root,
                .selector = selector.*,
                .exclude_root = true,
            };
        }

        // Safe to optimize for descendant/child combinators
        return .{
            .root = id_node,
            .selector = .{
                .first = selector.first,
                .segments = selector.segments,
            },
            .exclude_root = true,
        };
    }

    // ID is in one of the segments
    const seg_idx = segment_index.?;

    // Check if there are segments after the ID
    if (seg_idx + 1 < selector.segments.len) {
        // Check the combinator of the segment after the ID
        const next_combinator = selector.segments[seg_idx + 1].combinator;
        if (next_combinator == .next_sibling or next_combinator == .subsequent_sibling) {
            // Cannot optimize: matches are siblings, not descendants
            return .{
                .root = root,
                .selector = selector.*,
                .exclude_root = true,
            };
        }
    }

    // If there's a prefix selector, we need to verify that the id_node's
    // ancestors match it. We construct a selector up to and including the ID segment.
    const prefix_selector = Selector.Selector{
        .first = selector.first,
        .segments = selector.segments[0 .. seg_idx + 1],
    };

    if (!matches(id_node, prefix_selector, id_node, page)) {
        return null;
    }

    // Return a selector starting from the segments after the ID
    return .{
        .root = id_node,
        .selector = .{
            .first = selector.segments[seg_idx].compound,
            .segments = selector.segments[seg_idx + 1 ..],
        },
        .exclude_root = false,
    };
}

pub fn getLength(self: *const List) usize {
    return self._nodes.len;
}

pub fn keys(self: *List, page: *Page) !*KeyIterator {
    return .init(.{ .list = self }, page);
}

pub fn values(self: *List, page: *Page) !*ValueIterator {
    return .init(.{ .list = self }, page);
}

pub fn entries(self: *List, page: *Page) !*EntryIterator {
    return .init(.{ .list = self }, page);
}

pub fn getAtIndex(self: *const List, index: usize) !?*Node {
    if (index >= self._nodes.len) {
        return null;
    }
    return self._nodes[index];
}

const NodeList = @import("../collections/NodeList.zig");
pub fn runtimeGenericWrap(self: *List, page: *Page) !*NodeList {
    return page._factory.create(NodeList{ .data = .{ .selector_list = self } });
}

const IdAnchor = struct {
    id: []const u8,
    segment_index: ?usize, // null if ID is in first compound
};

// Rightmost (last) is best because it minimizes the subtree we need to search
fn findIdSelector(selector: *const Selector.Selector) ?IdAnchor {
    // Check segments from right to left
    var i = selector.segments.len;
    while (i > 0) {
        i -= 1;
        const compound = selector.segments[i].compound.parts;
        if (compound.len != 1) {
            continue;
        }
        const part = compound[0];
        if (part == .id) {
            return .{ .id = part.id, .segment_index = i };
        }
    }

    // Check the first compound
    if (selector.first.parts.len == 1) {
        const part = selector.first.parts[0];
        if (part == .id) {
            return .{ .id = part.id, .segment_index = null };
        }
    }

    return null;
}

pub fn matches(node: *Node, selector: Selector.Selector, scope: *Node, page: *Page) bool {
    const el = node.is(Node.Element) orelse return false;

    if (selector.segments.len == 0) {
        return matchesCompound(el, selector.first, scope, page);
    }

    const last_segment = selector.segments[selector.segments.len - 1];
    if (!matchesCompound(el, last_segment.compound, scope, page)) {
        return false;
    }

    return matchSegments(node, selector, selector.segments.len - 1, null, scope, page);
}

// Match segments backward, with support for backtracking on subsequent_sibling
fn matchSegments(node: *Node, selector: Selector.Selector, segment_index: usize, root: ?*Node, scope: *Node, page: *Page) bool {
    const segment = selector.segments[segment_index];
    const target_compound = if (segment_index == 0)
        selector.first
    else
        selector.segments[segment_index - 1].compound;

    const matched: ?*Node = switch (segment.combinator) {
        .descendant => matchDescendant(node, target_compound, root, scope, page),
        .child => matchChild(node, target_compound, root, scope, page),
        .next_sibling => matchNextSibling(node, target_compound, scope, page),
        .subsequent_sibling => {
            // For subsequent_sibling, try all matching siblings with backtracking
            var sibling = node.previousSibling();
            while (sibling) |s| {
                const sibling_el = s.is(Node.Element) orelse {
                    sibling = s.previousSibling();
                    continue;
                };

                if (matchesCompound(sibling_el, target_compound, scope, page)) {
                    // If we're at the first segment, we found a match
                    if (segment_index == 0) {
                        return true;
                    }
                    // Try to match remaining segments from this sibling
                    if (matchSegments(s, selector, segment_index - 1, root, scope, page)) {
                        return true;
                    }
                    // This sibling didn't work, try the next one
                }
                sibling = s.previousSibling();
            }
            return false;
        },
    };

    // For non-subsequent_sibling combinators, matched is either the node or null
    if (segment.combinator != .subsequent_sibling) {
        const current = matched orelse return false;
        if (segment_index == 0) {
            return true;
        }
        return matchSegments(current, selector, segment_index - 1, root, scope, page);
    }

    // subsequent_sibling already handled its recursion above
    return false;
}

// Find an ancestor that matches the compound (any distance up the tree)
fn matchDescendant(node: *Node, compound: Selector.Compound, root: ?*Node, scope: *Node, page: *Page) ?*Node {
    var current = node._parent;

    while (current) |ancestor| {
        if (ancestor.is(Node.Element)) |ancestor_el| {
            if (matchesCompound(ancestor_el, compound, scope, page)) {
                return ancestor;
            }
        }

        // Stop if we've reached the boundary
        if (root) |boundary| {
            if (ancestor == boundary) {
                return null;
            }
        }

        current = ancestor._parent;
    }

    return null;
}

// Find the direct parent if it matches the compound
fn matchChild(node: *Node, compound: Selector.Compound, root: ?*Node, scope: *Node, page: *Page) ?*Node {
    const parent = node._parent orelse return null;

    // Don't match beyond the root boundary
    // If there's a boundary, check if parent is outside (an ancestor of) the boundary
    if (root) |boundary| {
        if (!boundary.contains(parent)) {
            return null;
        }
    }

    const parent_el = parent.is(Node.Element) orelse return null;

    if (matchesCompound(parent_el, compound, scope, page)) {
        return parent;
    }

    return null;
}

// Find the immediately preceding sibling if it matches the compound
fn matchNextSibling(node: *Node, compound: Selector.Compound, scope: *Node, page: *Page) ?*Node {
    var sibling = node.previousSibling();

    // For next_sibling (+), we need the immediately preceding element sibling
    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            // Skip non-element nodes
            sibling = s.previousSibling();
            continue;
        };

        // Found an element - check if it matches
        if (matchesCompound(sibling_el, compound, scope, page)) {
            return s;
        }
        // we found an element, it wasn't a match, we're done
        return null;
    }

    return null;
}

// Find any preceding sibling that matches the compound
fn matchSubsequentSibling(node: *Node, compound: Selector.Compound, scope: *Node, page: *Page) ?*Node {
    var sibling = node.previousSibling();

    // For subsequent_sibling (~), check all preceding element siblings
    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            // Skip non-element nodes
            sibling = s.previousSibling();
            continue;
        };

        if (matchesCompound(sibling_el, compound, scope, page)) {
            return s;
        }

        sibling = s.previousSibling();
    }

    return null;
}

fn matchesCompound(el: *Node.Element, compound: Selector.Compound, scope: *Node, page: *Page) bool {
    // For compound selectors, ALL parts must match
    for (compound.parts) |part| {
        if (!matchesPart(el, part, scope, page)) {
            return false;
        }
    }
    return true;
}

fn matchesPart(el: *Node.Element, part: Part, scope: *Node, page: *Page) bool {
    switch (part) {
        .id => |id| {
            const element_id = el.getAttributeSafe("id") orelse return false;
            return std.mem.eql(u8, element_id, id);
        },
        .class => |cls| {
            const class_attr = el.getAttributeSafe("class") orelse return false;
            return Selector.classAttributeContains(class_attr, cls);
        },
        .tag => |tag| {
            // Optimized: compare enum directly
            return el.getTag() == tag;
        },
        .tag_name => |tag_name| {
            // Fallback for custom/unknown tags
            // Both are lowercase, so we can use fast string comparison
            const element_tag = el.getTagNameLower();
            return std.mem.eql(u8, element_tag, tag_name);
        },
        .universal => return true,
        .pseudo_class => |pseudo| return matchesPseudoClass(el, pseudo, scope, page),
        .attribute => |attr| return matchesAttribute(el, attr),
    }
}

fn matchesAttribute(el: *Node.Element, attr: Selector.Attribute) bool {
    const value = el.getAttributeSafe(attr.name) orelse {
        return false;
    };

    switch (attr.matcher) {
        .presence => return true,
        .exact => |expected| {
            return if (attr.case_insensitive)
                std.ascii.eqlIgnoreCase(value, expected)
            else
                std.mem.eql(u8, value, expected);
        },
        .substring => |expected| {
            return if (attr.case_insensitive)
                std.ascii.indexOfIgnoreCase(value, expected) != null
            else
                std.mem.indexOf(u8, value, expected) != null;
        },
        .starts_with => |expected| {
            return if (attr.case_insensitive)
                std.ascii.startsWithIgnoreCase(value, expected)
            else
                std.mem.startsWith(u8, value, expected);
        },
        .ends_with => |expected| {
            return if (attr.case_insensitive)
                std.ascii.endsWithIgnoreCase(value, expected)
            else
                std.mem.endsWith(u8, value, expected);
        },
        .word => |expected| {
            // Space-separated word match (like class names)
            var it = std.mem.tokenizeAny(u8, value, &std.ascii.whitespace);
            while (it.next()) |word| {
                const same = if (attr.case_insensitive)
                    std.ascii.eqlIgnoreCase(word, expected)
                else
                    std.mem.eql(u8, word, expected);

                if (same) return true;
            }
            return false;
        },
        .prefix_dash => |expected| {
            // Matches value or value- prefix (for language codes like en, en-US)
            if (attr.case_insensitive) {
                if (std.ascii.eqlIgnoreCase(value, expected)) return true;
                if (value.len > expected.len and value[expected.len] == '-') {
                    return std.ascii.eqlIgnoreCase(value[0..expected.len], expected);
                }
            } else {
                if (std.mem.eql(u8, value, expected)) return true;
                if (value.len > expected.len and value[expected.len] == '-') {
                    return std.mem.eql(u8, value[0..expected.len], expected);
                }
            }
            return false;
        },
    }
}

fn attributeContainsWord(value: []const u8, word: []const u8) bool {
    var remaining = value;
    while (remaining.len > 0) {
        const trimmed = std.mem.trimLeft(u8, remaining, &std.ascii.whitespace);
        if (trimmed.len == 0) return false;

        const end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
        const current_word = trimmed[0..end];

        if (std.mem.eql(u8, current_word, word)) {
            return true;
        }

        if (end >= trimmed.len) break;
        remaining = trimmed[end..];
    }
    return false;
}

fn matchesPseudoClass(el: *Node.Element, pseudo: Selector.PseudoClass, scope: *Node, page: *Page) bool {
    const node = el.asNode();
    switch (pseudo) {
        // State pseudo-classes
        .modal => return false,
        .checked => {
            const input = el.is(Node.Element.Html.Input) orelse return false;
            return input.getChecked();
        },
        .disabled => {
            return el.getAttributeSafe("disabled") != null;
        },
        .enabled => {
            return el.getAttributeSafe("disabled") == null;
        },
        .indeterminate => return false,

        // Form validation
        .valid => return false,
        .invalid => return false,
        .required => {
            return el.getAttributeSafe("required") != null;
        },
        .optional => {
            return el.getAttributeSafe("required") == null;
        },
        .in_range => return false,
        .out_of_range => return false,
        .placeholder_shown => return false,
        .read_only => {
            return el.getAttributeSafe("readonly") != null;
        },
        .read_write => {
            return el.getAttributeSafe("readonly") == null;
        },
        .default => return false,

        // User interaction
        .hover => return false,
        .active => return false,
        .focus => {
            const active = page.document._active_element orelse return false;
            return active == el;
        },
        .focus_within => {
            const active = page.document._active_element orelse return false;
            return node.contains(active.asNode());
        },
        .focus_visible => return false,

        // Link states
        .link => return false,
        .visited => return false,
        .any_link => {
            if (el.getTag() != .anchor) return false;
            return el.getAttributeSafe("href") != null;
        },
        .target => {
            const element_id = el.getAttributeSafe("id") orelse return false;
            const location = page.document._location orelse return false;
            const hash = location.getHash();
            if (hash.len <= 1) return false;
            return std.mem.eql(u8, element_id, hash[1..]);
        },

        // Tree structural
        .root => {
            const parent = node.parentNode() orelse return false;
            return parent._type == .document;
        },
        .scope => {
            // :scope matches the reference element (querySelector root)
            return node == scope;
        },
        .empty => {
            return node.firstChild() == null;
        },
        .first_child => return isFirstChild(el),
        .last_child => return isLastChild(el),
        .only_child => return isFirstChild(el) and isLastChild(el),
        .first_of_type => return isFirstOfType(el),
        .last_of_type => return isLastOfType(el),
        .only_of_type => return isFirstOfType(el) and isLastOfType(el),
        .nth_child => |pattern| return matchesNthChild(el, pattern),
        .nth_last_child => |pattern| return matchesNthLastChild(el, pattern),
        .nth_of_type => |pattern| return matchesNthOfType(el, pattern),
        .nth_last_of_type => |pattern| return matchesNthLastOfType(el, pattern),

        // Custom elements
        .defined => {
            const tag_name = el.getTagNameLower();
            if (std.mem.indexOfScalar(u8, tag_name, '-') == null) return true;
            const registry = &page.window._custom_elements;
            return registry.get(tag_name) != null;
        },

        // Functional
        .lang => return false,
        .not => |selectors| {
            for (selectors) |selector| {
                if (matches(node, selector, scope, page)) {
                    return false;
                }
            }
            return true;
        },
        .is => |selectors| {
            for (selectors) |selector| {
                if (matches(node, selector, scope, page)) {
                    return true;
                }
            }
            return false;
        },
        .where => |selectors| {
            for (selectors) |selector| {
                if (matches(node, selector, scope, page)) {
                    return true;
                }
            }
            return false;
        },
        .has => |selectors| {
            for (selectors) |selector| {
                var child = node.firstChild();
                while (child) |c| {
                    const child_el = c.is(Node.Element) orelse {
                        child = c.nextSibling();
                        continue;
                    };

                    if (matches(child_el.asNode(), selector, scope, page)) {
                        return true;
                    }

                    if (matchesHasDescendant(child_el, selector, scope, page)) {
                        return true;
                    }

                    child = c.nextSibling();
                }
            }
            return false;
        },
    }
}

fn matchesHasDescendant(el: *Node.Element, selector: Selector.Selector, scope: *Node, page: *Page) bool {
    var child = el.asNode().firstChild();
    while (child) |c| {
        const child_el = c.is(Node.Element) orelse {
            child = c.nextSibling();
            continue;
        };

        if (matches(child_el.asNode(), selector, scope, page)) {
            return true;
        }

        if (matchesHasDescendant(child_el, selector, scope, page)) {
            return true;
        }

        child = c.nextSibling();
    }
    return false;
}

fn isFirstChild(el: *Node.Element) bool {
    const node = el.asNode();
    var sibling = node.previousSibling();

    // Check if there are any element siblings before this one
    while (sibling) |s| {
        if (s.is(Node.Element)) |_| {
            return false;
        }
        sibling = s.previousSibling();
    }

    return true;
}

fn isLastChild(el: *Node.Element) bool {
    const node = el.asNode();
    var sibling = node.nextSibling();

    // Check if there are any element siblings after this one
    while (sibling) |s| {
        if (s.is(Node.Element)) |_| {
            return false;
        }
        sibling = s.nextSibling();
    }

    return true;
}

fn isFirstOfType(el: *Node.Element) bool {
    const tag = el.getTag();
    const node = el.asNode();
    var sibling = node.previousSibling();

    // Check if there are any element siblings of the same type before this one
    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            sibling = s.previousSibling();
            continue;
        };

        if (sibling_el.getTag() == tag) {
            return false;
        }

        sibling = s.previousSibling();
    }

    return true;
}

fn isLastOfType(el: *Node.Element) bool {
    const tag = el.getTag();
    const node = el.asNode();
    var sibling = node.nextSibling();

    // Check if there are any element siblings of the same type after this one
    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            sibling = s.nextSibling();
            continue;
        };

        if (sibling_el.getTag() == tag) {
            return false;
        }

        sibling = s.nextSibling();
    }

    return true;
}

fn matchesNthChild(el: *Node.Element, pattern: Selector.NthPattern) bool {
    const index = getChildIndex(el) orelse return false;
    return matchesNthPattern(index, pattern);
}

fn matchesNthLastChild(el: *Node.Element, pattern: Selector.NthPattern) bool {
    const index = getChildIndexFromEnd(el) orelse return false;
    return matchesNthPattern(index, pattern);
}

fn matchesNthOfType(el: *Node.Element, pattern: Selector.NthPattern) bool {
    const index = getTypeIndex(el) orelse return false;
    return matchesNthPattern(index, pattern);
}

fn matchesNthLastOfType(el: *Node.Element, pattern: Selector.NthPattern) bool {
    const index = getTypeIndexFromEnd(el) orelse return false;
    return matchesNthPattern(index, pattern);
}

fn getChildIndex(el: *Node.Element) ?usize {
    const node = el.asNode();
    var index: usize = 1;
    var sibling = node.previousSibling();

    while (sibling) |s| {
        if (s.is(Node.Element)) |_| {
            index += 1;
        }
        sibling = s.previousSibling();
    }

    return index;
}

fn getChildIndexFromEnd(el: *Node.Element) ?usize {
    const node = el.asNode();
    var index: usize = 1;
    var sibling = node.nextSibling();

    while (sibling) |s| {
        if (s.is(Node.Element)) |_| {
            index += 1;
        }
        sibling = s.nextSibling();
    }

    return index;
}

fn getTypeIndex(el: *Node.Element) ?usize {
    const tag = el.getTag();
    const node = el.asNode();

    var index: usize = 1;
    var sibling = node.previousSibling();

    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            sibling = s.previousSibling();
            continue;
        };

        if (sibling_el.getTag() == tag) {
            index += 1;
        }

        sibling = s.previousSibling();
    }

    return index;
}

fn getTypeIndexFromEnd(el: *Node.Element) ?usize {
    const tag = el.getTag();
    const node = el.asNode();

    var index: usize = 1;
    var sibling = node.nextSibling();

    while (sibling) |s| {
        const sibling_el = s.is(Node.Element) orelse {
            sibling = s.nextSibling();
            continue;
        };

        if (sibling_el.getTag() == tag) {
            index += 1;
        }

        sibling = s.nextSibling();
    }

    return index;
}

fn matchesNthPattern(index: usize, pattern: Selector.NthPattern) bool {
    const a = pattern.a;
    const b = pattern.b;

    // Special case: a=0 means we're matching a specific index
    if (a == 0) {
        return @as(i32, @intCast(index)) == b;
    }

    // For an+b pattern, we need to find if there's an integer n >= 0
    // such that an + b = index
    // Rearranging: n = (index - b) / a
    const index_i = @as(i32, @intCast(index));
    const diff = index_i - b;

    // Check if (index - b) is divisible by a
    if (@rem(diff, a) != 0) {
        return false;
    }

    const n = @divTrunc(diff, a);

    // n must be non-negative
    return n >= 0;
}

const Iterator = struct {
    index: u32 = 0,
    list: *List,

    const Entry = struct { u32, *Node };

    pub fn next(self: *Iterator, _: *const Page) ?Entry {
        const index = self.index;
        if (index >= self.list._nodes.len) {
            return null;
        }
        self.index = index + 1;
        return .{ index, self.list._nodes[index] };
    }
};