", .exp = 1 },
+ .{ .q = "p:only-child", .html = "", .exp = 1 },
+ .{ .q = "p:only-of-type", .html = "", .exp = 1 },
+ .{ .q = ":empty", .html = "Hello
", .exp = 3 },
+ .{ .q = "div p", .html = "", .exp = 2 },
+ .{ .q = "div table p", .html = "
", .exp = 1 },
+ .{ .q = "div > p", .html = "
", .exp = 2 },
+ .{ .q = "p ~ p", .html = "", .exp = 2 },
+ .{ .q = "p + p", .html = "
", .exp = 1 },
+ .{ .q = "li, p", .html = "
", .exp = 3 },
+ .{ .q = "p +/*This is a comment*/ p", .html = "
", .exp = 1 },
+ // .{ .q = "p:contains(\"that wraps\")", .html = "
Text block that wraps inner text and continues
", .exp = 1 },
+ // .{ .q = "p:containsOwn(\"that wraps\")", .html = "Text block that wraps inner text and continues
", .exp = 0 },
+ // .{ .q = ":containsOwn(\"inner\")", .html = "Text block that wraps inner text and continues
", .exp = 1 },
+ // .{ .q = "p:containsOwn(\"block\")", .html = "Text block that wraps inner text and continues
", .exp = 1 },
+ .{ .q = "div:has(#p1)", .html = "", .exp = 1 },
+ // .{ .q = "div:has(:containsOwn(\"2\"))", .html = "
", .exp = 1 },
+ // .{ .q = "body :has(:containsOwn(\"2\"))", .html = "
", .exp = 2 },
+ // .{ .q = "body :haschild(:containsOwn(\"2\"))", .html = "
", .exp = 1 },
+ // .{ .q = "p:matches([\\d])", .html = "
0123456789
abcdef
0123ABCD
", .exp = 2 },
+ // .{ .q = "p:matches([a-z])", .html = "
0123456789
abcdef
0123ABCD
", .exp = 1 },
+ // .{ .q = "p:matches([a-zA-Z])", .html = "
0123456789
abcdef
0123ABCD
", .exp = 2 },
+ // .{ .q = "p:matches([^\\d])", .html = "
0123456789
abcdef
0123ABCD
", .exp = 2 },
+ // .{ .q = "p:matches(^(0|a))", .html = "
0123456789
abcdef
0123ABCD
", .exp = 3 },
+ // .{ .q = "p:matches(^\\d+$)", .html = "
0123456789
abcdef
0123ABCD
", .exp = 1 },
+ // .{ .q = "p:not(:matches(^\\d+$))", .html = "
0123456789
abcdef
0123ABCD
", .exp = 2 },
+ // .{ .q = "div :matchesOwn(^\\d+$)", .html = "
0123456789
", .exp = 2 },
+ // .{ .q = "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", .html = "
", .exp = 2 },
+ // .{ .q = "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", .html = "
", .exp = 1 },
+ .{ .q = ":input", .html = "
", .exp = 5 },
+ .{ .q = ":root", .html = "", .exp = 1 },
+ .{ .q = "*:root", .html = "", .exp = 1 },
+ .{ .q = "html:nth-child(1)", .html = "", .exp = 1 },
+ .{ .q = "*:root:first-child", .html = "", .exp = 1 },
+ .{ .q = "*:root:nth-child(1)", .html = "", .exp = 1 },
+ .{ .q = "a:not(:root)", .html = "
", .exp = 1 },
+ .{ .q = "body > *:nth-child(3n+2)", .html = "
", .exp = 2 },
+ .{ .q = "input:disabled", .html = "
", .exp = 1 },
+ .{ .q = ":disabled", .html = "
", .exp = 1 },
+ .{ .q = ":enabled", .html = "
", .exp = 1 },
+ .{ .q = "div.class1, div.class2", .html = "
", .exp = 2 },
+ };
+
+ for (testcases) |tc| {
+ matcher.reset();
+
+ const doc = try parser.documentHTMLParseFromStr(tc.html);
+ defer parser.documentHTMLClose(doc) catch {};
+
+ const s = css.parse(alloc, tc.q, .{}) catch |e| {
+ std.debug.print("parse, query: {s}\n", .{tc.q});
+ return e;
+ };
+ defer s.deinit(alloc);
+
+ const node = Node{ .node = parser.documentHTMLToNode(doc) };
+
+ _ = css.matchAll(s, node, &matcher) catch |e| {
+ std.debug.print("match, query: {s}\n", .{tc.q});
+ return e;
+ };
+ std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("expectation, query: {s}\n", .{tc.q});
+ return e;
+ };
+ }
+}
diff --git a/src/css/match_test.zig b/src/css/match_test.zig
new file mode 100644
index 00000000..f30f2e5b
--- /dev/null
+++ b/src/css/match_test.zig
@@ -0,0 +1,569 @@
+const std = @import("std");
+const css = @import("css.zig");
+
+// Node mock implementation for test only.
+pub const Node = struct {
+ child: ?*const Node = null,
+ last: ?*const Node = null,
+ sibling: ?*const Node = null,
+ prev: ?*const Node = null,
+ par: ?*const Node = null,
+
+ name: []const u8 = "",
+ att: ?[]const u8 = null,
+
+ pub fn firstChild(n: *const Node) !?*const Node {
+ return n.child;
+ }
+
+ pub fn lastChild(n: *const Node) !?*const Node {
+ return n.last;
+ }
+
+ pub fn nextSibling(n: *const Node) !?*const Node {
+ return n.sibling;
+ }
+
+ pub fn prevSibling(n: *const Node) !?*const Node {
+ return n.prev;
+ }
+
+ pub fn parent(n: *const Node) !?*const Node {
+ return n.par;
+ }
+
+ pub fn isElement(_: *const Node) bool {
+ return true;
+ }
+
+ pub fn isDocument(_: *const Node) bool {
+ return false;
+ }
+
+ pub fn isComment(_: *const Node) bool {
+ return false;
+ }
+
+ pub fn isText(_: *const Node) bool {
+ return false;
+ }
+
+ pub fn isEmptyText(_: *const Node) !bool {
+ return false;
+ }
+
+ pub fn tag(n: *const Node) ![]const u8 {
+ return n.name;
+ }
+
+ pub fn attr(n: *const Node, _: []const u8) !?[]const u8 {
+ return n.att;
+ }
+
+ pub fn eql(a: *const Node, b: *const Node) bool {
+ return a == b;
+ }
+};
+
+const Matcher = struct {
+ const Nodes = std.ArrayList(*const Node);
+
+ nodes: Nodes,
+
+ fn init(alloc: std.mem.Allocator) Matcher {
+ return .{ .nodes = Nodes.init(alloc) };
+ }
+
+ fn deinit(m: *Matcher) void {
+ m.nodes.deinit();
+ }
+
+ fn reset(m: *Matcher) void {
+ m.nodes.clearRetainingCapacity();
+ }
+
+ pub fn match(m: *Matcher, n: *const Node) !void {
+ try m.nodes.append(n);
+ }
+};
+
+test "matchFirst" {
+ const alloc = std.testing.allocator;
+
+ var matcher = Matcher.init(alloc);
+ defer matcher.deinit();
+
+ const testcases = [_]struct {
+ q: []const u8,
+ n: Node,
+ exp: usize,
+ }{
+ .{
+ .q = "address",
+ .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "#foo",
+ .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } },
+ .exp = 1,
+ },
+ .{
+ .q = ".t1",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } },
+ .exp = 1,
+ },
+ .{
+ .q = ".t1",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo!=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo!=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo~=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo~=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo^=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo$=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo*=rb]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "strong, a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{
+ .name = "a",
+ .par = &.{ .name = "span", .par = &.{ .name = "p" } },
+ } } } },
+ .exp = 1,
+ },
+ .{
+ .q = ":not(p)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p:has(a)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p:has(strong)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "p:haschild(a)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p:haschild(strong)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "p:lang(en)",
+ .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "a:lang(en)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } },
+ .exp = 1,
+ },
+ };
+
+ for (testcases) |tc| {
+ matcher.reset();
+
+ const s = try css.parse(alloc, tc.q, .{});
+ defer s.deinit(alloc);
+
+ _ = css.matchFirst(s, &tc.n, &matcher) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+ }
+}
+
+test "matchAll" {
+ const alloc = std.testing.allocator;
+
+ var matcher = Matcher.init(alloc);
+ defer matcher.deinit();
+
+ const testcases = [_]struct {
+ q: []const u8,
+ n: Node,
+ exp: usize,
+ }{
+ .{
+ .q = "address",
+ .n = .{ .child = &.{ .name = "body", .child = &.{ .name = "address" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "#foo",
+ .n = .{ .child = &.{ .name = "p", .att = "foo", .child = &.{ .name = "p" } } },
+ .exp = 1,
+ },
+ .{
+ .q = ".t1",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "t1" } } },
+ .exp = 1,
+ },
+ .{
+ .q = ".t1",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "foo t1" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo!=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo!=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 2,
+ },
+ .{
+ .q = "[foo~=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "baz bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo~=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "[foo^=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo$=baz]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo*=rb]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "barbaz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "bar-baz" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "[foo|=bar]",
+ .n = .{ .child = &.{ .name = "p", .sibling = &.{ .name = "p", .att = "ba" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "strong, a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 2,
+ },
+ .{
+ .q = "p a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .name = "p" } }, .sibling = &.{ .name = "a" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p a",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "span", .child = &.{
+ .name = "a",
+ .par = &.{ .name = "span", .par = &.{ .name = "p" } },
+ } } } },
+ .exp = 1,
+ },
+ .{
+ .q = ":not(p)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 2,
+ },
+ .{
+ .q = "p:has(a)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p:has(strong)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "p:haschild(a)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "p:haschild(strong)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a" }, .sibling = &.{ .name = "strong" } } },
+ .exp = 0,
+ },
+ .{
+ .q = "p:lang(en)",
+ .n = .{ .child = &.{ .name = "p", .att = "en-US", .child = &.{ .name = "a" } } },
+ .exp = 1,
+ },
+ .{
+ .q = "a:lang(en)",
+ .n = .{ .child = &.{ .name = "p", .child = &.{ .name = "a", .par = &.{ .att = "en-US" } } } },
+ .exp = 1,
+ },
+ };
+
+ for (testcases) |tc| {
+ matcher.reset();
+
+ const s = try css.parse(alloc, tc.q, .{});
+ defer s.deinit(alloc);
+
+ css.matchAll(s, &tc.n, &matcher) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ std.testing.expectEqual(tc.exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+ }
+}
+
+test "pseudo class" {
+ const alloc = std.testing.allocator;
+
+ var matcher = Matcher.init(alloc);
+ defer matcher.deinit();
+
+ var p1: Node = .{ .name = "p" };
+ var p2: Node = .{ .name = "p" };
+ var a1: Node = .{ .name = "a" };
+
+ p1.sibling = &p2;
+ p2.prev = &p1;
+
+ p2.sibling = &a1;
+ a1.prev = &p2;
+
+ var root: Node = .{ .child = &p1, .last = &a1 };
+ p1.par = &root;
+ p2.par = &root;
+ a1.par = &root;
+
+ const testcases = [_]struct {
+ q: []const u8,
+ n: Node,
+ exp: ?*const Node,
+ }{
+ .{ .q = "p:only-child", .n = root, .exp = null },
+ .{ .q = "a:only-of-type", .n = root, .exp = &a1 },
+ };
+
+ for (testcases) |tc| {
+ matcher.reset();
+
+ const s = try css.parse(alloc, tc.q, .{});
+ defer s.deinit(alloc);
+
+ css.matchAll(s, &tc.n, &matcher) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ if (tc.exp) |exp_n| {
+ const exp: usize = 1;
+ std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ continue;
+ }
+
+ const exp: usize = 0;
+ std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+ }
+}
+
+test "nth pseudo class" {
+ const alloc = std.testing.allocator;
+
+ var matcher = Matcher.init(alloc);
+ defer matcher.deinit();
+
+ var p1: Node = .{ .name = "p" };
+ var p2: Node = .{ .name = "p" };
+
+ p1.sibling = &p2;
+ p2.prev = &p1;
+
+ var root: Node = .{ .child = &p1, .last = &p2 };
+ p1.par = &root;
+ p2.par = &root;
+
+ const testcases = [_]struct {
+ q: []const u8,
+ n: Node,
+ exp: ?*const Node,
+ }{
+ .{ .q = "a:nth-of-type(1)", .n = root, .exp = null },
+ .{ .q = "p:nth-of-type(1)", .n = root, .exp = &p1 },
+ .{ .q = "p:nth-of-type(2)", .n = root, .exp = &p2 },
+ .{ .q = "p:nth-of-type(0)", .n = root, .exp = null },
+ .{ .q = "p:nth-of-type(2n)", .n = root, .exp = &p2 },
+ .{ .q = "p:nth-last-child(1)", .n = root, .exp = &p2 },
+ .{ .q = "p:nth-last-child(2)", .n = root, .exp = &p1 },
+ .{ .q = "p:nth-child(1)", .n = root, .exp = &p1 },
+ .{ .q = "p:nth-child(2)", .n = root, .exp = &p2 },
+ .{ .q = "p:nth-child(odd)", .n = root, .exp = &p1 },
+ .{ .q = "p:nth-child(even)", .n = root, .exp = &p2 },
+ .{ .q = "p:nth-child(n+2)", .n = root, .exp = &p2 },
+ };
+
+ for (testcases) |tc| {
+ matcher.reset();
+
+ const s = try css.parse(alloc, tc.q, .{});
+ defer s.deinit(alloc);
+
+ css.matchAll(s, &tc.n, &matcher) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ if (tc.exp) |exp_n| {
+ const exp: usize = 1;
+ std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ std.testing.expectEqual(exp_n, matcher.nodes.items[0]) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+
+ continue;
+ }
+
+ const exp: usize = 0;
+ std.testing.expectEqual(exp, matcher.nodes.items.len) catch |e| {
+ std.debug.print("query: {s}, parsed selector: {any}\n", .{ tc.q, s });
+ return e;
+ };
+ }
+}
diff --git a/src/css/parser.zig b/src/css/parser.zig
new file mode 100644
index 00000000..b23991c1
--- /dev/null
+++ b/src/css/parser.zig
@@ -0,0 +1,899 @@
+// CSS Selector parser
+// This file is a rewrite in Zig of Cascadia CSS Selector parser.
+// see https://github.com/andybalholm/cascadia
+// see https://github.com/andybalholm/cascadia/blob/master/parser.go
+const std = @import("std");
+const ascii = std.ascii;
+
+const selector = @import("selector.zig");
+const Selector = selector.Selector;
+const PseudoClass = selector.PseudoClass;
+const AttributeOP = selector.AttributeOP;
+const Combinator = selector.Combinator;
+
+pub const ParseError = error{
+ ExpectedSelector,
+ ExpectedIdentifier,
+ ExpectedName,
+ ExpectedIDSelector,
+ ExpectedClassSelector,
+ ExpectedAttributeSelector,
+ ExpectedString,
+ ExpectedRegexp,
+ ExpectedPseudoClassSelector,
+ ExpectedParenthesis,
+ ExpectedParenthesisClose,
+ ExpectedNthExpression,
+ ExpectedInteger,
+ InvalidEscape,
+ EscapeLineEndingOutsideString,
+ InvalidUnicode,
+ UnicodeIsNotHandled,
+ WriteError,
+ PseudoElementNotAtSelectorEnd,
+ PseudoElementNotUnique,
+ PseudoElementDisabled,
+ InvalidAttributeOperator,
+ InvalidAttributeSelector,
+ InvalidString,
+ InvalidRegexp,
+ InvalidPseudoClassSelector,
+ EmptyPseudoClassSelector,
+ InvalidPseudoClass,
+ InvalidPseudoElement,
+ UnmatchParenthesis,
+ NotHandled,
+ UnknownPseudoSelector,
+ InvalidNthExpression,
+} || PseudoClass.Error || Combinator.Error || std.mem.Allocator.Error;
+
+pub const ParseOptions = struct {
+ accept_pseudo_elts: bool = true,
+};
+
+pub const Parser = struct {
+ s: []const u8, // string to parse
+ i: usize = 0, // current position
+
+ opts: ParseOptions,
+
+ pub fn parse(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ return p.parseSelectorGroup(alloc);
+ }
+
+ // skipWhitespace consumes whitespace characters and comments.
+ // It returns true if there was actually anything to skip.
+ fn skipWhitespace(p: *Parser) bool {
+ var i = p.i;
+ while (i < p.s.len) {
+ const c = p.s[i];
+ // Whitespaces.
+ if (ascii.isWhitespace(c)) {
+ i += 1;
+ continue;
+ }
+
+ // Comments.
+ if (c == '/') {
+ if (std.mem.startsWith(u8, p.s[i..], "/*")) {
+ if (std.mem.indexOf(u8, p.s[i..], "*/")) |end| {
+ i += end + "*/".len;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+
+ if (i > p.i) {
+ p.i = i;
+ return true;
+ }
+
+ return false;
+ }
+
+ // parseSimpleSelectorSequence parses a selector sequence that applies to
+ // a single element.
+ fn parseSimpleSelectorSequence(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ if (p.i >= p.s.len) {
+ return ParseError.ExpectedSelector;
+ }
+
+ var buf = std.ArrayList(Selector).init(alloc);
+ defer buf.deinit();
+
+ switch (p.s[p.i]) {
+ '*' => {
+ // It's the universal selector. Just skip over it, since it
+ // doesn't affect the meaning.
+ p.i += 1;
+
+ // other version of universal selector
+ if (p.i + 2 < p.s.len and std.mem.eql(u8, "|*", p.s[p.i .. p.i + 2])) {
+ p.i += 2;
+ }
+ },
+ '#', '.', '[', ':' => {
+ // There's no type selector. Wait to process the other till the
+ // main loop.
+ },
+ else => try buf.append(try p.parseTypeSelector(alloc)),
+ }
+
+ var pseudo_elt: ?PseudoClass = null;
+
+ loop: while (p.i < p.s.len) {
+ var ns: Selector = switch (p.s[p.i]) {
+ '#' => try p.parseIDSelector(alloc),
+ '.' => try p.parseClassSelector(alloc),
+ '[' => try p.parseAttributeSelector(alloc),
+ ':' => try p.parsePseudoclassSelector(alloc),
+ else => break :loop,
+ };
+ errdefer ns.deinit(alloc);
+
+ // From https://drafts.csswg.org/selectors-3/#pseudo-elements :
+ // "Only one pseudo-element may appear per selector, and if present
+ // it must appear after the sequence of simple selectors that
+ // represents the subjects of the selector.""
+ switch (ns) {
+ .pseudo_element => |e| {
+ // We found a pseudo-element.
+ // Only one pseudo-element is accepted per selector.
+ if (pseudo_elt != null) return ParseError.PseudoElementNotUnique;
+ if (!p.opts.accept_pseudo_elts) return ParseError.PseudoElementDisabled;
+
+ pseudo_elt = e;
+ ns.deinit(alloc);
+ },
+ else => {
+ if (pseudo_elt != null) return ParseError.PseudoElementNotAtSelectorEnd;
+ try buf.append(ns);
+ },
+ }
+ }
+
+ // no need wrap the selectors in compoundSelector
+ if (buf.items.len == 1 and pseudo_elt == null) return buf.items[0];
+
+ return .{ .compound = .{ .selectors = try buf.toOwnedSlice(), .pseudo_elt = pseudo_elt } };
+ }
+
+ // parseTypeSelector parses a type selector (one that matches by tag name).
+ fn parseTypeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+ try p.parseIdentifier(buf.writer());
+
+ return .{ .tag = try buf.toOwnedSlice() };
+ }
+
+ // parseIdentifier parses an identifier.
+ fn parseIdentifier(p: *Parser, w: anytype) ParseError!void {
+ const prefix = '-';
+ var numPrefix: usize = 0;
+
+ while (p.s.len > p.i and p.s[p.i] == prefix) {
+ p.i += 1;
+ numPrefix += 1;
+ }
+
+ if (p.s.len <= p.i) {
+ return ParseError.ExpectedSelector;
+ }
+
+ const c = p.s[p.i];
+ if (!nameStart(c) or c == '\\') {
+ return ParseError.ExpectedSelector;
+ }
+
+ var ii: usize = 0;
+ while (ii < numPrefix) {
+ w.writeByte(prefix) catch return ParseError.WriteError;
+ ii += 1;
+ }
+ try parseName(p, w);
+ }
+
+ // parseName parses a name (which is like an identifier, but doesn't have
+ // extra restrictions on the first character).
+ fn parseName(p: *Parser, w: anytype) ParseError!void {
+ var i = p.i;
+ var ok = false;
+
+ while (i < p.s.len) {
+ const c = p.s[i];
+
+ if (nameChar(c)) {
+ const start = i;
+ while (i < p.s.len and nameChar(p.s[i])) i += 1;
+ w.writeAll(p.s[start..i]) catch return ParseError.WriteError;
+ ok = true;
+ } else if (c == '\\') {
+ p.i = i;
+ try p.parseEscape(w);
+ i = p.i;
+ ok = true;
+ } else {
+ // default:
+ break;
+ }
+ }
+
+ if (!ok) return ParseError.ExpectedName;
+ p.i = i;
+ }
+
+ // parseEscape parses a backslash escape.
+ // The returned string is owned by the caller.
+ fn parseEscape(p: *Parser, w: anytype) ParseError!void {
+ if (p.s.len < p.i + 2 or p.s[p.i] != '\\') {
+ return ParseError.InvalidEscape;
+ }
+
+ const start = p.i + 1;
+ const c = p.s[start];
+ if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString;
+
+ // unicode escape (hex)
+ if (ascii.isHex(c)) {
+ var i: usize = start;
+ while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) {
+ i += 1;
+ }
+ const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode;
+ if (p.s.len > i) {
+ switch (p.s[i]) {
+ '\r' => {
+ i += 1;
+ if (p.s.len > i and p.s[i] == '\n') i += 1;
+ },
+ ' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
+ else => {},
+ }
+ p.i = i;
+ var buf: [4]u8 = undefined;
+ const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode;
+ w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
+ return;
+ }
+ }
+
+ // Return the literal character after the backslash.
+ p.i += 2;
+ w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError;
+ }
+
+ // parseIDSelector parses a selector that matches by id attribute.
+ fn parseIDSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ if (p.i >= p.s.len) return ParseError.ExpectedIDSelector;
+ if (p.s[p.i] != '#') return ParseError.ExpectedIDSelector;
+
+ p.i += 1;
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ try p.parseName(buf.writer());
+ return .{ .id = try buf.toOwnedSlice() };
+ }
+
+ // parseClassSelector parses a selector that matches by class attribute.
+ fn parseClassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ if (p.i >= p.s.len) return ParseError.ExpectedClassSelector;
+ if (p.s[p.i] != '.') return ParseError.ExpectedClassSelector;
+
+ p.i += 1;
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ try p.parseIdentifier(buf.writer());
+ return .{ .class = try buf.toOwnedSlice() };
+ }
+
+ // parseAttributeSelector parses a selector that matches by attribute value.
+ fn parseAttributeSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+ if (p.s[p.i] != '[') return ParseError.ExpectedAttributeSelector;
+
+ p.i += 1;
+ _ = p.skipWhitespace();
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ try p.parseIdentifier(buf.writer());
+ const key = try buf.toOwnedSlice();
+ errdefer alloc.free(key);
+
+ lowerstr(key);
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+ if (p.s[p.i] == ']') {
+ p.i += 1;
+ return .{ .attribute = .{ .key = key } };
+ }
+
+ if (p.i + 2 >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+ const op = try parseAttributeOP(p.s[p.i .. p.i + 2]);
+ p.i += op.len();
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+ buf.clearRetainingCapacity();
+ var is_val: bool = undefined;
+ if (op == .regexp) {
+ is_val = false;
+ try p.parseRegex(buf.writer());
+ } else {
+ is_val = true;
+ switch (p.s[p.i]) {
+ '\'', '"' => try p.parseString(buf.writer()),
+ else => try p.parseIdentifier(buf.writer()),
+ }
+ }
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+ // check if the attribute contains an ignore case flag
+ var ci = false;
+ if (p.s[p.i] == 'i' or p.s[p.i] == 'I') {
+ ci = true;
+ p.i += 1;
+ }
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.ExpectedAttributeSelector;
+
+ if (p.s[p.i] != ']') return ParseError.InvalidAttributeSelector;
+ p.i += 1;
+
+ return .{ .attribute = .{
+ .key = key,
+ .val = if (is_val) try buf.toOwnedSlice() else null,
+ .regexp = if (!is_val) try buf.toOwnedSlice() else null,
+ .op = op,
+ .ci = ci,
+ } };
+ }
+
+ // parseString parses a single- or double-quoted string.
+ fn parseString(p: *Parser, writer: anytype) ParseError!void {
+ var i = p.i;
+ if (p.s.len < i + 2) return ParseError.ExpectedString;
+
+ const quote = p.s[i];
+ i += 1;
+
+ loop: while (i < p.s.len) {
+ switch (p.s[i]) {
+ '\\' => {
+ if (p.s.len > i + 1) {
+ const c = p.s[i + 1];
+ switch (c) {
+ '\r' => {
+ if (p.s.len > i + 2 and p.s[i + 2] == '\n') {
+ i += 3;
+ continue :loop;
+ }
+ i += 2;
+ continue :loop;
+ },
+ '\n', std.ascii.control_code.ff => {
+ i += 2;
+ continue :loop;
+ },
+ else => {},
+ }
+ }
+ p.i = i;
+ try p.parseEscape(writer);
+ i = p.i;
+ },
+ '\r', '\n', std.ascii.control_code.ff => return ParseError.InvalidString,
+ else => |c| {
+ if (c == quote) break :loop;
+ const start = i;
+ while (i < p.s.len) {
+ const cc = p.s[i];
+ if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
+ i += 1;
+ }
+ writer.writeAll(p.s[start..i]) catch return ParseError.WriteError;
+ },
+ }
+ }
+
+ if (i >= p.s.len) return ParseError.InvalidString;
+
+ // Consume the final quote.
+ i += 1;
+ p.i = i;
+ }
+
+ // parseRegex parses a regular expression; the end is defined by encountering an
+ // unmatched closing ')' or ']' which is not consumed
+ fn parseRegex(p: *Parser, writer: anytype) ParseError!void {
+ var i = p.i;
+ if (p.s.len < i + 2) return ParseError.ExpectedRegexp;
+
+ // number of open parens or brackets;
+ // when it becomes negative, finished parsing regex
+ var open: isize = 0;
+
+ loop: while (i < p.s.len) {
+ switch (p.s[i]) {
+ '(', '[' => open += 1,
+ ')', ']' => {
+ open -= 1;
+ if (open < 0) break :loop;
+ },
+ else => {},
+ }
+ i += 1;
+ }
+
+ if (i >= p.s.len) return ParseError.InvalidRegexp;
+ writer.writeAll(p.s[p.i..i]) catch return ParseError.WriteError;
+ p.i = i;
+ }
+
+ // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
+ // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
+ // https://drafts.csswg.org/selectors-3/#pseudo-elements
+ fn parsePseudoclassSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ if (p.i >= p.s.len) return ParseError.ExpectedPseudoClassSelector;
+ if (p.s[p.i] != ':') return ParseError.ExpectedPseudoClassSelector;
+
+ p.i += 1;
+
+ var must_pseudo_elt: bool = false;
+ if (p.i >= p.s.len) return ParseError.EmptyPseudoClassSelector;
+ if (p.s[p.i] == ':') { // we found a pseudo-element
+ must_pseudo_elt = true;
+ p.i += 1;
+ }
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ try p.parseIdentifier(buf.writer());
+
+ const pseudo_class = try PseudoClass.parse(buf.items);
+
+ // reset the buffer to reuse it.
+ buf.clearRetainingCapacity();
+
+ if (must_pseudo_elt and !pseudo_class.isPseudoElement()) return ParseError.InvalidPseudoElement;
+
+ switch (pseudo_class) {
+ .not, .has, .haschild => {
+ if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+
+ const sel = try p.parseSelectorGroup(alloc);
+ if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+ const s = try alloc.create(Selector);
+ errdefer alloc.destroy(s);
+ s.* = sel;
+
+ return .{ .pseudo_class_relative = .{ .pseudo_class = pseudo_class, .match = s } };
+ },
+ .contains, .containsown => {
+ if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+ if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
+
+ switch (p.s[p.i]) {
+ '\'', '"' => try p.parseString(buf.writer()),
+ else => try p.parseString(buf.writer()),
+ }
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
+ if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+ const val = try buf.toOwnedSlice();
+ errdefer alloc.free(val);
+
+ lowerstr(val);
+
+ return .{ .pseudo_class_contains = .{ .own = pseudo_class == .containsown, .val = val } };
+ },
+ .matches, .matchesown => {
+ if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+
+ try p.parseRegex(buf.writer());
+ if (p.i >= p.s.len) return ParseError.InvalidPseudoClassSelector;
+ if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+ return .{ .pseudo_class_regexp = .{ .own = pseudo_class == .matchesown, .regexp = try buf.toOwnedSlice() } };
+ },
+ .nth_child, .nth_last_child, .nth_of_type, .nth_last_of_type => {
+ if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+ const nth = try p.parseNth(alloc);
+ if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+ const last = pseudo_class == .nth_last_child or pseudo_class == .nth_last_of_type;
+ const of_type = pseudo_class == .nth_of_type or pseudo_class == .nth_last_of_type;
+ return .{ .pseudo_class_nth = .{ .a = nth[0], .b = nth[1], .of_type = of_type, .last = last } };
+ },
+ .first_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = false } },
+ .last_child => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = false, .last = true } },
+ .first_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = false } },
+ .last_of_type => return .{ .pseudo_class_nth = .{ .a = 0, .b = 1, .of_type = true, .last = true } },
+ .only_child => return .{ .pseudo_class_only_child = false },
+ .only_of_type => return .{ .pseudo_class_only_child = true },
+ .input, .empty, .root, .link => return .{ .pseudo_class = pseudo_class },
+ .enabled, .disabled, .checked => return .{ .pseudo_class = pseudo_class },
+ .lang => {
+ if (!p.consumeParenthesis()) return ParseError.ExpectedParenthesis;
+ if (p.i == p.s.len) return ParseError.UnmatchParenthesis;
+
+ try p.parseIdentifier(buf.writer());
+
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.InvalidPseudoClass;
+ if (!p.consumeClosingParenthesis()) return ParseError.ExpectedParenthesisClose;
+
+ const val = try buf.toOwnedSlice();
+ errdefer alloc.free(val);
+ lowerstr(val);
+
+ return .{ .pseudo_class_lang = val };
+ },
+ .visited, .hover, .active, .focus, .target => {
+ // Not applicable in a static context: never match.
+ return .{ .never_match = pseudo_class };
+ },
+ .after, .backdrop, .before, .cue, .first_letter => return .{ .pseudo_element = pseudo_class },
+ .first_line, .grammar_error, .marker, .placeholder => return .{ .pseudo_element = pseudo_class },
+ .selection, .spelling_error => return .{ .pseudo_element = pseudo_class },
+ }
+ }
+
+ // consumeParenthesis consumes an opening parenthesis and any following
+ // whitespace. It returns true if there was actually a parenthesis to skip.
+ fn consumeParenthesis(p: *Parser) bool {
+ if (p.i < p.s.len and p.s[p.i] == '(') {
+ p.i += 1;
+ _ = p.skipWhitespace();
+ return true;
+ }
+ return false;
+ }
+
+ // parseSelectorGroup parses a group of selectors, separated by commas.
+ fn parseSelectorGroup(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ const s = try p.parseSelector(alloc);
+
+ var buf = std.ArrayList(Selector).init(alloc);
+ defer buf.deinit();
+
+ try buf.append(s);
+
+ while (p.i < p.s.len) {
+ if (p.s[p.i] != ',') break;
+ p.i += 1;
+ const ss = try p.parseSelector(alloc);
+ try buf.append(ss);
+ }
+
+ if (buf.items.len == 1) return buf.items[0];
+
+ return .{ .group = try buf.toOwnedSlice() };
+ }
+
+ // parseSelector parses a selector that may include combinators.
+ fn parseSelector(p: *Parser, alloc: std.mem.Allocator) ParseError!Selector {
+ _ = p.skipWhitespace();
+ var s = try p.parseSimpleSelectorSequence(alloc);
+
+ while (true) {
+ var combinator: Combinator = .empty;
+ if (p.skipWhitespace()) {
+ combinator = .descendant;
+ }
+ if (p.i >= p.s.len) {
+ return s;
+ }
+
+ switch (p.s[p.i]) {
+ '+', '>', '~' => {
+ combinator = try Combinator.parse(p.s[p.i]);
+ p.i += 1;
+ _ = p.skipWhitespace();
+ },
+ // These characters can't begin a selector, but they can legally occur after one.
+ ',', ')' => {
+ return s;
+ },
+ else => {},
+ }
+
+ if (combinator == .empty) {
+ return s;
+ }
+
+ const c = try p.parseSimpleSelectorSequence(alloc);
+
+ const first = try alloc.create(Selector);
+ errdefer alloc.destroy(first);
+ first.* = s;
+
+ const second = try alloc.create(Selector);
+ errdefer alloc.destroy(second);
+ second.* = c;
+
+ s = Selector{ .combined = .{ .first = first, .second = second, .combinator = combinator } };
+ }
+
+ return s;
+ }
+
+ // consumeClosingParenthesis consumes a closing parenthesis and any preceding
+ // whitespace. It returns true if there was actually a parenthesis to skip.
+ fn consumeClosingParenthesis(p: *Parser) bool {
+ const i = p.i;
+ _ = p.skipWhitespace();
+ if (p.i < p.s.len and p.s[p.i] == ')') {
+ p.i += 1;
+ return true;
+ }
+ p.i = i;
+ return false;
+ }
+
+ // parseInteger parses a decimal integer.
+ fn parseInteger(p: *Parser) ParseError!isize {
+ var i = p.i;
+ const start = i;
+ while (i < p.s.len and '0' <= p.s[i] and p.s[i] <= '9') i += 1;
+ if (i == start) return ParseError.ExpectedInteger;
+ p.i = i;
+
+ return std.fmt.parseUnsigned(isize, p.s[start..i], 10) catch ParseError.ExpectedInteger;
+ }
+
+ fn parseNthReadN(p: *Parser, a: isize) ParseError![2]isize {
+ _ = p.skipWhitespace();
+ if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+
+ return switch (p.s[p.i]) {
+ '+' => {
+ p.i += 1;
+ _ = p.skipWhitespace();
+ const b = try p.parseInteger();
+ return .{ a, b };
+ },
+ '-' => {
+ p.i += 1;
+ _ = p.skipWhitespace();
+ const b = try p.parseInteger();
+ return .{ a, -b };
+ },
+ else => .{ a, 0 },
+ };
+ }
+
+ fn parseNthReadA(p: *Parser, a: isize) ParseError![2]isize {
+ if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+ return switch (p.s[p.i]) {
+ 'n', 'N' => {
+ p.i += 1;
+ return p.parseNthReadN(a);
+ },
+ else => .{ 0, a },
+ };
+ }
+
+ fn parseNthNegativeA(p: *Parser) ParseError![2]isize {
+ if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+ const c = p.s[p.i];
+ if (std.ascii.isDigit(c)) {
+ const a = try p.parseInteger() * -1;
+ return p.parseNthReadA(a);
+ }
+ if (c == 'n' or c == 'N') {
+ p.i += 1;
+ return p.parseNthReadN(-1);
+ }
+
+ return ParseError.InvalidNthExpression;
+ }
+
+ fn parseNthPositiveA(p: *Parser) ParseError![2]isize {
+ if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+ const c = p.s[p.i];
+ if (std.ascii.isDigit(c)) {
+ const a = try p.parseInteger();
+ return p.parseNthReadA(a);
+ }
+ if (c == 'n' or c == 'N') {
+ p.i += 1;
+ return p.parseNthReadN(1);
+ }
+
+ return ParseError.InvalidNthExpression;
+ }
+
+ // parseNth parses the argument for :nth-child (normally of the form an+b).
+ fn parseNth(p: *Parser, alloc: std.mem.Allocator) ParseError![2]isize {
+ // initial state
+ if (p.i >= p.s.len) return ParseError.ExpectedNthExpression;
+ return switch (p.s[p.i]) {
+ '-' => {
+ p.i += 1;
+ return p.parseNthNegativeA();
+ },
+ '+' => {
+ p.i += 1;
+ return p.parseNthPositiveA();
+ },
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => p.parseNthPositiveA(),
+ 'n', 'N' => {
+ p.i += 1;
+ return p.parseNthReadN(1);
+ },
+ 'o', 'O', 'e', 'E' => {
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ try p.parseName(buf.writer());
+
+ if (std.ascii.eqlIgnoreCase("odd", buf.items)) return .{ 2, 1 };
+ if (std.ascii.eqlIgnoreCase("even", buf.items)) return .{ 2, 0 };
+
+ return ParseError.InvalidNthExpression;
+ },
+ else => ParseError.InvalidNthExpression,
+ };
+ }
+};
+
+// nameStart returns whether c can be the first character of an identifier
+// (not counting an initial hyphen, or an escape sequence).
+fn nameStart(c: u8) bool {
+ return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127;
+}
+
+// nameChar returns whether c can be a character within an identifier
+// (not counting an escape sequence).
+fn nameChar(c: u8) bool {
+ return 'a' <= c and c <= 'z' or 'A' <= c and c <= 'Z' or c == '_' or c > 127 or
+ c == '-' or '0' <= c and c <= '9';
+}
+
+fn lowerstr(str: []u8) void {
+ for (str, 0..) |c, i| {
+ str[i] = std.ascii.toLower(c);
+ }
+}
+
+// parseAttributeOP parses an AttributeOP from a string of 1 or 2 bytes.
+fn parseAttributeOP(s: []const u8) ParseError!AttributeOP {
+ if (s.len < 1 or s.len > 2) return ParseError.InvalidAttributeOperator;
+
+ // if the first sign is equal, we don't check anything else.
+ if (s[0] == '=') return .eql;
+
+ if (s.len != 2 or s[1] != '=') return ParseError.InvalidAttributeOperator;
+
+ return switch (s[0]) {
+ '=' => .eql,
+ '!' => .not_eql,
+ '~' => .one_of,
+ '|' => .prefix_hyphen,
+ '^' => .prefix,
+ '$' => .suffix,
+ '*' => .contains,
+ '#' => .regexp,
+ else => ParseError.InvalidAttributeOperator,
+ };
+}
+
+test "parser.skipWhitespace" {
+ const testcases = [_]struct {
+ s: []const u8,
+ i: usize,
+ r: bool,
+ }{
+ .{ .s = "", .i = 0, .r = false },
+ .{ .s = "foo", .i = 0, .r = false },
+ .{ .s = " ", .i = 1, .r = true },
+ .{ .s = " foo", .i = 1, .r = true },
+ .{ .s = "/* foo */ bar", .i = 10, .r = true },
+ .{ .s = "/* foo", .i = 0, .r = false },
+ };
+
+ for (testcases) |tc| {
+ var p = Parser{ .s = tc.s, .opts = .{} };
+ const res = p.skipWhitespace();
+ try std.testing.expectEqual(tc.r, res);
+ try std.testing.expectEqual(tc.i, p.i);
+ }
+}
+
+test "parser.parseIdentifier" {
+ const alloc = std.testing.allocator;
+
+ const testcases = [_]struct {
+ s: []const u8, // given value
+ exp: []const u8, // expected value
+ err: bool = false,
+ }{
+ .{ .s = "x", .exp = "x" },
+ .{ .s = "96", .exp = "", .err = true },
+ .{ .s = "-x", .exp = "-x" },
+ .{ .s = "r\\e9 sumé", .exp = "résumé" },
+ .{ .s = "r\\0000e9 sumé", .exp = "résumé" },
+ .{ .s = "r\\0000e9sumé", .exp = "résumé" },
+ .{ .s = "a\\\"b", .exp = "a\"b" },
+ };
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ for (testcases) |tc| {
+ buf.clearRetainingCapacity();
+
+ var p = Parser{ .s = tc.s, .opts = .{} };
+ p.parseIdentifier(buf.writer()) catch |e| {
+ // if error was expected, continue.
+ if (tc.err) continue;
+
+ std.debug.print("test case {s}\n", .{tc.s});
+ return e;
+ };
+ std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
+ std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
+ return e;
+ };
+ }
+}
+
+test "parser.parseString" {
+ const alloc = std.testing.allocator;
+
+ const testcases = [_]struct {
+ s: []const u8, // given value
+ exp: []const u8, // expected value
+ err: bool = false,
+ }{
+ .{ .s = "\"x\"", .exp = "x" },
+ .{ .s = "'x'", .exp = "x" },
+ .{ .s = "'x", .exp = "", .err = true },
+ .{ .s = "'x\\\r\nx'", .exp = "xx" },
+ .{ .s = "\"r\\e9 sumé\"", .exp = "résumé" },
+ .{ .s = "\"r\\0000e9 sumé\"", .exp = "résumé" },
+ .{ .s = "\"r\\0000e9sumé\"", .exp = "résumé" },
+ .{ .s = "\"a\\\"b\"", .exp = "a\"b" },
+ .{ .s = "\"\\\n\"", .exp = "" },
+ .{ .s = "\"hello world\"", .exp = "hello world" },
+ };
+
+ var buf = std.ArrayList(u8).init(alloc);
+ defer buf.deinit();
+
+ for (testcases) |tc| {
+ buf.clearRetainingCapacity();
+
+ var p = Parser{ .s = tc.s, .opts = .{} };
+ p.parseString(buf.writer()) catch |e| {
+ // if error was expected, continue.
+ if (tc.err) continue;
+
+ std.debug.print("test case {s}\n", .{tc.s});
+ return e;
+ };
+ std.testing.expectEqualDeep(tc.exp, buf.items) catch |e| {
+ std.debug.print("test case {s} : {s}\n", .{ tc.s, buf.items });
+ return e;
+ };
+ }
+}
diff --git a/src/css/selector.zig b/src/css/selector.zig
new file mode 100644
index 00000000..b5779899
--- /dev/null
+++ b/src/css/selector.zig
@@ -0,0 +1,749 @@
+const std = @import("std");
+
+pub const AttributeOP = enum {
+ eql, // =
+ not_eql, // !=
+ one_of, // ~=
+ prefix_hyphen, // |=
+ prefix, // ^=
+ suffix, // $=
+ contains, // *=
+ regexp, // #=
+
+ pub fn len(op: AttributeOP) u2 {
+ if (op == .eql) return 1;
+ return 2;
+ }
+};
+
+pub const Combinator = enum {
+ empty,
+ descendant, // space
+ child, // >
+ next_sibling, // +
+ subsequent_sibling, // ~
+
+ pub const Error = error{
+ InvalidCombinator,
+ };
+
+ pub fn parse(c: u8) Error!Combinator {
+ return switch (c) {
+ ' ' => .descendant,
+ '>' => .child,
+ '+' => .next_sibling,
+ '~' => .subsequent_sibling,
+ else => Error.InvalidCombinator,
+ };
+ }
+};
+
+pub const PseudoClass = enum {
+ not,
+ has,
+ haschild,
+ contains,
+ containsown,
+ matches,
+ matchesown,
+ nth_child,
+ nth_last_child,
+ nth_of_type,
+ nth_last_of_type,
+ first_child,
+ last_child,
+ first_of_type,
+ last_of_type,
+ only_child,
+ only_of_type,
+ input,
+ empty,
+ root,
+ link,
+ lang,
+ enabled,
+ disabled,
+ checked,
+ visited,
+ hover,
+ active,
+ focus,
+ target,
+ after,
+ backdrop,
+ before,
+ cue,
+ first_letter,
+ first_line,
+ grammar_error,
+ marker,
+ placeholder,
+ selection,
+ spelling_error,
+
+ pub const Error = error{
+ InvalidPseudoClass,
+ };
+
+ pub fn isPseudoElement(pc: PseudoClass) bool {
+ return switch (pc) {
+ .after, .backdrop, .before, .cue, .first_letter => true,
+ .first_line, .grammar_error, .marker, .placeholder => true,
+ .selection, .spelling_error => true,
+ else => false,
+ };
+ }
+
+ pub fn parse(s: []const u8) Error!PseudoClass {
+ if (std.ascii.eqlIgnoreCase(s, "not")) return .not;
+ if (std.ascii.eqlIgnoreCase(s, "has")) return .has;
+ if (std.ascii.eqlIgnoreCase(s, "haschild")) return .haschild;
+ if (std.ascii.eqlIgnoreCase(s, "contains")) return .contains;
+ if (std.ascii.eqlIgnoreCase(s, "containsown")) return .containsown;
+ if (std.ascii.eqlIgnoreCase(s, "matches")) return .matches;
+ if (std.ascii.eqlIgnoreCase(s, "matchesown")) return .matchesown;
+ if (std.ascii.eqlIgnoreCase(s, "nth-child")) return .nth_child;
+ if (std.ascii.eqlIgnoreCase(s, "nth-last-child")) return .nth_last_child;
+ if (std.ascii.eqlIgnoreCase(s, "nth-of-type")) return .nth_of_type;
+ if (std.ascii.eqlIgnoreCase(s, "nth-last-of-type")) return .nth_last_of_type;
+ if (std.ascii.eqlIgnoreCase(s, "first-child")) return .first_child;
+ if (std.ascii.eqlIgnoreCase(s, "last-child")) return .last_child;
+ if (std.ascii.eqlIgnoreCase(s, "first-of-type")) return .first_of_type;
+ if (std.ascii.eqlIgnoreCase(s, "last-of-type")) return .last_of_type;
+ if (std.ascii.eqlIgnoreCase(s, "only-child")) return .only_child;
+ if (std.ascii.eqlIgnoreCase(s, "only-of-type")) return .only_of_type;
+ if (std.ascii.eqlIgnoreCase(s, "input")) return .input;
+ if (std.ascii.eqlIgnoreCase(s, "empty")) return .empty;
+ if (std.ascii.eqlIgnoreCase(s, "root")) return .root;
+ if (std.ascii.eqlIgnoreCase(s, "link")) return .link;
+ if (std.ascii.eqlIgnoreCase(s, "lang")) return .lang;
+ if (std.ascii.eqlIgnoreCase(s, "enabled")) return .enabled;
+ if (std.ascii.eqlIgnoreCase(s, "disabled")) return .disabled;
+ if (std.ascii.eqlIgnoreCase(s, "checked")) return .checked;
+ if (std.ascii.eqlIgnoreCase(s, "visited")) return .visited;
+ if (std.ascii.eqlIgnoreCase(s, "hover")) return .hover;
+ if (std.ascii.eqlIgnoreCase(s, "active")) return .active;
+ if (std.ascii.eqlIgnoreCase(s, "focus")) return .focus;
+ if (std.ascii.eqlIgnoreCase(s, "target")) return .target;
+ if (std.ascii.eqlIgnoreCase(s, "after")) return .after;
+ if (std.ascii.eqlIgnoreCase(s, "backdrop")) return .backdrop;
+ if (std.ascii.eqlIgnoreCase(s, "before")) return .before;
+ if (std.ascii.eqlIgnoreCase(s, "cue")) return .cue;
+ if (std.ascii.eqlIgnoreCase(s, "first-letter")) return .first_letter;
+ if (std.ascii.eqlIgnoreCase(s, "first-line")) return .first_line;
+ if (std.ascii.eqlIgnoreCase(s, "grammar-error")) return .grammar_error;
+ if (std.ascii.eqlIgnoreCase(s, "marker")) return .marker;
+ if (std.ascii.eqlIgnoreCase(s, "placeholder")) return .placeholder;
+ if (std.ascii.eqlIgnoreCase(s, "selection")) return .selection;
+ if (std.ascii.eqlIgnoreCase(s, "spelling-error")) return .spelling_error;
+ return Error.InvalidPseudoClass;
+ }
+};
+
+pub const Selector = union(enum) {
+ pub const Error = error{
+ UnknownCombinedCombinator,
+ UnsupportedRelativePseudoClass,
+ UnsupportedContainsPseudoClass,
+ UnsupportedPseudoClass,
+ UnsupportedPseudoElement,
+ UnsupportedRegexpPseudoClass,
+ UnsupportedAttrRegexpOperator,
+ };
+
+ compound: struct {
+ selectors: []Selector,
+ pseudo_elt: ?PseudoClass,
+ },
+ group: []Selector,
+ tag: []const u8,
+ id: []const u8,
+ class: []const u8,
+ attribute: struct {
+ key: []const u8,
+ val: ?[]const u8 = null,
+ op: ?AttributeOP = null,
+ regexp: ?[]const u8 = null,
+ ci: bool = false,
+ },
+ combined: struct {
+ first: *Selector,
+ second: *Selector,
+ combinator: Combinator,
+ },
+
+ never_match: PseudoClass,
+
+ pseudo_class: PseudoClass,
+ pseudo_class_only_child: bool,
+ pseudo_class_lang: []const u8,
+ pseudo_class_relative: struct {
+ pseudo_class: PseudoClass,
+ match: *Selector,
+ },
+ pseudo_class_contains: struct {
+ own: bool,
+ val: []const u8,
+ },
+ pseudo_class_regexp: struct {
+ own: bool,
+ regexp: []const u8,
+ },
+ pseudo_class_nth: struct {
+ a: isize,
+ b: isize,
+ of_type: bool,
+ last: bool,
+ },
+ pseudo_element: PseudoClass,
+
+ // returns true if s is a whitespace-separated list that includes val.
+ fn word(haystack: []const u8, needle: []const u8, ci: bool) bool {
+ if (haystack.len == 0) return false;
+ var it = std.mem.splitAny(u8, haystack, " \t\r\n"); // TODO add \f
+ while (it.next()) |part| {
+ if (eql(part, needle, ci)) return true;
+ }
+ return false;
+ }
+
+ fn eql(a: []const u8, b: []const u8, ci: bool) bool {
+ if (ci) return std.ascii.eqlIgnoreCase(a, b);
+ return std.mem.eql(u8, a, b);
+ }
+
+ fn starts(haystack: []const u8, needle: []const u8, ci: bool) bool {
+ if (ci) return std.ascii.startsWithIgnoreCase(haystack, needle);
+ return std.mem.startsWith(u8, haystack, needle);
+ }
+
+ fn ends(haystack: []const u8, needle: []const u8, ci: bool) bool {
+ if (ci) return std.ascii.endsWithIgnoreCase(haystack, needle);
+ return std.mem.endsWith(u8, haystack, needle);
+ }
+
+ fn contains(haystack: []const u8, needle: []const u8, ci: bool) bool {
+ if (ci) return std.ascii.indexOfIgnoreCase(haystack, needle) != null;
+ return std.mem.indexOf(u8, haystack, needle) != null;
+ }
+
+ // match returns true if the node matches the selector query.
+ pub fn match(s: Selector, n: anytype) !bool {
+ return switch (s) {
+ .tag => |v| n.isElement() and std.ascii.eqlIgnoreCase(v, try n.tag()),
+ .id => |v| return n.isElement() and std.mem.eql(u8, v, try n.attr("id") orelse return false),
+ .class => |v| return n.isElement() and word(try n.attr("class") orelse return false, v, false),
+ .group => |v| {
+ for (v) |sel| {
+ if (try sel.match(n)) return true;
+ }
+ return false;
+ },
+ .compound => |v| {
+ if (v.selectors.len == 0) return n.isElement();
+
+ for (v.selectors) |sel| {
+ if (!try sel.match(n)) return false;
+ }
+ return true;
+ },
+ .combined => |v| {
+ return switch (v.combinator) {
+ .empty => try v.first.match(n),
+ .descendant => {
+ if (!try v.second.match(n)) return false;
+
+ // The first must match a ascendent.
+ var p = try n.parent();
+ while (p != null) {
+ if (try v.first.match(p.?)) {
+ return true;
+ }
+ p = try p.?.parent();
+ }
+
+ return false;
+ },
+ .child => {
+ const p = try n.parent();
+ if (p == null) return false;
+
+ return try v.second.match(n) and try v.first.match(p.?);
+ },
+ .next_sibling => {
+ if (!try v.second.match(n)) return false;
+ var c = try n.prevSibling();
+ while (c != null) {
+ if (c.?.isText() or c.?.isComment()) {
+ c = try c.?.prevSibling();
+ continue;
+ }
+ return try v.first.match(c.?);
+ }
+ return false;
+ },
+ .subsequent_sibling => {
+ if (!try v.second.match(n)) return false;
+
+ var c = try n.prevSibling();
+ while (c != null) {
+ if (try v.first.match(c.?)) return true;
+ c = try c.?.prevSibling();
+ }
+ return false;
+ },
+ };
+ },
+ .attribute => |v| {
+ var attr = try n.attr(v.key);
+
+ if (v.op == null) return attr != null;
+ if (v.val == null or v.val.?.len == 0) return false;
+
+ const val = v.val.?;
+
+ return switch (v.op.?) {
+ .eql => attr != null and eql(attr.?, val, v.ci),
+ .not_eql => attr == null or !eql(attr.?, val, v.ci),
+ .one_of => attr != null and word(attr.?, val, v.ci),
+ .prefix => {
+ if (attr == null) return false;
+ attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+ if (attr.?.len == 0) return false;
+
+ return starts(attr.?, val, v.ci);
+ },
+ .suffix => {
+ if (attr == null) return false;
+ attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+ if (attr.?.len == 0) return false;
+
+ return ends(attr.?, val, v.ci);
+ },
+ .contains => {
+ if (attr == null) return false;
+ attr.? = std.mem.trim(u8, attr.?, &std.ascii.whitespace);
+
+ if (attr.?.len == 0) return false;
+
+ return contains(attr.?, val, v.ci);
+ },
+ .prefix_hyphen => {
+ if (attr == null) return false;
+ if (eql(attr.?, val, v.ci)) return true;
+
+ if (attr.?.len <= val.len) return false;
+
+ if (!starts(attr.?, val, v.ci)) return false;
+
+ return attr.?[val.len] == '-';
+ },
+ .regexp => return Error.UnsupportedAttrRegexpOperator, // TODO handle regexp attribute operator.
+ };
+ },
+ .never_match => return false,
+ .pseudo_class_relative => |v| {
+ if (!n.isElement()) return false;
+
+ return switch (v.pseudo_class) {
+ .not => !try v.match.match(n),
+ .has => try hasDescendantMatch(v.match, n),
+ .haschild => try hasChildMatch(v.match, n),
+ else => Error.UnsupportedRelativePseudoClass,
+ };
+ },
+ .pseudo_class_contains => return Error.UnsupportedContainsPseudoClass, // TODO, need mem allocation.
+ .pseudo_class_regexp => return Error.UnsupportedRegexpPseudoClass, // TODO need mem allocation.
+ .pseudo_class_nth => |v| {
+ if (v.a == 0) {
+ if (v.last) {
+ return simpleNthLastChildMatch(v.b, v.of_type, n);
+ }
+ return simpleNthChildMatch(v.b, v.of_type, n);
+ }
+ return nthChildMatch(v.a, v.b, v.last, v.of_type, n);
+ },
+ .pseudo_class => |v| {
+ return switch (v) {
+ .input => {
+ if (!n.isElement()) return false;
+ const ntag = try n.tag();
+
+ return std.ascii.eqlIgnoreCase("input", ntag) or
+ std.ascii.eqlIgnoreCase("select", ntag) or
+ std.ascii.eqlIgnoreCase("button", ntag) or
+ std.ascii.eqlIgnoreCase("textarea", ntag);
+ },
+ .empty => {
+ if (!n.isElement()) return false;
+
+ var c = try n.firstChild();
+ while (c != null) {
+ if (c.?.isElement()) return false;
+
+ if (c.?.isText()) {
+ if (try c.?.isEmptyText()) continue;
+ return false;
+ }
+
+ c = try c.?.nextSibling();
+ }
+
+ return true;
+ },
+ .root => {
+ if (!n.isElement()) return false;
+
+ const p = try n.parent();
+ return (p != null and p.?.isDocument());
+ },
+ .link => {
+ const ntag = try n.tag();
+
+ return std.ascii.eqlIgnoreCase("a", ntag) or
+ std.ascii.eqlIgnoreCase("area", ntag) or
+ std.ascii.eqlIgnoreCase("link", ntag);
+ },
+ .enabled => {
+ if (!n.isElement()) return false;
+
+ const ntag = try n.tag();
+
+ if (std.ascii.eqlIgnoreCase("a", ntag) or
+ std.ascii.eqlIgnoreCase("area", ntag) or
+ std.ascii.eqlIgnoreCase("link", ntag))
+ {
+ return try n.attr("href") != null;
+ }
+
+ if (std.ascii.eqlIgnoreCase("optgroup", ntag) or
+ std.ascii.eqlIgnoreCase("menuitem", ntag) or
+ std.ascii.eqlIgnoreCase("fieldset", ntag))
+ {
+ return try n.attr("disabled") == null;
+ }
+
+ if (std.ascii.eqlIgnoreCase("input", ntag) or
+ std.ascii.eqlIgnoreCase("button", ntag) or
+ std.ascii.eqlIgnoreCase("select", ntag) or
+ std.ascii.eqlIgnoreCase("textarea", ntag) or
+ std.ascii.eqlIgnoreCase("option", ntag))
+ {
+ return try n.attr("disabled") == null and
+ !try inDisabledFieldset(n);
+ }
+
+ return false;
+ },
+ .disabled => {
+ if (!n.isElement()) return false;
+
+ const ntag = try n.tag();
+
+ if (std.ascii.eqlIgnoreCase("optgroup", ntag) or
+ std.ascii.eqlIgnoreCase("menuitem", ntag) or
+ std.ascii.eqlIgnoreCase("fieldset", ntag))
+ {
+ return try n.attr("disabled") != null;
+ }
+
+ if (std.ascii.eqlIgnoreCase("input", ntag) or
+ std.ascii.eqlIgnoreCase("button", ntag) or
+ std.ascii.eqlIgnoreCase("select", ntag) or
+ std.ascii.eqlIgnoreCase("textarea", ntag) or
+ std.ascii.eqlIgnoreCase("option", ntag))
+ {
+ return try n.attr("disabled") != null or
+ try inDisabledFieldset(n);
+ }
+
+ return false;
+ },
+ .checked => {
+ if (!n.isElement()) return false;
+
+ const ntag = try n.tag();
+
+ if (std.ascii.eqlIgnoreCase("intput", ntag)) {
+ const ntype = try n.attr("type");
+ if (ntype == null) return false;
+
+ if (std.mem.eql(u8, ntype.?, "checkbox") or
+ std.mem.eql(u8, ntype.?, "radio"))
+ {
+ return try n.attr("checked") != null;
+ }
+
+ return false;
+ }
+ if (std.ascii.eqlIgnoreCase("option", ntag)) {
+ return try n.attr("selected") != null;
+ }
+
+ return false;
+ },
+ .visited => return false,
+ .hover => return false,
+ .active => return false,
+ .focus => return false,
+ // TODO implement using the url fragment.
+ // see https://developer.mozilla.org/en-US/docs/Web/CSS/:target
+ .target => return false,
+
+ // all others pseudo class are handled by specialized
+ // pseudo_class_X selectors.
+ else => return Error.UnsupportedPseudoClass,
+ };
+ },
+ .pseudo_class_only_child => |v| onlyChildMatch(v, n),
+ .pseudo_class_lang => |v| langMatch(v, n),
+
+ // pseudo elements doesn't make sense in the matching process.
+ // > A CSS pseudo-element is a keyword added to a selector that
+ // > lets you style a specific part of the selected element(s).
+ // https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-elements
+ .pseudo_element => return Error.UnsupportedPseudoElement,
+ };
+ }
+
+ fn hasLegendInPreviousSiblings(n: anytype) anyerror!bool {
+ var c = try n.prevSibling();
+ while (c != null) {
+ const ctag = try c.?.tag();
+ if (std.ascii.eqlIgnoreCase("legend", ctag)) return true;
+ c = try c.?.prevSibling();
+ }
+ return false;
+ }
+
+ fn inDisabledFieldset(n: anytype) anyerror!bool {
+ const p = try n.parent();
+ if (p == null) return false;
+
+ const ntag = try n.tag();
+ const ptag = try p.?.tag();
+
+ if (std.ascii.eqlIgnoreCase("fieldset", ptag) and
+ try p.?.attr("disabled") != null and
+ (!std.ascii.eqlIgnoreCase("legend", ntag) or try hasLegendInPreviousSiblings(n)))
+ {
+ return true;
+ }
+
+ // TODO should we handle legend like cascadia does?
+ // The implemention below looks suspicious, I didn't find a test case
+ // in cascadia and I didn't find the reference about legend in the
+ // specs. For now I do prefer ignoring this part.
+ //
+ // ```
+ // (n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) {
+ // ```
+ // https://github.com/andybalholm/cascadia/blob/master/pseudo_classes.go#L434
+
+ return try inDisabledFieldset(p.?);
+ }
+
+ fn langMatch(lang: []const u8, n: anytype) anyerror!bool {
+ if (try n.attr("lang")) |own| {
+ if (std.mem.eql(u8, own, lang)) return true;
+
+ // check if the lang attr starts with lang+'-'
+ if (std.mem.startsWith(u8, own, lang)) {
+ if (own.len > lang.len and own[lang.len] == '-') return true;
+ }
+ }
+
+ // if the tag doesn't match, try the parent.
+ const p = try n.parent();
+ if (p == null) return false;
+
+ return langMatch(lang, p.?);
+ }
+
+ // onlyChildMatch implements :only-child
+ // If `ofType` is true, it implements :only-of-type instead.
+ fn onlyChildMatch(of_type: bool, n: anytype) anyerror!bool {
+ if (!n.isElement()) return false;
+
+ const p = try n.parent();
+ if (p == null) return false;
+
+ const ntag = try n.tag();
+
+ var count: usize = 0;
+ var c = try p.?.firstChild();
+ // loop hover all n siblings.
+ while (c != null) {
+ // ignore non elements or others tags if of-type is true.
+ if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+ c = try c.?.nextSibling();
+ continue;
+ }
+
+ count += 1;
+ if (count > 1) return false;
+
+ c = try c.?.nextSibling();
+ }
+
+ return count == 1;
+ }
+
+ // simpleNthLastChildMatch implements :nth-last-child(b).
+ // If ofType is true, implements :nth-last-of-type instead.
+ fn simpleNthLastChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool {
+ if (!n.isElement()) return false;
+
+ const p = try n.parent();
+ if (p == null) return false;
+
+ const ntag = try n.tag();
+
+ var count: isize = 0;
+ var c = try p.?.lastChild();
+ // loop hover all n siblings.
+ while (c != null) {
+ // ignore non elements or others tags if of-type is true.
+ if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+ c = try c.?.prevSibling();
+ continue;
+ }
+
+ count += 1;
+
+ if (n.eql(c.?)) return count == b;
+ if (count >= b) return false;
+
+ c = try c.?.prevSibling();
+ }
+
+ return false;
+ }
+
+ // simpleNthChildMatch implements :nth-child(b).
+ // If ofType is true, implements :nth-of-type instead.
+ fn simpleNthChildMatch(b: isize, of_type: bool, n: anytype) anyerror!bool {
+ if (!n.isElement()) return false;
+
+ const p = try n.parent();
+ if (p == null) return false;
+
+ const ntag = try n.tag();
+
+ var count: isize = 0;
+ var c = try p.?.firstChild();
+ // loop hover all n siblings.
+ while (c != null) {
+ // ignore non elements or others tags if of-type is true.
+ if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+ c = try c.?.nextSibling();
+ continue;
+ }
+
+ count += 1;
+
+ if (n.eql(c.?)) return count == b;
+ if (count >= b) return false;
+
+ c = try c.?.nextSibling();
+ }
+
+ return false;
+ }
+
+ // nthChildMatch implements :nth-child(an+b).
+ // If last is true, implements :nth-last-child instead.
+ // If ofType is true, implements :nth-of-type instead.
+ fn nthChildMatch(a: isize, b: isize, last: bool, of_type: bool, n: anytype) anyerror!bool {
+ if (!n.isElement()) return false;
+
+ const p = try n.parent();
+ if (p == null) return false;
+
+ const ntag = try n.tag();
+
+ var i: isize = -1;
+ var count: isize = 0;
+ var c = try p.?.firstChild();
+ // loop hover all n siblings.
+ while (c != null) {
+ // ignore non elements or others tags if of-type is true.
+ if (!c.?.isElement() or (of_type and !std.mem.eql(u8, ntag, try c.?.tag()))) {
+ c = try c.?.nextSibling();
+ continue;
+ }
+ count += 1;
+
+ if (n.eql(c.?)) {
+ i = count;
+ if (!last) break;
+ }
+
+ c = try c.?.nextSibling();
+ }
+
+ if (i == -1) return false;
+
+ if (last) i = count - i + 1;
+
+ i -= b;
+ if (a == 0) return i == 0;
+ return @mod(i, a) == 0 and @divTrunc(i, a) >= 0;
+ }
+
+ fn hasDescendantMatch(s: *const Selector, n: anytype) anyerror!bool {
+ var c = try n.firstChild();
+ while (c != null) {
+ if (try s.match(c.?)) return true;
+ if (c.?.isElement() and try hasDescendantMatch(s, c.?)) return true;
+ c = try c.?.nextSibling();
+ }
+
+ return false;
+ }
+
+ fn hasChildMatch(s: *const Selector, n: anytype) anyerror!bool {
+ var c = try n.firstChild();
+ while (c != null) {
+ if (try s.match(c.?)) return true;
+ c = try c.?.nextSibling();
+ }
+
+ return false;
+ }
+
+ pub fn deinit(sel: Selector, alloc: std.mem.Allocator) void {
+ switch (sel) {
+ .group => |v| {
+ for (v) |vv| vv.deinit(alloc);
+ alloc.free(v);
+ },
+ .compound => |v| {
+ for (v.selectors) |vv| vv.deinit(alloc);
+ alloc.free(v.selectors);
+ },
+ .tag, .id, .class, .pseudo_class_lang => |v| alloc.free(v),
+ .attribute => |att| {
+ alloc.free(att.key);
+ if (att.val) |v| alloc.free(v);
+ if (att.regexp) |v| alloc.free(v);
+ },
+ .combined => |c| {
+ c.first.deinit(alloc);
+ alloc.destroy(c.first);
+ c.second.deinit(alloc);
+ alloc.destroy(c.second);
+ },
+ .pseudo_class_relative => |v| {
+ v.match.deinit(alloc);
+ alloc.destroy(v.match);
+ },
+ .pseudo_class_contains => |v| alloc.free(v.val),
+ .pseudo_class_regexp => |v| alloc.free(v.regexp),
+ .pseudo_class, .pseudo_element, .never_match => {},
+ .pseudo_class_nth, .pseudo_class_only_child => {},
+ }
+ }
+};
diff --git a/src/dom/css.zig b/src/dom/css.zig
new file mode 100644
index 00000000..4e293c92
--- /dev/null
+++ b/src/dom/css.zig
@@ -0,0 +1,61 @@
+const std = @import("std");
+
+const parser = @import("../netsurf.zig");
+
+const css = @import("../css/css.zig");
+const Node = @import("../css/libdom.zig").Node;
+const NodeList = @import("nodelist.zig").NodeList;
+
+const MatchFirst = struct {
+ n: ?*parser.Node = null,
+
+ pub fn match(m: *MatchFirst, n: Node) !void {
+ m.n = n.node;
+ }
+};
+
+pub fn querySelector(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !?*parser.Node {
+ const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
+ defer ps.deinit(alloc);
+
+ var m = MatchFirst{};
+
+ _ = try css.matchFirst(ps, Node{ .node = n }, &m);
+ return m.n;
+}
+
+const MatchAll = struct {
+ alloc: std.mem.Allocator,
+ nl: NodeList,
+
+ fn init(alloc: std.mem.Allocator) MatchAll {
+ return .{
+ .alloc = alloc,
+ .nl = NodeList.init(),
+ };
+ }
+
+ fn deinit(m: *MatchAll) void {
+ m.nl.deinit(m.alloc);
+ }
+
+ pub fn match(m: *MatchAll, n: Node) !void {
+ try m.nl.append(m.alloc, n.node);
+ }
+
+ fn toOwnedList(m: *MatchAll) NodeList {
+ defer m.nl = NodeList.init();
+ return m.nl;
+ }
+};
+
+pub fn querySelectorAll(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !NodeList {
+ const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
+ defer ps.deinit(alloc);
+
+ var m = MatchAll.init(alloc);
+ defer m.deinit();
+
+ try css.matchAll(ps, Node{ .node = n }, &m);
+ return m.toOwnedList();
+}
diff --git a/src/dom/document.zig b/src/dom/document.zig
index 7f3af5cf..c99cc5fc 100644
--- a/src/dom/document.zig
+++ b/src/dom/document.zig
@@ -13,6 +13,7 @@ const NodeUnion = @import("node.zig").Union;
const Walker = @import("walker.zig").WalkerDepthFirst;
const collection = @import("html_collection.zig");
+const css = @import("css.zig");
const Element = @import("element.zig").Element;
const ElementUnion = @import("element.zig").Union;
@@ -188,54 +189,18 @@ pub const Document = struct {
return 1;
}
- // TODO netsurf doesn't handle query selectors. We have to implement a
- // solution by ourselves.
- // For now we handle only * and single id selector like `#foo`.
- pub fn _querySelector(self: *parser.Document, selectors: []const u8) !?ElementUnion {
- if (selectors.len == 0) return null;
+ pub fn _querySelector(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !?ElementUnion {
+ if (selector.len == 0) return null;
- // catch-all, return the firstElementChild
- if (selectors[0] == '*') return try get_firstElementChild(self);
+ const n = try css.querySelector(alloc, parser.documentToNode(self), selector);
- // support only simple id selector.
- if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
+ if (n == null) return null;
- return try _getElementById(self, selectors[1..]);
+ return try Element.toInterface(parser.nodeToElement(n.?));
}
- // TODO netsurf doesn't handle query selectors. We have to implement a
- // solution by ourselves.
- // We handle only * and single id selector like `#foo`.
- pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
- var list = try NodeList.init();
- errdefer list.deinit(alloc);
-
- if (selectors.len == 0) return list;
-
- // catch-all, return all elements
- if (selectors[0] == '*') {
- // walk over the node tree fo find the node by id.
- const root = parser.documentToNode(self);
- const walker = Walker{};
- var next: ?*parser.Node = null;
- while (true) {
- next = try walker.get_next(root, next) orelse return list;
- // ignore non-element nodes.
- if (try parser.nodeType(next.?) != .element) {
- continue;
- }
- try list.append(alloc, next.?);
- }
- }
-
- // support only simple id selector.
- if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
-
- // walk over the node tree fo find the node by id.
- const e = try parser.documentGetElementById(self, selectors[1..]) orelse return list;
- try list.append(alloc, parser.elementToNode(e));
-
- return list;
+ pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
+ return css.querySelectorAll(alloc, parser.documentToNode(self), selector);
}
// TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -426,6 +391,12 @@ pub fn testExecFn(
.{ .src = "document.querySelector('*').nodeName", .ex = "HTML" },
.{ .src = "document.querySelector('#content').id", .ex = "content" },
.{ .src = "document.querySelector('#para').id", .ex = "para" },
+ .{ .src = "document.querySelector('.ok').id", .ex = "link" },
+ .{ .src = "document.querySelector('a ~ p').id", .ex = "para-empty" },
+ .{ .src = "document.querySelector(':root').nodeName", .ex = "HTML" },
+
+ .{ .src = "document.querySelectorAll('p').length", .ex = "2" },
+ .{ .src = "document.querySelectorAll('.ok').item(0).id", .ex = "link" },
};
try checkCases(js_env, &querySelector);
diff --git a/src/dom/element.zig b/src/dom/element.zig
index 7f58ea10..80d31992 100644
--- a/src/dom/element.zig
+++ b/src/dom/element.zig
@@ -9,6 +9,7 @@ const Variadic = jsruntime.Variadic;
const collection = @import("html_collection.zig");
const writeNode = @import("../browser/dump.zig").writeNode;
+const css = @import("css.zig");
const Node = @import("node.zig").Node;
const Walker = @import("walker.zig").WalkerDepthFirst;
@@ -263,56 +264,18 @@ pub const Element = struct {
}
}
- // TODO netsurf doesn't handle query selectors. We have to implement a
- // solution by ourselves.
- // We handle only * and single id selector like `#foo`.
- pub fn _querySelector(self: *parser.Element, selectors: []const u8) !?Union {
- if (selectors.len == 0) return null;
+ pub fn _querySelector(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !?Union {
+ if (selector.len == 0) return null;
- // catch-all, return the firstElementChild
- if (selectors[0] == '*') return try get_firstElementChild(self);
+ const n = try css.querySelector(alloc, parser.elementToNode(self), selector);
- // support only simple id selector.
- if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
+ if (n == null) return null;
- // walk over the node tree fo find the node by id.
- const n = try getElementById(self, selectors[1..]) orelse return null;
- return try toInterface(parser.nodeToElement(n));
+ return try toInterface(parser.nodeToElement(n.?));
}
- // TODO netsurf doesn't handle query selectors. We have to implement a
- // solution by ourselves.
- // We handle only * and single id selector like `#foo`.
- pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
- var list = try NodeList.init();
- errdefer list.deinit(alloc);
-
- if (selectors.len == 0) return list;
-
- // catch-all, return all elements
- if (selectors[0] == '*') {
- // walk over the node tree fo find the node by id.
- const root = parser.elementToNode(self);
- const walker = Walker{};
- var next: ?*parser.Node = null;
- while (true) {
- next = try walker.get_next(root, next) orelse return list;
- // ignore non-element nodes.
- if (try parser.nodeType(next.?) != .element) {
- continue;
- }
- try list.append(alloc, next.?);
- }
- }
-
- // support only simple id selector.
- if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
-
- // walk over the node tree fo find the node by id.
- const n = try getElementById(self, selectors[1..]) orelse return list;
- try list.append(alloc, n);
-
- return list;
+ pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
+ return css.querySelectorAll(alloc, parser.elementToNode(self), selector);
}
// TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -433,6 +396,12 @@ pub fn testExecFn(
.{ .src = "e.querySelector('#link').id", .ex = "link" },
.{ .src = "e.querySelector('#para').id", .ex = "para" },
.{ .src = "e.querySelector('*').id", .ex = "link" },
+ .{ .src = "e.querySelector('')", .ex = "null" },
+ .{ .src = "e.querySelector('*').id", .ex = "link" },
+ .{ .src = "e.querySelector('#content')", .ex = "null" },
+ .{ .src = "e.querySelector('#para').id", .ex = "para" },
+ .{ .src = "e.querySelector('.ok').id", .ex = "link" },
+ .{ .src = "e.querySelector('a ~ p').id", .ex = "para-empty" },
.{ .src = "e.querySelectorAll('foo').length", .ex = "0" },
.{ .src = "e.querySelectorAll('#foo').length", .ex = "0" },
@@ -441,6 +410,8 @@ pub fn testExecFn(
.{ .src = "e.querySelectorAll('#para').length", .ex = "1" },
.{ .src = "e.querySelectorAll('#para').item(0).id", .ex = "para" },
.{ .src = "e.querySelectorAll('*').length", .ex = "4" },
+ .{ .src = "e.querySelectorAll('p').length", .ex = "2" },
+ .{ .src = "e.querySelectorAll('.ok').item(0).id", .ex = "link" },
};
try checkCases(js_env, &querySelector);
diff --git a/src/dom/node.zig b/src/dom/node.zig
index 7e6aa383..920768ec 100644
--- a/src/dom/node.zig
+++ b/src/dom/node.zig
@@ -199,7 +199,7 @@ pub const Node = struct {
}
pub fn get_childNodes(self: *parser.Node, alloc: std.mem.Allocator) !NodeList {
- var list = try NodeList.init();
+ var list = NodeList.init();
errdefer list.deinit(alloc);
var n = try parser.nodeFirstChild(self) orelse return list;
diff --git a/src/dom/nodelist.zig b/src/dom/nodelist.zig
index c685f3b5..19ca644a 100644
--- a/src/dom/nodelist.zig
+++ b/src/dom/nodelist.zig
@@ -26,7 +26,7 @@ pub const NodeList = struct {
nodes: NodesArrayList,
- pub fn init() !NodeList {
+ pub fn init() NodeList {
return NodeList{
.nodes = NodesArrayList{},
};
diff --git a/src/html/document.zig b/src/html/document.zig
index d463ab29..34943ac2 100644
--- a/src/html/document.zig
+++ b/src/html/document.zig
@@ -80,7 +80,7 @@ pub const HTMLDocument = struct {
}
pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList {
- var list = try NodeList.init();
+ var list = NodeList.init();
errdefer list.deinit(alloc);
if (name.len == 0) return list;
diff --git a/src/run_tests.zig b/src/run_tests.zig
index 2f5c8d6b..84ca45fd 100644
--- a/src/run_tests.zig
+++ b/src/run_tests.zig
@@ -98,11 +98,23 @@ pub fn main() !void {
}
test {
- const AsyncTest = @import("async/test.zig");
- std.testing.refAllDecls(AsyncTest);
+ const asyncTest = @import("async/test.zig");
+ std.testing.refAllDecls(asyncTest);
- const DumpTest = @import("browser/dump.zig");
- std.testing.refAllDecls(DumpTest);
+ const dumpTest = @import("browser/dump.zig");
+ std.testing.refAllDecls(dumpTest);
+
+ const cssTest = @import("css/css.zig");
+ std.testing.refAllDecls(cssTest);
+
+ const cssParserTest = @import("css/parser.zig");
+ std.testing.refAllDecls(cssParserTest);
+
+ const cssMatchTest = @import("css/match_test.zig");
+ std.testing.refAllDecls(cssMatchTest);
+
+ const cssLibdomTest = @import("css/libdom_test.zig");
+ std.testing.refAllDecls(cssLibdomTest);
}
fn testJSRuntime() !void {