diff --git a/src/css/README.md b/src/css/README.md new file mode 100644 index 00000000..fc2a7352 --- /dev/null +++ b/src/css/README.md @@ -0,0 +1,218 @@ +# css + +Lightpanda css implements CSS selectors parsing and matching in Zig. +This package is a port of the Go lib [andybalholm/cascadia](https://github.com/andybalholm/cascadia). + +## Usage + +### Query parser + +```zig +const css = @import("css.zig"); + +const selector = try css.parse(alloc, "h1", .{}); +defer selector.deinit(alloc); +``` + +### DOM tree match + +The lib expects a `Node` interface implementation to match your DOM tree. + +```zig +pub const Node = struct { + pub fn firstChild(_: Node) !?Node { + return error.TODO; + } + + pub fn lastChild(_: Node) !?Node { + return error.TODO; + } + + pub fn nextSibling(_: Node) !?Node { + return error.TODO; + } + + pub fn prevSibling(_: Node) !?Node { + return error.TODO; + } + + pub fn parent(_: Node) !?Node { + return error.TODO; + } + + pub fn isElement(_: Node) bool { + return false; + } + + pub fn isDocument(_: Node) bool { + return false; + } + + pub fn isComment(_: Node) bool { + return false; + } + + pub fn isText(_: Node) bool { + return false; + } + + pub fn isEmptyText(_: Node) !bool { + return error.TODO; + } + + pub fn tag(_: Node) ![]const u8 { + return error.TODO; + } + + pub fn attr(_: Node, _: []const u8) !?[]const u8 { + return error.TODO; + } + + pub fn eql(_: Node, _: Node) bool { + return false; + } +}; +``` + +You also need do define a `Matcher` implementing a `match` function to +accumulate the results. + +```zig +const Matcher = struct { + const Nodes = std.ArrayList(Node); + + nodes: Nodes, + + fn init(alloc: std.mem.Allocator) Matcher { + return .{ .nodes = Nodes.init(alloc) }; + } + + fn deinit(m: *Matcher) void { + m.nodes.deinit(); + } + + pub fn match(m: *Matcher, n: Node) !void { + try m.nodes.append(n); + } +}; +``` + +Then you can use the lib itself. + +```zig +var matcher = Matcher.init(alloc); +defer matcher.deinit(); + +try css.matchAll(selector, node, &matcher); +_ = try css.matchFirst(selector, node, &matcher); // returns true if a node matched. +``` + +## Features + +* [x] parse query selector +* [x] `matchAll` +* [x] `matchFirst` +* [ ] specificity + +### Selectors implemented + +#### Selectors + +* [x] Class selectors +* [x] Id selectors +* [x] Type selectors +* [x] Universal selectors +* [ ] Nesting selectors + +#### Combinators + +* [x] Child combinator +* [ ] Column combinator +* [x] Descendant combinator +* [ ] Namespace combinator +* [x] Next-sibling combinator +* [x] Selector list combinator +* [x] Subsequent-sibling combinator + +#### Attribute + +* [x] `[attr]` +* [x] `[attr=value]` +* [x] `[attr|=value]` +* [x] `[attr^=value]` +* [x] `[attr$=value]` +* [ ] `[attr*=value]` +* [x] `[attr operator value i]` +* [ ] `[attr operator value s]` + +#### Pseudo classes + +* [ ] `:active` +* [ ] `:any-link` +* [ ] `:autofill` +* [ ] `:blank Experimental` +* [x] `:checked` +* [ ] `:current Experimental` +* [ ] `:default` +* [ ] `:defined` +* [ ] `:dir() Experimental` +* [x] `:disabled` +* [x] `:empty` +* [x] `:enabled` +* [ ] `:first` +* [x] `:first-child` +* [x] `:first-of-type` +* [ ] `:focus` +* [ ] `:focus-visible` +* [ ] `:focus-within` +* [ ] `:fullscreen` +* [ ] `:future Experimental` +* [x] `:has() Experimental` +* [ ] `:host` +* [ ] `:host()` +* [ ] `:host-context() Experimental` +* [ ] `:hover` +* [ ] `:indeterminate` +* [ ] `:in-range` +* [ ] `:invalid` +* [ ] `:is()` +* [x] `:lang()` +* [x] `:last-child` +* [x] `:last-of-type` +* [ ] `:left` +* [x] `:link` +* [ ] `:local-link Experimental` +* [ ] `:modal` +* [x] `:not()` +* [x] `:nth-child()` +* [x] `:nth-last-child()` +* [x] `:nth-last-of-type()` +* [x] `:nth-of-type()` +* [x] `:only-child` +* [x] `:only-of-type` +* [ ] `:optional` +* [ ] `:out-of-range` +* [ ] `:past Experimental` +* [ ] `:paused` +* [ ] `:picture-in-picture` +* [ ] `:placeholder-shown` +* [ ] `:playing` +* [ ] `:read-only` +* [ ] `:read-write` +* [ ] `:required` +* [ ] `:right` +* [x] `:root` +* [ ] `:scope` +* [ ] `:state() Experimental` +* [ ] `:target` +* [ ] `:target-within Experimental` +* [ ] `:user-invalid Experimental` +* [ ] `:valid` +* [ ] `:visited` +* [ ] `:where()` +* [ ] `:contains()` +* [ ] `:containsown()` +* [ ] `:matched()` +* [ ] `:matchesown()` +* [x] `:root` + diff --git a/src/css/css.zig b/src/css/css.zig new file mode 100644 index 00000000..fc5e8995 --- /dev/null +++ b/src/css/css.zig @@ -0,0 +1,158 @@ +// CSS Selector parser and query +// This package is a rewrite in Zig of Cascadia CSS Selector parser. +// see https://github.com/andybalholm/cascadia +const std = @import("std"); +const Selector = @import("selector.zig").Selector; +const parser = @import("parser.zig"); + +// parse parse a selector string and returns the parsed result or an error. +pub fn parse(alloc: std.mem.Allocator, s: []const u8, opts: parser.ParseOptions) parser.ParseError!Selector { + var p = parser.Parser{ .s = s, .i = 0, .opts = opts }; + return p.parse(alloc); +} + +// matchFirst call m.match with the first node that matches the selector s, from the +// descendants of n and returns true. If none matches, it returns false. +pub fn matchFirst(s: Selector, node: anytype, m: anytype) !bool { + var c = try node.firstChild(); + while (true) { + if (c == null) break; + + if (try s.match(c.?)) { + try m.match(c.?); + return true; + } + + if (try matchFirst(s, c.?, m)) return true; + c = try c.?.nextSibling(); + } + return false; +} + +// matchAll call m.match with the all the nodes that matches the selector s, from the +// descendants of n. +pub fn matchAll(s: Selector, node: anytype, m: anytype) !void { + var c = try node.firstChild(); + while (true) { + if (c == null) break; + + if (try s.match(c.?)) try m.match(c.?); + try matchAll(s, c.?, m); + c = try c.?.nextSibling(); + } +} + +test "parse" { + const alloc = std.testing.allocator; + + const testcases = [_][]const u8{ + "address", + "*", + "#foo", + "li#t1", + "*#t4", + ".t1", + "p.t1", + "div.teST", + ".t1.fail", + "p.t1.t2", + "p.--t1", + "p.--t1.--t2", + "p[title]", + "div[class=\"red\" i]", + "address[title=\"foo\"]", + "address[title=\"FoOIgnoRECaSe\" i]", + "address[title!=\"foo\"]", + "address[title!=\"foo\" i]", + "p[title!=\"FooBarUFoo\" i]", + "[ \t title ~= foo ]", + "p[title~=\"FOO\" i]", + "p[title~=toofoo i]", + "[title~=\"hello world\"]", + "[title~=\"hello\" i]", + "[title~=\"hello\" I]", + "[lang|=\"en\"]", + "[lang|=\"EN\" i]", + "[lang|=\"EN\" i]", + "[title^=\"foo\"]", + "[title^=\"foo\" i]", + "[title$=\"bar\"]", + "[title$=\"BAR\" i]", + "[title*=\"bar\"]", + "[title*=\"BaRu\" i]", + "[title*=\"BaRu\" I]", + "p[class$=\" \"]", + "p[class$=\"\"]", + "p[class^=\" \"]", + "p[class^=\"\"]", + "p[class*=\" \"]", + "p[class*=\"\"]", + "input[name=Sex][value=F]", + "table[border=\"0\"][cellpadding=\"0\"][cellspacing=\"0\"]", + ".t1:not(.t2)", + "div:not(.t1)", + "div:not([class=\"t2\"])", + "li:nth-child(odd)", + "li:nth-child(even)", + "li:nth-child(-n+2)", + "li:nth-child(3n+1)", + "li:nth-last-child(odd)", + "li:nth-last-child(even)", + "li:nth-last-child(-n+2)", + "li:nth-last-child(3n+1)", + "span:first-child", + "span:last-child", + "p:nth-of-type(2)", + "p:nth-last-of-type(2)", + "p:last-of-type", + "p:first-of-type", + "p:only-child", + "p:only-of-type", + ":empty", + "div p", + "div table p", + "div > p", + "p ~ p", + "p + p", + "li, p", + "p +/*This is a comment*/ p", + "p:contains(\"that wraps\")", + "p:containsOwn(\"that wraps\")", + ":containsOwn(\"inner\")", + "p:containsOwn(\"block\")", + "div:has(#p1)", + "div:has(:containsOwn(\"2\"))", + "body :has(:containsOwn(\"2\"))", + "body :haschild(:containsOwn(\"2\"))", + "p:matches([\\d])", + "p:matches([a-z])", + "p:matches([a-zA-Z])", + "p:matches([^\\d])", + "p:matches(^(0|a))", + "p:matches(^\\d+$)", + "p:not(:matches(^\\d+$))", + "div :matchesOwn(^\\d+$)", + "[href#=(fina)]:not([href#=(\\/\\/[^\\/]+untrusted)])", + "[href#=(^https:\\/\\/[^\\/]*\\/?news)]", + ":input", + ":root", + "*:root", + "html:nth-child(1)", + "*:root:first-child", + "*:root:nth-child(1)", + "a:not(:root)", + "body > *:nth-child(3n+2)", + "input:disabled", + ":disabled", + ":enabled", + "div.class1, div.class2", + }; + + for (testcases) |tc| { + const s = parse(alloc, tc, .{}) catch |e| { + std.debug.print("query {s}", .{tc}); + return e; + }; + defer s.deinit(alloc); + } +} diff --git a/src/css/libdom.zig b/src/css/libdom.zig new file mode 100644 index 00000000..e4e416ea --- /dev/null +++ b/src/css/libdom.zig @@ -0,0 +1,84 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +// Node implementation with Netsurf Libdom C lib. +pub const Node = struct { + node: *parser.Node, + + pub fn firstChild(n: Node) !?Node { + const c = try parser.nodeFirstChild(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn lastChild(n: Node) !?Node { + const c = try parser.nodeLastChild(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn nextSibling(n: Node) !?Node { + const c = try parser.nodeNextSibling(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn prevSibling(n: Node) !?Node { + const c = try parser.nodePreviousSibling(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn parent(n: Node) !?Node { + const c = try parser.nodeParentNode(n.node); + if (c) |cc| return .{ .node = cc }; + + return null; + } + + pub fn isElement(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .element; + } + + pub fn isDocument(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .document; + } + + pub fn isComment(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .comment; + } + + pub fn isText(n: Node) bool { + const t = parser.nodeType(n.node) catch return false; + return t == .text; + } + + pub fn isEmptyText(n: Node) !bool { + const data = try parser.nodeTextContent(n.node); + if (data == null) return true; + if (data.?.len == 0) return true; + + return std.mem.trim(u8, data.?, &std.ascii.whitespace).len == 0; + } + + pub fn tag(n: Node) ![]const u8 { + return try parser.nodeName(n.node); + } + + pub fn attr(n: Node, key: []const u8) !?[]const u8 { + if (!n.isElement()) return null; + return try parser.elementGetAttribute(parser.nodeToElement(n.node), key); + } + + pub fn eql(a: Node, b: Node) bool { + return a.node == b.node; + } +}; diff --git a/src/css/libdom_test.zig b/src/css/libdom_test.zig new file mode 100644 index 00000000..ee651f2f --- /dev/null +++ b/src/css/libdom_test.zig @@ -0,0 +1,307 @@ +const std = @import("std"); +const css = @import("css.zig"); +const Node = @import("libdom.zig").Node; +const parser = @import("../netsurf.zig"); + +const Matcher = struct { + const Nodes = std.ArrayList(Node); + + nodes: Nodes, + + fn init(alloc: std.mem.Allocator) Matcher { + return .{ .nodes = Nodes.init(alloc) }; + } + + fn deinit(m: *Matcher) void { + m.nodes.deinit(); + } + + fn reset(m: *Matcher) void { + m.nodes.clearRetainingCapacity(); + } + + pub fn match(m: *Matcher, n: Node) !void { + try m.nodes.append(n); + } +}; + +test "matchFirst" { + const alloc = std.testing.allocator; + + var matcher = Matcher.init(alloc); + defer matcher.deinit(); + + const testcases = [_]struct { + q: []const u8, + html: []const u8, + exp: usize, + }{ + .{ .q = "address", .html = "
This address...
", .exp = 1 }, + .{ .q = "*", .html = "text", .exp = 1 }, + .{ .q = "*", .html = "", .exp = 1 }, + .{ .q = "#foo", .html = "

", .exp = 1 }, + .{ .q = "li#t1", .html = "