dom: use the css matcher for DOM

This commit is contained in:
Pierre Tachoire
2024-03-26 10:02:15 +01:00
parent 0fa49b99bf
commit 4d5f6d42fa
6 changed files with 94 additions and 91 deletions

61
src/dom/css.zig Normal file
View File

@@ -0,0 +1,61 @@
const std = @import("std");
const parser = @import("../netsurf.zig");
const css = @import("../css/css.zig");
const Node = @import("../css/libdom.zig").Node;
const NodeList = @import("nodelist.zig").NodeList;
const MatchFirst = struct {
n: ?*parser.Node = null,
pub fn match(m: *MatchFirst, n: Node) !void {
m.n = n.node;
}
};
pub fn querySelector(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !?*parser.Node {
const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
defer ps.deinit(alloc);
var m = MatchFirst{};
_ = try css.matchFirst(ps, Node{ .node = n }, &m);
return m.n;
}
const MatchAll = struct {
alloc: std.mem.Allocator,
nl: NodeList,
fn init(alloc: std.mem.Allocator) MatchAll {
return .{
.alloc = alloc,
.nl = NodeList.init(),
};
}
fn deinit(m: *MatchAll) void {
m.nl.deinit(m.alloc);
}
pub fn match(m: *MatchAll, n: Node) !void {
try m.nl.append(m.alloc, n.node);
}
fn toOwnedList(m: *MatchAll) NodeList {
defer m.nl = NodeList.init();
return m.nl;
}
};
pub fn querySelectorAll(alloc: std.mem.Allocator, n: *parser.Node, selector: []const u8) !NodeList {
const ps = try css.parse(alloc, selector, .{ .accept_pseudo_elts = true });
defer ps.deinit(alloc);
var m = MatchAll.init(alloc);
defer m.deinit();
try css.matchAll(ps, Node{ .node = n }, &m);
return m.toOwnedList();
}

View File

@@ -13,6 +13,7 @@ const NodeUnion = @import("node.zig").Union;
const Walker = @import("walker.zig").WalkerDepthFirst; const Walker = @import("walker.zig").WalkerDepthFirst;
const collection = @import("html_collection.zig"); const collection = @import("html_collection.zig");
const css = @import("css.zig");
const Element = @import("element.zig").Element; const Element = @import("element.zig").Element;
const ElementUnion = @import("element.zig").Union; const ElementUnion = @import("element.zig").Union;
@@ -188,54 +189,18 @@ pub const Document = struct {
return 1; return 1;
} }
// TODO netsurf doesn't handle query selectors. We have to implement a pub fn _querySelector(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !?ElementUnion {
// solution by ourselves. if (selector.len == 0) return null;
// For now we handle only * and single id selector like `#foo`.
pub fn _querySelector(self: *parser.Document, selectors: []const u8) !?ElementUnion {
if (selectors.len == 0) return null;
// catch-all, return the firstElementChild const n = try css.querySelector(alloc, parser.documentToNode(self), selector);
if (selectors[0] == '*') return try get_firstElementChild(self);
// support only simple id selector. if (n == null) return null;
if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
return try _getElementById(self, selectors[1..]); return try Element.toInterface(parser.nodeToElement(n.?));
} }
// TODO netsurf doesn't handle query selectors. We have to implement a pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
// solution by ourselves. return css.querySelectorAll(alloc, parser.documentToNode(self), selector);
// We handle only * and single id selector like `#foo`.
pub fn _querySelectorAll(self: *parser.Document, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
var list = try NodeList.init();
errdefer list.deinit(alloc);
if (selectors.len == 0) return list;
// catch-all, return all elements
if (selectors[0] == '*') {
// walk over the node tree fo find the node by id.
const root = parser.documentToNode(self);
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {
next = try walker.get_next(root, next) orelse return list;
// ignore non-element nodes.
if (try parser.nodeType(next.?) != .element) {
continue;
}
try list.append(alloc, next.?);
}
}
// support only simple id selector.
if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
// walk over the node tree fo find the node by id.
const e = try parser.documentGetElementById(self, selectors[1..]) orelse return list;
try list.append(alloc, parser.elementToNode(e));
return list;
} }
// TODO according with https://dom.spec.whatwg.org/#parentnode, the // TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -426,6 +391,12 @@ pub fn testExecFn(
.{ .src = "document.querySelector('*').nodeName", .ex = "HTML" }, .{ .src = "document.querySelector('*').nodeName", .ex = "HTML" },
.{ .src = "document.querySelector('#content').id", .ex = "content" }, .{ .src = "document.querySelector('#content').id", .ex = "content" },
.{ .src = "document.querySelector('#para').id", .ex = "para" }, .{ .src = "document.querySelector('#para').id", .ex = "para" },
.{ .src = "document.querySelector('.ok').id", .ex = "link" },
.{ .src = "document.querySelector('a ~ p').id", .ex = "para-empty" },
.{ .src = "document.querySelector(':root').nodeName", .ex = "HTML" },
.{ .src = "document.querySelectorAll('p').length", .ex = "2" },
.{ .src = "document.querySelectorAll('.ok').item(0).id", .ex = "link" },
}; };
try checkCases(js_env, &querySelector); try checkCases(js_env, &querySelector);

View File

@@ -9,6 +9,7 @@ const Variadic = jsruntime.Variadic;
const collection = @import("html_collection.zig"); const collection = @import("html_collection.zig");
const writeNode = @import("../browser/dump.zig").writeNode; const writeNode = @import("../browser/dump.zig").writeNode;
const css = @import("css.zig");
const Node = @import("node.zig").Node; const Node = @import("node.zig").Node;
const Walker = @import("walker.zig").WalkerDepthFirst; const Walker = @import("walker.zig").WalkerDepthFirst;
@@ -263,56 +264,18 @@ pub const Element = struct {
} }
} }
// TODO netsurf doesn't handle query selectors. We have to implement a pub fn _querySelector(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !?Union {
// solution by ourselves. if (selector.len == 0) return null;
// We handle only * and single id selector like `#foo`.
pub fn _querySelector(self: *parser.Element, selectors: []const u8) !?Union {
if (selectors.len == 0) return null;
// catch-all, return the firstElementChild const n = try css.querySelector(alloc, parser.elementToNode(self), selector);
if (selectors[0] == '*') return try get_firstElementChild(self);
// support only simple id selector. if (n == null) return null;
if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return null;
// walk over the node tree fo find the node by id. return try toInterface(parser.nodeToElement(n.?));
const n = try getElementById(self, selectors[1..]) orelse return null;
return try toInterface(parser.nodeToElement(n));
} }
// TODO netsurf doesn't handle query selectors. We have to implement a pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selector: []const u8) !NodeList {
// solution by ourselves. return css.querySelectorAll(alloc, parser.elementToNode(self), selector);
// We handle only * and single id selector like `#foo`.
pub fn _querySelectorAll(self: *parser.Element, alloc: std.mem.Allocator, selectors: []const u8) !NodeList {
var list = try NodeList.init();
errdefer list.deinit(alloc);
if (selectors.len == 0) return list;
// catch-all, return all elements
if (selectors[0] == '*') {
// walk over the node tree fo find the node by id.
const root = parser.elementToNode(self);
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {
next = try walker.get_next(root, next) orelse return list;
// ignore non-element nodes.
if (try parser.nodeType(next.?) != .element) {
continue;
}
try list.append(alloc, next.?);
}
}
// support only simple id selector.
if (selectors[0] != '#' or std.mem.indexOf(u8, selectors, " ") != null) return list;
// walk over the node tree fo find the node by id.
const n = try getElementById(self, selectors[1..]) orelse return list;
try list.append(alloc, n);
return list;
} }
// TODO according with https://dom.spec.whatwg.org/#parentnode, the // TODO according with https://dom.spec.whatwg.org/#parentnode, the
@@ -433,6 +396,12 @@ pub fn testExecFn(
.{ .src = "e.querySelector('#link').id", .ex = "link" }, .{ .src = "e.querySelector('#link').id", .ex = "link" },
.{ .src = "e.querySelector('#para').id", .ex = "para" }, .{ .src = "e.querySelector('#para').id", .ex = "para" },
.{ .src = "e.querySelector('*').id", .ex = "link" }, .{ .src = "e.querySelector('*').id", .ex = "link" },
.{ .src = "e.querySelector('')", .ex = "null" },
.{ .src = "e.querySelector('*').id", .ex = "link" },
.{ .src = "e.querySelector('#content')", .ex = "null" },
.{ .src = "e.querySelector('#para').id", .ex = "para" },
.{ .src = "e.querySelector('.ok').id", .ex = "link" },
.{ .src = "e.querySelector('a ~ p').id", .ex = "para-empty" },
.{ .src = "e.querySelectorAll('foo').length", .ex = "0" }, .{ .src = "e.querySelectorAll('foo').length", .ex = "0" },
.{ .src = "e.querySelectorAll('#foo').length", .ex = "0" }, .{ .src = "e.querySelectorAll('#foo').length", .ex = "0" },
@@ -441,6 +410,8 @@ pub fn testExecFn(
.{ .src = "e.querySelectorAll('#para').length", .ex = "1" }, .{ .src = "e.querySelectorAll('#para').length", .ex = "1" },
.{ .src = "e.querySelectorAll('#para').item(0).id", .ex = "para" }, .{ .src = "e.querySelectorAll('#para').item(0).id", .ex = "para" },
.{ .src = "e.querySelectorAll('*').length", .ex = "4" }, .{ .src = "e.querySelectorAll('*').length", .ex = "4" },
.{ .src = "e.querySelectorAll('p').length", .ex = "2" },
.{ .src = "e.querySelectorAll('.ok').item(0).id", .ex = "link" },
}; };
try checkCases(js_env, &querySelector); try checkCases(js_env, &querySelector);

View File

@@ -199,7 +199,7 @@ pub const Node = struct {
} }
pub fn get_childNodes(self: *parser.Node, alloc: std.mem.Allocator) !NodeList { pub fn get_childNodes(self: *parser.Node, alloc: std.mem.Allocator) !NodeList {
var list = try NodeList.init(); var list = NodeList.init();
errdefer list.deinit(alloc); errdefer list.deinit(alloc);
var n = try parser.nodeFirstChild(self) orelse return list; var n = try parser.nodeFirstChild(self) orelse return list;

View File

@@ -26,7 +26,7 @@ pub const NodeList = struct {
nodes: NodesArrayList, nodes: NodesArrayList,
pub fn init() !NodeList { pub fn init() NodeList {
return NodeList{ return NodeList{
.nodes = NodesArrayList{}, .nodes = NodesArrayList{},
}; };

View File

@@ -80,7 +80,7 @@ pub const HTMLDocument = struct {
} }
pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList { pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList {
var list = try NodeList.init(); var list = NodeList.init();
errdefer list.deinit(alloc); errdefer list.deinit(alloc);
if (name.len == 0) return list; if (name.len == 0) return list;