Files
browser/src/dom/html_collection.zig
Francis Bouvier 5734b903ec Adapt HTMLCollectionIterator to HTMLCollection._item changes
And update jsruntime to fix the build

Signed-off-by: Francis Bouvier <francis@lightpanda.io>
2024-01-13 15:14:43 +01:00

553 lines
18 KiB
Zig

const std = @import("std");
const parser = @import("../netsurf.zig");
const jsruntime = @import("jsruntime");
const Case = jsruntime.test_utils.Case;
const checkCases = jsruntime.test_utils.checkCases;
const generate = @import("../generate.zig");
const utils = @import("utils.z");
const Element = @import("element.zig").Element;
const Union = @import("element.zig").Union;
const Matcher = union(enum) {
matchByName: MatchByName,
matchByTagName: MatchByTagName,
matchByClassName: MatchByClassName,
matchByLinks: MatchByLinks,
matchByAnchors: MatchByAnchors,
matchTrue: struct {},
matchFalse: struct {},
pub fn match(self: Matcher, node: *parser.Node) !bool {
switch (self) {
inline .matchTrue => return true,
inline .matchFalse => return false,
inline .matchByTagName => |case| return case.match(node),
inline .matchByClassName => |case| return case.match(node),
inline .matchByName => |case| return case.match(node),
inline .matchByLinks => return MatchByLinks.match(node),
inline .matchByAnchors => return MatchByAnchors.match(node),
}
}
pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void {
switch (self) {
inline .matchTrue => return,
inline .matchFalse => return,
inline .matchByTagName => |case| return case.deinit(alloc),
inline .matchByClassName => |case| return case.deinit(alloc),
inline .matchByName => |case| return case.deinit(alloc),
inline .matchByLinks => return,
inline .matchByAnchors => return,
}
}
};
pub const MatchByTagName = struct {
// tag is used to select node against their name.
// tag comparison is case insensitive.
tag: []const u8,
is_wildcard: bool,
fn init(alloc: std.mem.Allocator, tag_name: []const u8) !MatchByTagName {
const tag_name_alloc = try alloc.alloc(u8, tag_name.len);
@memcpy(tag_name_alloc, tag_name);
return MatchByTagName{
.tag = tag_name_alloc,
.is_wildcard = std.mem.eql(u8, tag_name, "*"),
};
}
pub fn match(self: MatchByTagName, node: *parser.Node) !bool {
return self.is_wildcard or std.ascii.eqlIgnoreCase(self.tag, try parser.nodeName(node));
}
fn deinit(self: MatchByTagName, alloc: std.mem.Allocator) void {
alloc.free(self.tag);
}
};
pub fn HTMLCollectionByTagName(
alloc: std.mem.Allocator,
root: ?*parser.Node,
tag_name: []const u8,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByTagName = try MatchByTagName.init(alloc, tag_name),
},
.include_root = include_root,
};
}
pub const MatchByClassName = struct {
classNames: []const u8,
fn init(alloc: std.mem.Allocator, classNames: []const u8) !MatchByClassName {
const class_names_alloc = try alloc.alloc(u8, classNames.len);
@memcpy(class_names_alloc, classNames);
return MatchByClassName{
.classNames = class_names_alloc,
};
}
pub fn match(self: MatchByClassName, node: *parser.Node) !bool {
var it = std.mem.splitAny(u8, self.classNames, " ");
const e = parser.nodeToElement(node);
while (it.next()) |c| {
if (!try parser.elementHasClass(e, c)) {
return false;
}
}
return true;
}
fn deinit(self: MatchByClassName, alloc: std.mem.Allocator) void {
alloc.free(self.classNames);
}
};
pub fn HTMLCollectionByClassName(
alloc: std.mem.Allocator,
root: ?*parser.Node,
classNames: []const u8,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByClassName = try MatchByClassName.init(alloc, classNames),
},
.include_root = include_root,
};
}
pub const MatchByName = struct {
name: []const u8,
fn init(alloc: std.mem.Allocator, name: []const u8) !MatchByName {
const names_alloc = try alloc.alloc(u8, name.len);
@memcpy(names_alloc, name);
return MatchByName{
.name = names_alloc,
};
}
pub fn match(self: MatchByName, node: *parser.Node) !bool {
const e = parser.nodeToElement(node);
const nname = try parser.elementGetAttribute(e, "name") orelse return false;
return std.mem.eql(u8, self.name, nname);
}
fn deinit(self: MatchByName, alloc: std.mem.Allocator) void {
alloc.free(self.name);
}
};
pub fn HTMLCollectionByName(
alloc: std.mem.Allocator,
root: ?*parser.Node,
name: []const u8,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByName = try MatchByName.init(alloc, name),
},
.include_root = include_root,
};
}
pub fn HTMLCollectionAll(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{ .matchTrue = .{} },
.include_root = include_root,
};
}
pub fn HTMLCollectionChildren(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerChildren = .{} },
.matcher = Matcher{ .matchTrue = .{} },
.include_root = include_root,
};
}
pub fn HTMLCollectionEmpty() !HTMLCollection {
return HTMLCollection{
.root = null,
.walker = Walker{ .walkerNone = .{} },
.matcher = Matcher{ .matchFalse = .{} },
.include_root = false,
};
}
// MatchByLinks matches the a and area elements in the Document that have href
// attributes.
// https://html.spec.whatwg.org/#dom-document-links
pub const MatchByLinks = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a") and !std.ascii.eqlIgnoreCase(tag, "area")) {
return false;
}
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "href");
}
};
pub fn HTMLCollectionByLinks(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByLinks = MatchByLinks{},
},
.include_root = include_root,
};
}
// MatchByAnchors matches the a elements in the Document that have name
// attributes.
// https://html.spec.whatwg.org/#dom-document-anchors
pub const MatchByAnchors = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a")) return false;
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "name");
}
};
pub fn HTMLCollectionByAnchors(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByAnchors = MatchByAnchors{},
},
.include_root = include_root,
};
}
const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WalkerChildren iterates over the root's children only.
pub const WalkerChildren = struct {
pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
// On walk start, we return the first root's child.
if (cur == null) return try parser.nodeFirstChild(root);
// If cur is root, then return null.
// This is a special case, if the root is included in the walk, we
// don't want to go further to find children.
if (root == cur.?) return null;
return try parser.nodeNextSibling(cur.?);
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};
pub const HTMLCollectionIterator = struct {
pub const mem_guarantied = true;
coll: *HTMLCollection,
index: u32 = 0,
pub const Return = struct {
value: ?Union,
done: bool,
};
pub fn _next(self: *HTMLCollectionIterator) !Return {
const e = try self.coll._item(self.index);
if (e == null) {
return Return{
.value = null,
.done = true,
};
}
self.index += 1;
return Return{
.value = e,
.done = false,
};
}
};
// WEB IDL https://dom.spec.whatwg.org/#htmlcollection
// HTMLCollection is re implemented in zig here because libdom
// dom_html_collection expects a comparison function callback as arguement.
// But we wanted a dynamically comparison here, according to the match tagname.
pub const HTMLCollection = struct {
pub const mem_guarantied = true;
matcher: Matcher,
walker: Walker,
root: ?*parser.Node,
// By default the HTMLCollection walk on the root's descendant only.
// But on somes cases, like for dom document, we want to walk over the root
// itself.
include_root: bool = false,
// save a state for the collection to improve the _item speed.
cur_idx: ?u32 = undefined,
cur_node: ?*parser.Node = undefined,
// start returns the first node to walk on.
fn start(self: HTMLCollection) !?*parser.Node {
if (self.root == null) return null;
if (self.include_root) {
return self.root.?;
}
return try self.walker.get_next(self.root.?, null);
}
pub fn _symbol_iterator(self: *HTMLCollection) HTMLCollectionIterator {
return HTMLCollectionIterator{
.coll = self,
};
}
/// get_length computes the collection's length dynamically according to
/// the current root structure.
// TODO: nodes retrieved must be de-referenced.
pub fn get_length(self: *HTMLCollection) !u32 {
if (self.root == null) return 0;
var len: u32 = 0;
var node = try self.start() orelse return 0;
while (true) {
if (try parser.nodeType(node) == .element) {
if (try self.matcher.match(node)) {
len += 1;
}
}
node = try self.walker.get_next(self.root.?, node) orelse break;
}
return len;
}
pub fn item(self: *HTMLCollection, index: u32) !?*parser.Node {
if (self.root == null) return null;
var i: u32 = 0;
var node: *parser.Node = undefined;
// Use the current state to improve speed if possible.
if (self.cur_idx != null and index >= self.cur_idx.?) {
i = self.cur_idx.?;
node = self.cur_node.?;
} else {
node = try self.start() orelse return null;
}
while (true) {
if (try parser.nodeType(node) == .element) {
if (try self.matcher.match(node)) {
// check if we found the searched element.
if (i == index) {
// save the current state
self.cur_node = node;
self.cur_idx = i;
return node;
}
i += 1;
}
}
node = try self.walker.get_next(self.root.?, node) orelse break;
}
return null;
}
pub fn _item(self: *HTMLCollection, index: u32) !?Union {
const node = try self.item(index) orelse return null;
const e = @as(*parser.Element, @ptrCast(node));
return try Element.toInterface(e);
}
pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union {
if (self.root == null) return null;
if (name.len == 0) return null;
var node = try self.start() orelse return null;
while (true) {
if (try parser.nodeType(node) == .element) {
if (try self.matcher.match(node)) {
const elem = @as(*parser.Element, @ptrCast(node));
var attr = try parser.elementGetAttribute(elem, "id");
// check if the node id corresponds to the name argument.
if (attr != null and std.mem.eql(u8, name, attr.?)) {
return try Element.toInterface(elem);
}
attr = try parser.elementGetAttribute(elem, "name");
// check if the node id corresponds to the name argument.
if (attr != null and std.mem.eql(u8, name, attr.?)) {
return try Element.toInterface(elem);
}
}
}
node = try self.walker.get_next(self.root.?, node) orelse break;
}
return null;
}
pub fn deinit(self: *HTMLCollection, alloc: std.mem.Allocator) void {
self.matcher.deinit(alloc);
}
};
// Tests
// -----
pub fn testExecFn(
_: std.mem.Allocator,
js_env: *jsruntime.Env,
) anyerror!void {
var getElementsByTagName = [_]Case{
.{ .src = "let getElementsByTagName = document.getElementsByTagName('p')", .ex = "undefined" },
.{ .src = "getElementsByTagName.length", .ex = "2" },
.{ .src = "let getElementsByTagNameCI = document.getElementsByTagName('P')", .ex = "undefined" },
.{ .src = "getElementsByTagNameCI.length", .ex = "2" },
.{ .src = "getElementsByTagName.item(0).localName", .ex = "p" },
.{ .src = "getElementsByTagName.item(1).localName", .ex = "p" },
.{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" },
.{ .src = "getElementsByTagNameAll.length", .ex = "8" },
.{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" },
.{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" },
.{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" },
.{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" },
.{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" },
.{ .src = "getElementsByTagNameAll.item(3).localName", .ex = "div" },
.{ .src = "getElementsByTagNameAll.item(7).localName", .ex = "p" },
.{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" },
.{ .src = "document.getElementById('content').getElementsByTagName('*').length", .ex = "4" },
.{ .src = "document.getElementById('content').getElementsByTagName('p').length", .ex = "2" },
.{ .src = "document.getElementById('content').getElementsByTagName('div').length", .ex = "0" },
.{ .src = "document.children.length", .ex = "1" },
.{ .src = "document.getElementById('content').children.length", .ex = "3" },
// check liveness
.{ .src = "let content = document.getElementById('content')", .ex = "undefined" },
.{ .src = "let pe = document.getElementById('para-empty')", .ex = "undefined" },
.{ .src = "let p = document.createElement('p')", .ex = "undefined" },
.{ .src = "p.textContent = 'OK live'", .ex = "OK live" },
.{ .src = "getElementsByTagName.item(1).textContent", .ex = " And" },
.{ .src = "content.appendChild(p) != undefined", .ex = "true" },
.{ .src = "getElementsByTagName.length", .ex = "3" },
.{ .src = "getElementsByTagName.item(2).textContent", .ex = "OK live" },
.{ .src = "content.insertBefore(p, pe) != undefined", .ex = "true" },
.{ .src = "getElementsByTagName.item(0).textContent", .ex = "OK live" },
};
try checkCases(js_env, &getElementsByTagName);
}