const std = @import("std"); const parser = @import("../netsurf.zig"); const jsruntime = @import("jsruntime"); const Case = jsruntime.test_utils.Case; const checkCases = jsruntime.test_utils.checkCases; const generate = @import("../generate.zig"); const utils = @import("utils.z"); const Element = @import("element.zig").Element; const Union = @import("element.zig").Union; const Matcher = union(enum) { matchByName: MatchByName, matchByTagName: MatchByTagName, matchByClassName: MatchByClassName, matchByLinks: MatchByLinks, matchByAnchors: MatchByAnchors, matchTrue: struct {}, matchFalse: struct {}, pub fn match(self: Matcher, node: *parser.Node) !bool { switch (self) { inline .matchTrue => return true, inline .matchFalse => return false, inline .matchByTagName => |case| return case.match(node), inline .matchByClassName => |case| return case.match(node), inline .matchByName => |case| return case.match(node), inline .matchByLinks => return MatchByLinks.match(node), inline .matchByAnchors => return MatchByAnchors.match(node), } } pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void { switch (self) { inline .matchTrue => return, inline .matchFalse => return, inline .matchByTagName => |case| return case.deinit(alloc), inline .matchByClassName => |case| return case.deinit(alloc), inline .matchByName => |case| return case.deinit(alloc), inline .matchByLinks => return, inline .matchByAnchors => return, } } }; pub const MatchByTagName = struct { // tag is used to select node against their name. // tag comparison is case insensitive. tag: []const u8, is_wildcard: bool, fn init(alloc: std.mem.Allocator, tag_name: []const u8) !MatchByTagName { const tag_name_alloc = try alloc.alloc(u8, tag_name.len); @memcpy(tag_name_alloc, tag_name); return MatchByTagName{ .tag = tag_name_alloc, .is_wildcard = std.mem.eql(u8, tag_name, "*"), }; } pub fn match(self: MatchByTagName, node: *parser.Node) !bool { return self.is_wildcard or std.ascii.eqlIgnoreCase(self.tag, try parser.nodeName(node)); } fn deinit(self: MatchByTagName, alloc: std.mem.Allocator) void { alloc.free(self.tag); } }; pub fn HTMLCollectionByTagName( alloc: std.mem.Allocator, root: ?*parser.Node, tag_name: []const u8, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchByTagName = try MatchByTagName.init(alloc, tag_name), }, .include_root = include_root, }; } pub const MatchByClassName = struct { classNames: []const u8, fn init(alloc: std.mem.Allocator, classNames: []const u8) !MatchByClassName { const class_names_alloc = try alloc.alloc(u8, classNames.len); @memcpy(class_names_alloc, classNames); return MatchByClassName{ .classNames = class_names_alloc, }; } pub fn match(self: MatchByClassName, node: *parser.Node) !bool { var it = std.mem.splitAny(u8, self.classNames, " "); const e = parser.nodeToElement(node); while (it.next()) |c| { if (!try parser.elementHasClass(e, c)) { return false; } } return true; } fn deinit(self: MatchByClassName, alloc: std.mem.Allocator) void { alloc.free(self.classNames); } }; pub fn HTMLCollectionByClassName( alloc: std.mem.Allocator, root: ?*parser.Node, classNames: []const u8, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchByClassName = try MatchByClassName.init(alloc, classNames), }, .include_root = include_root, }; } pub const MatchByName = struct { name: []const u8, fn init(alloc: std.mem.Allocator, name: []const u8) !MatchByName { const names_alloc = try alloc.alloc(u8, name.len); @memcpy(names_alloc, name); return MatchByName{ .name = names_alloc, }; } pub fn match(self: MatchByName, node: *parser.Node) !bool { const e = parser.nodeToElement(node); const nname = try parser.elementGetAttribute(e, "name") orelse return false; return std.mem.eql(u8, self.name, nname); } fn deinit(self: MatchByName, alloc: std.mem.Allocator) void { alloc.free(self.name); } }; pub fn HTMLCollectionByName( alloc: std.mem.Allocator, root: ?*parser.Node, name: []const u8, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchByName = try MatchByName.init(alloc, name), }, .include_root = include_root, }; } pub fn HTMLCollectionAll( root: ?*parser.Node, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchTrue = .{} }, .include_root = include_root, }; } pub fn HTMLCollectionChildren( root: ?*parser.Node, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerChildren = .{} }, .matcher = Matcher{ .matchTrue = .{} }, .include_root = include_root, }; } pub fn HTMLCollectionEmpty() !HTMLCollection { return HTMLCollection{ .root = null, .walker = Walker{ .walkerNone = .{} }, .matcher = Matcher{ .matchFalse = .{} }, .include_root = false, }; } // MatchByLinks matches the a and area elements in the Document that have href // attributes. // https://html.spec.whatwg.org/#dom-document-links pub const MatchByLinks = struct { pub fn match(node: *parser.Node) !bool { const tag = try parser.nodeName(node); if (!std.ascii.eqlIgnoreCase(tag, "a") and !std.ascii.eqlIgnoreCase(tag, "area")) { return false; } const elem = @as(*parser.Element, @ptrCast(node)); return parser.elementHasAttribute(elem, "href"); } }; pub fn HTMLCollectionByLinks( root: ?*parser.Node, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchByLinks = MatchByLinks{}, }, .include_root = include_root, }; } // MatchByAnchors matches the a elements in the Document that have name // attributes. // https://html.spec.whatwg.org/#dom-document-anchors pub const MatchByAnchors = struct { pub fn match(node: *parser.Node) !bool { const tag = try parser.nodeName(node); if (!std.ascii.eqlIgnoreCase(tag, "a")) return false; const elem = @as(*parser.Element, @ptrCast(node)); return parser.elementHasAttribute(elem, "name"); } }; pub fn HTMLCollectionByAnchors( root: ?*parser.Node, include_root: bool, ) !HTMLCollection { return HTMLCollection{ .root = root, .walker = Walker{ .walkerDepthFirst = .{} }, .matcher = Matcher{ .matchByAnchors = MatchByAnchors{}, }, .include_root = include_root, }; } const Walker = union(enum) { walkerDepthFirst: WalkerDepthFirst, walkerChildren: WalkerChildren, walkerNone: WalkerNone, pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { switch (self) { inline else => |case| return case.get_next(root, cur), } } }; // WalkerDepthFirst iterates over the DOM tree to return the next following // node or null at the end. // // This implementation is a zig version of Netsurf code. // http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 // // The iteration is a depth first as required by the specification. // https://dom.spec.whatwg.org/#htmlcollection // https://dom.spec.whatwg.org/#concept-tree-order pub const WalkerDepthFirst = struct { pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { var n = cur orelse root; // TODO deinit next if (try parser.nodeFirstChild(n)) |next| { return next; } // TODO deinit next if (try parser.nodeNextSibling(n)) |next| { return next; } // TODO deinit parent // Back to the parent of cur. // If cur has no parent, then the iteration is over. var parent = try parser.nodeParentNode(n) orelse return null; // TODO deinit lastchild var lastchild = try parser.nodeLastChild(parent); while (n != root and n == lastchild) { n = parent; // TODO deinit parent // Back to the prev's parent. // If prev has no parent, then the loop must stop. parent = try parser.nodeParentNode(n) orelse break; // TODO deinit lastchild lastchild = try parser.nodeLastChild(parent); } if (n == root) { return null; } return try parser.nodeNextSibling(n); } }; // WalkerChildren iterates over the root's children only. pub const WalkerChildren = struct { pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { // On walk start, we return the first root's child. if (cur == null) return try parser.nodeFirstChild(root); // If cur is root, then return null. // This is a special case, if the root is included in the walk, we // don't want to go further to find children. if (root == cur.?) return null; return try parser.nodeNextSibling(cur.?); } }; pub const WalkerNone = struct { pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { return null; } }; pub const HTMLCollectionIterator = struct { pub const mem_guarantied = true; coll: *HTMLCollection, index: u32 = 0, pub const Return = struct { value: ?Union, done: bool, }; pub fn _next(self: *HTMLCollectionIterator) !Return { const e = try self.coll._item(self.index); if (e == null) { return Return{ .value = null, .done = true, }; } self.index += 1; return Return{ .value = e, .done = false, }; } }; // WEB IDL https://dom.spec.whatwg.org/#htmlcollection // HTMLCollection is re implemented in zig here because libdom // dom_html_collection expects a comparison function callback as arguement. // But we wanted a dynamically comparison here, according to the match tagname. pub const HTMLCollection = struct { pub const mem_guarantied = true; matcher: Matcher, walker: Walker, root: ?*parser.Node, // By default the HTMLCollection walk on the root's descendant only. // But on somes cases, like for dom document, we want to walk over the root // itself. include_root: bool = false, // save a state for the collection to improve the _item speed. cur_idx: ?u32 = undefined, cur_node: ?*parser.Node = undefined, // start returns the first node to walk on. fn start(self: HTMLCollection) !?*parser.Node { if (self.root == null) return null; if (self.include_root) { return self.root.?; } return try self.walker.get_next(self.root.?, null); } pub fn _symbol_iterator(self: *HTMLCollection) HTMLCollectionIterator { return HTMLCollectionIterator{ .coll = self, }; } /// get_length computes the collection's length dynamically according to /// the current root structure. // TODO: nodes retrieved must be de-referenced. pub fn get_length(self: *HTMLCollection) !u32 { if (self.root == null) return 0; var len: u32 = 0; var node = try self.start() orelse return 0; while (true) { if (try parser.nodeType(node) == .element) { if (try self.matcher.match(node)) { len += 1; } } node = try self.walker.get_next(self.root.?, node) orelse break; } return len; } pub fn item(self: *HTMLCollection, index: u32) !?*parser.Node { if (self.root == null) return null; var i: u32 = 0; var node: *parser.Node = undefined; // Use the current state to improve speed if possible. if (self.cur_idx != null and index >= self.cur_idx.?) { i = self.cur_idx.?; node = self.cur_node.?; } else { node = try self.start() orelse return null; } while (true) { if (try parser.nodeType(node) == .element) { if (try self.matcher.match(node)) { // check if we found the searched element. if (i == index) { // save the current state self.cur_node = node; self.cur_idx = i; return node; } i += 1; } } node = try self.walker.get_next(self.root.?, node) orelse break; } return null; } pub fn _item(self: *HTMLCollection, index: u32) !?Union { const node = try self.item(index) orelse return null; const e = @as(*parser.Element, @ptrCast(node)); return try Element.toInterface(e); } pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union { if (self.root == null) return null; if (name.len == 0) return null; var node = try self.start() orelse return null; while (true) { if (try parser.nodeType(node) == .element) { if (try self.matcher.match(node)) { const elem = @as(*parser.Element, @ptrCast(node)); var attr = try parser.elementGetAttribute(elem, "id"); // check if the node id corresponds to the name argument. if (attr != null and std.mem.eql(u8, name, attr.?)) { return try Element.toInterface(elem); } attr = try parser.elementGetAttribute(elem, "name"); // check if the node id corresponds to the name argument. if (attr != null and std.mem.eql(u8, name, attr.?)) { return try Element.toInterface(elem); } } } node = try self.walker.get_next(self.root.?, node) orelse break; } return null; } pub fn deinit(self: *HTMLCollection, alloc: std.mem.Allocator) void { self.matcher.deinit(alloc); } }; // Tests // ----- pub fn testExecFn( _: std.mem.Allocator, js_env: *jsruntime.Env, ) anyerror!void { var getElementsByTagName = [_]Case{ .{ .src = "let getElementsByTagName = document.getElementsByTagName('p')", .ex = "undefined" }, .{ .src = "getElementsByTagName.length", .ex = "2" }, .{ .src = "let getElementsByTagNameCI = document.getElementsByTagName('P')", .ex = "undefined" }, .{ .src = "getElementsByTagNameCI.length", .ex = "2" }, .{ .src = "getElementsByTagName.item(0).localName", .ex = "p" }, .{ .src = "getElementsByTagName.item(1).localName", .ex = "p" }, .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, .{ .src = "getElementsByTagNameAll.length", .ex = "8" }, .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, .{ .src = "getElementsByTagNameAll.item(3).localName", .ex = "div" }, .{ .src = "getElementsByTagNameAll.item(7).localName", .ex = "p" }, .{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" }, .{ .src = "document.getElementById('content').getElementsByTagName('*').length", .ex = "4" }, .{ .src = "document.getElementById('content').getElementsByTagName('p').length", .ex = "2" }, .{ .src = "document.getElementById('content').getElementsByTagName('div').length", .ex = "0" }, .{ .src = "document.children.length", .ex = "1" }, .{ .src = "document.getElementById('content').children.length", .ex = "3" }, // check liveness .{ .src = "let content = document.getElementById('content')", .ex = "undefined" }, .{ .src = "let pe = document.getElementById('para-empty')", .ex = "undefined" }, .{ .src = "let p = document.createElement('p')", .ex = "undefined" }, .{ .src = "p.textContent = 'OK live'", .ex = "OK live" }, .{ .src = "getElementsByTagName.item(1).textContent", .ex = " And" }, .{ .src = "content.appendChild(p) != undefined", .ex = "true" }, .{ .src = "getElementsByTagName.length", .ex = "3" }, .{ .src = "getElementsByTagName.item(2).textContent", .ex = "OK live" }, .{ .src = "content.insertBefore(p, pe) != undefined", .ex = "true" }, .{ .src = "getElementsByTagName.item(0).textContent", .ex = "OK live" }, }; try checkCases(js_env, &getElementsByTagName); }