From 2e40837f0d80b6226402d022d88f7cca8cc59fb7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 24 Oct 2023 17:12:59 +0200 Subject: [PATCH 01/22] dom: add HTMLCollection --- src/dom/html_collection.zig | 28 ++++++++++++++++++++++++++++ src/dom/node.zig | 2 ++ src/netsurf.zig | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 src/dom/html_collection.zig diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig new file mode 100644 index 00000000..a48f5cbc --- /dev/null +++ b/src/dom/html_collection.zig @@ -0,0 +1,28 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +const jsruntime = @import("jsruntime"); + +const Element = @import("element.zig").Element; + +// WEB IDL https://dom.spec.whatwg.org/#htmlcollection +pub const HTMLCollection = struct { + pub const Self = parser.HTMLCollection; + pub const mem_guarantied = true; + + // JS funcs + // -------- + + pub fn _get_length(self: *parser.HTMLCollection) u32 { + return parser.HTMLCollectionLength(self); + } + + pub fn _item(self: *parser.HTMLCollection, index: u32) ?*parser.Element { + return parser.HTMLCollectionItem(self, index); + } + + pub fn _namedItem(self: *parser.HTMLCollection, name: []const u8) ?*parser.Element { + return parser.HTMLCollectionNamedItem(self, name); + } +}; diff --git a/src/dom/node.zig b/src/dom/node.zig index 3b9005b3..a57a94a9 100644 --- a/src/dom/node.zig +++ b/src/dom/node.zig @@ -14,6 +14,7 @@ const EventTarget = @import("event_target.zig").EventTarget; const CData = @import("character_data.zig"); const Element = @import("element.zig").Element; const Document = @import("document.zig").Document; +const HTMLCollection = @import("html_collection.zig").HTMLCollection; // HTML const HTML = @import("../html/html.zig"); @@ -25,6 +26,7 @@ pub const Interfaces = generate.Tuple(.{ CData.Interfaces, Element, Document, + HTMLCollection, HTML.Interfaces, }); diff --git a/src/netsurf.zig b/src/netsurf.zig index 5f246d98..1f21563a 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -618,6 +618,40 @@ pub fn elementGetAttribute(elem: *Element, name: []const u8) ?[]const u8 { return stringToData(s.?); } +// HTMLCollection + +pub const HTMLCollection = c.dom_html_collection; + +pub fn HTMLCollectionLength(collection: *HTMLCollection) u32 { + var ln: u32 = undefined; + _ = c.dom_html_collection_get_length(collection, &ln); + return ln; +} + +pub fn HTMLCollectionItem(collection: *HTMLCollection, index: u32) ?*Element { + var n: [*c]c.dom_node = undefined; + _ = c.dom_html_collection_item(collection, index, &n); + + if (n == null) { + return null; + } + + // cast [*c]c.dom_node into *Element + return @as(*Element, @ptrCast(n)); +} + +pub fn HTMLCollectionNamedItem(collection: *HTMLCollection, name: []const u8) ?*Element { + var n: [*c]c.dom_node = undefined; + _ = c.dom_html_collection_named_item(collection, stringFromData(name), &n); + + if (n == null) { + return null; + } + + // cast [*c]c.dom_node into *Element + return @as(*Element, @ptrCast(n)); +} + // ElementHTML pub const ElementHTML = c.dom_html_element; From 062a1a4010bd9cd0905e1f7410919d2b1c9ef502 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 25 Oct 2023 10:36:30 +0200 Subject: [PATCH 02/22] dom: implement HTMLCollection We can't simply use the libdom dom_document_get_elements_by_tag_name because it follows an old version of the specifications and returns a NodeList. Since https://github.com/whatwg/dom/commit/190700b7c12ecfd3b5ebdb359ab1d6ea9cbf7749 the spec changed in favor of returning an HTMLCollection. So I'm trying to re-implement the HTMLCollection in zig. --- src/dom/document.zig | 31 ++++++++++ src/dom/html_collection.zig | 118 +++++++++++++++++++++++++++++++++--- src/netsurf.zig | 12 ++++ 3 files changed, 152 insertions(+), 9 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index 7e2ac989..a2dfe3c8 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -7,6 +7,7 @@ const Case = jsruntime.test_utils.Case; const checkCases = jsruntime.test_utils.checkCases; const Node = @import("node.zig").Node; +const HTMLCollection = @import("html_collection.zig").HTMLCollection; const Element = @import("element.zig").Element; const ElementUnion = @import("element.zig").Union; @@ -34,6 +35,23 @@ pub const Document = struct { const e = parser.documentCreateElement(self, tag_name); return Element.toInterface(e); } + + // We can't simply use libdom dom_document_get_elements_by_tag_name here. + // Indeed, netsurf implemented a previous dom spec when + // getElementsByTagName returned a NodeList. + // But since + // https://github.com/whatwg/dom/commit/190700b7c12ecfd3b5ebdb359ab1d6ea9cbf7749 + // the spec changed to return an HTMLCollection instead. + // That's why we reimplemented getElementsByTagName by using an + // HTMLCollection in zig here. + pub fn _getElementsByTagName(self: *parser.Document, tag_name: []const u8) HTMLCollection { + const root = parser.documentGetDocumentNode(self); + return HTMLCollection{ + .root = root, + // TODO handle case insensitive comparison. + .match = tag_name, + }; + } }; // Tests @@ -58,6 +76,19 @@ pub fn testExecFn( }; try checkCases(js_env, &getElementById); + var getElementsByTagName = [_]Case{ + .{ .src = "let getElementsByTagName = document.getElementsByTagName('P')", .ex = "undefined" }, + .{ .src = "getElementsByTagName.length", .ex = "2" }, + .{ .src = "getElementsByTagName.item(0).localName", .ex = "p" }, + .{ .src = "getElementsByTagName.item(1).localName", .ex = "p" }, + .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, + .{ .src = "getElementsByTagNameAll.length", .ex = "8" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, + .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, + .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, + }; + try checkCases(js_env, &getElementsByTagName); + const tags = comptime parser.Tag.all(); comptime var createElements: [(tags.len) * 2]Case = undefined; inline for (tags, 0..) |tag, i| { diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index a48f5cbc..9dfb3b94 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -7,22 +7,122 @@ const jsruntime = @import("jsruntime"); const Element = @import("element.zig").Element; // WEB IDL https://dom.spec.whatwg.org/#htmlcollection +// HTMLCollection is re implemented in zig here because libdom +// dom_html_collection expects a comparison function callback as arguement. +// But we wanted a dynamically comparison here, according to the match tagname. pub const HTMLCollection = struct { - pub const Self = parser.HTMLCollection; pub const mem_guarantied = true; - // JS funcs - // -------- + root: *parser.Node, + // match is used to select node against their name. + // match comparison is case sensitive. + match: []const u8, - pub fn _get_length(self: *parser.HTMLCollection) u32 { - return parser.HTMLCollectionLength(self); + /// _get_length computes the collection's length dynamically according to + /// the current root structure. + // TODO: nodes retrieved must be de-referenced. + pub fn get_length(self: *HTMLCollection) u32 { + var len: u32 = 0; + var node: ?*parser.Node = self.root; + var ntype: parser.NodeType = undefined; + + var is_wildcard = std.mem.eql(u8, self.match, "*"); + + while (node != null) { + ntype = parser.nodeType(node.?); + if (ntype == .element) { + if (is_wildcard or std.mem.eql(u8, self.match, parser.nodeName(node.?))) { + len += 1; + } + } + + // Iterate hover the DOM tree. + var next = parser.nodeFirstChild(node.?); + if (next != null) { + node = next; + continue; + } + + next = parser.nodeNextSibling(node.?); + if (next != null) { + node = next; + continue; + } + + var parent = parser.nodeParentNode(node.?); + var lastchild = parser.nodeLastChild(parent.?); + while (node.? != self.root and node.? == lastchild) { + node = parent; + parent = parser.nodeParentNode(node.?); + lastchild = parser.nodeLastChild(parent.?); + } + + if (node.? == self.root) { + node = null; + continue; + } + + node = parser.nodeNextSibling(node.?); + } + + return len; } - pub fn _item(self: *parser.HTMLCollection, index: u32) ?*parser.Element { - return parser.HTMLCollectionItem(self, index); + pub fn _item(self: *HTMLCollection, index: u32) ?*parser.Element { + var len: u32 = 0; + var node: ?*parser.Node = self.root; + var ntype: parser.NodeType = undefined; + + var is_wildcard = std.mem.eql(u8, self.match, "*"); + + while (node != null) { + ntype = parser.nodeType(node.?); + if (ntype == .element) { + if (is_wildcard or std.mem.eql(u8, self.match, parser.nodeName(node.?))) { + len += 1; + + // check if we found the searched element. + if (len == index + 1) { + return @as(*parser.Element, @ptrCast(node)); + } + } + } + + // Iterate hover the DOM tree. + var next = parser.nodeFirstChild(node.?); + if (next != null) { + node = next; + continue; + } + + next = parser.nodeNextSibling(node.?); + if (next != null) { + node = next; + continue; + } + + var parent = parser.nodeParentNode(node.?); + var lastchild = parser.nodeLastChild(parent.?); + while (node.? != self.root and node.? == lastchild) { + node = parent; + parent = parser.nodeParentNode(node.?); + lastchild = parser.nodeLastChild(parent.?); + } + + if (node.? == self.root) { + node = null; + continue; + } + + node = parser.nodeNextSibling(node.?); + } + + return null; } - pub fn _namedItem(self: *parser.HTMLCollection, name: []const u8) ?*parser.Element { - return parser.HTMLCollectionNamedItem(self, name); + pub fn _namedItem(self: *HTMLCollection, name: []const u8) ?*parser.Element { + _ = name; + _ = self; + return null; } }; diff --git a/src/netsurf.zig b/src/netsurf.zig index 1f21563a..159db9c1 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -765,6 +765,18 @@ pub inline fn documentGetElementsByTagName(doc: *Document, tagname: []const u8) return nlist.?; } +// documentGetDocumentElement returns the root document element. +pub inline fn documentGetDocumentElement(doc: *Document) *Element { + var elem: ?*Element = undefined; + _ = documentVtable(doc).dom_document_get_document_element.?(doc, &elem); + return elem.?; +} + +pub inline fn documentGetDocumentNode(doc: *Document) *Node { + const res = documentGetDocumentElement(doc); + return @as(*Node, @ptrCast(res)); +} + pub inline fn documentCreateElement(doc: *Document, tag_name: []const u8) *Element { var elem: ?*Element = undefined; _ = documentVtable(doc).dom_document_create_element.?(doc, stringFromData(tag_name), &elem); From 3a188c4568216da2e0765b4a2be8bad9573b01f4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 25 Oct 2023 15:54:56 +0200 Subject: [PATCH 03/22] dom: compare tagname in case insensitive way --- src/dom/document.zig | 3 +-- src/dom/html_collection.zig | 13 ++++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index a2dfe3c8..5a4e496d 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -48,7 +48,6 @@ pub const Document = struct { const root = parser.documentGetDocumentNode(self); return HTMLCollection{ .root = root, - // TODO handle case insensitive comparison. .match = tag_name, }; } @@ -77,7 +76,7 @@ pub fn testExecFn( try checkCases(js_env, &getElementById); var getElementsByTagName = [_]Case{ - .{ .src = "let getElementsByTagName = document.getElementsByTagName('P')", .ex = "undefined" }, + .{ .src = "let getElementsByTagName = document.getElementsByTagName('p')", .ex = "undefined" }, .{ .src = "getElementsByTagName.length", .ex = "2" }, .{ .src = "getElementsByTagName.item(0).localName", .ex = "p" }, .{ .src = "getElementsByTagName.item(1).localName", .ex = "p" }, diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 9dfb3b94..b96d7bec 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -4,6 +4,7 @@ const parser = @import("../netsurf.zig"); const jsruntime = @import("jsruntime"); +const utils = @import("utils.z"); const Element = @import("element.zig").Element; // WEB IDL https://dom.spec.whatwg.org/#htmlcollection @@ -15,7 +16,7 @@ pub const HTMLCollection = struct { root: *parser.Node, // match is used to select node against their name. - // match comparison is case sensitive. + // match comparison is case insensitive. match: []const u8, /// _get_length computes the collection's length dynamically according to @@ -26,12 +27,15 @@ pub const HTMLCollection = struct { var node: ?*parser.Node = self.root; var ntype: parser.NodeType = undefined; + var buffer: [128]u8 = undefined; + const imatch = std.ascii.upperString(&buffer, self.match); + var is_wildcard = std.mem.eql(u8, self.match, "*"); while (node != null) { ntype = parser.nodeType(node.?); if (ntype == .element) { - if (is_wildcard or std.mem.eql(u8, self.match, parser.nodeName(node.?))) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { len += 1; } } @@ -75,10 +79,13 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); + var buffer: [128]u8 = undefined; + const imatch = std.ascii.upperString(&buffer, self.match); + while (node != null) { ntype = parser.nodeType(node.?); if (ntype == .element) { - if (is_wildcard or std.mem.eql(u8, self.match, parser.nodeName(node.?))) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { len += 1; // check if we found the searched element. From 09d032f6f8e1f775f8e434a299a143562857109e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 25 Oct 2023 17:30:55 +0200 Subject: [PATCH 04/22] dom: implement HTMLCollection.namedItem() --- src/dom/document.zig | 1 + src/dom/html_collection.zig | 63 +++++++++++++++++++++++++++++++++++-- src/netsurf.zig | 3 ++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index 5a4e496d..9320ddf6 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -85,6 +85,7 @@ pub fn testExecFn( .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, + .{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" }, }; try checkCases(js_env, &getElementsByTagName); diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index b96d7bec..35068092 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -128,8 +128,67 @@ pub const HTMLCollection = struct { } pub fn _namedItem(self: *HTMLCollection, name: []const u8) ?*parser.Element { - _ = name; - _ = self; + if (name.len == 0) { + return null; + } + + var node: ?*parser.Node = self.root; + var ntype: parser.NodeType = undefined; + + var is_wildcard = std.mem.eql(u8, self.match, "*"); + + var buffer: [128]u8 = undefined; + const imatch = std.ascii.upperString(&buffer, self.match); + + while (node != null) { + ntype = parser.nodeType(node.?); + if (ntype == .element) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { + const elem = @as(*parser.Element, @ptrCast(node)); + + var attr = parser.elementGetAttribute(elem, "id"); + // check if the node id corresponds to the name argument. + if (attr != null and std.mem.eql(u8, name, attr.?)) { + return elem; + } + + attr = parser.elementGetAttribute(elem, "name"); + // check if the node id corresponds to the name argument. + if (attr != null and std.mem.eql(u8, name, attr.?)) { + return elem; + } + } + } + + // Iterate hover the DOM tree. + var next = parser.nodeFirstChild(node.?); + if (next != null) { + node = next; + continue; + } + + next = parser.nodeNextSibling(node.?); + if (next != null) { + node = next; + continue; + } + + var parent = parser.nodeParentNode(node.?); + var lastchild = parser.nodeLastChild(parent.?); + while (node.? != self.root and node.? == lastchild) { + node = parent; + parent = parser.nodeParentNode(node.?); + lastchild = parser.nodeLastChild(parent.?); + } + + if (node.? == self.root) { + node = null; + continue; + } + + node = parser.nodeNextSibling(node.?); + } + return null; } }; diff --git a/src/netsurf.zig b/src/netsurf.zig index 159db9c1..aa2ed825 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -597,6 +597,9 @@ pub fn textSplitText(text: *Text, offset: u32) *Text { // Comment pub const Comment = c.dom_comment; +// Attribute +pub const Attribute = c.dom_attr; + // Element pub const Element = c.dom_element; From 1fe49adba298aeb4d3c913b838ea4bf61b489fbf Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 26 Oct 2023 09:00:11 +0200 Subject: [PATCH 05/22] dom: add comment about buffer with fixed length The buffer is used for upper case string transformation. --- src/dom/html_collection.zig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 35068092..d1d09a85 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -27,6 +27,11 @@ pub const HTMLCollection = struct { var node: ?*parser.Node = self.root; var ntype: parser.NodeType = undefined; + // FIXME using a fixed length buffer here avoid the need of an allocator + // to get an upper case match value. But if the match value (a tag + // name) is greater than 128 chars, the code will panic. + // ascii.upperString asserts the buffer size is greater or equals than + // the given string. var buffer: [128]u8 = undefined; const imatch = std.ascii.upperString(&buffer, self.match); @@ -79,6 +84,11 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); + // FIXME using a fixed length buffer here avoid the need of an allocator + // to get an upper case match value. But if the match value (a tag + // name) is greater than 128 chars, the code will panic. + // ascii.upperString asserts the buffer size is greater or equals than + // the given string. var buffer: [128]u8 = undefined; const imatch = std.ascii.upperString(&buffer, self.match); @@ -137,6 +147,11 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); + // FIXME using a fixed length buffer here avoid the need of an allocator + // to get an upper case match value. But if the match value (a tag + // name) is greater than 128 chars, the code will panic. + // ascii.upperString asserts the buffer size is greater or equals than + // the given string. var buffer: [128]u8 = undefined; const imatch = std.ascii.upperString(&buffer, self.match); From 8c1d1e491c0e6941a496511d219108220c7905d2 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 27 Oct 2023 16:51:43 +0200 Subject: [PATCH 06/22] dom: refacto HTMLCollection next --- src/dom/html_collection.zig | 119 ++++++++++++------------------------ 1 file changed, 38 insertions(+), 81 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index d1d09a85..0799e8f6 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -19,6 +19,41 @@ pub const HTMLCollection = struct { // match comparison is case insensitive. match: []const u8, + // next iterates hover the DOM tree to return the next following node or + // null at the end. + fn _next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { + // TODO deinit next + var next = parser.nodeFirstChild(cur); + if (next != null) { + return next; + } + + // TODO deinit next + next = parser.nodeNextSibling(cur); + if (next != null) { + return next; + } + + // TODO deinit parent + var parent = parser.nodeParentNode(cur) orelse unreachable; + // TODO deinit lastchild + var lastchild = parser.nodeLastChild(parent); + var prev = cur; + while (prev != root and prev == lastchild) { + prev = parent; + // TODO deinit parent + parent = parser.nodeParentNode(cur) orelse unreachable; + // TODO deinit lastchild + lastchild = parser.nodeLastChild(parent); + } + + if (prev == root) { + return null; + } + + return parser.nodeNextSibling(prev); + } + /// _get_length computes the collection's length dynamically according to /// the current root structure. // TODO: nodes retrieved must be de-referenced. @@ -45,33 +80,7 @@ pub const HTMLCollection = struct { } } - // Iterate hover the DOM tree. - var next = parser.nodeFirstChild(node.?); - if (next != null) { - node = next; - continue; - } - - next = parser.nodeNextSibling(node.?); - if (next != null) { - node = next; - continue; - } - - var parent = parser.nodeParentNode(node.?); - var lastchild = parser.nodeLastChild(parent.?); - while (node.? != self.root and node.? == lastchild) { - node = parent; - parent = parser.nodeParentNode(node.?); - lastchild = parser.nodeLastChild(parent.?); - } - - if (node.? == self.root) { - node = null; - continue; - } - - node = parser.nodeNextSibling(node.?); + node = _next(self.root, node.?); } return len; @@ -105,33 +114,7 @@ pub const HTMLCollection = struct { } } - // Iterate hover the DOM tree. - var next = parser.nodeFirstChild(node.?); - if (next != null) { - node = next; - continue; - } - - next = parser.nodeNextSibling(node.?); - if (next != null) { - node = next; - continue; - } - - var parent = parser.nodeParentNode(node.?); - var lastchild = parser.nodeLastChild(parent.?); - while (node.? != self.root and node.? == lastchild) { - node = parent; - parent = parser.nodeParentNode(node.?); - lastchild = parser.nodeLastChild(parent.?); - } - - if (node.? == self.root) { - node = null; - continue; - } - - node = parser.nodeNextSibling(node.?); + node = _next(self.root, node.?); } return null; @@ -175,33 +158,7 @@ pub const HTMLCollection = struct { } } - // Iterate hover the DOM tree. - var next = parser.nodeFirstChild(node.?); - if (next != null) { - node = next; - continue; - } - - next = parser.nodeNextSibling(node.?); - if (next != null) { - node = next; - continue; - } - - var parent = parser.nodeParentNode(node.?); - var lastchild = parser.nodeLastChild(parent.?); - while (node.? != self.root and node.? == lastchild) { - node = parent; - parent = parser.nodeParentNode(node.?); - lastchild = parser.nodeLastChild(parent.?); - } - - if (node.? == self.root) { - node = null; - continue; - } - - node = parser.nodeNextSibling(node.?); + node = _next(self.root, node.?); } return null; From 5f05cffff9b708359d031990b70ab244631b9c47 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 27 Oct 2023 17:00:17 +0200 Subject: [PATCH 07/22] dom: remove optional node type --- src/dom/html_collection.zig | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 0799e8f6..843d5ec5 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -59,7 +59,7 @@ pub const HTMLCollection = struct { // TODO: nodes retrieved must be de-referenced. pub fn get_length(self: *HTMLCollection) u32 { var len: u32 = 0; - var node: ?*parser.Node = self.root; + var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; // FIXME using a fixed length buffer here avoid the need of an allocator @@ -72,15 +72,15 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); - while (node != null) { - ntype = parser.nodeType(node.?); + while (true) { + ntype = parser.nodeType(node); if (ntype == .element) { - if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node))) { len += 1; } } - node = _next(self.root, node.?); + node = _next(self.root, node) orelse break; } return len; @@ -88,7 +88,7 @@ pub const HTMLCollection = struct { pub fn _item(self: *HTMLCollection, index: u32) ?*parser.Element { var len: u32 = 0; - var node: ?*parser.Node = self.root; + var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; var is_wildcard = std.mem.eql(u8, self.match, "*"); @@ -101,10 +101,10 @@ pub const HTMLCollection = struct { var buffer: [128]u8 = undefined; const imatch = std.ascii.upperString(&buffer, self.match); - while (node != null) { - ntype = parser.nodeType(node.?); + while (true) { + ntype = parser.nodeType(node); if (ntype == .element) { - if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node))) { len += 1; // check if we found the searched element. @@ -114,7 +114,7 @@ pub const HTMLCollection = struct { } } - node = _next(self.root, node.?); + node = _next(self.root, node) orelse break; } return null; @@ -125,7 +125,7 @@ pub const HTMLCollection = struct { return null; } - var node: ?*parser.Node = self.root; + var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; var is_wildcard = std.mem.eql(u8, self.match, "*"); @@ -138,10 +138,10 @@ pub const HTMLCollection = struct { var buffer: [128]u8 = undefined; const imatch = std.ascii.upperString(&buffer, self.match); - while (node != null) { - ntype = parser.nodeType(node.?); + while (true) { + ntype = parser.nodeType(node); if (ntype == .element) { - if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node.?))) { + if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node))) { const elem = @as(*parser.Element, @ptrCast(node)); var attr = parser.elementGetAttribute(elem, "id"); @@ -158,7 +158,7 @@ pub const HTMLCollection = struct { } } - node = _next(self.root, node.?); + node = _next(self.root, node) orelse break; } return null; From 055bde96e4c2ace3f5fa177fab7189710d95b3b6 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 27 Oct 2023 17:55:53 +0200 Subject: [PATCH 08/22] dom: speedup HTMLCollection.item() w/ an internal state --- src/dom/document.zig | 4 ++++ src/dom/html_collection.zig | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index 9320ddf6..e55469d5 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -83,8 +83,12 @@ pub fn testExecFn( .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, .{ .src = "getElementsByTagNameAll.length", .ex = "8" }, .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, + .{ .src = "getElementsByTagNameAll.item(3).localName", .ex = "div" }, + .{ .src = "getElementsByTagNameAll.item(7).localName", .ex = "p" }, .{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" }, }; try checkCases(js_env, &getElementsByTagName); diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 843d5ec5..039ea62e 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -19,6 +19,10 @@ pub const HTMLCollection = struct { // match comparison is case insensitive. match: []const u8, + // save a state for the collection to improve the _item speed. + cur_idx: u32 = undefined, + cur_node: *parser.Node = undefined, + // next iterates hover the DOM tree to return the next following node or // null at the end. fn _next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { @@ -87,12 +91,18 @@ pub const HTMLCollection = struct { } pub fn _item(self: *HTMLCollection, index: u32) ?*parser.Element { - var len: u32 = 0; + var i: u32 = 0; var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; var is_wildcard = std.mem.eql(u8, self.match, "*"); + // Use the current state to improve speed if possible. + if (self.cur_idx != undefined and index >= self.cur_idx) { + i = self.cur_idx; + node = self.cur_node; + } + // FIXME using a fixed length buffer here avoid the need of an allocator // to get an upper case match value. But if the match value (a tag // name) is greater than 128 chars, the code will panic. @@ -105,12 +115,16 @@ pub const HTMLCollection = struct { ntype = parser.nodeType(node); if (ntype == .element) { if (is_wildcard or std.mem.eql(u8, imatch, parser.nodeName(node))) { - len += 1; - // check if we found the searched element. - if (len == index + 1) { + if (i == index) { + // save the current state + self.cur_node = node; + self.cur_idx = i; + return @as(*parser.Element, @ptrCast(node)); } + + i += 1; } } From 84934cca1321daecabac3a19d2f820c0f36de621 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 27 Oct 2023 17:59:27 +0200 Subject: [PATCH 09/22] dom: add HTMLCollection unit tests --- src/dom/document.zig | 5 ----- src/dom/html_collection.zig | 29 +++++++++++++++++++++++++++++ src/run_tests.zig | 2 ++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index e55469d5..b5e57a10 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -83,11 +83,6 @@ pub fn testExecFn( .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, .{ .src = "getElementsByTagNameAll.length", .ex = "8" }, .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, - .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, - .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, - .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, - .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, - .{ .src = "getElementsByTagNameAll.item(3).localName", .ex = "div" }, .{ .src = "getElementsByTagNameAll.item(7).localName", .ex = "p" }, .{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" }, }; diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 039ea62e..04e81fa9 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -3,6 +3,8 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); const jsruntime = @import("jsruntime"); +const Case = jsruntime.test_utils.Case; +const checkCases = jsruntime.test_utils.checkCases; const utils = @import("utils.z"); const Element = @import("element.zig").Element; @@ -178,3 +180,30 @@ pub const HTMLCollection = struct { return null; } }; + +// Tests +// ----- + +pub fn testExecFn( + _: std.mem.Allocator, + js_env: *jsruntime.Env, + comptime _: []jsruntime.API, +) !void { + var getElementsByTagName = [_]Case{ + .{ .src = "let getElementsByTagName = document.getElementsByTagName('p')", .ex = "undefined" }, + .{ .src = "getElementsByTagName.length", .ex = "2" }, + .{ .src = "getElementsByTagName.item(0).localName", .ex = "p" }, + .{ .src = "getElementsByTagName.item(1).localName", .ex = "p" }, + .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, + .{ .src = "getElementsByTagNameAll.length", .ex = "8" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, + .{ .src = "getElementsByTagNameAll.item(1).localName", .ex = "head" }, + .{ .src = "getElementsByTagNameAll.item(0).localName", .ex = "html" }, + .{ .src = "getElementsByTagNameAll.item(2).localName", .ex = "body" }, + .{ .src = "getElementsByTagNameAll.item(3).localName", .ex = "div" }, + .{ .src = "getElementsByTagNameAll.item(7).localName", .ex = "p" }, + .{ .src = "getElementsByTagNameAll.namedItem('para-empty-child').localName", .ex = "span" }, + }; + try checkCases(js_env, &getElementsByTagName); +} diff --git a/src/run_tests.zig b/src/run_tests.zig index 8860bc67..b5ad7159 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -11,6 +11,7 @@ const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn; const nodeTestExecFn = @import("dom/node.zig").testExecFn; const characterDataTestExecFn = @import("dom/character_data.zig").testExecFn; const textTestExecFn = @import("dom/text.zig").testExecFn; +const HTMLCollectionTestExecFn = @import("dom/html_collection.zig").testExecFn; var doc: *parser.DocumentHTML = undefined; @@ -51,6 +52,7 @@ fn testsAllExecFn( nodeTestExecFn, characterDataTestExecFn, textTestExecFn, + HTMLCollectionTestExecFn, }; inline for (testFns) |testFn| { From 9bb200a46f5b43898220b97f457f415abe1addc2 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 27 Oct 2023 18:02:44 +0200 Subject: [PATCH 10/22] dom: rename HTMLCollection._next() into get_next() We don't want expose the function to the JS API. --- src/dom/html_collection.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 04e81fa9..cb54c032 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -27,7 +27,7 @@ pub const HTMLCollection = struct { // next iterates hover the DOM tree to return the next following node or // null at the end. - fn _next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { + fn get_next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { // TODO deinit next var next = parser.nodeFirstChild(cur); if (next != null) { @@ -86,7 +86,7 @@ pub const HTMLCollection = struct { } } - node = _next(self.root, node) orelse break; + node = get_next(self.root, node) orelse break; } return len; @@ -130,7 +130,7 @@ pub const HTMLCollection = struct { } } - node = _next(self.root, node) orelse break; + node = get_next(self.root, node) orelse break; } return null; @@ -174,7 +174,7 @@ pub const HTMLCollection = struct { } } - node = _next(self.root, node) orelse break; + node = get_next(self.root, node) orelse break; } return null; From f02769b2dd30e217171c901839bd1d4e2071b473 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 15 Nov 2023 15:03:55 +0100 Subject: [PATCH 11/22] dom: use allocator with HTMLCollection getters --- src/dom/html_collection.zig | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index cb54c032..7b3be82c 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -63,18 +63,13 @@ pub const HTMLCollection = struct { /// _get_length computes the collection's length dynamically according to /// the current root structure. // TODO: nodes retrieved must be de-referenced. - pub fn get_length(self: *HTMLCollection) u32 { + pub fn get_length(self: *HTMLCollection, allocator: std.mem.Allocator) !u32 { var len: u32 = 0; var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; - // FIXME using a fixed length buffer here avoid the need of an allocator - // to get an upper case match value. But if the match value (a tag - // name) is greater than 128 chars, the code will panic. - // ascii.upperString asserts the buffer size is greater or equals than - // the given string. - var buffer: [128]u8 = undefined; - const imatch = std.ascii.upperString(&buffer, self.match); + const imatch = try std.ascii.allocUpperString(allocator, self.match); + defer allocator.free(imatch); var is_wildcard = std.mem.eql(u8, self.match, "*"); @@ -92,7 +87,7 @@ pub const HTMLCollection = struct { return len; } - pub fn _item(self: *HTMLCollection, index: u32) ?*parser.Element { + pub fn _item(self: *HTMLCollection, allocator: std.mem.Allocator, index: u32) !?*parser.Element { var i: u32 = 0; var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; @@ -105,13 +100,8 @@ pub const HTMLCollection = struct { node = self.cur_node; } - // FIXME using a fixed length buffer here avoid the need of an allocator - // to get an upper case match value. But if the match value (a tag - // name) is greater than 128 chars, the code will panic. - // ascii.upperString asserts the buffer size is greater or equals than - // the given string. - var buffer: [128]u8 = undefined; - const imatch = std.ascii.upperString(&buffer, self.match); + const imatch = try std.ascii.allocUpperString(allocator, self.match); + defer allocator.free(imatch); while (true) { ntype = parser.nodeType(node); @@ -136,7 +126,7 @@ pub const HTMLCollection = struct { return null; } - pub fn _namedItem(self: *HTMLCollection, name: []const u8) ?*parser.Element { + pub fn _namedItem(self: *HTMLCollection, allocator: std.mem.Allocator, name: []const u8) !?*parser.Element { if (name.len == 0) { return null; } @@ -146,13 +136,8 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); - // FIXME using a fixed length buffer here avoid the need of an allocator - // to get an upper case match value. But if the match value (a tag - // name) is greater than 128 chars, the code will panic. - // ascii.upperString asserts the buffer size is greater or equals than - // the given string. - var buffer: [128]u8 = undefined; - const imatch = std.ascii.upperString(&buffer, self.match); + const imatch = try std.ascii.allocUpperString(allocator, self.match); + defer allocator.free(imatch); while (true) { ntype = parser.nodeType(node); @@ -192,6 +177,8 @@ pub fn testExecFn( var getElementsByTagName = [_]Case{ .{ .src = "let getElementsByTagName = document.getElementsByTagName('p')", .ex = "undefined" }, .{ .src = "getElementsByTagName.length", .ex = "2" }, + .{ .src = "let getElementsByTagNameCI = document.getElementsByTagName('P')", .ex = "undefined" }, + .{ .src = "getElementsByTagNameCI.length", .ex = "2" }, .{ .src = "getElementsByTagName.item(0).localName", .ex = "p" }, .{ .src = "getElementsByTagName.item(1).localName", .ex = "p" }, .{ .src = "let getElementsByTagNameAll = document.getElementsByTagName('*')", .ex = "undefined" }, From 6f2e59d663aba54bf2d6b1d88d053625f1791d9c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 15 Nov 2023 15:05:43 +0100 Subject: [PATCH 12/22] wpt: add getElementsByTagName tests --- .../Document-Element-getElementsByTagName.js | 208 ++++++++++++++++++ .../nodes/Document-getElementsByTagName.html | 11 + 2 files changed, 219 insertions(+) create mode 100644 tests/wpt/dom/nodes/Document-Element-getElementsByTagName.js create mode 100644 tests/wpt/dom/nodes/Document-getElementsByTagName.html diff --git a/tests/wpt/dom/nodes/Document-Element-getElementsByTagName.js b/tests/wpt/dom/nodes/Document-Element-getElementsByTagName.js new file mode 100644 index 00000000..dbbe667f --- /dev/null +++ b/tests/wpt/dom/nodes/Document-Element-getElementsByTagName.js @@ -0,0 +1,208 @@ +function test_getElementsByTagName(context, element) { + // TODO: getElementsByTagName("*") + test(function() { + assert_false(context.getElementsByTagName("html") instanceof NodeList, + "Should not return a NodeList") + assert_true(context.getElementsByTagName("html") instanceof HTMLCollection, + "Should return an HTMLCollection") + }, "Interfaces") + + test(function() { + var firstCollection = context.getElementsByTagName("html"), + secondCollection = context.getElementsByTagName("html") + assert_true(firstCollection !== secondCollection || + firstCollection === secondCollection) + }, "Caching is allowed") + + test(function() { + var l = context.getElementsByTagName("nosuchtag") + l[5] = "foopy" + assert_equals(l[5], undefined) + assert_equals(l.item(5), null) + }, "Shouldn't be able to set unsigned properties on a HTMLCollection (non-strict mode)") + + test(function() { + var l = context.getElementsByTagName("nosuchtag") + assert_throws_js(TypeError, function() { + "use strict"; + l[5] = "foopy" + }) + assert_equals(l[5], undefined) + assert_equals(l.item(5), null) + }, "Shouldn't be able to set unsigned properties on a HTMLCollection (strict mode)") + + test(function() { + var l = context.getElementsByTagName("nosuchtag") + var fn = l.item; + assert_equals(fn, HTMLCollection.prototype.item); + l.item = "pass" + assert_equals(l.item, "pass") + assert_equals(HTMLCollection.prototype.item, fn); + }, "Should be able to set expando shadowing a proto prop (item)") + + test(function() { + var l = context.getElementsByTagName("nosuchtag") + var fn = l.namedItem; + assert_equals(fn, HTMLCollection.prototype.namedItem); + l.namedItem = "pass" + assert_equals(l.namedItem, "pass") + assert_equals(HTMLCollection.prototype.namedItem, fn); + }, "Should be able to set expando shadowing a proto prop (namedItem)") + + test(function() { + var t1 = element.appendChild(document.createElement("pre")); + t1.id = "x"; + var t2 = element.appendChild(document.createElement("pre")); + t2.setAttribute("name", "y"); + var t3 = element.appendChild(document.createElementNS("", "pre")); + t3.setAttribute("id", "z"); + var t4 = element.appendChild(document.createElementNS("", "pre")); + t4.setAttribute("name", "w"); + this.add_cleanup(function() { + element.removeChild(t1) + element.removeChild(t2) + element.removeChild(t3) + element.removeChild(t4) + }); + + var list = context.getElementsByTagName('pre'); + var pre = list[0]; + assert_equals(pre.id, "x"); + + var exposedNames = { 'x': 0, 'y': 1, 'z': 2 }; + for (var exposedName in exposedNames) { + assert_equals(list[exposedName], list[exposedNames[exposedName]]); + assert_equals(list[exposedName], list.namedItem(exposedName)); + assert_true(exposedName in list, "'" + exposedName + "' in list"); + assert_true(list.hasOwnProperty(exposedName), + "list.hasOwnProperty('" + exposedName + "')"); + } + + var unexposedNames = ["w"]; + for (var unexposedName of unexposedNames) { + assert_false(unexposedName in list); + assert_false(list.hasOwnProperty(unexposedName)); + assert_equals(list[unexposedName], undefined); + assert_equals(list.namedItem(unexposedName), null); + } + + assert_array_equals(Object.getOwnPropertyNames(list).sort(), + ["0", "1", "2", "3", "x", "y", "z"]); + + var desc = Object.getOwnPropertyDescriptor(list, '0'); + assert_equals(typeof desc, "object", "descriptor should be an object"); + assert_true(desc.enumerable, "desc.enumerable"); + assert_true(desc.configurable, "desc.configurable"); + + desc = Object.getOwnPropertyDescriptor(list, 'x'); + assert_equals(typeof desc, "object", "descriptor should be an object"); + assert_false(desc.enumerable, "desc.enumerable"); + assert_true(desc.configurable, "desc.configurable"); + }, "hasOwnProperty, getOwnPropertyDescriptor, getOwnPropertyNames") + + test(function() { + assert_equals(document.createElementNS("http://www.w3.org/1999/xhtml", "i").localName, "i") // Sanity + var t = element.appendChild(document.createElementNS("http://www.w3.org/1999/xhtml", "I")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_equals(t.localName, "I") + assert_equals(t.tagName, "I") + assert_equals(context.getElementsByTagName("I").length, 0) + assert_equals(context.getElementsByTagName("i").length, 0) + }, "HTML element with uppercase tagName never matches in HTML Documents") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "st")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("st"), [t]) + assert_array_equals(context.getElementsByTagName("ST"), []) + }, "Element in non-HTML namespace, no prefix, lowercase name") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "ST")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("ST"), [t]) + assert_array_equals(context.getElementsByTagName("st"), []) + }, "Element in non-HTML namespace, no prefix, uppercase name") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "te:st")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("st"), []) + assert_array_equals(context.getElementsByTagName("ST"), []) + assert_array_equals(context.getElementsByTagName("te:st"), [t]) + assert_array_equals(context.getElementsByTagName("te:ST"), []) + }, "Element in non-HTML namespace, prefix, lowercase name") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "te:ST")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("st"), []) + assert_array_equals(context.getElementsByTagName("ST"), []) + assert_array_equals(context.getElementsByTagName("te:st"), []) + assert_array_equals(context.getElementsByTagName("te:ST"), [t]) + }, "Element in non-HTML namespace, prefix, uppercase name") + + test(function() { + var t = element.appendChild(document.createElement("aÇ")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_equals(t.localName, "aÇ") + assert_array_equals(context.getElementsByTagName("AÇ"), [t], "All uppercase input") + assert_array_equals(context.getElementsByTagName("aÇ"), [t], "Ascii lowercase input") + assert_array_equals(context.getElementsByTagName("aç"), [], "All lowercase input") + }, "Element in HTML namespace, no prefix, non-ascii characters in name") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "AÇ")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("AÇ"), [t]) + assert_array_equals(context.getElementsByTagName("aÇ"), []) + assert_array_equals(context.getElementsByTagName("aç"), []) + }, "Element in non-HTML namespace, non-ascii characters in name") + + test(function() { + var t = element.appendChild(document.createElementNS("http://www.w3.org/1999/xhtml", "test:aÇ")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("TEST:AÇ"), [t], "All uppercase input") + assert_array_equals(context.getElementsByTagName("test:aÇ"), [t], "Ascii lowercase input") + assert_array_equals(context.getElementsByTagName("test:aç"), [], "All lowercase input") + }, "Element in HTML namespace, prefix, non-ascii characters in name") + + test(function() { + var t = element.appendChild(document.createElementNS("test", "TEST:AÇ")) + this.add_cleanup(function() {element.removeChild(t)}) + assert_array_equals(context.getElementsByTagName("TEST:AÇ"), [t], "All uppercase input") + assert_array_equals(context.getElementsByTagName("test:aÇ"), [], "Ascii lowercase input") + assert_array_equals(context.getElementsByTagName("test:aç"), [], "All lowercase input") + }, "Element in non-HTML namespace, prefix, non-ascii characters in name") + + test(function() { + var actual = context.getElementsByTagName("*"); + var expected = []; + var get_elements = function(node) { + for (var i = 0; i < node.childNodes.length; i++) { + var child = node.childNodes[i]; + if (child.nodeType === child.ELEMENT_NODE) { + expected.push(child); + get_elements(child); + } + } + } + get_elements(context); + assert_array_equals(actual, expected); + }, "getElementsByTagName('*')") + + test(function() { + var t1 = element.appendChild(document.createElement("abc")); + this.add_cleanup(function() {element.removeChild(t1)}); + + var l = context.getElementsByTagName("abc"); + assert_true(l instanceof HTMLCollection); + assert_equals(l.length, 1); + + var t2 = element.appendChild(document.createElement("abc")); + assert_equals(l.length, 2); + + element.removeChild(t2); + assert_equals(l.length, 1); + }, "getElementsByTagName() should be a live collection"); +} diff --git a/tests/wpt/dom/nodes/Document-getElementsByTagName.html b/tests/wpt/dom/nodes/Document-getElementsByTagName.html new file mode 100644 index 00000000..00e3435c --- /dev/null +++ b/tests/wpt/dom/nodes/Document-getElementsByTagName.html @@ -0,0 +1,11 @@ + + +Document.getElementsByTagName + + + + +
+ From 24d38541382e561dc8a8a5076e737bf9a8a14596 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 15 Nov 2023 17:06:42 +0100 Subject: [PATCH 13/22] dom: saner returns for html collection --- src/dom/html_collection.zig | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 7b3be82c..b965f3c2 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -8,6 +8,7 @@ const checkCases = jsruntime.test_utils.checkCases; const utils = @import("utils.z"); const Element = @import("element.zig").Element; +const Union = @import("element.zig").Union; // WEB IDL https://dom.spec.whatwg.org/#htmlcollection // HTMLCollection is re implemented in zig here because libdom @@ -87,7 +88,7 @@ pub const HTMLCollection = struct { return len; } - pub fn _item(self: *HTMLCollection, allocator: std.mem.Allocator, index: u32) !?*parser.Element { + pub fn _item(self: *HTMLCollection, allocator: std.mem.Allocator, index: u32) !?Union { var i: u32 = 0; var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; @@ -113,7 +114,8 @@ pub const HTMLCollection = struct { self.cur_node = node; self.cur_idx = i; - return @as(*parser.Element, @ptrCast(node)); + const e = @as(*parser.Element, @ptrCast(node)); + return Element.toInterface(e); } i += 1; @@ -126,7 +128,7 @@ pub const HTMLCollection = struct { return null; } - pub fn _namedItem(self: *HTMLCollection, allocator: std.mem.Allocator, name: []const u8) !?*parser.Element { + pub fn _namedItem(self: *HTMLCollection, allocator: std.mem.Allocator, name: []const u8) !?Union { if (name.len == 0) { return null; } @@ -148,13 +150,13 @@ pub const HTMLCollection = struct { var attr = parser.elementGetAttribute(elem, "id"); // check if the node id corresponds to the name argument. if (attr != null and std.mem.eql(u8, name, attr.?)) { - return elem; + return Element.toInterface(elem); } attr = parser.elementGetAttribute(elem, "name"); // check if the node id corresponds to the name argument. if (attr != null and std.mem.eql(u8, name, attr.?)) { - return elem; + return Element.toInterface(elem); } } } From 62fe1c292d0f38c66e508b4ddf769a3e36367f22 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 10:09:00 +0100 Subject: [PATCH 14/22] wpt: add a console.log debug message --- src/main_wpt.zig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/main_wpt.zig b/src/main_wpt.zig index 7f567ea2..96c8ad42 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -205,6 +205,11 @@ fn runWPT(arena: *std.heap.ArenaAllocator, comptime apis: []jsruntime.API, f: [] \\ return true; \\}; \\window.removeEventListener = function () {}; + \\ + \\console = []; + \\console.log = function () { + \\ console.push(...arguments); + \\}; ; res = try evalJS(js_env, alloc, init, "init"); if (!res.success) { @@ -247,6 +252,12 @@ fn runWPT(arena: *std.heap.ArenaAllocator, comptime apis: []jsruntime.API, f: [] return res; } + // display console logs + res = try evalJS(js_env, alloc, "console.join(', ');", "console"); + if (res.result.len > 0) { + std.debug.print("-- CONSOLE LOG\n{s}\n--\n", .{res.result}); + } + // Check the final test status. res = try evalJS(js_env, alloc, "report.status;", "teststatus"); if (!res.success) { From ba32e1baff4cc39c638b2e0af805e2c99e370ae0 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 12:28:45 +0100 Subject: [PATCH 15/22] dom: collection: use nullable value instead fo undefined --- src/dom/html_collection.zig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index b965f3c2..b6877e8c 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -23,8 +23,8 @@ pub const HTMLCollection = struct { match: []const u8, // save a state for the collection to improve the _item speed. - cur_idx: u32 = undefined, - cur_node: *parser.Node = undefined, + cur_idx: ?u32 = undefined, + cur_node: ?*parser.Node = undefined, // next iterates hover the DOM tree to return the next following node or // null at the end. @@ -96,9 +96,9 @@ pub const HTMLCollection = struct { var is_wildcard = std.mem.eql(u8, self.match, "*"); // Use the current state to improve speed if possible. - if (self.cur_idx != undefined and index >= self.cur_idx) { - i = self.cur_idx; - node = self.cur_node; + if (self.cur_idx != null and index >= self.cur_idx.?) { + i = self.cur_idx.?; + node = self.cur_node.?; } const imatch = try std.ascii.allocUpperString(allocator, self.match); From d8b1989e8ee13d4f609475ccdde235b7db5c31ce Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 16:11:25 +0100 Subject: [PATCH 16/22] netsurf: remove useless HTMLCollection wrapper --- src/netsurf.zig | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index aa2ed825..992b8e46 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -621,40 +621,6 @@ pub fn elementGetAttribute(elem: *Element, name: []const u8) ?[]const u8 { return stringToData(s.?); } -// HTMLCollection - -pub const HTMLCollection = c.dom_html_collection; - -pub fn HTMLCollectionLength(collection: *HTMLCollection) u32 { - var ln: u32 = undefined; - _ = c.dom_html_collection_get_length(collection, &ln); - return ln; -} - -pub fn HTMLCollectionItem(collection: *HTMLCollection, index: u32) ?*Element { - var n: [*c]c.dom_node = undefined; - _ = c.dom_html_collection_item(collection, index, &n); - - if (n == null) { - return null; - } - - // cast [*c]c.dom_node into *Element - return @as(*Element, @ptrCast(n)); -} - -pub fn HTMLCollectionNamedItem(collection: *HTMLCollection, name: []const u8) ?*Element { - var n: [*c]c.dom_node = undefined; - _ = c.dom_html_collection_named_item(collection, stringFromData(name), &n); - - if (n == null) { - return null; - } - - // cast [*c]c.dom_node into *Element - return @as(*Element, @ptrCast(n)); -} - // ElementHTML pub const ElementHTML = c.dom_html_element; From 2129361fa08b44e0e20bd7ca338d6076ea037c0e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 16:12:30 +0100 Subject: [PATCH 17/22] dom: is_wildcard is never changed --- src/dom/html_collection.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index b6877e8c..2d1eb268 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -72,7 +72,7 @@ pub const HTMLCollection = struct { const imatch = try std.ascii.allocUpperString(allocator, self.match); defer allocator.free(imatch); - var is_wildcard = std.mem.eql(u8, self.match, "*"); + const is_wildcard = std.mem.eql(u8, self.match, "*"); while (true) { ntype = parser.nodeType(node); @@ -93,7 +93,7 @@ pub const HTMLCollection = struct { var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; - var is_wildcard = std.mem.eql(u8, self.match, "*"); + const is_wildcard = std.mem.eql(u8, self.match, "*"); // Use the current state to improve speed if possible. if (self.cur_idx != null and index >= self.cur_idx.?) { @@ -136,7 +136,7 @@ pub const HTMLCollection = struct { var node: *parser.Node = self.root; var ntype: parser.NodeType = undefined; - var is_wildcard = std.mem.eql(u8, self.match, "*"); + const is_wildcard = std.mem.eql(u8, self.match, "*"); const imatch = try std.ascii.allocUpperString(allocator, self.match); defer allocator.free(imatch); From a136e812ae71210120c7602fb5d1405b11b9445a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 16:25:39 +0100 Subject: [PATCH 18/22] dom: add collection get_next comment --- src/dom/html_collection.zig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 2d1eb268..407d62f3 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -26,8 +26,15 @@ pub const HTMLCollection = struct { cur_idx: ?u32 = undefined, cur_node: ?*parser.Node = undefined, - // next iterates hover the DOM tree to return the next following node or + // get_next iterates over the DOM tree to return the next following node or // null at the end. + // + // This implementation is a zig version of Netsurf code. + // http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 + // + // The iteration is a depth first as required by the specification. + // https://dom.spec.whatwg.org/#htmlcollection + // https://dom.spec.whatwg.org/#concept-tree-order fn get_next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { // TODO deinit next var next = parser.nodeFirstChild(cur); @@ -61,7 +68,7 @@ pub const HTMLCollection = struct { return parser.nodeNextSibling(prev); } - /// _get_length computes the collection's length dynamically according to + /// get_length computes the collection's length dynamically according to /// the current root structure. // TODO: nodes retrieved must be de-referenced. pub fn get_length(self: *HTMLCollection, allocator: std.mem.Allocator) !u32 { From 3909fc01a7f18d7f128022c7a24ccf71b910ba48 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 16:56:48 +0100 Subject: [PATCH 19/22] dom: improve code style --- src/dom/html_collection.zig | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 407d62f3..b388032b 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -37,14 +37,12 @@ pub const HTMLCollection = struct { // https://dom.spec.whatwg.org/#concept-tree-order fn get_next(root: *parser.Node, cur: *parser.Node) ?*parser.Node { // TODO deinit next - var next = parser.nodeFirstChild(cur); - if (next != null) { + if (parser.nodeFirstChild(cur)) |next| { return next; } // TODO deinit next - next = parser.nodeNextSibling(cur); - if (next != null) { + if (parser.nodeNextSibling(cur)) |next| { return next; } From 9b923c550bd8170409c7d76580d95e565d89988c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 17:20:41 +0100 Subject: [PATCH 20/22] dom: remove unreachable from html collection --- src/dom/html_collection.zig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index b388032b..10a4210c 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -47,14 +47,21 @@ pub const HTMLCollection = struct { } // TODO deinit parent - var parent = parser.nodeParentNode(cur) orelse unreachable; + // Back to the parent of cur. + // If cur has no parent, then the iteration is over. + var parent = parser.nodeParentNode(cur) orelse return null; + // TODO deinit lastchild var lastchild = parser.nodeLastChild(parent); var prev = cur; while (prev != root and prev == lastchild) { prev = parent; + // TODO deinit parent - parent = parser.nodeParentNode(cur) orelse unreachable; + // Back to the prev's parent. + // If prev has no parent, then the loop must stop. + parent = parser.nodeParentNode(cur) orelse break; + // TODO deinit lastchild lastchild = parser.nodeLastChild(parent); } From 3ca6a4a74e6ee4ae2d1d6825843953ffd8d5e6b0 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 16 Nov 2023 17:21:15 +0100 Subject: [PATCH 21/22] dom: fix bug in collection get_next --- src/dom/html_collection.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index 10a4210c..10cfb78f 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -60,7 +60,7 @@ pub const HTMLCollection = struct { // TODO deinit parent // Back to the prev's parent. // If prev has no parent, then the loop must stop. - parent = parser.nodeParentNode(cur) orelse break; + parent = parser.nodeParentNode(prev) orelse break; // TODO deinit lastchild lastchild = parser.nodeLastChild(parent); From 18bfaf8b553abda592c1233eb1ea79ead4f80900 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 17 Nov 2023 15:11:00 +0100 Subject: [PATCH 22/22] dom: create elementToNode helper --- src/dom/document.zig | 4 ++-- src/netsurf.zig | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dom/document.zig b/src/dom/document.zig index b5e57a10..23609600 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -45,9 +45,9 @@ pub const Document = struct { // That's why we reimplemented getElementsByTagName by using an // HTMLCollection in zig here. pub fn _getElementsByTagName(self: *parser.Document, tag_name: []const u8) HTMLCollection { - const root = parser.documentGetDocumentNode(self); + const root = parser.documentGetDocumentElement(self); return HTMLCollection{ - .root = root, + .root = parser.elementToNode(root), .match = tag_name, }; } diff --git a/src/netsurf.zig b/src/netsurf.zig index 992b8e46..24bb1771 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -621,6 +621,11 @@ pub fn elementGetAttribute(elem: *Element, name: []const u8) ?[]const u8 { return stringToData(s.?); } +// elementToNode is an helper to convert an element to a node. +pub inline fn elementToNode(e: *Element) *Node { + return @as(*Node, @ptrCast(e)); +} + // ElementHTML pub const ElementHTML = c.dom_html_element; @@ -741,11 +746,6 @@ pub inline fn documentGetDocumentElement(doc: *Document) *Element { return elem.?; } -pub inline fn documentGetDocumentNode(doc: *Document) *Node { - const res = documentGetDocumentElement(doc); - return @as(*Node, @ptrCast(res)); -} - pub inline fn documentCreateElement(doc: *Document, tag_name: []const u8) *Element { var elem: ?*Element = undefined; _ = documentVtable(doc).dom_document_create_element.?(doc, stringFromData(tag_name), &elem);