Merge pull request #144 from lightpanda-io/html-doc

dom: DocumentHTML getters
This commit is contained in:
Pierre Tachoire
2024-01-10 17:33:43 +01:00
committed by GitHub
6 changed files with 390 additions and 22 deletions

View File

@@ -108,12 +108,7 @@ pub const Document = struct {
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
tag_name: []const u8, tag_name: []const u8,
) !collection.HTMLCollection { ) !collection.HTMLCollection {
var elt: ?*parser.Node = null; return try collection.HTMLCollectionByTagName(alloc, parser.documentToNode(self), tag_name, true);
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionByTagName(alloc, elt, tag_name, true);
} }
pub fn _getElementsByClassName( pub fn _getElementsByClassName(
@@ -121,12 +116,7 @@ pub const Document = struct {
alloc: std.mem.Allocator, alloc: std.mem.Allocator,
classNames: []const u8, classNames: []const u8,
) !collection.HTMLCollection { ) !collection.HTMLCollection {
var elt: ?*parser.Node = null; return try collection.HTMLCollectionByClassName(alloc, parser.documentToNode(self), classNames, true);
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionByClassName(alloc, elt, classNames, true);
} }
pub fn _createDocumentFragment(self: *parser.Document) !*parser.DocumentFragment { pub fn _createDocumentFragment(self: *parser.Document) !*parser.DocumentFragment {
@@ -170,11 +160,7 @@ pub const Document = struct {
// ParentNode // ParentNode
// https://dom.spec.whatwg.org/#parentnode // https://dom.spec.whatwg.org/#parentnode
pub fn get_children(self: *parser.Document) !collection.HTMLCollection { pub fn get_children(self: *parser.Document) !collection.HTMLCollection {
var elt: ?*parser.Node = null; return try collection.HTMLCollectionChildren(parser.documentToNode(self), false);
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionChildren(elt, true);
} }
pub fn get_firstElementChild(self: *parser.Document) !?ElementUnion { pub fn get_firstElementChild(self: *parser.Document) !?ElementUnion {
@@ -219,7 +205,7 @@ pub const Document = struct {
// catch-all, return all elements // catch-all, return all elements
if (selectors[0] == '*') { if (selectors[0] == '*') {
// walk over the node tree fo find the node by id. // walk over the node tree fo find the node by id.
const root = parser.elementToNode(try parser.documentGetDocumentElement(self) orelse return list); const root = parser.documentToNode(self);
const walker = Walker{}; const walker = Walker{};
var next: ?*parser.Node = null; var next: ?*parser.Node = null;
while (true) { while (true) {

View File

@@ -12,23 +12,35 @@ const Element = @import("element.zig").Element;
const Union = @import("element.zig").Union; const Union = @import("element.zig").Union;
const Matcher = union(enum) { const Matcher = union(enum) {
matchByName: MatchByName,
matchByTagName: MatchByTagName, matchByTagName: MatchByTagName,
matchByClassName: MatchByClassName, matchByClassName: MatchByClassName,
matchByLinks: MatchByLinks,
matchByAnchors: MatchByAnchors,
matchTrue: struct {}, matchTrue: struct {},
matchFalse: struct {},
pub fn match(self: Matcher, node: *parser.Node) !bool { pub fn match(self: Matcher, node: *parser.Node) !bool {
switch (self) { switch (self) {
inline .matchTrue => return true, inline .matchTrue => return true,
inline .matchFalse => return false,
inline .matchByTagName => |case| return case.match(node), inline .matchByTagName => |case| return case.match(node),
inline .matchByClassName => |case| return case.match(node), inline .matchByClassName => |case| return case.match(node),
inline .matchByName => |case| return case.match(node),
inline .matchByLinks => return MatchByLinks.match(node),
inline .matchByAnchors => return MatchByAnchors.match(node),
} }
} }
pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void { pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void {
switch (self) { switch (self) {
inline .matchTrue => return, inline .matchTrue => return,
inline .matchFalse => return,
inline .matchByTagName => |case| return case.deinit(alloc), inline .matchByTagName => |case| return case.deinit(alloc),
inline .matchByClassName => |case| return case.deinit(alloc), inline .matchByClassName => |case| return case.deinit(alloc),
inline .matchByName => |case| return case.deinit(alloc),
inline .matchByLinks => return,
inline .matchByAnchors => return,
} }
} }
}; };
@@ -117,6 +129,56 @@ pub fn HTMLCollectionByClassName(
}; };
} }
pub const MatchByName = struct {
name: []const u8,
fn init(alloc: std.mem.Allocator, name: []const u8) !MatchByName {
const names_alloc = try alloc.alloc(u8, name.len);
@memcpy(names_alloc, name);
return MatchByName{
.name = names_alloc,
};
}
pub fn match(self: MatchByName, node: *parser.Node) !bool {
const e = parser.nodeToElement(node);
const nname = try parser.elementGetAttribute(e, "name") orelse return false;
return std.mem.eql(u8, self.name, nname);
}
fn deinit(self: MatchByName, alloc: std.mem.Allocator) void {
alloc.free(self.name);
}
};
pub fn HTMLCollectionByName(
alloc: std.mem.Allocator,
root: ?*parser.Node,
name: []const u8,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByName = try MatchByName.init(alloc, name),
},
.include_root = include_root,
};
}
pub fn HTMLCollectionAll(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{ .matchTrue = .{} },
.include_root = include_root,
};
}
pub fn HTMLCollectionChildren( pub fn HTMLCollectionChildren(
root: ?*parser.Node, root: ?*parser.Node,
include_root: bool, include_root: bool,
@@ -129,9 +191,74 @@ pub fn HTMLCollectionChildren(
}; };
} }
pub fn HTMLCollectionEmpty() !HTMLCollection {
return HTMLCollection{
.root = null,
.walker = Walker{ .walkerNone = .{} },
.matcher = Matcher{ .matchFalse = .{} },
.include_root = false,
};
}
// MatchByLinks matches the a and area elements in the Document that have href
// attributes.
// https://html.spec.whatwg.org/#dom-document-links
pub const MatchByLinks = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a") and !std.ascii.eqlIgnoreCase(tag, "area")) {
return false;
}
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "href");
}
};
pub fn HTMLCollectionByLinks(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByLinks = MatchByLinks{},
},
.include_root = include_root,
};
}
// MatchByAnchors matches the a elements in the Document that have name
// attributes.
// https://html.spec.whatwg.org/#dom-document-anchors
pub const MatchByAnchors = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a")) return false;
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "name");
}
};
pub fn HTMLCollectionByAnchors(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByAnchors = MatchByAnchors{},
},
.include_root = include_root,
};
}
const Walker = union(enum) { const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst, walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren, walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) { switch (self) {
@@ -205,6 +332,12 @@ pub const WalkerChildren = struct {
} }
}; };
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};
// WEB IDL https://dom.spec.whatwg.org/#htmlcollection // WEB IDL https://dom.spec.whatwg.org/#htmlcollection
// HTMLCollection is re implemented in zig here because libdom // HTMLCollection is re implemented in zig here because libdom
// dom_html_collection expects a comparison function callback as arguement. // dom_html_collection expects a comparison function callback as arguement.
@@ -259,7 +392,7 @@ pub const HTMLCollection = struct {
return len; return len;
} }
pub fn _item(self: *HTMLCollection, index: u32) !?Union { pub fn item(self: *HTMLCollection, index: u32) !?*parser.Node {
if (self.root == null) return null; if (self.root == null) return null;
var i: u32 = 0; var i: u32 = 0;
@@ -282,8 +415,7 @@ pub const HTMLCollection = struct {
self.cur_node = node; self.cur_node = node;
self.cur_idx = i; self.cur_idx = i;
const e = @as(*parser.Element, @ptrCast(node)); return node;
return try Element.toInterface(e);
} }
i += 1; i += 1;
@@ -296,6 +428,12 @@ pub const HTMLCollection = struct {
return null; return null;
} }
pub fn _item(self: *HTMLCollection, index: u32) !?Union {
const node = try self.item(index) orelse return null;
const e = @as(*parser.Element, @ptrCast(node));
return try Element.toInterface(e);
}
pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union { pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union {
if (self.root == null) return null; if (self.root == null) return null;
if (name.len == 0) return null; if (name.len == 0) return null;

View File

@@ -14,6 +14,10 @@ const DOMException = @import("exceptions.zig").DOMException;
// Nodelist is implemented in pure Zig b/c libdom's NodeList doesn't allow to // Nodelist is implemented in pure Zig b/c libdom's NodeList doesn't allow to
// append nodes. // append nodes.
// WEB IDL https://dom.spec.whatwg.org/#nodelist // WEB IDL https://dom.spec.whatwg.org/#nodelist
//
// TODO: a Nodelist can be either static or live. But the current
// implementation allows only static nodelist.
// see https://dom.spec.whatwg.org/#old-style-collections
pub const NodeList = struct { pub const NodeList = struct {
pub const mem_guarantied = true; pub const mem_guarantied = true;
pub const Exception = DOMException; pub const Exception = DOMException;

View File

@@ -6,9 +6,14 @@ const jsruntime = @import("jsruntime");
const Case = jsruntime.test_utils.Case; const Case = jsruntime.test_utils.Case;
const checkCases = jsruntime.test_utils.checkCases; const checkCases = jsruntime.test_utils.checkCases;
const Node = @import("../dom/node.zig").Node;
const Document = @import("../dom/document.zig").Document; const Document = @import("../dom/document.zig").Document;
const NodeList = @import("../dom/nodelist.zig").NodeList;
const HTMLElem = @import("elements.zig"); const HTMLElem = @import("elements.zig");
const collection = @import("../dom/html_collection.zig");
const Walker = collection.WalkerDepthFirst;
// WEB IDL https://html.spec.whatwg.org/#the-document-object // WEB IDL https://html.spec.whatwg.org/#the-document-object
pub const HTMLDocument = struct { pub const HTMLDocument = struct {
pub const Self = parser.DocumentHTML; pub const Self = parser.DocumentHTML;
@@ -18,9 +23,168 @@ pub const HTMLDocument = struct {
// JS funcs // JS funcs
// -------- // --------
pub fn get_domain(self: *parser.DocumentHTML) ![]const u8 {
return try parser.documentHTMLGetDomain(self);
}
pub fn set_domain(_: *parser.DocumentHTML, _: []const u8) ![]const u8 {
return error.NotImplemented;
}
pub fn get_referrer(self: *parser.DocumentHTML) ![]const u8 {
return try parser.documentHTMLGetReferrer(self);
}
pub fn set_referrer(_: *parser.DocumentHTML, _: []const u8) ![]const u8 {
return error.NotImplemented;
}
pub fn get_body(self: *parser.DocumentHTML) !?*parser.Body { pub fn get_body(self: *parser.DocumentHTML) !?*parser.Body {
return try parser.documentHTMLBody(self); return try parser.documentHTMLBody(self);
} }
pub fn set_body(self: *parser.DocumentHTML, elt: ?*parser.ElementHTML) !?*parser.Body {
try parser.documentHTMLSetBody(self, elt);
return try get_body(self);
}
pub fn get_head(self: *parser.DocumentHTML) !?*parser.Head {
const root = parser.documentHTMLToNode(self);
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {
next = try walker.get_next(root, next) orelse return null;
if (std.ascii.eqlIgnoreCase("head", try parser.nodeName(next.?))) {
return @as(*parser.Head, @ptrCast(next.?));
}
}
}
// TODO: not implemented by libdom
pub fn get_cookie(_: *parser.DocumentHTML) ![]const u8 {
return error.NotImplemented;
}
// TODO: not implemented by libdom
pub fn set_cookie(_: *parser.DocumentHTML, _: []const u8) ![]const u8 {
return error.NotImplemented;
}
pub fn get_title(self: *parser.DocumentHTML) ![]const u8 {
return try parser.documentHTMLGetTitle(self);
}
pub fn set_title(self: *parser.DocumentHTML, v: []const u8) ![]const u8 {
try parser.documentHTMLSetTitle(self, v);
return v;
}
pub fn _getElementsByName(self: *parser.DocumentHTML, alloc: std.mem.Allocator, name: []const u8) !NodeList {
var list = try NodeList.init();
errdefer list.deinit(alloc);
if (name.len == 0) return list;
const root = parser.documentHTMLToNode(self);
var c = try collection.HTMLCollectionByName(alloc, root, name, false);
const ln = try c.get_length();
var i: u32 = 0;
while (i < ln) {
const n = try c.item(i) orelse break;
try list.append(alloc, n);
i += 1;
}
return list;
}
pub fn get_images(self: *parser.DocumentHTML, alloc: std.mem.Allocator) !collection.HTMLCollection {
return try collection.HTMLCollectionByTagName(alloc, parser.documentHTMLToNode(self), "img", false);
}
pub fn get_embeds(self: *parser.DocumentHTML, alloc: std.mem.Allocator) !collection.HTMLCollection {
return try collection.HTMLCollectionByTagName(alloc, parser.documentHTMLToNode(self), "embed", false);
}
pub fn get_plugins(self: *parser.DocumentHTML, alloc: std.mem.Allocator) !collection.HTMLCollection {
return get_embeds(self, alloc);
}
pub fn get_forms(self: *parser.DocumentHTML, alloc: std.mem.Allocator) !collection.HTMLCollection {
return try collection.HTMLCollectionByTagName(alloc, parser.documentHTMLToNode(self), "form", false);
}
pub fn get_scripts(self: *parser.DocumentHTML, alloc: std.mem.Allocator) !collection.HTMLCollection {
return try collection.HTMLCollectionByTagName(alloc, parser.documentHTMLToNode(self), "script", false);
}
pub fn get_applets(_: *parser.DocumentHTML) !collection.HTMLCollection {
return try collection.HTMLCollectionEmpty();
}
pub fn get_links(self: *parser.DocumentHTML) !collection.HTMLCollection {
return try collection.HTMLCollectionByLinks(parser.documentHTMLToNode(self), false);
}
pub fn get_anchors(self: *parser.DocumentHTML) !collection.HTMLCollection {
return try collection.HTMLCollectionByAnchors(parser.documentHTMLToNode(self), false);
}
pub fn get_all(self: *parser.DocumentHTML) !collection.HTMLCollection {
return try collection.HTMLCollectionAll(parser.documentHTMLToNode(self), true);
}
pub fn get_currentScript(_: *parser.DocumentHTML) !?*parser.Element {
return null;
}
pub fn get_designMode(_: *parser.DocumentHTML) []const u8 {
return "off";
}
pub fn set_designMode(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "off";
}
// noop legacy functions
// https://html.spec.whatwg.org/#Document-partial
pub fn _clear(_: *parser.DocumentHTML) void {}
pub fn _captureEvents(_: *parser.DocumentHTML) void {}
pub fn _releaseEvents(_: *parser.DocumentHTML) void {}
pub fn get_fgColor(_: *parser.DocumentHTML) []const u8 {
return "";
}
pub fn set_fgColor(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "";
}
pub fn get_linkColor(_: *parser.DocumentHTML) []const u8 {
return "";
}
pub fn set_linkColor(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "";
}
pub fn get_vlinkColor(_: *parser.DocumentHTML) []const u8 {
return "";
}
pub fn set_vlinkColor(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "";
}
pub fn get_alinkColor(_: *parser.DocumentHTML) []const u8 {
return "";
}
pub fn set_alinkColor(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "";
}
pub fn get_bgColor(_: *parser.DocumentHTML) []const u8 {
return "";
}
pub fn set_bgColor(_: *parser.DocumentHTML, _: []const u8) []const u8 {
return "";
}
pub fn deinit(_: *parser.DocumentHTML, _: std.mem.Allocator) void {}
}; };
// Tests // Tests
@@ -36,4 +200,37 @@ pub fn testExecFn(
.{ .src = "document.body.localName == 'body'", .ex = "true" }, .{ .src = "document.body.localName == 'body'", .ex = "true" },
}; };
try checkCases(js_env, &constructor); try checkCases(js_env, &constructor);
var getters = [_]Case{
.{ .src = "document.domain", .ex = "" },
.{ .src = "document.referrer", .ex = "" },
.{ .src = "document.title", .ex = "" },
.{ .src = "document.body.localName", .ex = "body" },
.{ .src = "document.head.localName", .ex = "head" },
.{ .src = "document.images.length", .ex = "0" },
.{ .src = "document.embeds.length", .ex = "0" },
.{ .src = "document.plugins.length", .ex = "0" },
.{ .src = "document.scripts.length", .ex = "0" },
.{ .src = "document.forms.length", .ex = "0" },
.{ .src = "document.links.length", .ex = "1" },
.{ .src = "document.applets.length", .ex = "0" },
.{ .src = "document.anchors.length", .ex = "0" },
.{ .src = "document.all.length", .ex = "8" },
.{ .src = "document.currentScript", .ex = "null" },
};
try checkCases(js_env, &getters);
var titles = [_]Case{
.{ .src = "document.title = 'foo'", .ex = "foo" },
.{ .src = "document.title", .ex = "foo" },
.{ .src = "document.title = ''", .ex = "" },
};
try checkCases(js_env, &titles);
var getElementsByName = [_]Case{
.{ .src = "document.getElementById('link').setAttribute('name', 'foo')", .ex = "undefined" },
.{ .src = "let list = document.getElementsByName('foo')", .ex = "undefined" },
.{ .src = "list.length", .ex = "1" },
};
try checkCases(js_env, &getElementsByName);
} }

View File

@@ -1216,6 +1216,10 @@ fn documentVtable(doc: *Document) c.dom_document_vtable {
return getVtable(c.dom_document_vtable, Document, doc); return getVtable(c.dom_document_vtable, Document, doc);
} }
pub inline fn documentToNode(doc: *Document) *Node {
return @as(*Node, @ptrCast(doc));
}
pub inline fn documentGetElementById(doc: *Document, id: []const u8) !?*Element { pub inline fn documentGetElementById(doc: *Document, id: []const u8) !?*Element {
var elem: ?*Element = undefined; var elem: ?*Element = undefined;
const err = documentVtable(doc).dom_document_get_element_by_id.?(doc, try strFromData(id), &elem); const err = documentVtable(doc).dom_document_get_element_by_id.?(doc, try strFromData(id), &elem);
@@ -1357,6 +1361,11 @@ pub inline fn documentCreateAttributeNS(doc: *Document, ns: []const u8, qname: [
// DocumentHTML // DocumentHTML
pub const DocumentHTML = c.dom_html_document; pub const DocumentHTML = c.dom_html_document;
// documentHTMLToNode is an helper to convert a documentHTML to an node.
pub inline fn documentHTMLToNode(doc: *DocumentHTML) *Node {
return @as(*Node, @ptrCast(doc));
}
fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable {
return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html);
} }
@@ -1450,3 +1459,37 @@ pub inline fn documentHTMLBody(doc_html: *DocumentHTML) !?*Body {
if (body == null) return null; if (body == null) return null;
return @as(*Body, @ptrCast(body.?)); return @as(*Body, @ptrCast(body.?));
} }
pub inline fn documentHTMLSetBody(doc_html: *DocumentHTML, elt: ?*ElementHTML) !void {
const err = documentHTMLVtable(doc_html).set_body.?(doc_html, elt);
try DOMErr(err);
}
pub inline fn documentHTMLGetDomain(doc: *DocumentHTML) ![]const u8 {
var s: ?*String = undefined;
const err = documentHTMLVtable(doc).get_domain.?(doc, &s);
try DOMErr(err);
if (s == null) return "";
return strToData(s.?);
}
pub inline fn documentHTMLGetReferrer(doc: *DocumentHTML) ![]const u8 {
var s: ?*String = undefined;
const err = documentHTMLVtable(doc).get_referrer.?(doc, &s);
try DOMErr(err);
if (s == null) return "";
return strToData(s.?);
}
pub inline fn documentHTMLGetTitle(doc: *DocumentHTML) ![]const u8 {
var s: ?*String = undefined;
const err = documentHTMLVtable(doc).get_title.?(doc, &s);
try DOMErr(err);
if (s == null) return "";
return strToData(s.?);
}
pub inline fn documentHTMLSetTitle(doc: *DocumentHTML, v: []const u8) !void {
const err = documentHTMLVtable(doc).set_title.?(doc, try strFromData(v));
try DOMErr(err);
}