Merge pull request #144 from lightpanda-io/html-doc

dom: DocumentHTML getters
This commit is contained in:
Pierre Tachoire
2024-01-10 17:33:43 +01:00
committed by GitHub
6 changed files with 390 additions and 22 deletions

View File

@@ -108,12 +108,7 @@ pub const Document = struct {
alloc: std.mem.Allocator,
tag_name: []const u8,
) !collection.HTMLCollection {
var elt: ?*parser.Node = null;
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionByTagName(alloc, elt, tag_name, true);
return try collection.HTMLCollectionByTagName(alloc, parser.documentToNode(self), tag_name, true);
}
pub fn _getElementsByClassName(
@@ -121,12 +116,7 @@ pub const Document = struct {
alloc: std.mem.Allocator,
classNames: []const u8,
) !collection.HTMLCollection {
var elt: ?*parser.Node = null;
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionByClassName(alloc, elt, classNames, true);
return try collection.HTMLCollectionByClassName(alloc, parser.documentToNode(self), classNames, true);
}
pub fn _createDocumentFragment(self: *parser.Document) !*parser.DocumentFragment {
@@ -170,11 +160,7 @@ pub const Document = struct {
// ParentNode
// https://dom.spec.whatwg.org/#parentnode
pub fn get_children(self: *parser.Document) !collection.HTMLCollection {
var elt: ?*parser.Node = null;
if (try parser.documentGetDocumentElement(self)) |root| {
elt = parser.elementToNode(root);
}
return try collection.HTMLCollectionChildren(elt, true);
return try collection.HTMLCollectionChildren(parser.documentToNode(self), false);
}
pub fn get_firstElementChild(self: *parser.Document) !?ElementUnion {
@@ -219,7 +205,7 @@ pub const Document = struct {
// catch-all, return all elements
if (selectors[0] == '*') {
// walk over the node tree fo find the node by id.
const root = parser.elementToNode(try parser.documentGetDocumentElement(self) orelse return list);
const root = parser.documentToNode(self);
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {

View File

@@ -12,23 +12,35 @@ const Element = @import("element.zig").Element;
const Union = @import("element.zig").Union;
const Matcher = union(enum) {
matchByName: MatchByName,
matchByTagName: MatchByTagName,
matchByClassName: MatchByClassName,
matchByLinks: MatchByLinks,
matchByAnchors: MatchByAnchors,
matchTrue: struct {},
matchFalse: struct {},
pub fn match(self: Matcher, node: *parser.Node) !bool {
switch (self) {
inline .matchTrue => return true,
inline .matchFalse => return false,
inline .matchByTagName => |case| return case.match(node),
inline .matchByClassName => |case| return case.match(node),
inline .matchByName => |case| return case.match(node),
inline .matchByLinks => return MatchByLinks.match(node),
inline .matchByAnchors => return MatchByAnchors.match(node),
}
}
pub fn deinit(self: Matcher, alloc: std.mem.Allocator) void {
switch (self) {
inline .matchTrue => return,
inline .matchFalse => return,
inline .matchByTagName => |case| return case.deinit(alloc),
inline .matchByClassName => |case| return case.deinit(alloc),
inline .matchByName => |case| return case.deinit(alloc),
inline .matchByLinks => return,
inline .matchByAnchors => return,
}
}
};
@@ -117,6 +129,56 @@ pub fn HTMLCollectionByClassName(
};
}
pub const MatchByName = struct {
name: []const u8,
fn init(alloc: std.mem.Allocator, name: []const u8) !MatchByName {
const names_alloc = try alloc.alloc(u8, name.len);
@memcpy(names_alloc, name);
return MatchByName{
.name = names_alloc,
};
}
pub fn match(self: MatchByName, node: *parser.Node) !bool {
const e = parser.nodeToElement(node);
const nname = try parser.elementGetAttribute(e, "name") orelse return false;
return std.mem.eql(u8, self.name, nname);
}
fn deinit(self: MatchByName, alloc: std.mem.Allocator) void {
alloc.free(self.name);
}
};
pub fn HTMLCollectionByName(
alloc: std.mem.Allocator,
root: ?*parser.Node,
name: []const u8,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByName = try MatchByName.init(alloc, name),
},
.include_root = include_root,
};
}
pub fn HTMLCollectionAll(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{ .matchTrue = .{} },
.include_root = include_root,
};
}
pub fn HTMLCollectionChildren(
root: ?*parser.Node,
include_root: bool,
@@ -129,9 +191,74 @@ pub fn HTMLCollectionChildren(
};
}
pub fn HTMLCollectionEmpty() !HTMLCollection {
return HTMLCollection{
.root = null,
.walker = Walker{ .walkerNone = .{} },
.matcher = Matcher{ .matchFalse = .{} },
.include_root = false,
};
}
// MatchByLinks matches the a and area elements in the Document that have href
// attributes.
// https://html.spec.whatwg.org/#dom-document-links
pub const MatchByLinks = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a") and !std.ascii.eqlIgnoreCase(tag, "area")) {
return false;
}
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "href");
}
};
pub fn HTMLCollectionByLinks(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByLinks = MatchByLinks{},
},
.include_root = include_root,
};
}
// MatchByAnchors matches the a elements in the Document that have name
// attributes.
// https://html.spec.whatwg.org/#dom-document-anchors
pub const MatchByAnchors = struct {
pub fn match(node: *parser.Node) !bool {
const tag = try parser.nodeName(node);
if (!std.ascii.eqlIgnoreCase(tag, "a")) return false;
const elem = @as(*parser.Element, @ptrCast(node));
return parser.elementHasAttribute(elem, "name");
}
};
pub fn HTMLCollectionByAnchors(
root: ?*parser.Node,
include_root: bool,
) !HTMLCollection {
return HTMLCollection{
.root = root,
.walker = Walker{ .walkerDepthFirst = .{} },
.matcher = Matcher{
.matchByAnchors = MatchByAnchors{},
},
.include_root = include_root,
};
}
const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
@@ -205,6 +332,12 @@ pub const WalkerChildren = struct {
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};
// WEB IDL https://dom.spec.whatwg.org/#htmlcollection
// HTMLCollection is re implemented in zig here because libdom
// dom_html_collection expects a comparison function callback as arguement.
@@ -259,7 +392,7 @@ pub const HTMLCollection = struct {
return len;
}
pub fn _item(self: *HTMLCollection, index: u32) !?Union {
pub fn item(self: *HTMLCollection, index: u32) !?*parser.Node {
if (self.root == null) return null;
var i: u32 = 0;
@@ -282,8 +415,7 @@ pub const HTMLCollection = struct {
self.cur_node = node;
self.cur_idx = i;
const e = @as(*parser.Element, @ptrCast(node));
return try Element.toInterface(e);
return node;
}
i += 1;
@@ -296,6 +428,12 @@ pub const HTMLCollection = struct {
return null;
}
pub fn _item(self: *HTMLCollection, index: u32) !?Union {
const node = try self.item(index) orelse return null;
const e = @as(*parser.Element, @ptrCast(node));
return try Element.toInterface(e);
}
pub fn _namedItem(self: *HTMLCollection, name: []const u8) !?Union {
if (self.root == null) return null;
if (name.len == 0) return null;

View File

@@ -14,6 +14,10 @@ const DOMException = @import("exceptions.zig").DOMException;
// Nodelist is implemented in pure Zig b/c libdom's NodeList doesn't allow to
// append nodes.
// WEB IDL https://dom.spec.whatwg.org/#nodelist
//
// TODO: a Nodelist can be either static or live. But the current
// implementation allows only static nodelist.
// see https://dom.spec.whatwg.org/#old-style-collections
pub const NodeList = struct {
pub const mem_guarantied = true;
pub const Exception = DOMException;