Improve support for non-HTML namespace

This does a better job of tracking the implicit namespace based on the context.
For example, when using DOMParser.parseFromString with an XML namespace, all
subsequent elements will be in the XML namespace.

Adds support for null namespace.

Rather than defaulting to HTML, unknown namespaces now map to a special unknown
type. We don't currently preserve the original namespace, but we're at least
able to properly handle the casing in this case.
This commit is contained in:
Karl Seguin
2026-01-08 20:38:23 +08:00
parent 0db1ceaea7
commit 9d498fa069
11 changed files with 736 additions and 703 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -104,7 +104,7 @@ pub fn parseXML(self: *Parser, xml: []const u8) void {
xml.len,
&self.container,
self,
createElementCallback,
createXMLElementCallback,
getDataCallback,
appendCallback,
parseErrorCallback,
@@ -225,17 +225,26 @@ fn _popCallback(self: *Parser, node: *Node) !void {
}
fn createElementCallback(ctx: *anyopaque, data: *anyopaque, qname: h5e.QualName, attributes: h5e.AttributeIterator) callconv(.c) ?*anyopaque {
return _createElementCallbackWithDefaultnamespace(ctx, data, qname, attributes, .unknown);
}
fn createXMLElementCallback(ctx: *anyopaque, data: *anyopaque, qname: h5e.QualName, attributes: h5e.AttributeIterator) callconv(.c) ?*anyopaque {
return _createElementCallbackWithDefaultnamespace(ctx, data, qname, attributes, .xml);
}
fn _createElementCallbackWithDefaultnamespace(ctx: *anyopaque, data: *anyopaque, qname: h5e.QualName, attributes: h5e.AttributeIterator, default_namespace: Element.Namespace) ?*anyopaque {
const self: *Parser = @ptrCast(@alignCast(ctx));
return self._createElementCallback(data, qname, attributes) catch |err| {
return self._createElementCallback(data, qname, attributes, default_namespace) catch |err| {
self.err = .{ .err = err, .source = .create_element };
return null;
};
}
fn _createElementCallback(self: *Parser, data: *anyopaque, qname: h5e.QualName, attributes: h5e.AttributeIterator) !*anyopaque {
fn _createElementCallback(self: *Parser, data: *anyopaque, qname: h5e.QualName, attributes: h5e.AttributeIterator, default_namespace: Element.Namespace) !*anyopaque {
const page = self.page;
const name = qname.local.slice();
const namespace = qname.ns.slice();
const node = try page.createElement(namespace, name, attributes);
const namespace_string = qname.ns.slice();
const namespace = if (namespace_string.len == 0) default_namespace else Element.Namespace.parse(namespace_string);
const node = try page.createElementNS(namespace, name, attributes);
const pn = try self.arena.create(ParsedNode);
pn.* = .{

View File

@@ -19,12 +19,13 @@
testing.expectEqual('http://www.w3.org/XML/1998/namespace', xmlElement.namespaceURI);
const nullNsElement = document.createElementNS(null, 'span');
testing.expectEqual('SPAN', nullNsElement.tagName);
testing.expectEqual('http://www.w3.org/1999/xhtml', nullNsElement.namespaceURI);
testing.expectEqual('span', nullNsElement.tagName);
testing.expectEqual(null, nullNsElement.namespaceURI);
const unknownNsElement = document.createElementNS('http://example.com/unknown', 'custom');
testing.expectEqual('CUSTOM', unknownNsElement.tagName);
testing.expectEqual('http://www.w3.org/1999/xhtml', unknownNsElement.namespaceURI);
testing.expectEqual('custom', unknownNsElement.tagName);
// Should be http://example.com/unknown
testing.expectEqual('http://lightpanda.io/unsupported/namespace', unknownNsElement.namespaceURI);
const regularDiv = document.createElement('div');
testing.expectEqual('DIV', regularDiv.tagName);
@@ -36,5 +37,5 @@
testing.expectEqual('te:ST', custom.tagName);
testing.expectEqual('te', custom.prefix);
testing.expectEqual('ST', custom.localName);
testing.expectEqual('http://www.w3.org/1999/xhtml', custom.namespaceURI); // Should be test
testing.expectEqual('http://lightpanda.io/unsupported/namespace', custom.namespaceURI); // Should be test
</script>

View File

@@ -168,7 +168,7 @@
const root = doc.documentElement;
testing.expectEqual(true, root !== null);
// TODO: XML documents should preserve case, but we currently uppercase
testing.expectEqual('ROOT', root.tagName);
testing.expectEqual('root', root.tagName);
}
</script>
@@ -206,10 +206,9 @@
const doc = impl.createDocument('http://example.com', 'prefix:localName', null);
const root = doc.documentElement;
// TODO: XML documents should preserve case, but we currently uppercase
testing.expectEqual('prefix:LOCALNAME', root.tagName);
// TODO: Custom namespaces are being overridden to XHTML namespace
testing.expectEqual('http://www.w3.org/1999/xhtml', root.namespaceURI);
testing.expectEqual('prefix:localName', root.tagName);
// TODO: Custom namespaces are being replaced with an empty value
testing.expectEqual('http://lightpanda.io/unsupported/namespace', root.namespaceURI);
}
</script>
@@ -224,8 +223,7 @@
doc.documentElement.appendChild(child);
testing.expectEqual(1, doc.documentElement.childNodes.length);
// TODO: XML documents should preserve case, but we currently uppercase
testing.expectEqual('CHILD', doc.documentElement.firstChild.tagName);
testing.expectEqual('child', doc.documentElement.firstChild.tagName);
testing.expectEqual('Test', doc.documentElement.firstChild.textContent);
}
</script>

View File

@@ -364,14 +364,14 @@
];
for (const mime of mimes) {
const doc = parser.parseFromString(sampleXML, "text/xml");
const doc = parser.parseFromString(sampleXML, mime);
const { firstChild: { childNodes, children: collection, tagName }, children } = doc;
// doc.
testing.expectEqual(true, doc instanceof XMLDocument);
testing.expectEqual(1, children.length);
// firstChild.
// TODO: Modern browsers expect this in lowercase.
testing.expectEqual("CATALOG", tagName);
testing.expectEqual("catalog", tagName);
testing.expectEqual(25, childNodes.length);
testing.expectEqual(12, collection.length);
// Check children of first child.
@@ -379,12 +379,12 @@
const {children: elements, id} = collection.item(i);
testing.expectEqual("bk" + (100 + i + 1), id);
// TODO: Modern browsers expect these in lowercase.
testing.expectEqual("AUTHOR", elements.item(0).tagName);
testing.expectEqual("TITLE", elements.item(1).tagName);
testing.expectEqual("GENRE", elements.item(2).tagName);
testing.expectEqual("PRICE", elements.item(3).tagName);
testing.expectEqual("PUBLISH_DATE", elements.item(4).tagName);
testing.expectEqual("DESCRIPTION", elements.item(5).tagName);
testing.expectEqual("author", elements.item(0).tagName);
testing.expectEqual("title", elements.item(1).tagName);
testing.expectEqual("genre", elements.item(2).tagName);
testing.expectEqual("price", elements.item(3).tagName);
testing.expectEqual("publish_date", elements.item(4).tagName);
testing.expectEqual("description", elements.item(5).tagName);
}
}
}

View File

@@ -57,26 +57,26 @@ pub fn createHTMLDocument(_: *const DOMImplementation, title: ?[]const u8, page:
_ = try document.asNode().appendChild(doctype.asNode(), page);
}
const html_node = try page.createElement(null, "html", null);
const html_node = try page.createElementNS(.html, "html", null);
_ = try document.asNode().appendChild(html_node, page);
const head_node = try page.createElement(null, "head", null);
const head_node = try page.createElementNS(.html, "head", null);
_ = try html_node.appendChild(head_node, page);
if (title) |t| {
const title_node = try page.createElement(null, "title", null);
const title_node = try page.createElementNS(.html, "title", null);
_ = try head_node.appendChild(title_node, page);
const text_node = try page.createTextNode(t);
_ = try title_node.appendChild(text_node, page);
}
const body_node = try page.createElement(null, "body", null);
const body_node = try page.createElementNS(.html, "body", null);
_ = try html_node.appendChild(body_node, page);
return document;
}
pub fn createDocument(_: *const DOMImplementation, namespace: ?[]const u8, qualified_name: ?[]const u8, doctype: ?*DocumentType, page: *Page) !*Document {
pub fn createDocument(_: *const DOMImplementation, namespace_: ?[]const u8, qualified_name: ?[]const u8, doctype: ?*DocumentType, page: *Page) !*Document {
// Create XML Document
const document = (try page._factory.document(Node.Document.XMLDocument{ ._proto = undefined })).asDocument();
@@ -88,7 +88,8 @@ pub fn createDocument(_: *const DOMImplementation, namespace: ?[]const u8, quali
// Create and append root element if qualified_name provided
if (qualified_name) |qname| {
if (qname.len > 0) {
const root = try page.createElement(namespace, qname, null);
const namespace = if (namespace_) |ns| Node.Element.Namespace.parse(ns) else .xml;
const root = try page.createElementNS(namespace, qname, null);
_ = try document.asNode().appendChild(root, page);
}
}

View File

@@ -124,7 +124,14 @@ const CreateElementOptions = struct {
};
pub fn createElement(self: *Document, name: []const u8, options_: ?CreateElementOptions, page: *Page) !*Element {
const node = try page.createElement(null, name, null);
const namespace: Element.Namespace = blk: {
if (self._type == .xml) {
@branchHint(.unlikely);
break :blk .xml;
}
break :blk .html;
};
const node = try page.createElementNS(namespace, name, null);
const element = node.as(Element);
// Track owner document if it's not the main document
@@ -142,7 +149,7 @@ pub fn createElement(self: *Document, name: []const u8, options_: ?CreateElement
}
pub fn createElementNS(self: *Document, namespace: ?[]const u8, name: []const u8, page: *Page) !*Element {
const node = try page.createElement(namespace, name, null);
const node = try page.createElementNS(Element.Namespace.parse(namespace), name, null);
// Track owner document if it's not the main document
if (self != page.document) {

View File

@@ -53,15 +53,41 @@ pub const Namespace = enum(u8) {
svg,
mathml,
xml,
// We should keep the original value, but don't. If this becomes important
// consider storing it in a page lookup, like `_element_class_lists`, rather
// that adding a slice directly here (directly in every element).
unknown,
null,
pub fn toUri(self: Namespace) []const u8 {
pub fn toUri(self: Namespace) ?[]const u8 {
return switch (self) {
.html => "http://www.w3.org/1999/xhtml",
.svg => "http://www.w3.org/2000/svg",
.mathml => "http://www.w3.org/1998/Math/MathML",
.xml => "http://www.w3.org/XML/1998/namespace",
.unknown => "http://lightpanda.io/unsupported/namespace",
.null => null,
};
}
pub fn parse(namespace_: ?[]const u8) Namespace {
const namespace = namespace_ orelse return .null;
if (namespace.len == "http://www.w3.org/1999/xhtml".len) {
// Common case, avoid the string comparion. Recklessly
@branchHint(.likely);
return .html;
}
if (std.mem.eql(u8, namespace, "http://www.w3.org/XML/1998/namespace")) {
return .xml;
}
if (std.mem.eql(u8, namespace, "http://www.w3.org/2000/svg")) {
return .svg;
}
if (std.mem.eql(u8, namespace, "http://www.w3.org/1998/Math/MathML")) {
return .mathml;
}
return .unknown;
}
};
_type: Type,
@@ -211,60 +237,54 @@ pub fn getTagNameLower(self: *const Element) []const u8 {
}
pub fn getTagNameSpec(self: *const Element, buf: []u8) []const u8 {
switch (self._type) {
return switch (self._type) {
.html => |he| switch (he._type) {
.custom => |e| {
@branchHint(.unlikely);
return upperTagName(&e._tag_name, buf);
.anchor => "A",
.body => "BODY",
.br => "BR",
.button => "BUTTON",
.canvas => "CANVAS",
.custom => |e| upperTagName(&e._tag_name, buf),
.data => "DATA",
.dialog => "DIALOG",
.div => "DIV",
.embed => "EMBED",
.form => "FORM",
.generic => |e| upperTagName(&e._tag_name, buf),
.heading => |e| upperTagName(&e._tag_name, buf),
.head => "HEAD",
.html => "HTML",
.hr => "HR",
.iframe => "IFRAME",
.img => "IMG",
.input => "INPUT",
.li => "LI",
.link => "LINK",
.meta => "META",
.media => |m| switch (m._type) {
.audio => "AUDIO",
.video => "VIDEO",
.generic => "MEDIA",
},
else => return switch (he._type) {
.anchor => "A",
.body => "BODY",
.br => "BR",
.button => "BUTTON",
.canvas => "CANVAS",
.custom => |e| upperTagName(&e._tag_name, buf),
.data => "DATA",
.dialog => "DIALOG",
.div => "DIV",
.embed => "EMBED",
.form => "FORM",
.generic => |e| upperTagName(&e._tag_name, buf),
.heading => |e| upperTagName(&e._tag_name, buf),
.head => "HEAD",
.html => "HTML",
.hr => "HR",
.iframe => "IFRAME",
.img => "IMG",
.input => "INPUT",
.li => "LI",
.link => "LINK",
.meta => "META",
.media => |m| switch (m._type) {
.audio => "AUDIO",
.video => "VIDEO",
.generic => "MEDIA",
},
.ol => "OL",
.option => "OPTION",
.p => "P",
.script => "SCRIPT",
.select => "SELECT",
.slot => "SLOT",
.span => "SPAN",
.style => "STYLE",
.template => "TEMPLATE",
.textarea => "TEXTAREA",
.title => "TITLE",
.ul => "UL",
.unknown => |e| switch (self._namespace) {
.html => upperTagName(&e._tag_name, buf),
.svg, .xml, .mathml => return e._tag_name.str(),
},
.ol => "OL",
.option => "OPTION",
.p => "P",
.script => "SCRIPT",
.select => "SELECT",
.slot => "SLOT",
.span => "SPAN",
.style => "STYLE",
.template => "TEMPLATE",
.textarea => "TEXTAREA",
.title => "TITLE",
.ul => "UL",
.unknown => |e| switch (self._namespace) {
.html => upperTagName(&e._tag_name, buf),
.svg, .xml, .mathml, .unknown, .null => e._tag_name.str(),
},
},
.svg => |svg| return svg._tag_name.str(),
}
.svg => |svg| svg._tag_name.str(),
};
}
pub fn getTagNameDump(self: *const Element) []const u8 {
@@ -274,7 +294,7 @@ pub fn getTagNameDump(self: *const Element) []const u8 {
}
}
pub fn getNamespaceURI(self: *const Element) []const u8 {
pub fn getNamespaceURI(self: *const Element) ?[]const u8 {
return self._namespace.toUri();
}
@@ -996,9 +1016,7 @@ pub fn getElementsByClassName(self: *Element, class_name: []const u8, page: *Pag
pub fn cloneElement(self: *Element, deep: bool, page: *Page) !*Node {
const tag_name = self.getTagNameDump();
const namespace_uri = self.getNamespaceURI();
const node = try page.createElement(namespace_uri, tag_name, self._attributes);
const node = try page.createElementNS(self._namespace, tag_name, self._attributes);
// Allow element-specific types to copy their runtime state
_ = Element.Build.call(node.as(Element), "cloned", .{ self, node.as(Element), page }) catch |err| {

View File

@@ -132,7 +132,7 @@ pub fn setTitle(self: *HTMLDocument, title: []const u8, page: *Page) !void {
}
// No title element found, create one
const title_node = try page.createElement(null, "title", null);
const title_node = try page.createElementNS(.html, "title", null);
const title_element = title_node.as(Element);
// Only add text if non-empty

View File

@@ -443,9 +443,9 @@ pub fn createContextualFragment(self: *const Range, html: []const u8, page: *Pag
// Create a temporary element of the same type as the context for parsing
// This preserves the parsing context without modifying the original node
const temp_node = if (context_node.is(Node.Element)) |el|
try page.createElement(el._namespace.toUri(), el.getTagNameLower(), null)
try page.createElementNS(el._namespace, el.getTagNameLower(), null)
else
try page.createElement(null, "div", null);
try page.createElementNS(.html, "div", null);
try page.parseHtmlAsChildren(temp_node, html);

View File

@@ -10,7 +10,7 @@ const Image = @This();
_proto: *HtmlElement,
pub fn constructor(w_: ?u32, h_: ?u32, page: *Page) !*Image {
const node = try page.createElement(null, "img", null);
const node = try page.createElementNS(.html, "img", null);
const el = node.as(Element);
if (w_) |w| blk: {