Setting innerHTML now captures head elements

I couldn't find where the behavior is described. AND, browsers seem to behave
differently depending on the state of the page (blank document vs actual page).

Still, some sites use innerHTML to load <script> tags, and, in libdom at least,
these are created in the implicit head. We cannot just copy the body nodes. To
keep it simple, I now copy all head and body elements.
This commit is contained in:
Karl Seguin
2025-07-09 12:48:44 +08:00
parent 72083c8614
commit e82240a60e
2 changed files with 38 additions and 21 deletions

View File

@@ -127,19 +127,43 @@ pub const Element = struct {
// remove existing children
try Node.removeChildren(node);
// get fragment body children
const children = try parser.documentFragmentBodyChildren(fragment) orelse return;
// append children to the node
// I'm not sure what the exact behavior is supposed to be. Initially,
// we were only copying the body of the document fragment. But it seems
// like head elements should be copied too. Specifically, some sites
// create script tags via innerHTML, which we need to capture.
// If you play with this in a browser, you should notice that the
// behavior is different depending on whether you're in a blank page
// or an actual document. In a blank page, something like:
// x.innerHTML = '<script></script>';
// does _not_ create an empty script, but in a real page, it does. Weird.
const fragment_node = parser.documentFragmentToNode(fragment);
const html = try parser.nodeFirstChild(fragment_node) orelse return;
const head = try parser.nodeFirstChild(html) orelse return;
{
// First, copy some of the head element
const children = try parser.nodeGetChildNodes(head);
const ln = try parser.nodeListLength(children);
for (0..ln) |_| {
// always index 0, because ndoeAppendChild moves the node out of
// always index 0, because nodeAppendChild moves the node out of
// the nodeList and into the new tree
const child = try parser.nodeListItem(children, 0) orelse continue;
_ = try parser.nodeAppendChild(node, child);
}
}
{
const body = try parser.nodeNextSibling(head) orelse return;
const children = try parser.nodeGetChildNodes(body);
const ln = try parser.nodeListLength(children);
for (0..ln) |_| {
// always index 0, because nodeAppendChild moves the node out of
// the nodeList and into the new tree
const child = try parser.nodeListItem(children, 0) orelse continue;
_ = try parser.nodeAppendChild(node, child);
}
}
}
// The closest() method of the Element interface traverses the element and its parents (heading toward the document root) until it finds a node that matches the specified CSS selector.
// Returns the closest ancestor Element or itself, which matches the selectors. If there are no such element, null.
pub fn _closest(self: *parser.Element, selector: []const u8, page: *Page) !?*parser.Element {
@@ -688,5 +712,10 @@ test "Browser.DOM.Element" {
try runner.testCases(&.{
.{ "document.createElement('a').hasAttributes()", "false" },
.{ "var fc; (fc = document.createElement('div')).innerHTML = '<script><\\/script>'", null },
.{ "fc.outerHTML", "<div><script></script></div>" },
.{ "fc; (fc = document.createElement('div')).innerHTML = '<script><\\/script><p>hello</p>'", null },
.{ "fc.outerHTML", "<div><script></script><p>hello</p></div>" },
}, .{});
}

View File

@@ -1925,18 +1925,6 @@ pub inline fn documentFragmentToNode(doc: *DocumentFragment) *Node {
return @as(*Node, @alignCast(@ptrCast(doc)));
}
pub fn documentFragmentBodyChildren(doc: *DocumentFragment) !?*NodeList {
const node = documentFragmentToNode(doc);
const html = try nodeFirstChild(node) orelse return null;
// TODO unref
const head = try nodeFirstChild(html) orelse return null;
// TODO unref
const body = try nodeNextSibling(head) orelse return null;
// TODO unref
return try nodeGetChildNodes(body);
}
// Document Position
pub const DocumentPosition = enum(u32) {