document.write, document.close, document.open

Add support for both modes - parsing and post-parsing. In post-parsing mode,
document.write implicitly calls document open, and document.open wipes the
document. This mode is probably rarely, if ever, used.

However, while parsing, document.write does not call document.open and does not
remove all existing nodes. It just writes the html into the document where the
parser is. That isn't something we can properly do..but we can hack it. We
create a new DocumentFragment, parse the html into the document fragment, then
transfer the children into the document where we currently are.

Our hack probably doesn't work for some advance usage of document.write (e.g
nested calls), but it should work for more common cases, e.g. injecting a script
tag.
This commit is contained in:
Karl Seguin
2025-12-19 21:29:28 +08:00
parent f475aa09e8
commit 3d6af216dc
4 changed files with 305 additions and 4 deletions

View File

@@ -74,7 +74,7 @@ _session: *Session,
_event_manager: EventManager,
_parse_mode: enum { document, fragment },
_parse_mode: enum { document, fragment, document_write },
// See Attribute.List for what this is. TL;DR: proper DOM Attribute Nodes are
// fat yet rarely needed. We only create them on-demand, but still need proper

View File

@@ -727,6 +727,11 @@ pub const Script = struct {
page.document._current_script = script_element;
defer page.document._current_script = previous_script;
// Clear the document.write insertion point for this script
const previous_write_insertion_point = page.document._write_insertion_point;
page.document._write_insertion_point = null;
defer page.document._write_insertion_point = previous_write_insertion_point;
// inline scripts aren't cached. remote ones are.
const cacheable = self.source == .remote;

View File

@@ -0,0 +1,161 @@
<!DOCTYPE html>
<head>
<title>document.write Tests</title>
<script src="../testing.js"></script>
</head>
<body>
<!-- Phase 1 Tests: Basic HTML (no scripts) -->
<script id=basic_write_and_verify>
document.write('<h1 id="written">Hello</h1>');
// Add a simple assertion so the test framework doesn't complain
testing.expectEqual(true, true);
</script>
<script id=verify_basic>
const written = document.getElementById('written');
testing.expectEqual('Hello', written.textContent);
testing.expectEqual('H1', written.tagName);
</script>
<script id=multiple_writes>
document.write('<div id="a">A</div>');
document.write('<div id="b">B</div>');
testing.expectEqual(true, true);
</script>
<script id=verify_multiple>
const a = document.getElementById('a');
const b = document.getElementById('b');
testing.expectEqual('A', a.textContent);
testing.expectEqual('B', b.textContent);
// Verify they're siblings in the correct order
testing.expectEqual(b, a.nextElementSibling);
</script>
<script id=write_with_attributes>
document.write('<div id="styled" class="foo bar" data-value="123">Content</div>');
testing.expectEqual(true, true);
</script>
<script id=verify_attributes>
const el = document.getElementById('styled');
testing.expectEqual('foo bar', el.className);
testing.expectEqual('123', el.getAttribute('data-value'));
testing.expectEqual('Content', el.textContent);
</script>
<script id=write_multiple_elements>
document.write('<p id="p1">First</p><p id="p2">Second</p>');
testing.expectEqual(true, true);
</script>
<script id=verify_multiple_elements>
const p1 = document.getElementById('p1');
const p2 = document.getElementById('p2');
testing.expectEqual('First', p1.textContent);
testing.expectEqual('Second', p2.textContent);
testing.expectEqual(p2, p1.nextElementSibling);
</script>
<script id=write_nested_elements>
document.write('<div id="outer"><span id="inner">Nested</span></div>');
testing.expectEqual(true, true);
</script>
<script id=verify_nested>
const outer = document.getElementById('outer');
const inner = document.getElementById('inner');
testing.expectEqual(outer, inner.parentElement);
testing.expectEqual('Nested', inner.textContent);
</script>
<!-- Phase 2 Tests: Script execution -->
<script id=write_script>
document.write('<script id="written_script">window.executed = true; testing.expectEqual(true, true);<\/script>');
testing.expectEqual(true, true);
</script>
<script id=verify_script_executed>
testing.expectEqual(true, window.executed);
testing.expectEqual(document.getElementById('written_script').tagName, 'SCRIPT');
</script>
<script id=written_script_can_write>
document.write('<script id="nested_writer">document.write("<div id=\\"nested\\">OK</div>"); testing.expectEqual(true, true);<\/script>');
testing.expectEqual(true, true);
</script>
<script id=verify_nested_write>
const nested = document.getElementById('nested');
testing.expectEqual('OK', nested.textContent);
</script>
<div id="before_script">Before</div>
<script id=written_script_sees_dom>
document.write('<script id="dom_accessor">document.getElementById("before_script").setAttribute("data-modified", "yes"); testing.expectEqual(true, true);<\/script>');
testing.expectEqual(true, true);
</script>
<script id=verify_dom_modification>
const beforeScript = document.getElementById('before_script');
testing.expectEqual('yes', beforeScript.getAttribute('data-modified'));
</script>
<!-- Phase 3 Tests: document.open/close would go here -->
<!-- Note: Testing document.open/close requires async/setTimeout which doesn't -->
<!-- work well with the test isolation. The implementation is tested manually. -->
<script id=final_assertion>
// Just verify the methods exist
testing.expectEqual('function', typeof document.open);
testing.expectEqual('function', typeof document.close);
testing.expectEqual('function', typeof document.write);
</script>
<!-- Phase 3 Tests: document.open/close (post-parsing with setTimeout) -->
<div id="will_be_removed">This will be removed by document.open()</div>
<script id=test_open_close_async>
// Mark that we saw the element before
const sawBefore = document.getElementById('will_be_removed') !== null;
testing.expectEqual(true, sawBefore);
// Use setTimeout to ensure we're after parsing completes
setTimeout(() => {
document.open();
}, 5);
testing.eventually(() => {
// The element should be gone now
const afterOpen = document.getElementById('will_be_removed');
testing.expectEqual(null, afterOpen);
// Write new content
document.write('<html><body>');
document.write('<h1 id="new_content">Replaced</h1>');
document.write('</body></html>');
// Close the document
document.close();
// Verify new content exists
const newContent = document.getElementById('new_content');
testing.expectEqual('Replaced', newContent.textContent);
})
</script>
<script>
// doing this after test_open_close_async used to crash, so we keep it
// to make sure it doesn't
setTimeout(() => {
document.open();
document.close();
}, 20);
</script>
</body>

View File

@@ -26,6 +26,7 @@ const URL = @import("../URL.zig");
const Node = @import("Node.zig");
const Element = @import("Element.zig");
const Location = @import("Location.zig");
const Parser = @import("../parser/Parser.zig");
const collections = @import("collections.zig");
const Selector = @import("selector/Selector.zig");
const NodeFilter = @import("NodeFilter.zig");
@@ -34,8 +35,8 @@ const DOMNodeIterator = @import("DOMNodeIterator.zig");
const DOMImplementation = @import("DOMImplementation.zig");
const StyleSheetList = @import("css/StyleSheetList.zig");
pub const HTMLDocument = @import("HTMLDocument.zig");
pub const XMLDocument = @import("XMLDocument.zig");
pub const HTMLDocument = @import("HTMLDocument.zig");
const Document = @This();
@@ -47,6 +48,8 @@ _current_script: ?*Element.Html.Script = null,
_elements_by_id: std.StringHashMapUnmanaged(*Element) = .empty,
_active_element: ?*Element = null,
_style_sheets: ?*StyleSheetList = null,
_write_insertion_point: ?*Node = null,
_script_created_parser: ?Parser.Streaming = null,
pub const Type = union(enum) {
generic,
@@ -233,8 +236,8 @@ pub fn getImplementation(_: *const Document) DOMImplementation {
return .{};
}
pub fn createDocumentFragment(_: *const Document, page: *Page) !*@import("DocumentFragment.zig") {
return @import("DocumentFragment.zig").init(page);
pub fn createDocumentFragment(_: *const Document, page: *Page) !*Node.DocumentFragment {
return Node.DocumentFragment.init(page);
}
pub fn createComment(_: *const Document, data: []const u8, page: *Page) !*Node {
@@ -401,6 +404,135 @@ pub fn elementsFromPoint(self: *Document, x: f64, y: f64, page: *Page) ![]const
return result.items;
}
pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
if (self._type == .xml) {
return error.InvalidStateError;
}
const html = blk: {
var joined: std.ArrayList(u8) = .empty;
for (text) |str| {
try joined.appendSlice(page.call_arena, str);
}
break :blk joined.items;
};
if (self._current_script == null or page._load_state != .parsing) {
// Post-parsing (destructive behavior)
if (self._script_created_parser == null) {
_ = try self.open(page);
}
if (html.len > 0) {
self._script_created_parser.?.read(html);
}
return;
}
// Inline script during parsing
const script = self._current_script.?;
const parent = script.asNode().parentNode() orelse return;
// Our implemnetation is hacky. We'll write to a DocumentFragment, then
// append its children.
const fragment = try Node.DocumentFragment.init(page);
const fragment_node = fragment.asNode();
const previous_parse_mode = page._parse_mode;
page._parse_mode = .document_write;
defer page._parse_mode = previous_parse_mode;
var parser = Parser.init(page.call_arena, fragment_node, page);
parser.parseFragment(html);
// Extract children from wrapper HTML element (html5ever wraps fragments)
// https://github.com/servo/html5ever/issues/583
const children = fragment_node._children orelse return;
const first = children.first();
// Collect all children to insert (to avoid iterator invalidation)
var children_to_insert: std.ArrayList(*Node) = .empty;
var it = if (first.is(Element.Html.Html) == null) fragment_node.childrenIterator() else first.childrenIterator();
while (it.next()) |child| {
try children_to_insert.append(page.call_arena, child);
}
if (children_to_insert.items.len == 0) {
return;
}
// Determine insertion point:
// - If _write_insertion_point is set, continue from there (subsequent write)
// - Otherwise, start after the script (first write)
var insert_after: ?*Node = self._write_insertion_point orelse script.asNode();
for (children_to_insert.items) |child| {
// Clear parent pointer (child is currently parented to fragment/HTML wrapper)
child._parent = null;
try page.insertNodeRelative(parent, child, .{ .after = insert_after.? }, .{});
insert_after = child;
}
page.domChanged();
self._write_insertion_point = children_to_insert.getLast();
}
pub fn open(self: *Document, page: *Page) !*Document {
if (self._type == .xml) {
return error.InvalidStateError;
}
if (page._load_state == .parsing) {
return self;
}
if (self._script_created_parser != null) {
return self;
}
// If we aren't parsing, then open clears the document.
const doc_node = self.asNode();
{
// Remove all children from document
var it = doc_node.childrenIterator();
while (it.next()) |child| {
page.removeNode(doc_node, child, .{ .will_be_reconnected = false });
}
}
// reset the document
self._elements_by_id.clearAndFree(page.arena);
self._active_element = null;
self._style_sheets = null;
self._ready_state = .loading;
self._script_created_parser = Parser.Streaming.init(page.arena, doc_node, page);
try self._script_created_parser.?.start();
page._parse_mode = .document;
return self;
}
pub fn close(self: *Document, page: *Page) !void {
if (self._type == .xml) {
return error.InvalidStateError;
}
if (self._script_created_parser == null) {
return;
}
// done() calls html5ever_streaming_parser_finish which frees the parser
// We must NOT call deinit() after done() as that would be a double-free
self._script_created_parser.?.done();
// Just null out the handle since done() already freed it
self._script_created_parser.?.handle = null;
self._script_created_parser = null;
page.documentIsComplete();
}
const ReadyState = enum {
loading,
interactive,
@@ -463,6 +595,9 @@ pub const JsApi = struct {
pub const prepend = bridge.function(Document.prepend, .{});
pub const elementFromPoint = bridge.function(Document.elementFromPoint, .{});
pub const elementsFromPoint = bridge.function(Document.elementsFromPoint, .{});
pub const write = bridge.function(Document.write, .{ .dom_exception = true });
pub const open = bridge.function(Document.open, .{ .dom_exception = true });
pub const close = bridge.function(Document.close, .{ .dom_exception = true });
pub const defaultView = bridge.accessor(struct {
fn defaultView(_: *const Document, page: *Page) *@import("Window.zig") {