Try to protect against invalid use of document.write

Specifically, try to block multiple document.write which, when combined, have
multiple html documents.
This commit is contained in:
Karl Seguin
2025-12-30 10:07:56 +08:00
parent 169582c992
commit 7c7240d5ab
4 changed files with 55 additions and 19 deletions

View File

@@ -161,12 +161,22 @@ pub const Streaming = struct {
) orelse return error.ParserCreationFailed; ) orelse return error.ParserCreationFailed;
} }
pub fn read(self: *Streaming, data: []const u8) void { pub fn read(self: *Streaming, data: []const u8) !void {
h5e.html5ever_streaming_parser_feed( const result = h5e.html5ever_streaming_parser_feed(
self.handle.?, self.handle.?,
data.ptr, data.ptr,
data.len, data.len,
); );
if (result != 0) {
// Parser panicked - clean up and return error
// Note: deinit will destroy the handle if it exists
if (self.handle) |handle| {
h5e.html5ever_streaming_parser_destroy(handle);
self.handle = null;
}
return error.ParserPanic;
}
} }
pub fn done(self: *Streaming) void { pub fn done(self: *Streaming) void {

View File

@@ -94,7 +94,7 @@ pub extern "c" fn html5ever_streaming_parser_feed(
parser: *anyopaque, parser: *anyopaque,
html: [*c]const u8, html: [*c]const u8,
len: usize, len: usize,
) void; ) c_int;
pub extern "c" fn html5ever_streaming_parser_finish( pub extern "c" fn html5ever_streaming_parser_finish(
parser: *anyopaque, parser: *anyopaque,

View File

@@ -424,6 +424,21 @@ pub fn getDocType(_: *const Document) ?*DocumentType {
return null; return null;
} }
// document.write is complicated and works differently based on the state of
// parsing. But, generally, it's supposed to be additive/streaming. Multiple
// document.writes are parsed a single unit. Well, that causes issues with
// html5ever if we're trying to parse 1 document which is really many. So we
// try to detect "new" documents. (This is particularly problematic because we
// don't have proper frame support, so document.write into a frame can get
// sent to the main document (instead of the frame document)...and it's completely
// reasonable for 2 frames to document.write("<html>...</html>") into their own
// frame.
fn looksLikeNewDocument(html: []const u8) bool {
const trimmed = std.mem.trimLeft(u8, html, &std.ascii.whitespace);
return std.ascii.startsWithIgnoreCase(trimmed, "<!DOCTYPE") or
std.ascii.startsWithIgnoreCase(trimmed, "<html");
}
pub fn write(self: *Document, text: []const []const u8, page: *Page) !void { pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
if (self._type == .xml) { if (self._type == .xml) {
return error.InvalidStateError; return error.InvalidStateError;
@@ -438,12 +453,16 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
}; };
if (self._current_script == null or page._load_state != .parsing) { if (self._current_script == null or page._load_state != .parsing) {
// Post-parsing (destructive behavior) if (self._script_created_parser == null or looksLikeNewDocument(html)) {
if (self._script_created_parser == null) {
_ = try self.open(page); _ = try self.open(page);
} }
if (html.len > 0) { if (html.len > 0) {
self._script_created_parser.?.read(html); self._script_created_parser.?.read(html) catch |err| {
log.warn(.dom, "document.write parser error", .{ .err = err });
// was alrady closed
self._script_created_parser = null;
};
} }
return; return;
} }

View File

@@ -266,25 +266,32 @@ pub extern "C" fn html5ever_streaming_parser_feed(
parser_ptr: *mut c_void, parser_ptr: *mut c_void,
html: *const c_uchar, html: *const c_uchar,
len: usize, len: usize,
) { ) -> i32 {
if parser_ptr.is_null() || html.is_null() || len == 0 { if parser_ptr.is_null() || html.is_null() || len == 0 {
return; return 0;
} }
let streaming_parser = unsafe { &mut *(parser_ptr as *mut StreamingParser) }; let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
let bytes = unsafe { std::slice::from_raw_parts(html, len) }; let streaming_parser = unsafe { &mut *(parser_ptr as *mut StreamingParser) };
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
// Convert bytes to UTF-8 string // Convert bytes to UTF-8 string
if let Ok(s) = std::str::from_utf8(bytes) { if let Ok(s) = std::str::from_utf8(bytes) {
let tendril = StrTendril::from(s); let tendril = StrTendril::from(s);
// Feed the chunk to the parser // Feed the chunk to the parser
// The Parser implements TendrilSink, so we can call process() on it // The Parser implements TendrilSink, so we can call process() on it
let parser = streaming_parser.parser let parser = streaming_parser.parser
.downcast_mut::<Parser<sink::Sink>>() .downcast_mut::<Parser<sink::Sink>>()
.expect("Invalid parser type"); .expect("Invalid parser type");
parser.process(tendril); parser.process(tendril);
}
}));
match result {
Ok(_) => 0, // Success
Err(_) => -1, // Panic occurred
} }
} }