From 7c7240d5ab35390458557e938dd998a75f24737f Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 30 Dec 2025 10:07:56 +0800 Subject: [PATCH] Try to protect against invalid use of document.write Specifically, try to block multiple document.write which, when combined, have multiple html documents. --- src/browser/parser/Parser.zig | 14 ++++++++++++-- src/browser/parser/html5ever.zig | 2 +- src/browser/webapi/Document.zig | 25 +++++++++++++++++++++--- src/html5ever/lib.rs | 33 +++++++++++++++++++------------- 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index 651a1d45..3b2bebcd 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -161,12 +161,22 @@ pub const Streaming = struct { ) orelse return error.ParserCreationFailed; } - pub fn read(self: *Streaming, data: []const u8) void { - h5e.html5ever_streaming_parser_feed( + pub fn read(self: *Streaming, data: []const u8) !void { + const result = h5e.html5ever_streaming_parser_feed( self.handle.?, data.ptr, data.len, ); + + if (result != 0) { + // Parser panicked - clean up and return error + // Note: deinit will destroy the handle if it exists + if (self.handle) |handle| { + h5e.html5ever_streaming_parser_destroy(handle); + self.handle = null; + } + return error.ParserPanic; + } } pub fn done(self: *Streaming) void { diff --git a/src/browser/parser/html5ever.zig b/src/browser/parser/html5ever.zig index b200a461..afa22494 100644 --- a/src/browser/parser/html5ever.zig +++ b/src/browser/parser/html5ever.zig @@ -94,7 +94,7 @@ pub extern "c" fn html5ever_streaming_parser_feed( parser: *anyopaque, html: [*c]const u8, len: usize, -) void; +) c_int; pub extern "c" fn html5ever_streaming_parser_finish( parser: *anyopaque, diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index f79ccc73..6124e23b 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -424,6 +424,21 @@ pub fn getDocType(_: *const Document) ?*DocumentType { return null; } +// document.write is complicated and works differently based on the state of +// parsing. But, generally, it's supposed to be additive/streaming. Multiple +// document.writes are parsed a single unit. Well, that causes issues with +// html5ever if we're trying to parse 1 document which is really many. So we +// try to detect "new" documents. (This is particularly problematic because we +// don't have proper frame support, so document.write into a frame can get +// sent to the main document (instead of the frame document)...and it's completely +// reasonable for 2 frames to document.write("...") into their own +// frame. +fn looksLikeNewDocument(html: []const u8) bool { + const trimmed = std.mem.trimLeft(u8, html, &std.ascii.whitespace); + return std.ascii.startsWithIgnoreCase(trimmed, " 0) { - self._script_created_parser.?.read(html); + self._script_created_parser.?.read(html) catch |err| { + log.warn(.dom, "document.write parser error", .{ .err = err }); + // was alrady closed + self._script_created_parser = null; + }; } return; } diff --git a/src/html5ever/lib.rs b/src/html5ever/lib.rs index e03a7ee1..308001de 100644 --- a/src/html5ever/lib.rs +++ b/src/html5ever/lib.rs @@ -266,25 +266,32 @@ pub extern "C" fn html5ever_streaming_parser_feed( parser_ptr: *mut c_void, html: *const c_uchar, len: usize, -) { +) -> i32 { if parser_ptr.is_null() || html.is_null() || len == 0 { - return; + return 0; } - let streaming_parser = unsafe { &mut *(parser_ptr as *mut StreamingParser) }; - let bytes = unsafe { std::slice::from_raw_parts(html, len) }; + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let streaming_parser = unsafe { &mut *(parser_ptr as *mut StreamingParser) }; + let bytes = unsafe { std::slice::from_raw_parts(html, len) }; - // Convert bytes to UTF-8 string - if let Ok(s) = std::str::from_utf8(bytes) { - let tendril = StrTendril::from(s); + // Convert bytes to UTF-8 string + if let Ok(s) = std::str::from_utf8(bytes) { + let tendril = StrTendril::from(s); - // Feed the chunk to the parser - // The Parser implements TendrilSink, so we can call process() on it - let parser = streaming_parser.parser - .downcast_mut::>() - .expect("Invalid parser type"); + // Feed the chunk to the parser + // The Parser implements TendrilSink, so we can call process() on it + let parser = streaming_parser.parser + .downcast_mut::>() + .expect("Invalid parser type"); - parser.process(tendril); + parser.process(tendril); + } + })); + + match result { + Ok(_) => 0, // Success + Err(_) => -1, // Panic occurred } }