From ec71f8e2d906647ccfd31dc18bac7e310d8f349f Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Aug 2025 15:27:15 +0200 Subject: [PATCH] handle text content type with HTML For text content type (and application/json) we create a pseudo HTML tree with the text value in a
 tag.

It allows CDP clients to interact with text content easily.
---
 src/browser/page.zig | 45 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/browser/page.zig b/src/browser/page.zig
index 0189ae82..758d4a55 100644
--- a/src/browser/page.zig
+++ b/src/browser/page.zig
@@ -109,6 +109,7 @@ pub const Page = struct {
         err: anyerror,
         parsed: void,
         html: parser.Parser,
+        text: parser.Parser,
         raw: std.ArrayListUnmanaged(u8),
         raw_done: []const u8,
     };
@@ -207,6 +208,14 @@ pub const Page = struct {
                 return out.writeAll(buf.items);
             },
             .raw_done => |data| return out.writeAll(data),
+            .text => {
+                // processed below, along with .html
+                // return the 
 element from the HTML
+                const doc = parser.documentHTMLToDocument(self.window.document);
+                const list = try parser.documentGetElementsByTagName(doc, "pre");
+                const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
+                return Dump.writeChildren(pre, .{}, out);
+            },
             .html => {
                 // maybe page.wait timed-out, print what we have
                 log.warn(.http, "incomplete load", .{ .mode = "html" });
@@ -284,7 +293,7 @@ pub const Page = struct {
 
         while (true) {
             SW: switch (self.mode) {
-                .pre, .raw => {
+                .pre, .raw, .text => {
                     if (self.request_intercepted) {
                         // the page request was intercepted.
 
@@ -627,18 +636,27 @@ pub const Page = struct {
                 break :blk Mime.sniff(data);
             } orelse .unknown;
 
-            const is_html = mime.isHTML();
-            log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
+            log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
 
-            if (is_html) {
-                self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
-            } else {
-                self.mode = .{ .raw = .{} };
-            }
+            self.mode = switch (mime.content_type) {
+                .text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
+
+                .application_json,
+                .text_javascript,
+                .text_css,
+                .text_plain,
+                => blk: {
+                    var p = try parser.Parser.init(mime.charset orelse "UTF-8");
+                    try p.process("
");
+                    break :blk .{ .text = p };
+                },
+
+                else => .{ .raw = .{} },
+            };
         }
 
         switch (self.mode) {
-            .html => |*p| try p.process(data),
+            .html, .text => |*p| try p.process(data),
             .raw => |*buf| try buf.appendSlice(self.arena, data),
             .pre => unreachable,
             .parsed => unreachable,
@@ -658,6 +676,13 @@ pub const Page = struct {
                 self.mode = .{ .raw_done = buf.items };
                 self.documentIsComplete();
             },
+            .text => |*p| {
+                try p.process("
"); + const html_doc = p.html_doc; + p.deinit(); // don't need the parser anymore + try self.setDocument(html_doc); + self.documentIsComplete(); + }, .html => |*p| { const html_doc = p.html_doc; p.deinit(); // don't need the parser anymore @@ -719,7 +744,7 @@ pub const Page = struct { self.clearTransferArena(); switch (self.mode) { - .html => |*p| p.deinit(), // don't need the parser anymore + .html, .text => |*p| p.deinit(), // don't need the parser anymore else => {}, } self.mode = .{ .err = err };