From ec71f8e2d906647ccfd31dc18bac7e310d8f349f Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Aug 2025 15:27:15 +0200 Subject: [PATCH 1/3] handle text content type with HTML For text content type (and application/json) we create a pseudo HTML tree with the text value in a
 tag.

It allows CDP clients to interact with text content easily.
---
 src/browser/page.zig | 45 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/browser/page.zig b/src/browser/page.zig
index 0189ae82..758d4a55 100644
--- a/src/browser/page.zig
+++ b/src/browser/page.zig
@@ -109,6 +109,7 @@ pub const Page = struct {
         err: anyerror,
         parsed: void,
         html: parser.Parser,
+        text: parser.Parser,
         raw: std.ArrayListUnmanaged(u8),
         raw_done: []const u8,
     };
@@ -207,6 +208,14 @@ pub const Page = struct {
                 return out.writeAll(buf.items);
             },
             .raw_done => |data| return out.writeAll(data),
+            .text => {
+                // processed below, along with .html
+                // return the 
 element from the HTML
+                const doc = parser.documentHTMLToDocument(self.window.document);
+                const list = try parser.documentGetElementsByTagName(doc, "pre");
+                const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
+                return Dump.writeChildren(pre, .{}, out);
+            },
             .html => {
                 // maybe page.wait timed-out, print what we have
                 log.warn(.http, "incomplete load", .{ .mode = "html" });
@@ -284,7 +293,7 @@ pub const Page = struct {
 
         while (true) {
             SW: switch (self.mode) {
-                .pre, .raw => {
+                .pre, .raw, .text => {
                     if (self.request_intercepted) {
                         // the page request was intercepted.
 
@@ -627,18 +636,27 @@ pub const Page = struct {
                 break :blk Mime.sniff(data);
             } orelse .unknown;
 
-            const is_html = mime.isHTML();
-            log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
+            log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
 
-            if (is_html) {
-                self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
-            } else {
-                self.mode = .{ .raw = .{} };
-            }
+            self.mode = switch (mime.content_type) {
+                .text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
+
+                .application_json,
+                .text_javascript,
+                .text_css,
+                .text_plain,
+                => blk: {
+                    var p = try parser.Parser.init(mime.charset orelse "UTF-8");
+                    try p.process("
");
+                    break :blk .{ .text = p };
+                },
+
+                else => .{ .raw = .{} },
+            };
         }
 
         switch (self.mode) {
-            .html => |*p| try p.process(data),
+            .html, .text => |*p| try p.process(data),
             .raw => |*buf| try buf.appendSlice(self.arena, data),
             .pre => unreachable,
             .parsed => unreachable,
@@ -658,6 +676,13 @@ pub const Page = struct {
                 self.mode = .{ .raw_done = buf.items };
                 self.documentIsComplete();
             },
+            .text => |*p| {
+                try p.process("
"); + const html_doc = p.html_doc; + p.deinit(); // don't need the parser anymore + try self.setDocument(html_doc); + self.documentIsComplete(); + }, .html => |*p| { const html_doc = p.html_doc; p.deinit(); // don't need the parser anymore @@ -719,7 +744,7 @@ pub const Page = struct { self.clearTransferArena(); switch (self.mode) { - .html => |*p| p.deinit(), // don't need the parser anymore + .html, .text => |*p| p.deinit(), // don't need the parser anymore else => {}, } self.mode = .{ .err = err }; From 7335b1d0a4f4747e2eb6dda5fa41f5c8b2c20494 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Aug 2025 16:02:27 +0200 Subject: [PATCH 2/3] escape incoming plain text --- src/browser/page.zig | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/browser/page.zig b/src/browser/page.zig index 758d4a55..0633f44c 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -209,12 +209,18 @@ pub const Page = struct { }, .raw_done => |data| return out.writeAll(data), .text => { - // processed below, along with .html - // return the
 element from the HTML
+                // returns the 
 element from the HTML
                 const doc = parser.documentHTMLToDocument(self.window.document);
                 const list = try parser.documentGetElementsByTagName(doc, "pre");
                 const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
-                return Dump.writeChildren(pre, .{}, out);
+                const walker = Walker{};
+                var next: ?*parser.Node = null;
+                while (true) {
+                    next = try walker.get_next(pre, next) orelse break;
+                    const v = try parser.nodeTextContent(next.?) orelse return;
+                    try out.writeAll(v);
+                }
+                return;
             },
             .html => {
                 // maybe page.wait timed-out, print what we have
@@ -656,7 +662,24 @@ pub const Page = struct {
         }
 
         switch (self.mode) {
-            .html, .text => |*p| try p.process(data),
+            .html => |*p| try p.process(data),
+            .text => |*p| {
+                // we have to escape the data...
+                var v = data;
+                while (v.len > 0) {
+                    const index = std.mem.indexOfAnyPos(u8, v, 0, &.{ '<', '>' }) orelse {
+                        try p.process(v);
+                        return;
+                    };
+                    try p.process(v[0..index]);
+                    switch (v[index]) {
+                        '<' => try p.process("<"),
+                        '>' => try p.process(">"),
+                        else => unreachable,
+                    }
+                    v = v[index + 1 ..];
+                }
+            },
             .raw => |*buf| try buf.appendSlice(self.arena, data),
             .pre => unreachable,
             .parsed => unreachable,

From 3182a478582a0b96fac05b2ccd9cacc0d946efc7 Mon Sep 17 00:00:00 2001
From: Pierre Tachoire 
Date: Wed, 20 Aug 2025 16:28:26 +0200
Subject: [PATCH 3/3] typo fix

---
 src/browser/page.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/browser/page.zig b/src/browser/page.zig
index 0633f44c..bad1ccb2 100644
--- a/src/browser/page.zig
+++ b/src/browser/page.zig
@@ -1111,6 +1111,6 @@ pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) c
     }
 
     self.script_manager.addFromElement(element.?) catch |err| {
-        log.warn(.browser, "dynamcic script", .{ .err = err });
+        log.warn(.browser, "dynamic script", .{ .err = err });
     };
 }