Merge pull request #553 from lightpanda-io/mime_sniffing

Try to sniff the mime type based on the body content
2025-10-29 07:03:29 +00:00 · 2025-04-22 17:25:29 +02:00
parent faf93441f6 b9f61466ba
commit 5a08c92d02
5 changed files with 343 additions and 160 deletions
--- a/src/browser/browser.zig
+++ b/src/browser/browser.zig
@@ -435,24 +435,19 @@ pub const Page = struct {

        log.info("GET {any} {d}", .{ url, header.status });

-        const ct = blk: {
-            break :blk header.get("content-type") orelse {
-                // no content type in HTTP headers.
-                // TODO try to sniff mime type from the body.
-                log.info("no content-type HTTP header", .{});
+        const content_type = header.get("content-type");

-                // Assume it's HTML for now.
-                break :blk "text/html; charset=utf-8";
-            };
-        };
-
-        log.debug("header content-type: {s}", .{ct});
-        var mime = try Mime.parse(arena, ct);
+        const mime: Mime = blk: {
+            if (content_type) |ct| {
+                break :blk try Mime.parse(arena, ct);
+            }
+            break :blk Mime.sniff(try response.peek());
+        } orelse .unknown;

        if (mime.isHTML()) {
            try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
        } else {
-            log.info("non-HTML document: {s}", .{ct});
+            log.info("non-HTML document: {s}", .{content_type orelse "null"});
            var arr: std.ArrayListUnmanaged(u8) = .{};
            while (try response.next()) |data| {
                try arr.appendSlice(arena, try arena.dupe(u8, data));
--- a/src/browser/mime.zig
+++ b/src/browser/mime.zig
@@ -24,10 +24,17 @@ pub const Mime = struct {
    params: []const u8 = "",
    charset: ?[]const u8 = null,

+    pub const unknown = Mime{
+        .params = "",
+        .charset = "",
+        .content_type = .{ .unknown = {} },
+    };
+
    pub const ContentTypeEnum = enum {
        text_xml,
        text_html,
        text_plain,
+        unknown,
        other,
    };

@@ -35,21 +42,26 @@ pub const Mime = struct {
        text_xml: void,
        text_html: void,
        text_plain: void,
+        unknown: void,
        other: struct { type: []const u8, sub_type: []const u8 },
    };

-    pub fn parse(arena: Allocator, input: []const u8) !Mime {
+    pub fn parse(arena: Allocator, input: []u8) !Mime {
        if (input.len > 255) {
            return error.TooBig;
        }
-        var trimmed = trim(input);

-        const content_type, const type_len = try parseContentType(trimmed);
-        if (type_len >= trimmed.len) {
+        // Zig's trim API is broken. The return type is always `[]const u8`,
+        // even if the input type is `[]u8`. @constCast is safe here.
+        var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
+        _ = std.ascii.lowerString(normalized, normalized);
+
+        const content_type, const type_len = try parseContentType(normalized);
+        if (type_len >= normalized.len) {
            return .{ .content_type = content_type };
        }

-        const params = trimLeft(trimmed[type_len..]);
+        const params = trimLeft(normalized[type_len..]);

        var charset: ?[]const u8 = null;

@@ -63,11 +75,12 @@ pub const Mime = struct {
                return error.Invalid;
            }

-            switch (name.len) {
-                7 => if (isCaseEqual("charset", name)) {
-                    charset = try parseValue(arena, value);
-                },
-                else => {},
+            const attribute_name = std.meta.stringToEnum(enum {
+                charset,
+            }, name) orelse continue;
+
+            switch (attribute_name) {
+                .charset => charset = try parseAttributeValue(arena, value),
            }
        }

@@ -78,66 +91,113 @@ pub const Mime = struct {
        };
    }

+    pub fn sniff(body: []const u8) ?Mime {
+        // 0x0C is form feed
+        const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C });
+        if (content.len == 0) {
+            return null;
+        }
+
+        if (content[0] != '<') {
+            if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
+                // UTF-8 BOM
+                return .{ .content_type = .{ .text_plain = {} } };
+            }
+            if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
+                // UTF-16 big-endian BOM
+                return .{ .content_type = .{ .text_plain = {} } };
+            }
+            if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
+                // UTF-16 little-endian BOM
+                return .{ .content_type = .{ .text_plain = {} } };
+            }
+            return null;
+        }
+
+        // The longest prefix we have is "<!DOCTYPE HTML ", 15 bytes. If we're
+        // here, we already know content[0] == '<', so we can skip that. So 14
+        // bytes.
+
+        // +1 because we don't need the leading '<'
+        var buf: [14]u8 = undefined;
+
+        const stripped = content[1..];
+        const prefix_len = @min(stripped.len, buf.len);
+        const prefix = std.ascii.lowerString(&buf, stripped[0..prefix_len]);
+
+        // we already know it starts with a <
+        const known_prefixes = [_]struct { []const u8, ContentType }{
+            .{ "!doctype html", .{ .text_html = {} } },
+            .{ "html", .{ .text_html = {} } },
+            .{ "script", .{ .text_html = {} } },
+            .{ "iframe", .{ .text_html = {} } },
+            .{ "h1", .{ .text_html = {} } },
+            .{ "div", .{ .text_html = {} } },
+            .{ "font", .{ .text_html = {} } },
+            .{ "table", .{ .text_html = {} } },
+            .{ "a", .{ .text_html = {} } },
+            .{ "style", .{ .text_html = {} } },
+            .{ "title", .{ .text_html = {} } },
+            .{ "b", .{ .text_html = {} } },
+            .{ "body", .{ .text_html = {} } },
+            .{ "br", .{ .text_html = {} } },
+            .{ "p", .{ .text_html = {} } },
+            .{ "!--", .{ .text_html = {} } },
+            .{ "xml", .{ .text_xml = {} } },
+        };
+        inline for (known_prefixes) |kp| {
+            const known_prefix = kp.@"0";
+            if (std.mem.startsWith(u8, prefix, known_prefix) and prefix.len > known_prefix.len) {
+                const next = prefix[known_prefix.len];
+                // a "tag-terminating-byte"
+                if (next == ' ' or next == '>') {
+                    return .{ .content_type = kp.@"1" };
+                }
+            }
+        }
+
+        return null;
+    }
+
    pub fn isHTML(self: *const Mime) bool {
        return self.content_type == .text_html;
    }

+    // we expect value to be lowercase
    fn parseContentType(value: []const u8) !struct { ContentType, usize } {
-        const separator = std.mem.indexOfScalarPos(u8, value, 0, '/') orelse {
-            return error.Invalid;
-        };
-        const end = std.mem.indexOfScalarPos(u8, value, separator, ';') orelse blk: {
-            break :blk value.len;
-        };
+        const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
+        const type_name = trimRight(value[0..end]);
+        const attribute_start = end + 1;
+
+        if (std.meta.stringToEnum(enum {
+            @"text/xml",
+            @"text/html",
+            @"text/plain",
+        }, type_name)) |known_type| {
+            const ct: ContentType = switch (known_type) {
+                .@"text/xml" => .{ .text_xml = {} },
+                .@"text/html" => .{ .text_html = {} },
+                .@"text/plain" => .{ .text_plain = {} },
+            };
+            return .{ ct, attribute_start };
+        }
+
+        const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid;

        const main_type = value[0..separator];
        const sub_type = trimRight(value[separator + 1 .. end]);

-        if (parseCommonContentType(main_type, sub_type)) |content_type| {
-            return .{ content_type, end + 1 };
-        }
-
-        if (main_type.len == 0) {
+        if (main_type.len == 0 or validType(main_type) == false) {
            return error.Invalid;
        }
-        if (validType(main_type) == false) {
+        if (sub_type.len == 0 or validType(sub_type) == false) {
            return error.Invalid;
        }

-        if (sub_type.len == 0) {
-            return error.Invalid;
-        }
-        if (validType(sub_type) == false) {
-            return error.Invalid;
-        }
-
-        const content_type = ContentType{ .other = .{
+        return .{ .{ .other = .{
            .type = main_type,
            .sub_type = sub_type,
-        } };
-
-        return .{ content_type, end + 1 };
-    }
-
-    fn parseCommonContentType(main_type: []const u8, sub_type: []const u8) ?ContentType {
-        switch (main_type.len) {
-            4 => if (isCaseEqual("text", main_type)) {
-                switch (sub_type.len) {
-                    3 => if (isCaseEqual("xml", sub_type)) {
-                        return .{ .text_xml = {} };
-                    },
-                    4 => if (isCaseEqual("html", sub_type)) {
-                        return .{ .text_html = {} };
-                    },
-                    5 => if (isCaseEqual("plain", sub_type)) {
-                        return .{ .text_plain = {} };
-                    },
-                    else => {},
-                }
-            },
-            else => {},
-        }
-        return null;
+        } }, attribute_start };
    }

    const T_SPECIAL = blk: {
@@ -148,7 +208,7 @@ pub const Mime = struct {
        break :blk v;
    };

-    fn parseValue(arena: Allocator, value: []const u8) ![]const u8 {
+    fn parseAttributeValue(arena: Allocator, value: []const u8) ![]const u8 {
        if (value[0] != '"') {
            return value;
        }
@@ -218,10 +278,6 @@ pub const Mime = struct {
        return true;
    }

-    fn trim(s: []const u8) []const u8 {
-        return std.mem.trim(u8, s, &std.ascii.whitespace);
-    }
-
    fn trimLeft(s: []const u8) []const u8 {
        return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
    }
@@ -229,28 +285,12 @@ pub const Mime = struct {
    fn trimRight(s: []const u8) []const u8 {
        return std.mem.trimRight(u8, s, &std.ascii.whitespace);
    }
-
-    fn isCaseEqual(comptime target: anytype, value: []const u8) bool {
-        // - 8 beause we don't care about the sentinel
-        const bit_len = @bitSizeOf(@TypeOf(target.*)) - 8;
-        const byte_len = bit_len / 8;
-
-        const T = @Type(.{ .int = .{
-            .bits = bit_len,
-            .signedness = .unsigned,
-        } });
-
-        const bit_target: T = @bitCast(@as(*const [byte_len]u8, target).*);
-
-        if (@as(T, @bitCast(value[0..byte_len].*)) == bit_target) {
-            return true;
-        }
-        return std.ascii.eqlIgnoreCase(value, target);
-    }
 };

-const testing = std.testing;
+const testing = @import("../testing.zig");
 test "Mime: invalid " {
+    defer testing.reset();
+
    const invalids = [_][]const u8{
        "",
        "text",
@@ -270,11 +310,14 @@ test "Mime: invalid " {
    };

    for (invalids) |invalid| {
-        try testing.expectError(error.Invalid, Mime.parse(undefined, invalid));
+        const mutable_input = try testing.arena_allocator.dupe(u8, invalid);
+        try testing.expectError(error.Invalid, Mime.parse(undefined, mutable_input));
    }
 }

 test "Mime: parse common" {
+    defer testing.reset();
+
    try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
    try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
    try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
@@ -297,6 +340,8 @@ test "Mime: parse common" {
 }

 test "Mime: parse uncommon" {
+    defer testing.reset();
+
    const text_javascript = Expectation{
        .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
    };
@@ -306,12 +351,14 @@ test "Mime: parse uncommon" {
    try expect(text_javascript, "  text/javascript\t  ;");

    try expect(
-        .{ .content_type = .{ .other = .{ .type = "Text", .sub_type = "Javascript" } } },
+        .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
        "Text/Javascript",
    );
 }

 test "Mime: parse charset" {
+    defer testing.reset();
+
    try expect(.{
        .content_type = .{ .text_xml = {} },
        .charset = "utf-8",
@@ -332,11 +379,12 @@ test "Mime: parse charset" {
 }

 test "Mime: isHTML" {
+    defer testing.reset();
+
    const isHTML = struct {
        fn isHTML(expected: bool, input: []const u8) !void {
-            var arena = std.heap.ArenaAllocator.init(testing.allocator);
-            defer arena.deinit();
-            var mime = try Mime.parse(arena.allocator(), input);
+            const mutable_input = try testing.arena_allocator.dupe(u8, input);
+            var mime = try Mime.parse(testing.arena_allocator, mutable_input);
            try testing.expectEqual(expected, mime.isHTML());
        }
    }.isHTML;
@@ -348,6 +396,71 @@ test "Mime: isHTML" {
    try isHTML(false, "over/9000");
 }

+test "Mime: sniff" {
+    try testing.expectEqual(null, Mime.sniff(""));
+    try testing.expectEqual(null, Mime.sniff("<htm"));
+    try testing.expectEqual(null, Mime.sniff("<html!"));
+    try testing.expectEqual(null, Mime.sniff("<a_"));
+    try testing.expectEqual(null, Mime.sniff("<!doctype html"));
+    try testing.expectEqual(null, Mime.sniff("<!doctype  html>"));
+    try testing.expectEqual(null, Mime.sniff("\n  <!doctype  html>"));
+    try testing.expectEqual(null, Mime.sniff("\n \t <font/>"));
+
+    const expectHTML = struct {
+        fn expect(input: []const u8) !void {
+            try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type));
+        }
+    }.expect;
+
+    try expectHTML("<!doctype html ");
+    try expectHTML("\n  \t    <!DOCTYPE HTML ");
+
+    try expectHTML("<html ");
+    try expectHTML("\n  \t    <HtmL> even more stufff");
+
+    try expectHTML("<script>");
+    try expectHTML("\n  \t    <SCRIpt >alert(document.cookies)</script>");
+
+    try expectHTML("<iframe>");
+    try expectHTML(" \t    <ifRAME >");
+
+    try expectHTML("<h1>");
+    try expectHTML("  <H1>");
+
+    try expectHTML("<div>");
+    try expectHTML("\n\r\r  <DiV>");
+
+    try expectHTML("<font>");
+    try expectHTML("  <fonT>");
+
+    try expectHTML("<table>");
+    try expectHTML("\t\t<TAblE>");
+
+    try expectHTML("<a>");
+    try expectHTML("\n\n<A>");
+
+    try expectHTML("<style>");
+    try expectHTML("    \n\t <STyLE>");
+
+    try expectHTML("<title>");
+    try expectHTML("    \n\t <TITLE>");
+
+    try expectHTML("<b>");
+    try expectHTML("    \n\t <B>");
+
+    try expectHTML("<body>");
+    try expectHTML("    \n\t <BODY>");
+
+    try expectHTML("<br>");
+    try expectHTML("    \n\t <BR>");
+
+    try expectHTML("<p>");
+    try expectHTML("    \n\t <P>");
+
+    try expectHTML("<!-->");
+    try expectHTML("    \n\t <!-->");
+}
+
 const Expectation = struct {
    content_type: Mime.ContentType,
    params: []const u8 = "",
@@ -355,11 +468,9 @@ const Expectation = struct {
 };

 fn expect(expected: Expectation, input: []const u8) !void {
-    var arena = std.heap.ArenaAllocator.init(testing.allocator);
-    defer arena.deinit();
-
-    const actual = try Mime.parse(arena.allocator(), input);
+    const mutable_input = try testing.arena_allocator.dupe(u8, input);

+    const actual = try Mime.parse(testing.arena_allocator, mutable_input);
    try testing.expectEqual(
        std.meta.activeTag(expected.content_type),
        std.meta.activeTag(actual.content_type),
@@ -368,16 +479,16 @@ fn expect(expected: Expectation, input: []const u8) !void {
    switch (expected.content_type) {
        .other => |e| {
            const a = actual.content_type.other;
-            try testing.expectEqualStrings(e.type, a.type);
-            try testing.expectEqualStrings(e.sub_type, a.sub_type);
+            try testing.expectEqual(e.type, a.type);
+            try testing.expectEqual(e.sub_type, a.sub_type);
        },
        else => {}, // already asserted above
    }

-    try testing.expectEqualStrings(expected.params, actual.params);
+    try testing.expectEqual(expected.params, actual.params);

    if (expected.charset) |ec| {
-        try testing.expectEqualStrings(ec, actual.charset.?);
+        try testing.expectEqual(ec, actual.charset.?);
    } else {
        try testing.expectEqual(null, actual.charset);
    }
--- a/src/browser/xhr/xhr.zig
+++ b/src/browser/xhr/xhr.zig
@@ -254,7 +254,7 @@ pub const XMLHttpRequest = struct {
    };
    const ResponseObj = union(ResponseObjTag) {
        Document: *parser.Document,
-        Failure: bool,
+        Failure: void,
        JSON: std.json.Parsed(JSONValue),

        fn deinit(self: ResponseObj) void {
@@ -511,12 +511,8 @@ pub const XMLHttpRequest = struct {
            }

            // extract a mime type from headers.
-            {
-                var raw: []const u8 = "text/xml";
-                if (header.get("content-type")) |ct| {
-                    raw = try self.arena.dupe(u8, ct);
-                }
-                self.response_mime = Mime.parse(self.arena, raw) catch |e| {
+            if (header.get("content-type")) |ct| {
+                self.response_mime = Mime.parse(self.arena, ct) catch |e| {
                    return self.onErr(e);
                };
            }
@@ -724,26 +720,24 @@ pub const XMLHttpRequest = struct {
    // TODO parse XML.
    // https://xhr.spec.whatwg.org/#response-object
    fn setResponseObjDocument(self: *XMLHttpRequest) void {
-        const response_mime = &self.response_mime.?;
-        const isHTML = response_mime.isHTML();
-
-        // TODO If finalMIME is not an HTML MIME type or an XML MIME type, then
-        // return.
-        if (!isHTML) {
+        const mime = self.response_mime orelse return;
+        if (mime.isHTML() == false) {
            return;
        }

        var ccharset: [:0]const u8 = "utf-8";
-        if (response_mime.charset) |rc| {
-            ccharset = self.arena.dupeZ(u8, rc) catch {
-                self.response_obj = .{ .Failure = true };
-                return;
-            };
+        if (mime.charset) |rc| {
+            if (std.mem.eql(u8, rc, "utf-8") == false) {
+                ccharset = self.arena.dupeZ(u8, rc) catch {
+                    self.response_obj = .{ .Failure = {} };
+                    return;
+                };
+            }
        }

        var fbs = std.io.fixedBufferStream(self.response_bytes.items);
        const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
-            self.response_obj = .{ .Failure = true };
+            self.response_obj = .{ .Failure = {} };
            return;
        };

@@ -766,7 +760,7 @@ pub const XMLHttpRequest = struct {
            .{},
        ) catch |e| {
            log.err("parse JSON: {}", .{e});
-            self.response_obj = .{ .Failure = true };
+            self.response_obj = .{ .Failure = {} };
            return;
        };

--- a/src/http/client.zig
+++ b/src/http/client.zig
@@ -32,9 +32,13 @@ const Loop = @import("../runtime/loop.zig").Loop;

 const log = std.log.scoped(.http_client);

+// We might need to peek at the body to try and sniff the content-type.
+// While we only need a few bytes, in most cases we need to ignore leading
+// whitespace, so we want to get a reasonable-sized chunk.
+const PEEK_BUF_LEN = 1024;
+
 const BUFFER_LEN = 32 * 1024;

-// The longest individual header line that we support
 const MAX_HEADER_LINE_LEN = 4096;

 // Thread-safe. Holds our root certificate, connection pool and state pool
@@ -900,6 +904,7 @@ const SyncHandler = struct {
            // object which can be iterated to get the body.
            std.debug.assert(result.done or reader.body_reader != null);
            std.debug.assert(result.data == null);
+
            return .{
                ._buf = buf,
                ._request = request,
@@ -907,6 +912,8 @@ const SyncHandler = struct {
                ._done = result.done,
                ._connection = connection,
                ._data = result.unprocessed,
+                ._peek_len = 0,
+                ._peek_buf = state.peek_buf,
                .header = reader.response,
            };
        }
@@ -1046,7 +1053,7 @@ const Reader = struct {

        // Still parsing the header

-        // what data do we have leftover in `data`.
+        // What data do we have leftover in `data`?
        // When header_done == true, then this is part (or all) of the body
        // When header_done == false, then this is a header line that we didn't
        // have enough data for.
@@ -1504,23 +1511,49 @@ pub const Progress = struct {
    header: ResponseHeader,
 };

-// The value that we return from a synchronous requst.
+// The value that we return from a synchronous request.
 pub const Response = struct {
    _reader: Reader,
    _request: *Request,
-
-    _buf: []u8,
    _connection: SyncHandler.Connection,

+    // the buffer to read the peeked data into
+    _peek_buf: []u8,
+
+    // the length of data we've peeked. The peeked_data is _peek_buf[0.._peek_len].
+    // It's possible for peek_len > 0 and _done == true, in which case, the
+    // _peeked data should be emitted once and subsequent calls to `next` should
+    // return null.
+    _peek_len: usize,
+
+    // What we'll read from the socket into. This is the State's read_buf
+    _buf: []u8,
+
+    // Whether or not we're done reading the response. When true, next will
+    // return null.
    _done: bool,

-    // Any data we over-read while parsing the header. This will be returned on
-    // the first call to next();
+    // Data that we've read. This can be set when the Response is first created
+    // from extra data received while parsing the body. Or, it can be set
+    // when `next` is called and we read more data from the socket.
    _data: ?[]u8 = null,
    header: ResponseHeader,

    pub fn next(self: *Response) !?[]u8 {
-        var buf = self._buf;
+        // it's possible for peek_len > - and done == true. This would happen
+        // when, while peeking, we reached the end of the data. In that case,
+        // we return the peeked data once, and on subsequent call, we'll return
+        // null normally, because done == true;
+        const pl = self._peek_len;
+        if (pl > 0) {
+            self._peek_len = 0;
+            return self._peek_buf[0..pl];
+        }
+
+        return self._nextIgnorePeek(self._buf);
+    }
+
+    fn _nextIgnorePeek(self: *Response, buf: []u8) !?[]u8 {
        while (true) {
            if (try self.processData()) |data| {
                return data;
@@ -1541,14 +1574,38 @@ pub const Response = struct {
        self._data = result.unprocessed; // for the next call
        return result.data;
    }
+
+    pub fn peek(self: *Response) ![]u8 {
+        while (true) {
+            var peek_buf = self._peek_buf;
+            const peek_len = self._peek_len;
+
+            const data = (try self._nextIgnorePeek(peek_buf[peek_len..])) orelse {
+                return peek_buf[0..peek_len];
+            };
+
+            const peek_end = peek_len + data.len;
+            @memcpy(peek_buf[peek_len..peek_end], data);
+            self._peek_len = peek_end;
+
+            if (peek_end > 100) {
+                return peek_buf[peek_len..peek_end];
+            }
+        }
+    }
 };

 // Pooled and re-used when creating a request
 const State = struct {
-    // used for reading chunks of payload data.
+    // We might be asked to peek at the response, i.e. to sniff the mime type.
+    // This will require storing any peeked data so that, later, if we stream
+    // the body, we can present a cohesive body.
+    peek_buf: []u8,
+
+    // Used for reading chunks of payload data.
    read_buf: []u8,

-    // use for writing data. If you're wondering why BOTH a read_buf and a
+    // Used for writing data. If you're wondering why BOTH a read_buf and a
    // write_buf, even though HTTP is req -> resp, it's for TLS, which has
    // bidirectional data.
    write_buf: []u8,
@@ -1561,7 +1618,10 @@ const State = struct {
    // response headers.
    arena: ArenaAllocator,

-    fn init(allocator: Allocator, header_size: usize, buf_size: usize) !State {
+    fn init(allocator: Allocator, header_size: usize, peek_size: usize, buf_size: usize) !State {
+        const peek_buf = try allocator.alloc(u8, peek_size);
+        errdefer allocator.free(peek_buf);
+
        const read_buf = try allocator.alloc(u8, buf_size);
        errdefer allocator.free(read_buf);

@@ -1572,6 +1632,7 @@ const State = struct {
        errdefer allocator.free(header_buf);

        return .{
+            .peek_buf = peek_buf,
            .read_buf = read_buf,
            .write_buf = write_buf,
            .header_buf = header_buf,
@@ -1585,6 +1646,7 @@ const State = struct {

    fn deinit(self: *State) void {
        const allocator = self.arena.child_allocator;
+        allocator.free(self.peek_buf);
        allocator.free(self.read_buf);
        allocator.free(self.write_buf);
        allocator.free(self.header_buf);
@@ -1611,7 +1673,7 @@ const StatePool = struct {
        for (0..count) |i| {
            const state = try allocator.create(State);
            errdefer allocator.destroy(state);
-            state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, BUFFER_LEN);
+            state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, PEEK_BUF_LEN, BUFFER_LEN);
            states[i] = state;
            started += 1;
        }
@@ -1662,7 +1724,7 @@ const StatePool = struct {

 const testing = @import("../testing.zig");
 test "HttpClient Reader: fuzz" {
-    var state = try State.init(testing.allocator, 1024, 1024);
+    var state = try State.init(testing.allocator, 1024, 1024, 100);
    defer state.deinit();

    var res = TestResponse.init();
@@ -1773,18 +1835,23 @@ test "HttpClient: sync connect error" {
 }

 test "HttpClient: sync no body" {
-    var client = try testClient();
-    defer client.deinit();
+    for (0..2) |i| {
+        var client = try testClient();
+        defer client.deinit();

-    const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple");
-    var req = try client.request(.GET, &uri);
-    var res = try req.sendSync(.{});
+        const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple");
+        var req = try client.request(.GET, &uri);
+        var res = try req.sendSync(.{});

-    try testing.expectEqual(null, try res.next());
-    try testing.expectEqual(200, res.header.status);
-    try testing.expectEqual(2, res.header.count());
-    try testing.expectEqual("close", res.header.get("connection"));
-    try testing.expectEqual("0", res.header.get("content-length"));
+        if (i == 0) {
+            try testing.expectEqual("", try res.peek());
+        }
+        try testing.expectEqual(null, try res.next());
+        try testing.expectEqual(200, res.header.status);
+        try testing.expectEqual(2, res.header.count());
+        try testing.expectEqual("close", res.header.get("connection"));
+        try testing.expectEqual("0", res.header.get("content-length"));
+    }
 }

 test "HttpClient: sync tls no body" {
@@ -1804,21 +1871,26 @@ test "HttpClient: sync tls no body" {
 }

 test "HttpClient: sync with body" {
-    var client = try testClient();
-    defer client.deinit();
+    for (0..2) |i| {
+        var client = try testClient();
+        defer client.deinit();

-    const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo");
-    var req = try client.request(.GET, &uri);
-    var res = try req.sendSync(.{});
+        const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo");
+        var req = try client.request(.GET, &uri);
+        var res = try req.sendSync(.{});

-    try testing.expectEqual("over 9000!", try res.next());
-    try testing.expectEqual(201, res.header.status);
-    try testing.expectEqual(5, res.header.count());
-    try testing.expectEqual("close", res.header.get("connection"));
-    try testing.expectEqual("10", res.header.get("content-length"));
-    try testing.expectEqual("127.0.0.1", res.header.get("_host"));
-    try testing.expectEqual("Close", res.header.get("_connection"));
-    try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
+        if (i == 0) {
+            try testing.expectEqual("over 9000!", try res.peek());
+        }
+        try testing.expectEqual("over 9000!", try res.next());
+        try testing.expectEqual(201, res.header.status);
+        try testing.expectEqual(5, res.header.count());
+        try testing.expectEqual("close", res.header.get("connection"));
+        try testing.expectEqual("10", res.header.get("content-length"));
+        try testing.expectEqual("127.0.0.1", res.header.get("_host"));
+        try testing.expectEqual("Close", res.header.get("_connection"));
+        try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
+    }
 }

 test "HttpClient: sync tls with body" {
--- a/src/testing.zig
+++ b/src/testing.zig
@@ -24,6 +24,17 @@ pub const expectError = std.testing.expectError;
 pub const expectString = std.testing.expectEqualStrings;
 pub const expectEqualSlices = std.testing.expectEqualSlices;

+// sometimes it's super useful to have an arena you don't really care about
+// in a test. Like, you need a mutable string, so you just want to dupe a
+// string literal. It has nothing to do with the code under test, it's just
+// infrastructure for the test itself.
+pub var arena_instance = std.heap.ArenaAllocator.init(std.heap.c_allocator);
+pub const arena_allocator = arena_instance.allocator();
+
+pub fn reset() void {
+    _ = arena_instance.reset(.{ .retain_capacity = {} });
+}
+
 const App = @import("app.zig").App;
 const parser = @import("browser/netsurf.zig");