From 087e42a641c9712f00c9da676bd8ec48253bcc91 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 25 Aug 2025 09:20:18 +0800 Subject: [PATCH] Normalize CDP response headers chromedb doesn't support duplicate header names. Although servers _will_ send this (e.g. Cache-Control: public\r\nCache-Control: max-age=60\r\n), Chrome seems to join them with a "\n". So we do the same. A note on curl_easy_nextheader, which this code ultimately uses to iterate and collect the headers. The documentation says: Applications must copy the data if they want it to survive subsequent API calls or the life-time of the easy handle. As-is, I'd understand this to mean that a given header name/value is only valid until any API call, including another call to curl_easy_nextheader. So, from this comment, we _should_ be duping the name/value. But we don't. Why? Because, despite the note in the documentation, this doesn't appear to be how it actually works, nor does it really make sense. If it's just a linked list, there's no reason curl_easy_nextheader should invalidate previous results. I'm guessing this is just a general lack of guarantee libcurl is willing to make re lifetimes. https://github.com/lightpanda-io/browser/issues/966 --- src/cdp/domains/network.zig | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 627ec6d6..dd983137 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -267,7 +267,7 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: anytype, msg: *const Notific .requestId = try std.fmt.allocPrint(arena, "REQ-{d}", .{msg.transfer.id}), .loaderId = bc.loader_id, .frameId = target_id, - .response = TransferAsResponseWriter.init(msg.transfer), + .response = TransferAsResponseWriter.init(arena, msg.transfer), }, .{ .session_id = session_id }); } @@ -352,10 +352,12 @@ pub const TransferAsRequestWriter = struct { }; const TransferAsResponseWriter = struct { + arena: Allocator, transfer: *Transfer, - fn init(transfer: *Transfer) TransferAsResponseWriter { + fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { return .{ + .arena = arena, .transfer = transfer, }; } @@ -392,14 +394,24 @@ const TransferAsResponseWriter = struct { } { - try writer.objectField("headers"); - try writer.beginObject(); + // chromedp doesn't like having duplicate header names. It's pretty + // common to get these from a server (e.g. for Cache-Control), but + // Chrome joins these. So we have to too. + const arena = self.arena; var it = transfer.responseHeaderIterator(); + var map: std.StringArrayHashMapUnmanaged([]const u8) = .empty; while (it.next()) |hdr| { - try writer.objectField(hdr.name); - try writer.write(hdr.value); + const gop = try map.getOrPut(arena, hdr.name); + if (gop.found_existing) { + // yes, chrome joins multi-value headers with a \n + gop.value_ptr.* = try std.mem.join(arena, "\n", &.{ gop.value_ptr.*, hdr.value }); + } else { + gop.value_ptr.* = hdr.value; + } } - try writer.endObject(); + + try writer.objectField("headers"); + try writer.write(std.json.ArrayHashMap([]const u8){ .map = map }); } try writer.endObject(); }