From 7e575c501a614276bf15f1dcc84ed5396cca869b Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 3 Feb 2026 15:48:27 +0800 Subject: [PATCH] Add a dedicated browser_context and page_arena to CDP. The BrowserContext currently uses 3 arenas: 1 - Command-specific, which is like the call_arena, but for the processing of a single CDP command 2 - Notification-specific, which is similar, but for the processing of a single internal notification event 3 - Arena, which is just the session arena and lives for the duration of the BrowseContext/Session This is pretty coarse and can results in significant memory accumulation if a browser context is re-used for multiple navigations. This commit introduces 3 changes: 1 - Rather than referencing Session.arena, the BrowerContext.arena is now its own arena. This doesn't really change anything, but it does help keep things a bit better separated. 2 - Introduces a page_arena (not to be confused with Page.arena). This arena exists for the duration of a 1 page, i.e. it's cleared when the BrowserContext receives the page_created internal notification. The `captured_responses` now uses this arena, which means captures only exist for the duration of the current page. This appears to be consistent with how chrome behaves (In fact, Chrome seems even more aggressive and doesn't appear to make any guarantees around captured responses). CDP refers to this lifetime as a "renderer" and has an experimental message, which we don't support, `Network.configureDurableMessages` to control this. 3 - Isolated Worlds are now more self contained with an arena from the ArenaPool. There are currently 2 places where the BrowserContext.arena is still used: 1 - the isolated_world list 2 - the custom headers Although this could be long lived, I believe the above is ok. We should just really think twice whenever we want to use it for anything else. --- src/cdp/cdp.zig | 53 ++++++++++++++++++++++++++------------ src/cdp/domains/dom.zig | 2 +- src/cdp/domains/page.zig | 14 +++++++--- src/cdp/domains/target.zig | 2 +- 4 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 812200f1..66786973 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -76,6 +76,10 @@ pub fn CDPT(comptime TypeProvider: type) type { // Used for processing notifications within a browser context. notification_arena: std.heap.ArenaAllocator, + page_arena: std.heap.ArenaAllocator, + + browser_context_arena: std.heap.ArenaAllocator, + const Self = @This(); pub fn init(app: *App, client: TypeProvider.Client) !Self { @@ -90,8 +94,10 @@ pub fn CDPT(comptime TypeProvider: type) type { .browser = browser, .allocator = allocator, .browser_context = null, + .page_arena = std.heap.ArenaAllocator.init(allocator), .message_arena = std.heap.ArenaAllocator.init(allocator), .notification_arena = std.heap.ArenaAllocator.init(allocator), + .browser_context_arena = std.heap.ArenaAllocator.init(allocator), }; } @@ -100,8 +106,10 @@ pub fn CDPT(comptime TypeProvider: type) type { bc.deinit(); } self.browser.deinit(); + self.page_arena.deinit(); self.message_arena.deinit(); self.notification_arena.deinit(); + self.browser_context_arena.deinit(); } pub fn handleMessage(self: *Self, msg: []const u8) bool { @@ -323,9 +331,12 @@ pub fn BrowserContext(comptime CDP_T: type) type { // RELATION TO SESSION_ID session: *Session, - // Points to the session arena + // Tied to the lifetime of the BrowserContext arena: Allocator, + // Tied to the lifetime of 1 page rendered in the BrowserContext. + page_arena: Allocator, + // From the parent's notification_arena.allocator(). Most of the CDP // code paths deal with a cmd which has its own arena (from the // message_arena). But notifications happen outside of the typical CDP @@ -355,12 +366,12 @@ pub fn BrowserContext(comptime CDP_T: type) type { node_search_list: Node.Search.List, inspector_session: *js.Inspector.Session, - isolated_worlds: std.ArrayListUnmanaged(IsolatedWorld), + isolated_worlds: std.ArrayList(*IsolatedWorld), http_proxy_changed: bool = false, // Extra headers to add to all requests. - extra_headers: std.ArrayListUnmanaged([*c]const u8) = .empty, + extra_headers: std.ArrayList([*c]const u8) = .empty, intercept_state: InterceptState, @@ -373,7 +384,7 @@ pub fn BrowserContext(comptime CDP_T: type) type { // ever streamed. So if CDP is the only thing that needs bodies in // memory for an arbitrary amount of time, then that's where we're going // to store the, - captured_responses: std.AutoHashMapUnmanaged(usize, std.ArrayListUnmanaged(u8)), + captured_responses: std.AutoHashMapUnmanaged(usize, std.ArrayList(u8)), const Self = @This(); @@ -381,7 +392,6 @@ pub fn BrowserContext(comptime CDP_T: type) type { const allocator = cdp.allocator; const session = try cdp.browser.newSession(); - const arena = session.arena; const browser = &cdp.browser; const inspector_session = browser.env.inspector.?.startSession(self); @@ -393,7 +403,6 @@ pub fn BrowserContext(comptime CDP_T: type) type { self.* = .{ .id = id, .cdp = cdp, - .arena = arena, .target_id = null, .session_id = null, .session = session, @@ -405,6 +414,8 @@ pub fn BrowserContext(comptime CDP_T: type) type { .node_search_list = undefined, .isolated_worlds = .empty, .inspector_session = inspector_session, + .page_arena = cdp.page_arena.allocator(), + .arena = cdp.browser_context_arena.allocator(), .notification_arena = cdp.notification_arena.allocator(), .intercept_state = try InterceptState.init(allocator), .captured_responses = .empty, @@ -442,7 +453,7 @@ pub fn BrowserContext(comptime CDP_T: type) type { transfer.abort(error.ClientDisconnect); } - for (self.isolated_worlds.items) |*world| { + for (self.isolated_worlds.items) |world| { world.deinit(); } self.isolated_worlds.clearRetainingCapacity(); @@ -472,16 +483,21 @@ pub fn BrowserContext(comptime CDP_T: type) type { } pub fn createIsolatedWorld(self: *Self, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld { - const owned_name = try self.arena.dupe(u8, world_name); - const world = try self.isolated_worlds.addOne(self.arena); + const browser = &self.cdp.browser; + const arena = try browser.arena_pool.acquire(); + errdefer browser.arena_pool.release(arena); + const world = try arena.create(IsolatedWorld); world.* = .{ + .arena = arena, .context = null, - .name = owned_name, - .env = &self.cdp.browser.env, + .browser = browser, + .name = try arena.dupe(u8, world_name), .grant_universal_access = grant_universal_access, }; + try self.isolated_worlds.append(self.arena, world); + return world; } @@ -634,7 +650,7 @@ pub fn BrowserContext(comptime CDP_T: type) type { pub fn onHttpResponseData(ctx: *anyopaque, msg: *const Notification.ResponseData) !void { const self: *Self = @ptrCast(@alignCast(ctx)); - const arena = self.arena; + const arena = self.page_arena; const id = msg.transfer.id; const gop = try self.captured_responses.getOrPut(arena, id); @@ -700,7 +716,7 @@ pub fn BrowserContext(comptime CDP_T: type) type { // + 10 for the max websocket header const message_len = msg.len + session_id.len + 1 + field.len + 10; - var buf: std.ArrayListUnmanaged(u8) = .{}; + var buf: std.ArrayList(u8) = .{}; buf.ensureTotalCapacity(allocator, message_len) catch |err| { log.err(.cdp, "inspector buffer", .{ .err = err }); return; @@ -734,20 +750,23 @@ pub fn BrowserContext(comptime CDP_T: type) type { /// Generally the client needs to resolve a node into the isolated world to be able to work with it. /// An object id is unique across all contexts, different object ids can refer to the same Node in different contexts. const IsolatedWorld = struct { + arena: Allocator, + browser: *Browser, name: []const u8, - env: *js.Env, context: ?*js.Context = null, grant_universal_access: bool, pub fn deinit(self: *IsolatedWorld) void { if (self.context) |ctx| { - self.env.destroyContext(ctx); + self.browser.env.destroyContext(ctx); self.context = null; } + self.browser.arena_pool.release(self.arena); } + pub fn removeContext(self: *IsolatedWorld) !void { const ctx = self.context orelse return error.NoIsolatedContextToRemove; - self.env.destroyContext(ctx); + self.browser.env.destroyContext(ctx); self.context = null; } @@ -758,7 +777,7 @@ const IsolatedWorld = struct { // Currently we have only 1 page/frame and thus also only 1 state in the isolate world. pub fn createContext(self: *IsolatedWorld, page: *Page) !*js.Context { if (self.context == null) { - self.context = try self.env.createContext(page, false); + self.context = try self.browser.env.createContext(page, false); } else { log.warn(.cdp, "not implemented", .{ .feature = "createContext: Not implemented second isolated context creation", diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 1e19b9d0..d6900ced 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -284,7 +284,7 @@ fn resolveNode(cmd: anytype) !void { break :blk; } // not the default scope, check the other ones - for (bc.isolated_worlds.items) |*isolated_world| { + for (bc.isolated_worlds.items) |isolated_world| { ls.?.deinit(); ls = null; diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index ce58a69d..98b4d928 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -167,7 +167,7 @@ fn close(cmd: anytype) !void { } bc.session.removePage(); - for (bc.isolated_worlds.items) |*world| { + for (bc.isolated_worlds.items) |world| { world.deinit(); } bc.isolated_worlds.clearRetainingCapacity(); @@ -275,15 +275,21 @@ pub fn pageNavigate(arena: Allocator, bc: anytype, event: *const Notification.Pa pub fn pageRemove(bc: anytype) !void { // The main page is going to be removed, we need to remove contexts from other worlds first. - for (bc.isolated_worlds.items) |*isolated_world| { + for (bc.isolated_worlds.items) |isolated_world| { try isolated_world.removeContext(); } } pub fn pageCreated(bc: anytype, page: *Page) !void { - for (bc.isolated_worlds.items) |*isolated_world| { + _ = bc.cdp.page_arena.reset(.{ .retain_with_limit = 1024 * 512 }); + + for (bc.isolated_worlds.items) |isolated_world| { _ = try isolated_world.createContext(page); } + // Only retain captured responses until a navigation event. In CDP term, + // this is called a "renderer" and the cache-duration can be controlled via + // the Network.configureDurableMessages message (which we don't support) + bc.captured_responses = .empty; } pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.PageNavigated) !void { @@ -359,7 +365,7 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P true, ); } - for (bc.isolated_worlds.items) |*isolated_world| { + for (bc.isolated_worlds.items) |isolated_world| { const aux_json = try std.fmt.allocPrint(arena, "{{\"isDefault\":false,\"type\":\"isolated\",\"frameId\":\"{s}\"}}", .{target_id}); // Calling contextCreated will assign a new Id to the context and send the contextCreated event diff --git a/src/cdp/domains/target.zig b/src/cdp/domains/target.zig index 7b623b89..32b29901 100644 --- a/src/cdp/domains/target.zig +++ b/src/cdp/domains/target.zig @@ -283,7 +283,7 @@ fn closeTarget(cmd: anytype) !void { } bc.session.removePage(); - for (bc.isolated_worlds.items) |*world| { + for (bc.isolated_worlds.items) |world| { world.deinit(); } bc.isolated_worlds.clearRetainingCapacity();