Make CDP server more authoritative with respect to IDs

The TL;DR is that this commit enforces the use of correct IDs, introduces a
BrowserContext, and adds some CDP tests.

These are the ids we need to be aware of when talking about CDP:
- id
- browserContextId
- targetId
- sessionId
- loaderId
- frameId

The `id` is the only one that _should_ originate from the driver. It's attached
to most messages and it's how we maintain a request -> response flow: when
the server responds to a specific message, it echo's back the id from the
requested message. (As opposed to out-of-band events sent from the server which
won't have an `id`). When I say "id" from this point forward, I mean every id
except for this req->res id.

Every other id is created by the browser.

Prior to this commit, we didn't really check incoming ids from the driver. If
the driver said "attachToTarget" and included a targetId, we just assumed that
this was the current targetId. This was aided by the fact that we only used
hard-coded IDS. If _we_ only "create" a frameId of "FRAME-1", then it's tempting
to think the driver will only ever send a frameId of "FRAME-1".

The issue with this approach is that _if_ the browser and driver fall out of sync
and there's only ever 1 browserContextId, 1 sessionId and 1 frameId, it's not
impossible to imagine cases where we behave on the thing.

Imagine this flow:
- Driver asks for a new BrowserContext
- Browser says OK, your browserContextId is 1
- Driver, for whatever reason, says close browserContextId 2
- Browser says, OK, but it doesn't check the id and just closes the only
  BrowserContext it knows about (which is 1)

By both re-using the same hard-coded ids, and not verifying that the ids sent
from the client correspond to the correct ids, any issues are going to be hard
to debug.

Currently LOADER_ID and FRAEM_ID are still hard-coded. Baby steps.
This commit is contained in:
Karl Seguin
2025-02-26 09:33:50 +08:00
committed by Pierre Tachoire
parent ccacac0597
commit a3e2b5246e
17 changed files with 1128 additions and 591 deletions

View File

@@ -20,115 +20,78 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const json = std.json;
const dom = @import("dom.zig");
const Loop = @import("jsruntime").Loop;
// const Client = @import("../server.zig").Client;
const asUint = @import("../str/parser.zig").asUint;
const Incrementing = @import("../id.zig").Incrementing;
const log = std.log.scoped(.cdp);
pub const URL_BASE = "chrome://newtab/";
pub const LOADER_ID = "LOADERID24DD2FD56CF1EF33C965C79C";
pub const FRAME_ID = "FRAMEIDD8AED408A0467AC93100BCDBE";
pub const BROWSER_SESSION_ID = @tagName(SessionID.BROWSERSESSIONID597D9875C664CAC0);
pub const CONTEXT_SESSION_ID = @tagName(SessionID.CONTEXTSESSIONID0497A05C95417CF4);
pub const TimestampEvent = struct {
timestamp: f64,
};
pub const CDP = CDPT(struct {
const Client = @import("../server.zig").Client;
const Loop = *@import("jsruntime").Loop;
const Client = *@import("../server.zig").Client;
const Browser = @import("../browser/browser.zig").Browser;
const Session = @import("../browser/browser.zig").Session;
});
const SessionIdGen = Incrementing(u32, "SID");
const TargetIdGen = Incrementing(u32, "TID");
const BrowserContextIdGen = Incrementing(u32, "BID");
// Generic so that we can inject mocks into it.
pub fn CDPT(comptime TypeProvider: type) type {
return struct {
loop: TypeProvider.Loop,
// Used for sending message to the client and closing on error
client: *TypeProvider.Client,
// The active browser
browser: Browser,
// The active browser session
session: ?*Session,
client: TypeProvider.Client,
allocator: Allocator,
// The active browser
browser: ?Browser = null,
target_id_gen: TargetIdGen = .{},
session_id_gen: SessionIdGen = .{},
browser_context_id_gen: BrowserContextIdGen = .{},
browser_context: ?BrowserContext(Self),
// Re-used arena for processing a message. We're assuming that we're getting
// 1 message at a time.
message_arena: std.heap.ArenaAllocator,
// State
url: []const u8,
frame_id: []const u8,
loader_id: []const u8,
session_id: SessionID,
context_id: ?[]const u8,
execution_context_id: u32,
security_origin: []const u8,
page_life_cycle_events: bool,
secure_context_type: []const u8,
node_list: dom.NodeList,
node_search_list: dom.NodeSearchList,
const Self = @This();
pub const Browser = TypeProvider.Browser;
pub const Session = TypeProvider.Session;
pub fn init(allocator: Allocator, client: *TypeProvider.Client, loop: anytype) Self {
pub fn init(allocator: Allocator, client: TypeProvider.Client, loop: TypeProvider.Loop) Self {
return .{
.loop = loop,
.client = client,
.browser = Browser.init(allocator, loop),
.session = null,
.allocator = allocator,
.url = URL_BASE,
.execution_context_id = 0,
.context_id = null,
.frame_id = FRAME_ID,
.session_id = .CONTEXTSESSIONID0497A05C95417CF4,
.security_origin = URL_BASE,
.secure_context_type = "Secure", // TODO = enum
.loader_id = LOADER_ID,
.browser_context = null,
.message_arena = std.heap.ArenaAllocator.init(allocator),
.page_life_cycle_events = false, // TODO; Target based value
.node_list = dom.NodeList.init(allocator),
.node_search_list = dom.NodeSearchList.init(allocator),
};
}
pub fn deinit(self: *Self) void {
self.node_list.deinit();
for (self.node_search_list.items) |*s| {
s.deinit();
if (self.browser_context) |*bc| {
bc.deinit();
}
self.node_search_list.deinit();
self.browser.deinit();
self.message_arena.deinit();
}
pub fn reset(self: *Self) void {
self.node_list.reset();
// deinit all node searches.
for (self.node_search_list.items) |*s| {
s.deinit();
}
self.node_search_list.clearAndFree();
}
pub fn newSession(self: *Self) !void {
self.session = try self.browser.newSession(self);
}
pub fn handleMessage(self: *Self, msg: []const u8) bool {
self.processMessage(msg) catch |err| {
log.err("failed to process message: {}\n{s}", .{ err, msg });
return false;
};
// if there's an error, it's already been logged
self.processMessage(msg) catch return false;
return true;
}
@@ -140,83 +103,236 @@ pub fn CDPT(comptime TypeProvider: type) type {
// Called from above, in processMessage which handles client messages
// but can also be called internally. For example, Target.sendMessageToTarget
// calls back into dispatch to capture the response
// calls back into dispatch to capture the response.
pub fn dispatch(self: *Self, arena: Allocator, sender: anytype, str: []const u8) !void {
const input = json.parseFromSliceLeaky(InputMessage, arena, str, .{
.ignore_unknown_fields = true,
}) catch return error.InvalidJSON;
const domain, const action = blk: {
const method = input.method;
var command = Command(Self, @TypeOf(sender)){
.input = .{
.json = str,
.id = input.id,
.action = "",
.params = input.params,
.session_id = input.sessionId,
},
.cdp = self,
.arena = arena,
.sender = sender,
.browser_context = if (self.browser_context) |*bc| bc else null,
};
// See dispatchStartupCommand for more info on this.
var is_startup = false;
if (input.sessionId) |input_session_id| {
if (std.mem.eql(u8, input_session_id, "STARTUP")) {
is_startup = true;
} else if (self.isValidSessionId(input_session_id) == false) {
return command.sendError(-32001, "Unknown sessionId");
}
}
if (is_startup) {
dispatchStartupCommand(&command) catch |err| {
command.sendError(-31999, @errorName(err)) catch {};
return err;
};
} else {
dispatchCommand(&command, input.method) catch |err| {
command.sendError(-31998, @errorName(err)) catch {};
return err;
};
}
}
// A CDP session isn't 100% fully driven by the driver. There's are
// independent actions that the browser is expected to take. For example
// Puppeteer expects the browser to startup a tab and thus have existing
// targets.
// To this end, we create a [very] dummy BrowserContext, Target and
// Session. There isn't actually a BrowserContext, just a special id.
// When messages are received with the "STARTUP" sessionId, we do
// "special" handling - the bare minimum we need to do until the driver
// switches to a real BrowserContext.
// (I can imagine this logic will become driver-specific)
fn dispatchStartupCommand(command: anytype) !void {
return command.sendResult(null, .{});
}
fn dispatchCommand(command: anytype, method: []const u8) !void {
const domain = blk: {
const i = std.mem.indexOfScalarPos(u8, method, 0, '.') orelse {
return error.InvalidMethod;
};
break :blk .{ method[0..i], method[i + 1 ..] };
};
var command = Command(Self, @TypeOf(sender)){
.json = str,
.cdp = self,
.id = input.id,
.arena = arena,
.action = action,
._params = input.params,
.session_id = input.sessionId,
.sender = sender,
.session = self.session orelse blk: {
try self.newSession();
break :blk self.session.?;
},
command.input.action = method[i + 1 ..];
break :blk method[0..i];
};
switch (domain.len) {
3 => switch (@as(u24, @bitCast(domain[0..3].*))) {
asUint("DOM") => return @import("dom.zig").processMessage(&command),
asUint("Log") => return @import("log.zig").processMessage(&command),
asUint("CSS") => return @import("css.zig").processMessage(&command),
asUint("DOM") => return @import("dom.zig").processMessage(command),
asUint("Log") => return @import("log.zig").processMessage(command),
asUint("CSS") => return @import("css.zig").processMessage(command),
else => {},
},
4 => switch (@as(u32, @bitCast(domain[0..4].*))) {
asUint("Page") => return @import("page.zig").processMessage(&command),
asUint("Page") => return @import("page.zig").processMessage(command),
else => {},
},
5 => switch (@as(u40, @bitCast(domain[0..5].*))) {
asUint("Fetch") => return @import("fetch.zig").processMessage(&command),
asUint("Fetch") => return @import("fetch.zig").processMessage(command),
else => {},
},
6 => switch (@as(u48, @bitCast(domain[0..6].*))) {
asUint("Target") => return @import("target.zig").processMessage(&command),
asUint("Target") => return @import("target.zig").processMessage(command),
else => {},
},
7 => switch (@as(u56, @bitCast(domain[0..7].*))) {
asUint("Browser") => return @import("browser.zig").processMessage(&command),
asUint("Runtime") => return @import("runtime.zig").processMessage(&command),
asUint("Network") => return @import("network.zig").processMessage(&command),
asUint("Browser") => return @import("browser.zig").processMessage(command),
asUint("Runtime") => return @import("runtime.zig").processMessage(command),
asUint("Network") => return @import("network.zig").processMessage(command),
else => {},
},
8 => switch (@as(u64, @bitCast(domain[0..8].*))) {
asUint("Security") => return @import("security.zig").processMessage(&command),
asUint("Security") => return @import("security.zig").processMessage(command),
else => {},
},
9 => switch (@as(u72, @bitCast(domain[0..9].*))) {
asUint("Emulation") => return @import("emulation.zig").processMessage(&command),
asUint("Inspector") => return @import("inspector.zig").processMessage(&command),
asUint("Emulation") => return @import("emulation.zig").processMessage(command),
asUint("Inspector") => return @import("inspector.zig").processMessage(command),
else => {},
},
11 => switch (@as(u88, @bitCast(domain[0..11].*))) {
asUint("Performance") => return @import("performance.zig").processMessage(&command),
asUint("Performance") => return @import("performance.zig").processMessage(command),
else => {},
},
else => {},
}
return error.UnknownDomain;
}
fn isValidSessionId(self: *const Self, input_session_id: []const u8) bool {
const browser_context = &(self.browser_context orelse return false);
const session_id = browser_context.session_id orelse return false;
return std.mem.eql(u8, session_id, input_session_id);
}
pub fn createBrowserContext(self: *Self) ![]const u8 {
if (self.browser_context != null) {
return error.AlreadyExists;
}
const browser_context_id = self.browser_context_id_gen.next();
// is this safe?
self.browser_context = undefined;
errdefer self.browser_context = null;
try BrowserContext(Self).init(&self.browser_context.?, browser_context_id, self);
return browser_context_id;
}
pub fn disposeBrowserContext(self: *Self, browser_context_id: []const u8) bool {
const bc = &(self.browser_context orelse return false);
if (std.mem.eql(u8, bc.id, browser_context_id) == false) {
return false;
}
bc.deinit();
self.browser_context = null;
return true;
}
fn sendJSON(self: *Self, message: anytype) !void {
return self.client.sendJSON(message, .{
.emit_null_optional_fields = false,
});
}
};
}
pub fn BrowserContext(comptime CDP_T: type) type {
const dom = @import("dom.zig");
return struct {
id: []const u8,
cdp: *CDP_T,
browser: CDP_T.Browser,
// Represents the browser session. There is no equivalent in CDP. For
// all intents and purpose, from CDP's point of view our Browser and
// our Session more or less maps to a BrowserContext. THIS HAS ZERO
// RELATION TO SESSION_ID
session: *CDP_T.Session,
// Maps to our Page. (There are other types of targets, but we only
// deal with "pages" for now). Since we only allow 1 open page at a
// time, we only have 1 target_id.
target_id: ?[]const u8,
// The CDP session_id. After the target/page is created, the client
// "attaches" to it (either explicitly or automatically). We return a
// "sessionId" which identifies this link. `sessionId` is the how
// the CDP client informs us what it's trying to manipulate. Because we
// only support 1 BrowserContext at a time, and 1 page at a time, this
// is all pretty straightforward, but it still needs to be enforced, i.e.
// if we get a request with a sessionId that doesn't match the current one
// we should reject it.
session_id: ?[]const u8,
// State
url: []const u8,
frame_id: []const u8,
loader_id: []const u8,
security_origin: []const u8,
page_life_cycle_events: bool,
secure_context_type: []const u8,
node_list: dom.NodeList,
node_search_list: dom.NodeSearchList,
const Self = @This();
fn init(self: *Self, id: []const u8, cdp: *CDP_T) !void {
self.* = .{
.id = id,
.cdp = cdp,
.browser = undefined,
.session = undefined,
.target_id = null,
.session_id = null,
.url = URL_BASE,
.frame_id = FRAME_ID,
.security_origin = URL_BASE,
.secure_context_type = "Secure", // TODO = enum
.loader_id = LOADER_ID,
.page_life_cycle_events = false, // TODO; Target based value
.node_list = dom.NodeList.init(cdp.allocator),
.node_search_list = dom.NodeSearchList.init(cdp.allocator),
};
self.browser = CDP_T.Browser.init(cdp.allocator, cdp.loop);
errdefer self.browser.deinit();
self.session = try self.browser.newSession(self);
}
pub fn deinit(self: *Self) void {
self.node_list.deinit();
for (self.node_search_list.items) |*s| {
s.deinit();
}
self.node_search_list.deinit();
self.browser.deinit();
}
pub fn reset(self: *Self) void {
self.node_list.reset();
// deinit all node searches.
for (self.node_search_list.items) |*s| {
s.deinit();
}
self.node_search_list.clearAndFree();
}
pub fn onInspectorResponse(ctx: *anyopaque, _: u32, msg: []const u8) void {
if (std.log.defaultLogEnabled(.debug)) {
@@ -252,19 +368,24 @@ pub fn CDPT(comptime TypeProvider: type) type {
};
}
// This is hacky * 2. First, we have the JSON payload by gluing our
// This is hacky x 2. First, we create the JSON payload by gluing our
// session_id onto it. Second, we're much more client/websocket aware than
// we should be.
fn sendInspectorMessage(self: *Self, msg: []const u8) !void {
var arena = std.heap.ArenaAllocator.init(self.allocator);
const session_id = self.session_id orelse {
// We no longer have an active session. What should we do
// in this case?
return;
};
const cdp = self.cdp;
var arena = std.heap.ArenaAllocator.init(cdp.allocator);
errdefer arena.deinit();
const field = ",\"sessionId\":\"";
const session_id = @tagName(self.session_id);
// + 1 for the closing quote after the session id
// + 10 for the max websocket header
const message_len = msg.len + session_id.len + 1 + field.len + 10;
var buf: std.ArrayListUnmanaged(u8) = .{};
@@ -283,7 +404,7 @@ pub fn CDPT(comptime TypeProvider: type) type {
buf.appendSliceAssumeCapacity("\"}");
std.debug.assert(buf.items.len == message_len);
try self.client.sendJSONRaw(arena, buf);
try cdp.client.sendJSONRaw(arena, buf);
}
};
}
@@ -294,38 +415,29 @@ pub fn CDPT(comptime TypeProvider: type) type {
// generic.
pub fn Command(comptime CDP_T: type, comptime Sender: type) type {
return struct {
// reference to our CDP instance
cdp: *CDP_T,
// Comes directly from the input.id field
id: ?i64,
// A misc arena that can be used for any allocation for processing
// the message
arena: Allocator,
// the browser session
session: *CDP_T.Session,
// reference to our CDP instance
cdp: *CDP_T,
// The "action" of the message.Given a method of "LOG.enable", the
// action is "enable"
action: []const u8,
// The browser context this command targets
browser_context: ?*BrowserContext(CDP_T),
// Comes directly from the input.sessionId field
session_id: ?[]const u8,
// Unparsed / untyped input.params.
_params: ?InputParams,
// The full raw json input
json: []const u8,
// The command input (the id, optional session_id, params, ...)
input: Input,
// In most cases, Sender is going to be cdp itself. We'll call
// sender.sendJSON() and CDP will send it to the client. But some
// comamnds are dispatched internally, in which cases the Sender will
// be code to capture the data that we were "sending".
sender: Sender,
const Self = @This();
pub fn params(self: *const Self, comptime T: type) !?T {
if (self._params) |p| {
if (self.input.params) |p| {
return try json.parseFromSliceLeaky(
T,
self.arena,
@@ -336,20 +448,26 @@ pub fn Command(comptime CDP_T: type, comptime Sender: type) type {
return null;
}
pub fn createBrowserContext(self: *Self) !*BrowserContext(CDP_T) {
_ = try self.cdp.createBrowserContext();
self.browser_context = &self.cdp.browser_context.?;
return self.browser_context.?;
}
const SendResultOpts = struct {
include_session_id: bool = true,
};
pub fn sendResult(self: *Self, result: anytype, opts: SendResultOpts) !void {
return self.sender.sendJSON(.{
.id = self.id,
.id = self.input.id,
.result = if (comptime @typeInfo(@TypeOf(result)) == .Null) struct {}{} else result,
.sessionId = if (opts.include_session_id) self.session_id else null,
.sessionId = if (opts.include_session_id) self.input.session_id else null,
});
}
const SendEventOpts = struct {
session_id: ?[]const u8 = null,
};
pub fn sendEvent(self: *Self, method: []const u8, p: anytype, opts: SendEventOpts) !void {
// Events ALWAYS go to the client. self.sender should not be used
return self.cdp.sendJSON(.{
@@ -358,6 +476,32 @@ pub fn Command(comptime CDP_T: type, comptime Sender: type) type {
.sessionId = opts.session_id,
});
}
pub fn sendError(self: *Self, code: i32, message: []const u8) !void {
return self.sender.sendJSON(.{
.id = self.input.id,
.code = code,
.message = message,
});
}
const Input = struct {
// When we reply to a message, we echo back the message id
id: ?i64,
// The "action" of the message.Given a method of "LOG.enable", the
// action is "enable"
action: []const u8,
// See notes in BrowserContext about session_id
session_id: ?[]const u8,
// Unparsed / untyped input.params.
params: ?InputParams,
// The full raw json input
json: []const u8,
};
};
}
@@ -395,24 +539,7 @@ const InputParams = struct {
}
};
// Common
// ------
// TODO: hard coded IDs
pub const SessionID = enum {
BROWSERSESSIONID597D9875C664CAC0,
CONTEXTSESSIONID0497A05C95417CF4,
pub fn parse(str: []const u8) !SessionID {
return std.meta.stringToEnum(SessionID, str) orelse {
log.err("parse sessionID: {s}", .{str});
return error.InvalidSessionID;
};
}
};
const testing = @import("testing.zig");
test "cdp: invalid json" {
var ctx = testing.context();
defer ctx.deinit();
@@ -425,6 +552,7 @@ test "cdp: invalid json" {
try testing.expectError(error.InvalidMethod, ctx.processMessage(.{
.method = "Target",
}));
try ctx.expectSentError(-31998, "InvalidMethod", .{});
try testing.expectError(error.UnknownDomain, ctx.processMessage(.{
.method = "Unknown.domain",
@@ -434,3 +562,53 @@ test "cdp: invalid json" {
.method = "Target.over9000",
}));
}
test "cdp: invalid sessionId" {
var ctx = testing.context();
defer ctx.deinit();
{
// we have no browser context
try ctx.processMessage(.{ .method = "Hi", .sessionId = "nope" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
{
// we have a brower context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .method = "Hi", .sessionId = "BC-Has-No-SessionId" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
{
// we have a brower context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .method = "Hi", .sessionId = "SESS-1" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
}
test "cdp: STARTUP sessionId" {
var ctx = testing.context();
defer ctx.deinit();
{
// we have no browser context
try ctx.processMessage(.{ .id = 2, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 2, .index = 0, .session_id = "STARTUP" });
}
{
// we have a brower context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .id = 3, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 3, .index = 0, .session_id = "STARTUP" });
}
{
// we have a brower context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .id = 4, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 4, .index = 0, .session_id = "STARTUP" });
}
}