Merge branch 'main' into osc/feat-mcp-detect-forms

This commit is contained in:
Adrià Arrufat
2026-03-24 09:25:47 +09:00
54 changed files with 3006 additions and 731 deletions

View File

@@ -33,6 +33,7 @@ const Page = @import("../browser/Page.zig");
const Incrementing = @import("id.zig").Incrementing;
const Notification = @import("../Notification.zig");
const InterceptState = @import("domains/fetch.zig").InterceptState;
const Mime = @import("../browser/Mime.zig");
pub const URL_BASE = "chrome://newtab/";
@@ -129,9 +130,10 @@ pub fn CDPT(comptime TypeProvider: type) type {
// A bit hacky right now. The main server loop doesn't unblock for
// scheduled task. So we run this directly in order to process any
// timeouts (or http events) which are ready to be processed.
pub fn pageWait(self: *Self, ms: u32) Session.WaitResult {
const session = &(self.browser.session orelse return .no_page);
return session.wait(.{ .timeout_ms = ms });
pub fn pageWait(self: *Self, ms: u32) !Session.Runner.CDPWaitResult {
const session = &(self.browser.session orelse return error.NoPage);
var runner = try session.runner(.{});
return runner.waitCDP(.{ .ms = ms });
}
// Called from above, in processMessage which handles client messages
@@ -189,19 +191,10 @@ pub fn CDPT(comptime TypeProvider: type) type {
// (I can imagine this logic will become driver-specific)
fn dispatchStartupCommand(command: anytype, method: []const u8) !void {
// Stagehand parses the response and error if we don't return a
// correct one for this call.
// correct one for Page.getFrameTree on startup call.
if (std.mem.eql(u8, method, "Page.getFrameTree")) {
return command.sendResult(.{
.frameTree = .{
.frame = .{
.id = "TID-STARTUP",
.loaderId = "LOADERID24DD2FD56CF1EF33C965C79C",
.securityOrigin = URL_BASE,
.url = "about:blank",
.secureContextType = "Secure",
},
},
}, .{});
// The Page.getFrameTree handles startup response gracefully.
return dispatchCommand(command, method);
}
return command.sendResult(null, .{});
@@ -324,6 +317,11 @@ pub fn BrowserContext(comptime CDP_T: type) type {
const Node = @import("Node.zig");
const AXNode = @import("AXNode.zig");
const CapturedResponse = struct {
must_encode: bool,
data: std.ArrayList(u8),
};
return struct {
id: []const u8,
cdp: *CDP_T,
@@ -384,7 +382,7 @@ pub fn BrowserContext(comptime CDP_T: type) type {
// ever streamed. So if CDP is the only thing that needs bodies in
// memory for an arbitrary amount of time, then that's where we're going
// to store the,
captured_responses: std.AutoHashMapUnmanaged(usize, std.ArrayList(u8)),
captured_responses: std.AutoHashMapUnmanaged(usize, CapturedResponse),
notification: *Notification,
@@ -637,6 +635,35 @@ pub fn BrowserContext(comptime CDP_T: type) type {
pub fn onHttpResponseHeadersDone(ctx: *anyopaque, msg: *const Notification.ResponseHeaderDone) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
defer self.resetNotificationArena();
const arena = self.page_arena;
// Prepare the captured response value.
const id = msg.transfer.id;
const gop = try self.captured_responses.getOrPut(arena, id);
if (!gop.found_existing) {
gop.value_ptr.* = .{
.data = .empty,
// Encode the data in base64 by default, but don't encode
// for well known content-type.
.must_encode = blk: {
const transfer = msg.transfer;
if (transfer.response_header.?.contentType()) |ct| {
const mime = try Mime.parse(ct);
if (!mime.isText()) {
break :blk true;
}
if (std.mem.eql(u8, "UTF-8", mime.charsetString())) {
break :blk false;
}
}
break :blk true;
},
};
}
return @import("domains/network.zig").httpResponseHeaderDone(self.notification_arena, self, msg);
}
@@ -650,11 +677,9 @@ pub fn BrowserContext(comptime CDP_T: type) type {
const arena = self.page_arena;
const id = msg.transfer.id;
const gop = try self.captured_responses.getOrPut(arena, id);
if (!gop.found_existing) {
gop.value_ptr.* = .{};
}
try gop.value_ptr.appendSlice(arena, try arena.dupe(u8, msg.data));
const resp = self.captured_responses.getPtr(id) orelse lp.assert(false, "onHttpResponseData missinf captured response", .{});
return resp.data.appendSlice(arena, msg.data);
}
pub fn onHttpRequestAuthRequired(ctx: *anyopaque, data: *const Notification.RequestAuthRequired) !void {

View File

@@ -36,6 +36,7 @@ pub fn processMessage(cmd: anytype) !void {
clickNode,
fillNode,
scrollNode,
waitForSelector,
}, cmd.input.action) orelse return error.UnknownMethod;
switch (action) {
@@ -47,6 +48,7 @@ pub fn processMessage(cmd: anytype) !void {
.clickNode => return clickNode(cmd),
.fillNode => return fillNode(cmd),
.scrollNode => return scrollNode(cmd),
.waitForSelector => return waitForSelector(cmd),
}
}
@@ -257,6 +259,32 @@ fn scrollNode(cmd: anytype) !void {
return cmd.sendResult(.{}, .{});
}
fn waitForSelector(cmd: anytype) !void {
const Params = struct {
selector: []const u8,
timeout: ?u32 = null,
};
const params = (try cmd.params(Params)) orelse return error.InvalidParam;
const bc = cmd.browser_context orelse return error.NoBrowserContext;
_ = bc.session.currentPage() orelse return error.PageNotLoaded;
const timeout_ms = params.timeout orelse 5000;
const selector_z = try cmd.arena.dupeZ(u8, params.selector);
const node = lp.actions.waitForSelector(selector_z, timeout_ms, bc.session) catch |err| {
if (err == error.InvalidSelector) return error.InvalidParam;
if (err == error.Timeout) return error.InternalError;
return error.InternalError;
};
const registered = try bc.node_registry.register(node);
return cmd.sendResult(.{
.backendNodeId = registered.id,
}, .{});
}
const testing = @import("../testing.zig");
test "cdp.lp: getMarkdown" {
var ctx = testing.context();
@@ -315,7 +343,8 @@ test "cdp.lp: action tools" {
const page = try bc.session.createPage();
const url = "http://localhost:9582/src/browser/tests/mcp_actions.html";
try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } });
_ = bc.session.wait(.{});
var runner = try bc.session.runner(.{});
try runner.wait(.{ .ms = 2000 });
// Test Click
const btn = page.document.getElementById("btn", page).?.asNode();
@@ -366,3 +395,44 @@ test "cdp.lp: action tools" {
try testing.expect(result.isTrue());
}
test "cdp.lp: waitForSelector" {
var ctx = testing.context();
defer ctx.deinit();
const bc = try ctx.loadBrowserContext(.{});
const page = try bc.session.createPage();
const url = "http://localhost:9582/src/browser/tests/mcp_wait_for_selector.html";
try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } });
var runner = try bc.session.runner(.{});
try runner.wait(.{ .ms = 2000 });
// 1. Existing element
try ctx.processMessage(.{
.id = 1,
.method = "LP.waitForSelector",
.params = .{ .selector = "#existing", .timeout = 2000 },
});
var result = ctx.client.?.sent.items[0].object.get("result").?.object;
try testing.expect(result.get("backendNodeId") != null);
ctx.client.?.sent.clearRetainingCapacity();
// 2. Delayed element
try ctx.processMessage(.{
.id = 2,
.method = "LP.waitForSelector",
.params = .{ .selector = "#delayed", .timeout = 5000 },
});
result = ctx.client.?.sent.items[0].object.get("result").?.object;
try testing.expect(result.get("backendNodeId") != null);
ctx.client.?.sent.clearRetainingCapacity();
// 3. Timeout error
try ctx.processMessage(.{
.id = 3,
.method = "LP.waitForSelector",
.params = .{ .selector = "#nonexistent", .timeout = 100 },
});
const err_obj = ctx.client.?.sent.items[0].object.get("error").?.object;
try testing.expect(err_obj.get("code") != null);
}

View File

@@ -208,11 +208,22 @@ fn getResponseBody(cmd: anytype) !void {
const request_id = try idFromRequestId(params.requestId);
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
const buf = bc.captured_responses.getPtr(request_id) orelse return error.RequestNotFound;
const resp = bc.captured_responses.getPtr(request_id) orelse return error.RequestNotFound;
try cmd.sendResult(.{
.body = buf.items,
.base64Encoded = false,
if (!resp.must_encode) {
return cmd.sendResult(.{
.body = resp.data.items,
.base64Encoded = false,
}, .{});
}
const encoded_len = std.base64.standard.Encoder.calcSize(resp.data.items.len);
const encoded = try cmd.arena.alloc(u8, encoded_len);
_ = std.base64.standard.Encoder.encode(encoded, resp.data.items);
return cmd.sendResult(.{
.body = encoded,
.base64Encoded = true,
}, .{});
}

View File

@@ -75,8 +75,21 @@ const Frame = struct {
};
fn getFrameTree(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
const target_id = bc.target_id orelse return error.TargetNotLoaded;
// Stagehand parses the response and error if we don't return a
// correct one for this call when browser context or target id are missing.
const startup = .{
.frameTree = .{
.frame = .{
.id = "TID-STARTUP",
.loaderId = "LID-STARTUP",
.securityOrigin = @import("../cdp.zig").URL_BASE,
.url = "about:blank",
.secureContextType = "Secure",
},
},
};
const bc = cmd.browser_context orelse return cmd.sendResult(startup, .{});
const target_id = bc.target_id orelse return cmd.sendResult(startup, .{});
return cmd.sendResult(.{
.frameTree = .{
@@ -633,8 +646,18 @@ test "cdp.page: getFrameTree" {
defer ctx.deinit();
{
try ctx.processMessage(.{ .id = 10, .method = "Page.getFrameTree", .params = .{ .targetId = "X" } });
try ctx.expectSentError(-31998, "BrowserContextNotLoaded", .{ .id = 10 });
// no browser context - should return TID-STARTUP
try ctx.processMessage(.{ .id = 1, .method = "Page.getFrameTree", .sessionId = "STARTUP" });
try ctx.expectSentResult(.{
.frameTree = .{
.frame = .{
.id = "TID-STARTUP",
.loaderId = "LID-STARTUP",
.url = "about:blank",
.secureContextType = "Secure",
},
},
}, .{ .id = 1, .session_id = "STARTUP" });
}
const bc = try ctx.loadBrowserContext(.{ .id = "BID-9", .url = "hi.html", .target_id = "FID-000000000X".* });
@@ -659,6 +682,29 @@ test "cdp.page: getFrameTree" {
},
}, .{ .id = 11 });
}
{
// STARTUP sesion is handled when a broweser context and a target id exists.
try ctx.processMessage(.{ .id = 12, .method = "Page.getFrameTree", .session_id = "STARTUP" });
try ctx.expectSentResult(.{
.frameTree = .{
.frame = .{
.id = "FID-000000000X",
.loaderId = "LID-0000000001",
.url = "http://127.0.0.1:9582/src/browser/tests/hi.html",
.domainAndRegistry = "",
.securityOrigin = bc.security_origin,
.mimeType = "text/html",
.adFrameStatus = .{
.adFrameType = "none",
},
.secureContextType = bc.secure_context_type,
.crossOriginIsolatedContextType = "NotIsolated",
.gatedAPIFeatures = [_][]const u8{},
},
},
}, .{ .id = 12 });
}
}
test "cdp.page: captureScreenshot" {

View File

@@ -136,7 +136,8 @@ const TestContext = struct {
0,
);
try page.navigate(full_url, .{});
_ = bc.session.wait(.{});
var runner = try bc.session.runner(.{});
try runner.wait(.{ .ms = 2000 });
}
return bc;
}