Compare commits

...

18 Commits

Author SHA1 Message Date
Adrià Arrufat
1f75ce1778 agent: add unit tests for Command, CommandExecutor, and Recorder 2026-04-04 08:28:56 +02:00
Adrià Arrufat
7aabda9392 agent: add recorder, self-healing, env substitution, and security fixes
- Add Recorder for recording REPL sessions to .panda files, with
  --no-record flag and positional file arg support. Skips read-only
  commands (WAIT, TREE, MARKDOWN) per spec.
- Record resolved LLM tool calls as Pandascript commands so the
  generated artifact is deterministic.
- Add self-healing in --run mode: on command failure, prompt the LLM
  with the # INTENT context to resolve an alternative.
- Add LOGIN and ACCEPT_COOKIES high-level commands (LLM-resolved).
- Add multi-line EVAL """...""" support via ScriptIterator.
- Add $VAR_NAME environment variable substitution in command arguments.
- Escape JS strings in execType/execExtract to prevent injection.
- Sanitize output file paths in EXTRACT to prevent path traversal.
2026-04-04 08:14:48 +02:00
Adrià Arrufat
e29f33642c agent: add --run command for deterministic script replay 2026-04-04 07:56:10 +02:00
Adrià Arrufat
d94effb237 agent: improve tool call detection and logging 2026-04-04 07:56:10 +02:00
Adrià Arrufat
3b1ef66b51 agent: add markdown command 2026-04-04 07:56:10 +02:00
Adrià Arrufat
15c0a7be83 agent: add manual command support to REPL
Adds a parser and executor for manual commands like GOTO and CLICK.
Unrecognized input continues to be processed by the AI.
2026-04-04 07:56:10 +02:00
Adrià Arrufat
a5d3d686b8 agent: use arena allocators for messages and tools 2026-04-04 07:56:10 +02:00
Adrià Arrufat
20c31a3f71 agent: remove bold formatting from prompt 2026-04-04 07:56:10 +02:00
Adrià Arrufat
a81a24229b Add interactive agent mode with LLM-powered web browsing
Introduces `lightpanda agent` command that provides a REPL where users
can chat with an AI that uses the browser's tools (goto, markdown, click,
fill, etc.) to browse the web. Uses zenai for multi-provider LLM support
(Anthropic, OpenAI, Gemini) and linenoise v2 for terminal line editing.
2026-04-04 07:56:10 +02:00
Karl Seguin
5826caf6dc Merge pull request #2070 from lightpanda-io/mcp-new-action-tools
Some checks are pending
e2e-test / zig build release (push) Waiting to run
e2e-test / demo-scripts (push) Blocked by required conditions
e2e-test / wba-demo-scripts (push) Blocked by required conditions
e2e-test / wba-test (push) Blocked by required conditions
e2e-test / cdp-and-hyperfine-bench (push) Blocked by required conditions
e2e-test / perf-fmt (push) Blocked by required conditions
e2e-test / browser fetch (push) Blocked by required conditions
zig-test / zig fmt (push) Waiting to run
zig-test / zig test using v8 in debug mode (push) Waiting to run
zig-test / zig test (push) Waiting to run
zig-test / perf-fmt (push) Blocked by required conditions
mcp: Add hover, press, selectOption, setChecked
2026-04-04 10:20:54 +08:00
Karl Seguin
b0c6c2d591 Merge pull request #2083 from tmchow/fix/2080-keyboard-event-propagation
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / wba-demo-scripts (push) Has been cancelled
e2e-test / wba-test (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / zig fmt (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
fix: propagate keyUp and char keyboard events to JS listeners
2026-04-04 08:19:53 +08:00
Trevin Chow
b33bb54442 fix: propagate keyUp and char keyboard events to JS listeners
dispatchKeyEvent only handled keyDown, returning early for keyUp,
rawKeyDown, and char types. This meant JS keyup and keypress
listeners never fired via CDP.

Now keyUp dispatches as "keyup" and char dispatches as "keypress".
rawKeyDown remains a no-op (Chrome-internal, not used for JS dispatch).

Fixes #2080
Ref #2043
2026-04-03 17:08:09 -07:00
Pierre Tachoire
06fe6c5e7d Merge pull request #1934 from lightpanda-io/http-cache
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / wba-demo-scripts (push) Has been cancelled
e2e-test / wba-test (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / zig fmt (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
HTTP Caching
2026-04-03 17:11:06 +02:00
Adrià Arrufat
72229f715a Merge branch 'main' into mcp-new-action-tools 2026-04-03 07:06:10 +02:00
Adrià Arrufat
6c9a5ddab8 Extract shared helpers to reduce duplication
- Extract dispatchInputAndChangeEvents() in actions.zig, used by fill,
  selectOption, and setChecked
- Extract resolveNodeAndPage() in tools.zig, used by click, fill, hover,
  selectOption, setChecked, and nodeDetails handlers
2026-04-02 11:20:28 +02:00
Adrià Arrufat
46a63e0b4b Add focus before fill and findElement MCP tool
- fill action now calls focus() on the element before setting its value,
  ensuring focus/focusin events fire for JS listeners
- Add findElement MCP tool for locating interactive elements by ARIA role
  and/or accessible name (case-insensitive substring match)
- Add tests for findElement (by role, by name, no matches, missing params)
2026-04-02 11:03:49 +02:00
Adrià Arrufat
58143ee3d1 Fix event order and add tests
- Fix setChecked event order: click fires before input/change to match
  browser behavior
- Add tests for hover, press, selectOption, setChecked MCP tools
- Merge all action tests into a single test case sharing one page load
- Add test elements to mcp_actions.html (hover target, key input,
  second select, checkbox, radio)
2026-04-02 10:46:28 +02:00
Adrià Arrufat
5e79af42f4 mcp: Add hover, press, selectOption, setChecked
New browser actions and MCP tools for AI agent interaction:
- hover: dispatches mouseover/mouseenter events on an element
- press: dispatches keydown/keyup keyboard events (Enter, Tab, etc.)
- selectOption: selects a dropdown option by value with input/change events
- setChecked: checks/unchecks checkbox or radio with input/change/click events
2026-04-02 09:47:22 +02:00
16 changed files with 2612 additions and 66 deletions

View File

@@ -85,6 +85,8 @@ pub fn build(b: *Build) !void {
try linkV8(b, mod, enable_asan, enable_tsan, prebuilt_v8_path);
try linkCurl(b, mod, enable_tsan);
try linkHtml5Ever(b, mod);
linkZenai(b, mod);
linkLinenoise(b, mod);
break :blk mod;
};
@@ -750,6 +752,19 @@ fn buildCurl(
return lib;
}
fn linkZenai(b: *Build, mod: *Build.Module) void {
const dep = b.dependency("zenai", .{});
mod.addImport("zenai", dep.module("zenai"));
}
fn linkLinenoise(b: *Build, mod: *Build.Module) void {
const dep = b.dependency("linenoise", .{});
mod.addIncludePath(dep.path(""));
mod.addCSourceFile(.{
.file = dep.path("linenoise.c"),
});
}
/// Resolves the semantic version of the build.
///
/// The base version is read from `build.zig.zon`. This can be overridden

View File

@@ -30,6 +30,13 @@
.url = "https://github.com/curl/curl/releases/download/curl-8_18_0/curl-8.18.0.tar.gz",
.hash = "N-V-__8AALp9QAGn6CCHZ6fK_FfMyGtG824LSHYHHasM3w-y",
},
.zenai = .{
.path = "../zenai",
},
.linenoise = .{
.url = "https://github.com/antirez/linenoise/archive/refs/tags/2.0.tar.gz",
.hash = "N-V-__8AAJ4HAgCX79UDBfNwhqAqUVoGC44ib6UYa18q6oa_",
},
},
.paths = .{""},
}

View File

@@ -32,6 +32,7 @@ pub const RunMode = enum {
serve,
version,
mcp,
agent,
};
pub const CDP_MAX_HTTP_REQUEST_SIZE = 4096;
@@ -63,56 +64,56 @@ pub fn deinit(self: *const Config, allocator: Allocator) void {
pub fn tlsVerifyHost(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.tls_verify_host,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.tls_verify_host,
else => unreachable,
};
}
pub fn obeyRobots(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.obey_robots,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.obey_robots,
else => unreachable,
};
}
pub fn httpProxy(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_proxy,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_proxy,
else => unreachable,
};
}
pub fn proxyBearerToken(self: *const Config) ?[:0]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.proxy_bearer_token,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.proxy_bearer_token,
.help, .version => null,
};
}
pub fn httpMaxConcurrent(self: *const Config) u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_max_concurrent orelse 10,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_concurrent orelse 10,
else => unreachable,
};
}
pub fn httpMaxHostOpen(self: *const Config) u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_max_host_open orelse 4,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_host_open orelse 4,
else => unreachable,
};
}
pub fn httpConnectTimeout(self: *const Config) u31 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_connect_timeout orelse 0,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_connect_timeout orelse 0,
else => unreachable,
};
}
pub fn httpTimeout(self: *const Config) u31 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_timeout orelse 5000,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_timeout orelse 5000,
else => unreachable,
};
}
@@ -123,35 +124,35 @@ pub fn httpMaxRedirects(_: *const Config) u8 {
pub fn httpMaxResponseSize(self: *const Config) ?usize {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.http_max_response_size,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.http_max_response_size,
else => unreachable,
};
}
pub fn logLevel(self: *const Config) ?log.Level {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.log_level,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_level,
else => unreachable,
};
}
pub fn logFormat(self: *const Config) ?log.Format {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.log_format,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_format,
else => unreachable,
};
}
pub fn logFilterScopes(self: *const Config) ?[]const log.Scope {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.log_filter_scopes,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.log_filter_scopes,
else => unreachable,
};
}
pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.user_agent_suffix,
inline .serve, .fetch, .mcp, .agent => |opts| opts.common.user_agent_suffix,
.help, .version => null,
};
}
@@ -189,7 +190,7 @@ pub fn advertiseHost(self: *const Config) []const u8 {
pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| WebBotAuthConfig{
inline .serve, .fetch, .mcp, .agent => |opts| WebBotAuthConfig{
.key_file = opts.common.web_bot_auth_key_file orelse return null,
.keyid = opts.common.web_bot_auth_keyid orelse return null,
.domain = opts.common.web_bot_auth_domain orelse return null,
@@ -220,6 +221,7 @@ pub const Mode = union(RunMode) {
serve: Serve,
version: void,
mcp: Mcp,
agent: Agent,
};
pub const Serve = struct {
@@ -238,6 +240,24 @@ pub const Mcp = struct {
cdp_port: ?u16 = null,
};
pub const AiProvider = enum {
anthropic,
openai,
gemini,
};
pub const Agent = struct {
common: Common = .{},
provider: AiProvider = .anthropic,
model: ?[:0]const u8 = null,
api_key: ?[:0]const u8 = null,
system_prompt: ?[:0]const u8 = null,
repl: bool = true,
script_file: ?[]const u8 = null,
record_file: ?[]const u8 = null,
no_record: bool = false,
};
pub const DumpFormat = enum {
html,
markdown,
@@ -411,7 +431,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
const usage =
\\usage: {s} command [options] [URL]
\\
\\Command can be either 'fetch', 'serve', 'mcp' or 'help'
\\Command can be either 'fetch', 'serve', 'mcp', 'agent' or 'help'
\\
\\fetch command
\\Fetches the specified URL
@@ -493,6 +513,24 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\ Valid: 2024-11-05, 2025-03-26, 2025-06-18, 2025-11-25.
\\ Defaults to "2024-11-05".
\\
++ common_options ++
\\
\\agent command
\\Starts an interactive AI agent that can browse the web
\\Example: {s} agent --provider anthropic --model claude-sonnet-4-20250514
\\
\\Options:
\\--provider The AI provider: anthropic, openai, or gemini.
\\ Defaults to "anthropic".
\\
\\--model The model name to use.
\\ Defaults to a sensible default per provider.
\\
\\--api-key The API key. Can also be set via environment variable:
\\ ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.
\\
\\--system-prompt Override the default system prompt.
\\
++ common_options ++
\\
\\version command
@@ -502,7 +540,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\Displays this message
\\
;
std.debug.print(usage, .{ self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name });
std.debug.print(usage, .{ self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name, self.exec_name });
if (success) {
return std.process.cleanExit();
}
@@ -539,6 +577,8 @@ pub fn parseArgs(allocator: Allocator) !Config {
return init(allocator, exec_name, .{ .help = false }) },
.mcp => .{ .mcp = parseMcpArgs(allocator, &args) catch
return init(allocator, exec_name, .{ .help = false }) },
.agent => .{ .agent = parseAgentArgs(allocator, &args) catch
return init(allocator, exec_name, .{ .help = false }) },
.version => .{ .version = {} },
};
return init(allocator, exec_name, mode);
@@ -884,6 +924,93 @@ fn parseFetchArgs(
};
}
fn parseAgentArgs(
allocator: Allocator,
args: *std.process.ArgIterator,
) !Agent {
var result: Agent = .{};
while (args.next()) |opt| {
if (std.mem.eql(u8, "--provider", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.provider = std.meta.stringToEnum(AiProvider, str) orelse {
log.fatal(.app, "invalid provider", .{ .arg = opt, .val = str });
return error.InvalidArgument;
};
continue;
}
if (std.mem.eql(u8, "--model", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.model = try allocator.dupeZ(u8, str);
continue;
}
if (std.mem.eql(u8, "--api-key", opt) or std.mem.eql(u8, "--api_key", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.api_key = try allocator.dupeZ(u8, str);
continue;
}
if (std.mem.eql(u8, "--repl", opt)) {
result.repl = true;
continue;
}
if (std.mem.eql(u8, "--run", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.script_file = str;
continue;
}
if (std.mem.eql(u8, "--system-prompt", opt) or std.mem.eql(u8, "--system_prompt", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.system_prompt = try allocator.dupeZ(u8, str);
continue;
}
if (std.mem.eql(u8, "--no-record", opt) or std.mem.eql(u8, "--no_record", opt)) {
result.no_record = true;
continue;
}
if (try parseCommonArg(allocator, opt, args, &result.common)) {
continue;
}
// Positional argument: recording file for REPL mode (e.g. `agent --repl my_workflow.panda`)
if (!std.mem.startsWith(u8, opt, "-")) {
result.record_file = opt;
continue;
}
log.fatal(.app, "unknown argument", .{ .mode = "agent", .arg = opt });
return error.UnkownOption;
}
// If --no-record is set, clear the record file
if (result.no_record) {
result.record_file = null;
}
return result;
}
fn parseCommonArg(
allocator: Allocator,
opt: []const u8,

12
src/agent.zig Normal file
View File

@@ -0,0 +1,12 @@
pub const Agent = @import("agent/Agent.zig");
pub const ToolExecutor = @import("agent/ToolExecutor.zig");
pub const Terminal = @import("agent/Terminal.zig");
pub const Command = @import("agent/Command.zig");
pub const CommandExecutor = @import("agent/CommandExecutor.zig");
pub const Recorder = @import("agent/Recorder.zig");
test {
_ = Command;
_ = CommandExecutor;
_ = Recorder;
}

523
src/agent/Agent.zig Normal file
View File

@@ -0,0 +1,523 @@
const std = @import("std");
const zenai = @import("zenai");
const lp = @import("lightpanda");
const log = lp.log;
const Config = lp.Config;
const App = @import("../App.zig");
const ToolExecutor = @import("ToolExecutor.zig");
const Terminal = @import("Terminal.zig");
const Command = @import("Command.zig");
const CommandExecutor = @import("CommandExecutor.zig");
const Recorder = @import("Recorder.zig");
const Self = @This();
const default_system_prompt =
\\You are a web browsing assistant powered by the Lightpanda browser.
\\You can navigate to websites, read their content, interact with forms,
\\click links, and extract information.
\\
\\When helping the user, navigate to relevant pages and extract information.
\\Use the semantic_tree or interactiveElements tools to understand page structure
\\before clicking or filling forms. Be concise in your responses.
;
const self_heal_prompt_prefix =
\\A Pandascript command failed during replay. The original intent was:
\\
;
const self_heal_prompt_suffix =
\\
\\The command that failed was:
\\
;
const self_heal_prompt_page_state =
\\
\\Please analyze the current page state and execute the equivalent action.
\\Use the available tools to accomplish the original intent.
;
const login_prompt =
\\Find the login form on the current page. Fill in the credentials using
\\environment variables (look for $LP_EMAIL or $LP_USERNAME for the username
\\field, and $LP_PASSWORD for the password field). Handle any cookie banners
\\or popups first, then submit the login form.
;
const accept_cookies_prompt =
\\Find and dismiss the cookie consent banner on the current page.
\\Look for "Accept", "Accept All", "I agree", or similar buttons and click them.
;
allocator: std.mem.Allocator,
ai_client: ?AiClient,
tool_executor: *ToolExecutor,
terminal: Terminal,
cmd_executor: CommandExecutor,
recorder: Recorder,
messages: std.ArrayListUnmanaged(zenai.provider.Message),
message_arena: std.heap.ArenaAllocator,
tools: []const zenai.provider.Tool,
model: []const u8,
system_prompt: []const u8,
script_file: ?[]const u8,
record_file: ?[]const u8,
const AiClient = union(Config.AiProvider) {
anthropic: *zenai.anthropic.Client,
openai: *zenai.openai.Client,
gemini: *zenai.gemini.Client,
fn toProvider(self: AiClient) zenai.provider.Client {
return switch (self) {
.anthropic => |c| .{ .anthropic = c },
.openai => |c| .{ .openai = c },
.gemini => |c| .{ .gemini = c },
};
}
};
pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Self {
const is_script_mode = opts.script_file != null;
// API key is only required for REPL mode and self-healing
const api_key: ?[:0]const u8 = opts.api_key orelse getEnvApiKey(opts.provider) orelse if (!is_script_mode) {
log.fatal(.app, "missing API key", .{
.hint = "Set the API key via --api-key or environment variable",
});
return error.MissingApiKey;
} else null;
const tool_executor = try ToolExecutor.init(allocator, app);
errdefer tool_executor.deinit();
const self = try allocator.create(Self);
errdefer allocator.destroy(self);
const ai_client: ?AiClient = if (api_key) |key| switch (opts.provider) {
.anthropic => blk: {
const client = try allocator.create(zenai.anthropic.Client);
client.* = zenai.anthropic.Client.init(allocator, key, .{});
break :blk .{ .anthropic = client };
},
.openai => blk: {
const client = try allocator.create(zenai.openai.Client);
client.* = zenai.openai.Client.init(allocator, key, .{});
break :blk .{ .openai = client };
},
.gemini => blk: {
const client = try allocator.create(zenai.gemini.Client);
client.* = zenai.gemini.Client.init(allocator, key, .{});
break :blk .{ .gemini = client };
},
} else null;
const tools = tool_executor.getTools() catch {
log.fatal(.app, "failed to initialize tools", .{});
return error.ToolInitFailed;
};
self.* = .{
.allocator = allocator,
.ai_client = ai_client,
.tool_executor = tool_executor,
.terminal = Terminal.init(null),
.cmd_executor = undefined,
.recorder = Recorder.init(opts.record_file),
.messages = .empty,
.message_arena = std.heap.ArenaAllocator.init(allocator),
.tools = tools,
.model = opts.model orelse defaultModel(opts.provider),
.system_prompt = opts.system_prompt orelse default_system_prompt,
.script_file = opts.script_file,
.record_file = opts.record_file,
};
self.cmd_executor = CommandExecutor.init(allocator, tool_executor, &self.terminal);
return self;
}
pub fn deinit(self: *Self) void {
self.recorder.deinit();
self.message_arena.deinit();
self.messages.deinit(self.allocator);
self.tool_executor.deinit();
if (self.ai_client) |ai_client| {
switch (ai_client) {
inline else => |c| {
c.deinit();
self.allocator.destroy(c);
},
}
}
self.allocator.destroy(self);
}
pub fn run(self: *Self) void {
if (self.script_file) |script_file| {
self.runScript(script_file);
} else {
self.runRepl();
}
}
fn runRepl(self: *Self) void {
self.terminal.printInfo("Lightpanda Agent (type 'quit' to exit)");
log.debug(.app, "tools loaded", .{ .count = self.tools.len });
const info = if (self.ai_client) |ai_client|
std.fmt.allocPrint(self.allocator, "Provider: {s}, Model: {s}", .{
@tagName(std.meta.activeTag(ai_client)),
self.model,
}) catch null
else
null;
self.terminal.printInfo(info orelse "Ready.");
if (info) |i| self.allocator.free(i);
while (true) {
const line = self.terminal.readLine("> ") orelse break;
defer self.terminal.freeLine(line);
if (line.len == 0) continue;
const cmd = Command.parse(line);
switch (cmd) {
.exit => break,
.comment => continue,
.login => {
self.recorder.recordComment("# INTENT: LOGIN");
self.processUserMessage(login_prompt) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "LOGIN failed: {s}", .{@errorName(err)}) catch "LOGIN failed";
self.terminal.printError(msg);
};
},
.accept_cookies => {
self.recorder.recordComment("# INTENT: ACCEPT_COOKIES");
self.processUserMessage(accept_cookies_prompt) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "ACCEPT_COOKIES failed: {s}", .{@errorName(err)}) catch "ACCEPT_COOKIES failed";
self.terminal.printError(msg);
};
},
.natural_language => {
// "quit" as a convenience alias
if (std.mem.eql(u8, line, "quit")) break;
self.processUserMessage(line) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "Request failed: {s}", .{@errorName(err)}) catch "Request failed";
self.terminal.printError(msg);
};
},
else => {
self.cmd_executor.execute(cmd);
self.recorder.record(line);
},
}
}
self.terminal.printInfo("Goodbye!");
}
fn runScript(self: *Self, path: []const u8) void {
const file = std.fs.cwd().openFile(path, .{}) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "Failed to open script '{s}': {s}", .{ path, @errorName(err) }) catch "Failed to open script";
self.terminal.printError(msg);
return;
};
defer file.close();
const content = file.readToEndAlloc(self.allocator, 10 * 1024 * 1024) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "Failed to read script: {s}", .{@errorName(err)}) catch "Failed to read script";
self.terminal.printError(msg);
return;
};
defer self.allocator.free(content);
const script_info = std.fmt.allocPrint(self.allocator, "Running script: {s}", .{path}) catch null;
self.terminal.printInfo(script_info orelse "Running script...");
if (script_info) |i| self.allocator.free(i);
var script_arena = std.heap.ArenaAllocator.init(self.allocator);
defer script_arena.deinit();
var iter = Command.ScriptIterator.init(content, script_arena.allocator());
var last_intent: ?[]const u8 = null;
while (iter.next()) |entry| {
switch (entry.command) {
.exit => {
self.terminal.printInfo("EXIT — stopping script.");
return;
},
.comment => {
// Track # INTENT: comments for self-healing
if (std.mem.startsWith(u8, entry.raw_line, "# INTENT:")) {
last_intent = std.mem.trim(u8, entry.raw_line["# INTENT:".len..], &std.ascii.whitespace);
}
continue;
},
.natural_language => {
const msg = std.fmt.allocPrint(self.allocator, "line {d}: unrecognized command: {s}", .{ entry.line_num, entry.raw_line }) catch "unrecognized command";
self.terminal.printError(msg);
return;
},
.login, .accept_cookies => {
// High-level commands require LLM
if (self.ai_client == null) {
const msg = std.fmt.allocPrint(self.allocator, "line {d}: {s} requires an API key for LLM resolution", .{
entry.line_num,
entry.raw_line,
}) catch "LLM required";
self.terminal.printError(msg);
return;
}
const prompt = if (entry.command == .login) login_prompt else accept_cookies_prompt;
self.processUserMessage(prompt) catch |err| {
const msg = std.fmt.allocPrint(self.allocator, "line {d}: {s} failed: {s}", .{
entry.line_num,
entry.raw_line,
@errorName(err),
}) catch "command failed";
self.terminal.printError(msg);
return;
};
},
else => {
const line_info = std.fmt.allocPrint(self.allocator, "[{d}] {s}", .{ entry.line_num, entry.raw_line }) catch null;
self.terminal.printInfo(line_info orelse entry.raw_line);
if (line_info) |li| self.allocator.free(li);
// Execute with result checking for self-healing
var cmd_arena = std.heap.ArenaAllocator.init(self.allocator);
defer cmd_arena.deinit();
const result = self.cmd_executor.executeWithResult(cmd_arena.allocator(), entry.command);
self.terminal.printAssistant(result.output);
std.debug.print("\n", .{});
if (result.failed) {
// Attempt self-healing via LLM
if (self.ai_client != null) {
self.terminal.printInfo("Command failed, attempting self-healing...");
if (self.attemptSelfHeal(last_intent, entry.raw_line)) {
continue;
}
}
const msg = std.fmt.allocPrint(self.allocator, "line {d}: command failed: {s}", .{
entry.line_num,
entry.raw_line,
}) catch "command failed";
self.terminal.printError(msg);
return;
}
},
}
}
self.terminal.printInfo("Script completed.");
}
/// Attempt to self-heal a failed command by asking the LLM to resolve it.
fn attemptSelfHeal(self: *Self, intent: ?[]const u8, failed_command: []const u8) bool {
var heal_arena = std.heap.ArenaAllocator.init(self.allocator);
defer heal_arena.deinit();
const ha = heal_arena.allocator();
// Build the self-healing prompt
const prompt = std.fmt.allocPrint(ha, "{s}{s}{s}{s}{s}", .{
self_heal_prompt_prefix,
intent orelse "(no recorded intent)",
self_heal_prompt_suffix,
failed_command,
self_heal_prompt_page_state,
}) catch return false;
self.processUserMessage(prompt) catch return false;
return true;
}
fn processUserMessage(self: *Self, user_input: []const u8) !void {
const ma = self.message_arena.allocator();
// Add system prompt as first message if this is the first user message
if (self.messages.items.len == 0) {
try self.messages.append(self.allocator, .{
.role = .system,
.content = self.system_prompt,
});
}
// Add user message
try self.messages.append(self.allocator, .{
.role = .user,
.content = try ma.dupe(u8, user_input),
});
// Loop: send to LLM, execute tool calls, repeat until we get text
var max_iterations: u32 = 20;
while (max_iterations > 0) : (max_iterations -= 1) {
const provider_client = (self.ai_client orelse return error.NoAiClient).toProvider();
var result = provider_client.generateContent(self.model, self.messages.items, .{
.tools = self.tools,
.max_tokens = 4096,
}) catch |err| {
log.err(.app, "AI API error", .{ .err = err });
return error.ApiError;
};
defer result.deinit();
log.debug(.app, "LLM response", .{
.finish_reason = @tagName(result.finish_reason),
.has_text = result.text != null,
.has_tool_calls = result.tool_calls != null,
});
// Handle tool calls (check for tool_calls presence, not just finish_reason,
// because some providers like Gemini return finish_reason=STOP for tool calls)
if (result.tool_calls) |tool_calls| {
// Add the assistant message with tool calls
try self.messages.append(self.allocator, .{
.role = .assistant,
.content = if (result.text) |t| try ma.dupe(u8, t) else null,
.tool_calls = try self.dupeToolCalls(tool_calls),
});
// Execute each tool call and collect results
var tool_results: std.ArrayListUnmanaged(zenai.provider.ToolResult) = .empty;
for (tool_calls) |tc| {
self.terminal.printToolCall(tc.name, tc.arguments);
var tool_arena = std.heap.ArenaAllocator.init(self.allocator);
defer tool_arena.deinit();
const tool_result = self.tool_executor.call(tool_arena.allocator(), tc.name, tc.arguments) catch "Error: tool execution failed";
self.terminal.printToolResult(tc.name, tool_result);
// Record resolved tool call as Pandascript command
if (!std.mem.startsWith(u8, tool_result, "Error:")) {
self.recordToolCall(tool_arena.allocator(), tc.name, tc.arguments);
}
try tool_results.append(ma, .{
.id = try ma.dupe(u8, tc.id),
.name = try ma.dupe(u8, tc.name),
.content = try ma.dupe(u8, tool_result),
});
}
// Add tool results as a message
try self.messages.append(self.allocator, .{
.role = .tool,
.tool_results = try tool_results.toOwnedSlice(ma),
});
continue;
}
// Text response
if (result.text) |text| {
std.debug.print("\n", .{});
self.terminal.printAssistant(text);
std.debug.print("\n\n", .{});
try self.messages.append(self.allocator, .{
.role = .assistant,
.content = try ma.dupe(u8, text),
});
} else {
self.terminal.printInfo("(no response from model)");
}
break;
}
}
/// Convert a tool call (name + JSON arguments) into a Pandascript command and record it.
fn recordToolCall(self: *Self, arena: std.mem.Allocator, tool_name: []const u8, arguments: []const u8) void {
const parsed = std.json.parseFromSlice(std.json.Value, arena, arguments, .{}) catch return;
const obj = switch (parsed.value) {
.object => |o| o,
else => return,
};
const panda_cmd: ?[]const u8 = if (std.mem.eql(u8, tool_name, "goto") or std.mem.eql(u8, tool_name, "navigate")) blk: {
const url = switch (obj.get("url") orelse break :blk null) {
.string => |s| s,
else => break :blk null,
};
break :blk std.fmt.allocPrint(arena, "GOTO {s}", .{url}) catch null;
} else if (std.mem.eql(u8, tool_name, "click")) blk: {
if (obj.get("selector")) |sel_val| {
const sel = switch (sel_val) {
.string => |s| s,
else => break :blk null,
};
break :blk std.fmt.allocPrint(arena, "CLICK \"{s}\"", .{sel}) catch null;
}
if (obj.get("backendNodeId")) |_| {
// Can't meaningfully record a node ID as Pandascript
break :blk null;
}
break :blk null;
} else if (std.mem.eql(u8, tool_name, "fill")) blk: {
const sel = switch (obj.get("selector") orelse break :blk null) {
.string => |s| s,
else => break :blk null,
};
const val = switch (obj.get("value") orelse break :blk null) {
.string => |s| s,
else => break :blk null,
};
break :blk std.fmt.allocPrint(arena, "TYPE \"{s}\" \"{s}\"", .{ sel, val }) catch null;
} else if (std.mem.eql(u8, tool_name, "waitForSelector")) blk: {
// WAIT is read-only, not recorded — Recorder will skip it anyway
break :blk null;
} else if (std.mem.eql(u8, tool_name, "evaluate") or std.mem.eql(u8, tool_name, "eval")) blk: {
const script = switch (obj.get("script") orelse break :blk null) {
.string => |s| s,
else => break :blk null,
};
// Use multi-line format if the script contains newlines
if (std.mem.indexOfScalar(u8, script, '\n') != null) {
break :blk std.fmt.allocPrint(arena, "EVAL \"\"\"\n{s}\n\"\"\"", .{script}) catch null;
}
break :blk std.fmt.allocPrint(arena, "EVAL \"{s}\"", .{script}) catch null;
} else null;
if (panda_cmd) |cmd| {
self.recorder.record(cmd);
}
}
fn dupeToolCalls(self: *Self, calls: []const zenai.provider.ToolCall) ![]const zenai.provider.ToolCall {
const ma = self.message_arena.allocator();
const duped = try ma.alloc(zenai.provider.ToolCall, calls.len);
for (calls, 0..) |tc, i| {
duped[i] = .{
.id = try ma.dupe(u8, tc.id),
.name = try ma.dupe(u8, tc.name),
.arguments = try ma.dupe(u8, tc.arguments),
};
}
return duped;
}
fn getEnvApiKey(provider_type: Config.AiProvider) ?[:0]const u8 {
return switch (provider_type) {
.anthropic => std.posix.getenv("ANTHROPIC_API_KEY"),
.openai => std.posix.getenv("OPENAI_API_KEY"),
.gemini => std.posix.getenv("GOOGLE_API_KEY") orelse std.posix.getenv("GEMINI_API_KEY"),
};
}
fn defaultModel(provider_type: Config.AiProvider) []const u8 {
return switch (provider_type) {
.anthropic => "claude-sonnet-4-20250514",
.openai => "gpt-4o",
.gemini => "gemini-2.5-flash",
};
}

409
src/agent/Command.zig Normal file
View File

@@ -0,0 +1,409 @@
const std = @import("std");
pub const TypeArgs = struct {
selector: []const u8,
value: []const u8,
};
pub const ExtractArgs = struct {
selector: []const u8,
file: ?[]const u8,
};
pub const Command = union(enum) {
goto: []const u8,
click: []const u8,
type_cmd: TypeArgs,
wait: []const u8,
tree: void,
markdown: void,
extract: ExtractArgs,
eval_js: []const u8,
login: void,
accept_cookies: void,
exit: void,
comment: void,
natural_language: []const u8,
};
/// Parse a line of REPL input into a Pandascript command.
/// Unrecognized input is returned as `.natural_language`.
/// For multi-line EVAL blocks in scripts, use `ScriptParser`.
pub fn parse(line: []const u8) Command {
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0) return .{ .natural_language = trimmed };
// Skip comment lines
if (trimmed[0] == '#') return .{ .comment = {} };
// Find the command word (first whitespace-delimited token)
const cmd_end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
const cmd_word = trimmed[0..cmd_end];
const rest = std.mem.trim(u8, trimmed[cmd_end..], &std.ascii.whitespace);
if (eqlIgnoreCase(cmd_word, "GOTO")) {
if (rest.len == 0) return .{ .natural_language = trimmed };
return .{ .goto = rest };
}
if (eqlIgnoreCase(cmd_word, "CLICK")) {
const arg = extractQuoted(rest) orelse rest;
if (arg.len == 0) return .{ .natural_language = trimmed };
return .{ .click = arg };
}
if (eqlIgnoreCase(cmd_word, "TYPE")) {
const first = extractQuotedWithRemainder(rest) orelse return .{ .natural_language = trimmed };
const second_arg = std.mem.trim(u8, first.remainder, &std.ascii.whitespace);
const second = extractQuoted(second_arg) orelse return .{ .natural_language = trimmed };
return .{ .type_cmd = .{ .selector = first.value, .value = second } };
}
if (eqlIgnoreCase(cmd_word, "WAIT")) {
const arg = extractQuoted(rest) orelse rest;
if (arg.len == 0) return .{ .natural_language = trimmed };
return .{ .wait = arg };
}
if (eqlIgnoreCase(cmd_word, "TREE")) {
return .{ .tree = {} };
}
if (eqlIgnoreCase(cmd_word, "MARKDOWN") or eqlIgnoreCase(cmd_word, "MD")) {
return .{ .markdown = {} };
}
if (eqlIgnoreCase(cmd_word, "EXTRACT")) {
const selector = extractQuoted(rest) orelse {
if (rest.len == 0) return .{ .natural_language = trimmed };
return .{ .extract = .{ .selector = rest, .file = null } };
};
// Look for > filename after the quoted selector
const after_quote = extractQuotedWithRemainder(rest) orelse return .{ .extract = .{ .selector = selector, .file = null } };
const after = std.mem.trim(u8, after_quote.remainder, &std.ascii.whitespace);
if (after.len > 0 and after[0] == '>') {
const file = std.mem.trim(u8, after[1..], &std.ascii.whitespace);
return .{ .extract = .{ .selector = selector, .file = if (file.len > 0) file else null } };
}
return .{ .extract = .{ .selector = selector, .file = null } };
}
if (eqlIgnoreCase(cmd_word, "EVAL")) {
if (rest.len == 0) return .{ .natural_language = trimmed };
const arg = extractQuoted(rest) orelse rest;
return .{ .eval_js = arg };
}
if (eqlIgnoreCase(cmd_word, "LOGIN")) {
return .{ .login = {} };
}
if (eqlIgnoreCase(cmd_word, "ACCEPT_COOKIES") or eqlIgnoreCase(cmd_word, "ACCEPT-COOKIES")) {
return .{ .accept_cookies = {} };
}
if (eqlIgnoreCase(cmd_word, "EXIT")) {
return .{ .exit = {} };
}
return .{ .natural_language = trimmed };
}
/// Iterator for parsing a script file, handling multi-line EVAL """ ... """ blocks.
pub const ScriptIterator = struct {
lines: std.mem.SplitIterator(u8, .scalar),
line_num: u32,
allocator: std.mem.Allocator,
pub fn init(content: []const u8, allocator: std.mem.Allocator) ScriptIterator {
return .{
.lines = std.mem.splitScalar(u8, content, '\n'),
.line_num = 0,
.allocator = allocator,
};
}
pub const Entry = struct {
line_num: u32,
raw_line: []const u8,
command: Command,
};
/// Returns the next command from the script, or null at EOF.
/// Multi-line EVAL blocks are assembled into a single eval_js command.
pub fn next(self: *ScriptIterator) ?Entry {
while (self.lines.next()) |line| {
self.line_num += 1;
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0) continue;
// Check for EVAL """ multi-line block
if (isEvalTripleQuote(trimmed)) {
const start_line = self.line_num;
if (self.collectEvalBlock()) |js| {
return .{
.line_num = start_line,
.raw_line = trimmed,
.command = .{ .eval_js = js },
};
} else {
return .{
.line_num = start_line,
.raw_line = trimmed,
.command = .{ .natural_language = "unterminated EVAL block" },
};
}
}
return .{
.line_num = self.line_num,
.raw_line = trimmed,
.command = parse(trimmed),
};
}
return null;
}
fn isEvalTripleQuote(line: []const u8) bool {
const cmd_end = std.mem.indexOfAny(u8, line, &std.ascii.whitespace) orelse line.len;
const cmd_word = line[0..cmd_end];
if (!eqlIgnoreCase(cmd_word, "EVAL")) return false;
const rest = std.mem.trim(u8, line[cmd_end..], &std.ascii.whitespace);
return std.mem.startsWith(u8, rest, "\"\"\"");
}
/// Collect lines until closing """, return the JS content.
fn collectEvalBlock(self: *ScriptIterator) ?[]const u8 {
var parts: std.ArrayListUnmanaged(u8) = .empty;
while (self.lines.next()) |line| {
self.line_num += 1;
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (std.mem.eql(u8, trimmed, "\"\"\"")) {
return parts.toOwnedSlice(self.allocator) catch null;
}
if (parts.items.len > 0) {
parts.append(self.allocator, '\n') catch return null;
}
parts.appendSlice(self.allocator, line) catch return null;
}
// Unterminated
parts.deinit(self.allocator);
return null;
}
};
const QuotedResult = struct {
value: []const u8,
remainder: []const u8,
};
fn extractQuotedWithRemainder(s: []const u8) ?QuotedResult {
if (s.len < 2 or s[0] != '"') return null;
const end = std.mem.indexOfScalarPos(u8, s, 1, '"') orelse return null;
return .{
.value = s[1..end],
.remainder = s[end + 1 ..],
};
}
fn extractQuoted(s: []const u8) ?[]const u8 {
const result = extractQuotedWithRemainder(s) orelse return null;
return result.value;
}
fn eqlIgnoreCase(a: []const u8, comptime upper: []const u8) bool {
if (a.len != upper.len) return false;
for (a, upper) |ac, uc| {
if (std.ascii.toUpper(ac) != uc) return false;
}
return true;
}
// --- Tests ---
test "parse GOTO" {
const cmd = parse("GOTO https://example.com");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse GOTO case insensitive" {
const cmd = parse("goto https://example.com");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse GOTO missing url" {
const cmd = parse("GOTO");
try std.testing.expect(cmd == .natural_language);
}
test "parse CLICK quoted" {
const cmd = parse("CLICK \"Login\"");
try std.testing.expectEqualStrings("Login", cmd.click);
}
test "parse CLICK unquoted" {
const cmd = parse("CLICK .submit-btn");
try std.testing.expectEqualStrings(".submit-btn", cmd.click);
}
test "parse TYPE two quoted args" {
const cmd = parse("TYPE \"#email\" \"user@test.com\"");
try std.testing.expectEqualStrings("#email", cmd.type_cmd.selector);
try std.testing.expectEqualStrings("user@test.com", cmd.type_cmd.value);
}
test "parse TYPE missing second arg" {
const cmd = parse("TYPE \"#email\"");
try std.testing.expect(cmd == .natural_language);
}
test "parse WAIT" {
const cmd = parse("WAIT \".dashboard\"");
try std.testing.expectEqualStrings(".dashboard", cmd.wait);
}
test "parse TREE" {
const cmd = parse("TREE");
try std.testing.expect(cmd == .tree);
}
test "parse MARKDOWN alias MD" {
try std.testing.expect(parse("MARKDOWN") == .markdown);
try std.testing.expect(parse("md") == .markdown);
}
test "parse EXTRACT with file" {
const cmd = parse("EXTRACT \".title\" > titles.json");
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
try std.testing.expectEqualStrings("titles.json", cmd.extract.file.?);
}
test "parse EXTRACT without file" {
const cmd = parse("EXTRACT \".title\"");
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
try std.testing.expect(cmd.extract.file == null);
}
test "parse EVAL single line" {
const cmd = parse("EVAL \"document.title\"");
try std.testing.expectEqualStrings("document.title", cmd.eval_js);
}
test "parse LOGIN" {
try std.testing.expect(parse("LOGIN") == .login);
try std.testing.expect(parse("login") == .login);
}
test "parse ACCEPT_COOKIES" {
try std.testing.expect(parse("ACCEPT_COOKIES") == .accept_cookies);
try std.testing.expect(parse("ACCEPT-COOKIES") == .accept_cookies);
}
test "parse EXIT" {
try std.testing.expect(parse("EXIT") == .exit);
}
test "parse comment" {
try std.testing.expect(parse("# this is a comment") == .comment);
try std.testing.expect(parse("# INTENT: LOGIN") == .comment);
}
test "parse natural language fallback" {
const cmd = parse("what is on this page?");
try std.testing.expectEqualStrings("what is on this page?", cmd.natural_language);
}
test "parse whitespace trimming" {
const cmd = parse(" GOTO https://example.com ");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse empty input" {
const cmd = parse("");
try std.testing.expect(cmd == .natural_language);
}
test "ScriptIterator basic commands" {
const script =
\\GOTO https://example.com
\\TREE
\\CLICK "Login"
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expectEqualStrings("https://example.com", e1.command.goto);
try std.testing.expectEqual(@as(u32, 1), e1.line_num);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .tree);
const e3 = iter.next().?;
try std.testing.expectEqualStrings("Login", e3.command.click);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator skips blank lines and comments" {
const script =
\\# Navigate
\\GOTO https://example.com
\\
\\# Extract
\\TREE
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .comment);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .goto);
const e3 = iter.next().?;
try std.testing.expect(e3.command == .comment);
const e4 = iter.next().?;
try std.testing.expect(e4.command == .tree);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator multi-line EVAL" {
const script =
\\GOTO https://example.com
\\EVAL """
\\ const x = 1;
\\ const y = 2;
\\ return x + y;
\\"""
\\TREE
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .goto);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .eval_js);
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "const x = 1;") != null);
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "return x + y;") != null);
defer std.testing.allocator.free(e2.command.eval_js);
const e3 = iter.next().?;
try std.testing.expect(e3.command == .tree);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator unterminated EVAL" {
const script =
\\EVAL """
\\ const x = 1;
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .natural_language);
try std.testing.expectEqualStrings("unterminated EVAL block", e1.command.natural_language);
}

View File

@@ -0,0 +1,298 @@
const std = @import("std");
const Command = @import("Command.zig");
const ToolExecutor = @import("ToolExecutor.zig");
const Terminal = @import("Terminal.zig");
const Self = @This();
tool_executor: *ToolExecutor,
terminal: *Terminal,
allocator: std.mem.Allocator,
pub fn init(allocator: std.mem.Allocator, tool_executor: *ToolExecutor, terminal: *Terminal) Self {
return .{
.allocator = allocator,
.tool_executor = tool_executor,
.terminal = terminal,
};
}
pub const ExecResult = struct {
output: []const u8,
failed: bool,
};
/// Execute a command and return the result with success/failure status.
pub fn executeWithResult(self: *Self, a: std.mem.Allocator, cmd: Command.Command) ExecResult {
const result = switch (cmd) {
.goto => |url| self.execGoto(a, url),
.click => |target| self.execClick(a, target),
.type_cmd => |args| self.execType(a, args),
.wait => |selector| self.tool_executor.call(a, "waitForSelector", buildJson(a, .{ .selector = selector })) catch "Error: wait failed",
.tree => self.tool_executor.call(a, "semantic_tree", "") catch "Error: tree failed",
.markdown => self.tool_executor.call(a, "markdown", "") catch "Error: markdown failed",
.extract => |args| self.execExtract(a, args),
.eval_js => |script| self.tool_executor.call(a, "evaluate", buildJson(a, .{ .script = script })) catch "Error: eval failed",
.exit, .natural_language, .comment, .login, .accept_cookies => unreachable,
};
return .{
.output = result,
.failed = std.mem.startsWith(u8, result, "Error:"),
};
}
pub fn execute(self: *Self, cmd: Command.Command) void {
var arena = std.heap.ArenaAllocator.init(self.allocator);
defer arena.deinit();
const result = self.executeWithResult(arena.allocator(), cmd);
self.terminal.printAssistant(result.output);
std.debug.print("\n", .{});
}
fn execGoto(self: *Self, arena: std.mem.Allocator, raw_url: []const u8) []const u8 {
const url = substituteEnvVars(arena, raw_url);
return self.tool_executor.call(arena, "goto", buildJson(arena, .{ .url = url })) catch "Error: goto failed";
}
fn execClick(self: *Self, arena: std.mem.Allocator, raw_target: []const u8) []const u8 {
const target = substituteEnvVars(arena, raw_target);
// Try as CSS selector via interactiveElements + click
// First get interactive elements to find the target
const elements_result = self.tool_executor.call(arena, "interactiveElements", "") catch
return "Error: failed to get interactive elements";
// Try to find a backendNodeId by searching the elements result for the target text
if (findNodeIdByText(arena, elements_result, target)) |node_id| {
const args = std.fmt.allocPrint(arena, "{{\"backendNodeId\":{d}}}", .{node_id}) catch
return "Error: failed to build click args";
return self.tool_executor.call(arena, "click", args) catch "Error: click failed";
}
return "Error: could not find element matching the target";
}
fn execType(self: *Self, arena: std.mem.Allocator, args: Command.TypeArgs) []const u8 {
const selector = escapeJs(arena, substituteEnvVars(arena, args.selector));
const value = escapeJs(arena, substituteEnvVars(arena, args.value));
// Use JavaScript to set the value on the element matching the selector
const script = std.fmt.allocPrint(arena,
\\(function() {{
\\ var el = document.querySelector("{s}");
\\ if (!el) return "Error: element not found";
\\ el.value = "{s}";
\\ el.dispatchEvent(new Event("input", {{bubbles: true}}));
\\ return "Typed into " + el.tagName;
\\}})()
, .{ selector, value }) catch return "Error: failed to build type script";
return self.tool_executor.call(arena, "evaluate", buildJson(arena, .{ .script = script })) catch "Error: type failed";
}
fn execExtract(self: *Self, arena: std.mem.Allocator, args: Command.ExtractArgs) []const u8 {
const selector = escapeJs(arena, substituteEnvVars(arena, args.selector));
const script = std.fmt.allocPrint(arena,
\\JSON.stringify(Array.from(document.querySelectorAll("{s}")).map(el => el.textContent.trim()))
, .{selector}) catch return "Error: failed to build extract script";
const result = self.tool_executor.call(arena, "evaluate", buildJson(arena, .{ .script = script })) catch
return "Error: extract failed";
if (args.file) |raw_file| {
const file = sanitizePath(raw_file) orelse {
self.terminal.printError("Invalid output path: must be relative and not traverse above working directory");
return result;
};
std.fs.cwd().writeFile(.{
.sub_path = file,
.data = result,
}) catch {
self.terminal.printError("Failed to write to file");
return result;
};
const msg = std.fmt.allocPrint(arena, "Extracted to {s}", .{file}) catch return "Extracted.";
return msg;
}
return result;
}
/// Substitute $VAR_NAME references with values from the environment.
fn substituteEnvVars(arena: std.mem.Allocator, input: []const u8) []const u8 {
// Quick scan: if no $ present, return as-is
if (std.mem.indexOfScalar(u8, input, '$') == null) return input;
var result: std.ArrayListUnmanaged(u8) = .empty;
var i: usize = 0;
while (i < input.len) {
if (input[i] == '$') {
// Find the end of the variable name (alphanumeric + underscore)
const var_start = i + 1;
var var_end = var_start;
while (var_end < input.len and (std.ascii.isAlphanumeric(input[var_end]) or input[var_end] == '_')) {
var_end += 1;
}
if (var_end > var_start) {
const var_name = input[var_start..var_end];
// We need a null-terminated string for getenv
const var_name_z = arena.dupeZ(u8, var_name) catch return input;
if (std.posix.getenv(var_name_z)) |env_val| {
result.appendSlice(arena, env_val) catch return input;
} else {
// Keep the original $VAR if not found
result.appendSlice(arena, input[i..var_end]) catch return input;
}
i = var_end;
} else {
result.append(arena, '$') catch return input;
i += 1;
}
} else {
result.append(arena, input[i]) catch return input;
i += 1;
}
}
return result.toOwnedSlice(arena) catch input;
}
/// Escape a string for safe interpolation inside a JS double-quoted string literal.
fn escapeJs(arena: std.mem.Allocator, input: []const u8) []const u8 {
// Quick scan: if nothing to escape, return as-is
const needs_escape = for (input) |ch| {
if (ch == '"' or ch == '\\' or ch == '\n' or ch == '\r' or ch == '\t') break true;
} else false;
if (!needs_escape) return input;
var out: std.ArrayListUnmanaged(u8) = .empty;
for (input) |ch| {
switch (ch) {
'\\' => out.appendSlice(arena, "\\\\") catch return input,
'"' => out.appendSlice(arena, "\\\"") catch return input,
'\n' => out.appendSlice(arena, "\\n") catch return input,
'\r' => out.appendSlice(arena, "\\r") catch return input,
'\t' => out.appendSlice(arena, "\\t") catch return input,
else => out.append(arena, ch) catch return input,
}
}
return out.toOwnedSlice(arena) catch input;
}
/// Validate that a file path is safe: relative, no traversal above cwd.
fn sanitizePath(path: []const u8) ?[]const u8 {
// Reject absolute paths
if (path.len > 0 and path[0] == '/') return null;
// Reject paths containing ".." components
var iter = std.mem.splitScalar(u8, path, '/');
while (iter.next()) |component| {
if (std.mem.eql(u8, component, "..")) return null;
}
return path;
}
fn findNodeIdByText(arena: std.mem.Allocator, elements_json: []const u8, target: []const u8) ?u32 {
_ = arena;
// Simple text search in the JSON result for the target text
// Look for patterns like "backendNodeId":N near the target text
// This is a heuristic — search for the target text, then scan backwards for backendNodeId
var pos: usize = 0;
while (std.mem.indexOfPos(u8, elements_json, pos, target)) |idx| {
// Search backwards from idx for "backendNodeId":
const search_start = if (idx > 200) idx - 200 else 0;
const window = elements_json[search_start..idx];
if (std.mem.lastIndexOf(u8, window, "\"backendNodeId\":")) |bid_offset| {
const num_start = search_start + bid_offset + "\"backendNodeId\":".len;
const num_end = std.mem.indexOfAnyPos(u8, elements_json, num_start, ",}] \n") orelse continue;
const num_str = elements_json[num_start..num_end];
return std.fmt.parseInt(u32, num_str, 10) catch {
pos = idx + 1;
continue;
};
}
pos = idx + 1;
}
return null;
}
fn buildJson(arena: std.mem.Allocator, value: anytype) []const u8 {
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(value, .{}, &aw.writer) catch return "{}";
return aw.written();
}
// --- Tests ---
test "escapeJs no escaping needed" {
const result = escapeJs(std.testing.allocator, "hello world");
try std.testing.expectEqualStrings("hello world", result);
}
test "escapeJs quotes and backslashes" {
const result = escapeJs(std.testing.allocator, "say \"hello\\world\"");
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings("say \\\"hello\\\\world\\\"", result);
}
test "escapeJs newlines and tabs" {
const result = escapeJs(std.testing.allocator, "line1\nline2\ttab");
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings("line1\\nline2\\ttab", result);
}
test "escapeJs injection attempt" {
const result = escapeJs(std.testing.allocator, "\"; alert(1); //");
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings("\\\"; alert(1); //", result);
}
test "sanitizePath allows relative" {
try std.testing.expectEqualStrings("output.json", sanitizePath("output.json").?);
try std.testing.expectEqualStrings("dir/file.json", sanitizePath("dir/file.json").?);
}
test "sanitizePath rejects absolute" {
try std.testing.expect(sanitizePath("/etc/passwd") == null);
}
test "sanitizePath rejects traversal" {
try std.testing.expect(sanitizePath("../../../etc/passwd") == null);
try std.testing.expect(sanitizePath("foo/../../bar") == null);
}
test "substituteEnvVars no vars" {
const result = substituteEnvVars(std.testing.allocator, "hello world");
try std.testing.expectEqualStrings("hello world", result);
}
test "substituteEnvVars with HOME" {
// Use arena since substituteEnvVars makes intermediate allocations (dupeZ)
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const a = arena.allocator();
const result = substituteEnvVars(a, "dir=$HOME/test");
// Result should not contain $HOME literally (it got substituted)
try std.testing.expect(std.mem.indexOf(u8, result, "$HOME") == null);
try std.testing.expect(std.mem.indexOf(u8, result, "/test") != null);
}
test "substituteEnvVars missing var kept literal" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const result = substituteEnvVars(arena.allocator(), "$UNLIKELY_VAR_12345");
try std.testing.expectEqualStrings("$UNLIKELY_VAR_12345", result);
}
test "substituteEnvVars bare dollar" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const result = substituteEnvVars(arena.allocator(), "price is $ 5");
try std.testing.expectEqualStrings("price is $ 5", result);
}

146
src/agent/Recorder.zig Normal file
View File

@@ -0,0 +1,146 @@
const std = @import("std");
const Command = @import("Command.zig");
const Self = @This();
file: ?std.fs.File,
/// Commands that are read-only / ephemeral and should NOT be recorded.
pub fn init(path: ?[]const u8) Self {
const file: ?std.fs.File = if (path) |p|
std.fs.cwd().createFile(p, .{ .truncate = false }) catch |err| blk: {
std.debug.print("Warning: could not open recording file: {s}\n", .{@errorName(err)});
break :blk null;
}
else
null;
// Seek to end for appending
if (file) |f| {
f.seekFromEnd(0) catch {};
}
return .{ .file = file };
}
pub fn deinit(self: *Self) void {
if (self.file) |f| f.close();
}
/// Record a successfully executed command line to the .panda file.
/// Skips read-only commands (WAIT, TREE, MARKDOWN).
pub fn record(self: *Self, line: []const u8) void {
const f = self.file orelse return;
// Check if this command should be skipped
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0) return;
if (trimmed[0] == '#') return;
const cmd_end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
const cmd_word = trimmed[0..cmd_end];
if (isNonRecordedCommand(cmd_word)) return;
f.writeAll(trimmed) catch return;
f.writeAll("\n") catch return;
}
/// Record a comment line (e.g. # INTENT: ...).
pub fn recordComment(self: *Self, comment: []const u8) void {
const f = self.file orelse return;
f.writeAll(comment) catch return;
f.writeAll("\n") catch return;
}
fn isNonRecordedCommand(cmd_word: []const u8) bool {
const non_recorded = [_][]const u8{ "WAIT", "TREE", "MARKDOWN", "MD" };
inline for (non_recorded) |skip| {
if (eqlIgnoreCase(cmd_word, skip)) return true;
}
return false;
}
fn eqlIgnoreCase(a: []const u8, comptime upper: []const u8) bool {
if (a.len != upper.len) return false;
for (a, upper) |ac, uc| {
if (std.ascii.toUpper(ac) != uc) return false;
}
return true;
}
// --- Tests ---
test "isNonRecordedCommand" {
try std.testing.expect(isNonRecordedCommand("WAIT"));
try std.testing.expect(isNonRecordedCommand("wait"));
try std.testing.expect(isNonRecordedCommand("TREE"));
try std.testing.expect(isNonRecordedCommand("MARKDOWN"));
try std.testing.expect(isNonRecordedCommand("MD"));
try std.testing.expect(isNonRecordedCommand("md"));
try std.testing.expect(!isNonRecordedCommand("GOTO"));
try std.testing.expect(!isNonRecordedCommand("CLICK"));
try std.testing.expect(!isNonRecordedCommand("EXTRACT"));
}
test "record writes state-mutating commands" {
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const file = tmp.dir.createFile("test.panda", .{ .read = true }) catch unreachable;
var recorder = Self{ .file = file };
defer recorder.deinit();
recorder.record("GOTO https://example.com");
recorder.record("CLICK \"Login\"");
recorder.record("TREE"); // should be skipped
recorder.record("WAIT \".dashboard\""); // should be skipped
recorder.record("MARKDOWN"); // should be skipped
recorder.record("EXTRACT \".title\"");
recorder.recordComment("# INTENT: LOGIN");
// Read back and verify
file.seekTo(0) catch unreachable;
var buf: [512]u8 = undefined;
const n = file.readAll(&buf) catch unreachable;
const content = buf[0..n];
try std.testing.expect(std.mem.indexOf(u8, content, "GOTO https://example.com\n") != null);
try std.testing.expect(std.mem.indexOf(u8, content, "CLICK \"Login\"\n") != null);
try std.testing.expect(std.mem.indexOf(u8, content, "EXTRACT \".title\"\n") != null);
try std.testing.expect(std.mem.indexOf(u8, content, "# INTENT: LOGIN\n") != null);
// Verify skipped commands are NOT present
try std.testing.expect(std.mem.indexOf(u8, content, "TREE") == null);
try std.testing.expect(std.mem.indexOf(u8, content, "WAIT") == null);
try std.testing.expect(std.mem.indexOf(u8, content, "MARKDOWN") == null);
}
test "record skips empty and comment lines" {
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const file = tmp.dir.createFile("test2.panda", .{ .read = true }) catch unreachable;
var recorder = Self{ .file = file };
defer recorder.deinit();
recorder.record("");
recorder.record(" ");
recorder.record("# this is a comment");
recorder.record("GOTO https://example.com");
file.seekTo(0) catch unreachable;
var buf: [256]u8 = undefined;
const n = file.readAll(&buf) catch unreachable;
const content = buf[0..n];
try std.testing.expectEqualStrings("GOTO https://example.com\n", content);
}
test "recorder with null file is no-op" {
var recorder = Self{ .file = null };
recorder.record("GOTO https://example.com");
recorder.recordComment("# test");
recorder.deinit();
}

64
src/agent/Terminal.zig Normal file
View File

@@ -0,0 +1,64 @@
const std = @import("std");
const c = @cImport({
@cInclude("linenoise.h");
});
const Self = @This();
const ansi_reset = "\x1b[0m";
const ansi_bold = "\x1b[1m";
const ansi_dim = "\x1b[2m";
const ansi_cyan = "\x1b[36m";
const ansi_green = "\x1b[32m";
const ansi_yellow = "\x1b[33m";
const ansi_red = "\x1b[31m";
history_path: ?[:0]const u8,
pub fn init(history_path: ?[:0]const u8) Self {
c.linenoiseSetMultiLine(1);
const self = Self{ .history_path = history_path };
if (history_path) |path| {
_ = c.linenoiseHistoryLoad(path.ptr);
}
return self;
}
pub fn readLine(self: *Self, prompt: [*:0]const u8) ?[]const u8 {
const line = c.linenoise(prompt) orelse return null;
const slice = std.mem.sliceTo(line, 0);
if (slice.len > 0) {
_ = c.linenoiseHistoryAdd(line);
if (self.history_path) |path| {
_ = c.linenoiseHistorySave(path.ptr);
}
}
return slice;
}
pub fn freeLine(_: *Self, line: []const u8) void {
c.linenoiseFree(@ptrCast(@constCast(line.ptr)));
}
pub fn printAssistant(_: *Self, text: []const u8) void {
const fd = std.posix.STDOUT_FILENO;
_ = std.posix.write(fd, text) catch {};
}
pub fn printToolCall(_: *Self, name: []const u8, args: []const u8) void {
std.debug.print("\n{s}{s}[tool: {s}]{s} {s}\n", .{ ansi_dim, ansi_cyan, name, ansi_reset, args });
}
pub fn printToolResult(_: *Self, name: []const u8, result: []const u8) void {
const truncated = if (result.len > 500) result[0..500] else result;
const ellipsis: []const u8 = if (result.len > 500) "..." else "";
std.debug.print("{s}{s}[result: {s}]{s} {s}{s}\n", .{ ansi_dim, ansi_green, name, ansi_reset, truncated, ellipsis });
}
pub fn printError(_: *Self, msg: []const u8) void {
std.debug.print("{s}{s}Error: {s}{s}\n", .{ ansi_bold, ansi_red, msg, ansi_reset });
}
pub fn printInfo(_: *Self, msg: []const u8) void {
std.debug.print("{s}{s}{s}\n", .{ ansi_dim, msg, ansi_reset });
}

446
src/agent/ToolExecutor.zig Normal file
View File

@@ -0,0 +1,446 @@
const std = @import("std");
const lp = @import("lightpanda");
const zenai = @import("zenai");
const App = @import("../App.zig");
const HttpClient = @import("../browser/HttpClient.zig");
const CDPNode = @import("../cdp/Node.zig");
const mcp_tools = @import("../mcp/tools.zig");
const protocol = @import("../mcp/protocol.zig");
const Self = @This();
allocator: std.mem.Allocator,
app: *App,
http_client: *HttpClient,
notification: *lp.Notification,
browser: lp.Browser,
session: *lp.Session,
node_registry: CDPNode.Registry,
tool_schema_arena: std.heap.ArenaAllocator,
pub fn init(allocator: std.mem.Allocator, app: *App) !*Self {
const http_client = try HttpClient.init(allocator, &app.network);
errdefer http_client.deinit();
const notification = try lp.Notification.init(allocator);
errdefer notification.deinit();
const self = try allocator.create(Self);
errdefer allocator.destroy(self);
var browser = try lp.Browser.init(app, .{ .http_client = http_client });
errdefer browser.deinit();
self.* = .{
.allocator = allocator,
.app = app,
.http_client = http_client,
.notification = notification,
.browser = browser,
.session = undefined,
.node_registry = CDPNode.Registry.init(allocator),
.tool_schema_arena = std.heap.ArenaAllocator.init(allocator),
};
self.session = try self.browser.newSession(self.notification);
return self;
}
pub fn deinit(self: *Self) void {
self.tool_schema_arena.deinit();
self.node_registry.deinit();
self.browser.deinit();
self.notification.deinit();
self.http_client.deinit();
self.allocator.destroy(self);
}
/// Returns the list of tools in zenai provider.Tool format.
pub fn getTools(self: *Self) ![]const zenai.provider.Tool {
const arena = self.tool_schema_arena.allocator();
const tools = try arena.alloc(zenai.provider.Tool, mcp_tools.tool_list.len);
for (mcp_tools.tool_list, 0..) |t, i| {
const parsed = try std.json.parseFromSliceLeaky(
std.json.Value,
arena,
t.inputSchema,
.{},
);
tools[i] = .{
.name = t.name,
.description = t.description orelse "",
.parameters = parsed,
};
}
return tools;
}
/// Execute a tool by name with JSON arguments, returning the result as a string.
pub fn call(self: *Self, arena: std.mem.Allocator, tool_name: []const u8, arguments_json: []const u8) ![]const u8 {
const arguments = if (arguments_json.len > 0)
(std.json.parseFromSlice(std.json.Value, arena, arguments_json, .{}) catch
return "Error: invalid JSON arguments").value
else
null;
const Action = enum {
goto,
navigate,
markdown,
links,
nodeDetails,
interactiveElements,
structuredData,
detectForms,
evaluate,
eval,
semantic_tree,
click,
fill,
scroll,
waitForSelector,
};
const action_map = std.StaticStringMap(Action).initComptime(.{
.{ "goto", .goto },
.{ "navigate", .navigate },
.{ "markdown", .markdown },
.{ "links", .links },
.{ "nodeDetails", .nodeDetails },
.{ "interactiveElements", .interactiveElements },
.{ "structuredData", .structuredData },
.{ "detectForms", .detectForms },
.{ "evaluate", .evaluate },
.{ "eval", .eval },
.{ "semantic_tree", .semantic_tree },
.{ "click", .click },
.{ "fill", .fill },
.{ "scroll", .scroll },
.{ "waitForSelector", .waitForSelector },
});
const action = action_map.get(tool_name) orelse return "Error: unknown tool";
return switch (action) {
.goto, .navigate => self.execGoto(arena, arguments),
.markdown => self.execMarkdown(arena, arguments),
.links => self.execLinks(arena, arguments),
.nodeDetails => self.execNodeDetails(arena, arguments),
.interactiveElements => self.execInteractiveElements(arena, arguments),
.structuredData => self.execStructuredData(arena, arguments),
.detectForms => self.execDetectForms(arena, arguments),
.evaluate, .eval => self.execEvaluate(arena, arguments),
.semantic_tree => self.execSemanticTree(arena, arguments),
.click => self.execClick(arena, arguments),
.fill => self.execFill(arena, arguments),
.scroll => self.execScroll(arena, arguments),
.waitForSelector => self.execWaitForSelector(arena, arguments),
};
}
fn execGoto(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const GotoParams = struct {
url: [:0]const u8,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrErr(GotoParams, arena, arguments) orelse return "Error: missing or invalid 'url' argument";
self.performGoto(args.url, args.timeout, args.waitUntil) catch return "Error: navigation failed";
return "Navigated successfully.";
}
fn execMarkdown(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(UrlParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
var aw: std.Io.Writer.Allocating = .init(arena);
lp.markdown.dump(page.window._document.asNode(), .{}, &aw.writer, page) catch return "Error: failed to generate markdown";
return aw.written();
}
fn execLinks(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(UrlParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
const links_list = lp.links.collectLinks(arena, page.window._document.asNode(), page) catch
return "Error: failed to collect links";
var aw: std.Io.Writer.Allocating = .init(arena);
for (links_list, 0..) |href, i| {
if (i > 0) aw.writer.writeByte('\n') catch {};
aw.writer.writeAll(href) catch {};
}
return aw.written();
}
fn execNodeDetails(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct { backendNodeId: CDPNode.Id };
const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId";
_ = self.session.currentPage() orelse return "Error: page not loaded";
const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse
return "Error: node not found";
const page = self.session.currentPage().?;
const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &self.node_registry, page) catch
return "Error: failed to get node details";
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(&details, .{}, &aw.writer) catch return "Error: serialization failed";
return aw.written();
}
fn execInteractiveElements(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(UrlParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
const elements = lp.interactive.collectInteractiveElements(page.window._document.asNode(), arena, page) catch
return "Error: failed to collect interactive elements";
lp.interactive.registerNodes(elements, &self.node_registry) catch
return "Error: failed to register nodes";
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(elements, .{}, &aw.writer) catch return "Error: serialization failed";
return aw.written();
}
fn execStructuredData(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(UrlParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
const data = lp.structured_data.collectStructuredData(page.window._document.asNode(), arena, page) catch
return "Error: failed to collect structured data";
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(data, .{}, &aw.writer) catch return "Error: serialization failed";
return aw.written();
}
fn execDetectForms(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(UrlParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
const forms_data = lp.forms.collectForms(arena, page.window._document.asNode(), page) catch
return "Error: failed to collect forms";
lp.forms.registerNodes(forms_data, &self.node_registry) catch
return "Error: failed to register form nodes";
var aw: std.Io.Writer.Allocating = .init(arena);
std.json.Stringify.value(forms_data, .{}, &aw.writer) catch return "Error: serialization failed";
return aw.written();
}
fn execEvaluate(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct {
script: [:0]const u8,
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing 'script' argument";
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
var ls: lp.js.Local.Scope = undefined;
page.js.localScope(&ls);
defer ls.deinit();
var try_catch: lp.js.TryCatch = undefined;
try_catch.init(&ls.local);
defer try_catch.deinit();
const js_result = ls.local.compileAndRun(args.script, null) catch |err| {
const caught = try_catch.caughtOrError(arena, err);
var aw: std.Io.Writer.Allocating = .init(arena);
caught.format(&aw.writer) catch {};
return aw.written();
};
return js_result.toStringSliceWithAlloc(arena) catch "undefined";
}
fn execSemanticTree(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const TreeParams = struct {
url: ?[:0]const u8 = null,
backendNodeId: ?u32 = null,
maxDepth: ?u32 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = parseArgsOrDefault(TreeParams, arena, arguments);
const page = self.ensurePage(args.url, args.timeout, args.waitUntil) catch return "Error: page not loaded";
var root_node = page.window._document.asNode();
if (args.backendNodeId) |node_id| {
if (self.node_registry.lookup_by_id.get(node_id)) |n| {
root_node = n.dom;
}
}
const st = lp.SemanticTree{
.dom_node = root_node,
.registry = &self.node_registry,
.page = page,
.arena = arena,
.prune = true,
.max_depth = args.maxDepth orelse std.math.maxInt(u32) - 1,
};
var aw: std.Io.Writer.Allocating = .init(arena);
st.textStringify(&aw.writer) catch return "Error: failed to generate semantic tree";
return aw.written();
}
fn execClick(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct { backendNodeId: CDPNode.Id };
const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId";
const page = self.session.currentPage() orelse return "Error: page not loaded";
const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse return "Error: node not found";
lp.actions.click(node.dom, page) catch |err| {
if (err == error.InvalidNodeType) return "Error: node is not an HTML element";
return "Error: failed to click element";
};
const page_title = page.getTitle() catch null;
return std.fmt.allocPrint(arena, "Clicked element (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.backendNodeId,
page.url,
page_title orelse "(none)",
}) catch "Clicked element.";
}
fn execFill(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct {
backendNodeId: CDPNode.Id,
text: []const u8,
};
const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing backendNodeId or text";
const page = self.session.currentPage() orelse return "Error: page not loaded";
const node = self.node_registry.lookup_by_id.get(args.backendNodeId) orelse return "Error: node not found";
lp.actions.fill(node.dom, args.text, page) catch |err| {
if (err == error.InvalidNodeType) return "Error: node is not an input, textarea or select";
return "Error: failed to fill element";
};
const page_title = page.getTitle() catch null;
return std.fmt.allocPrint(arena, "Filled element (backendNodeId: {d}) with \"{s}\". Page url: {s}, title: {s}", .{
args.backendNodeId,
args.text,
page.url,
page_title orelse "(none)",
}) catch "Filled element.";
}
fn execScroll(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct {
backendNodeId: ?CDPNode.Id = null,
x: ?i32 = null,
y: ?i32 = null,
};
const args = parseArgsOrDefault(Params, arena, arguments);
const page = self.session.currentPage() orelse return "Error: page not loaded";
var target_node: ?*@import("../browser/webapi/Node.zig") = null;
if (args.backendNodeId) |node_id| {
const node = self.node_registry.lookup_by_id.get(node_id) orelse return "Error: node not found";
target_node = node.dom;
}
lp.actions.scroll(target_node, args.x, args.y, page) catch |err| {
if (err == error.InvalidNodeType) return "Error: node is not an element";
return "Error: failed to scroll";
};
const page_title = page.getTitle() catch null;
return std.fmt.allocPrint(arena, "Scrolled to x: {d}, y: {d}. Page url: {s}, title: {s}", .{
args.x orelse 0,
args.y orelse 0,
page.url,
page_title orelse "(none)",
}) catch "Scrolled.";
}
fn execWaitForSelector(self: *Self, arena: std.mem.Allocator, arguments: ?std.json.Value) []const u8 {
const Params = struct {
selector: [:0]const u8,
timeout: ?u32 = null,
};
const args = parseArgsOrErr(Params, arena, arguments) orelse return "Error: missing 'selector' argument";
_ = self.session.currentPage() orelse return "Error: page not loaded";
const timeout_ms = args.timeout orelse 5000;
const node = lp.actions.waitForSelector(args.selector, timeout_ms, self.session) catch |err| {
if (err == error.InvalidSelector) return "Error: invalid selector";
if (err == error.Timeout) return "Error: timeout waiting for selector";
return "Error: failed waiting for selector";
};
const registered = self.node_registry.register(node) catch return "Element found.";
return std.fmt.allocPrint(arena, "Element found. backendNodeId: {d}", .{registered.id}) catch "Element found.";
}
fn ensurePage(self: *Self, url: ?[:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !*lp.Page {
if (url) |u| {
try self.performGoto(u, timeout, waitUntil);
}
return self.session.currentPage() orelse error.PageNotLoaded;
}
fn performGoto(self: *Self, url: [:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !void {
const session = self.session;
if (session.page != null) {
session.removePage();
}
const page = try session.createPage();
_ = try page.navigate(url, .{
.reason = .address_bar,
.kind = .{ .push = null },
});
var runner = try session.runner(.{});
try runner.wait(.{
.ms = timeout orelse 10000,
.until = waitUntil orelse .done,
});
}
fn parseArgsOrDefault(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value) T {
const args_raw = arguments orelse return .{};
return std.json.parseFromValueLeaky(T, arena, args_raw, .{ .ignore_unknown_fields = true }) catch .{};
}
fn parseArgsOrErr(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Value) ?T {
const args_raw = arguments orelse return null;
return std.json.parseFromValueLeaky(T, arena, args_raw, .{ .ignore_unknown_fields = true }) catch null;
}

View File

@@ -22,10 +22,23 @@ const DOMNode = @import("webapi/Node.zig");
const Element = @import("webapi/Element.zig");
const Event = @import("webapi/Event.zig");
const MouseEvent = @import("webapi/event/MouseEvent.zig");
const KeyboardEvent = @import("webapi/event/KeyboardEvent.zig");
const Page = @import("Page.zig");
const Session = @import("Session.zig");
const Selector = @import("webapi/selector/Selector.zig");
fn dispatchInputAndChangeEvents(el: *Element, page: *Page) !void {
const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| {
lp.log.err(.app, "dispatch input event failed", .{ .err = err });
};
const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| {
lp.log.err(.app, "dispatch change event failed", .{ .err = err });
};
}
pub fn click(node: *DOMNode, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
@@ -43,9 +56,107 @@ pub fn click(node: *DOMNode, page: *Page) !void {
};
}
pub fn hover(node: *DOMNode, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const mouseover_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseover"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), mouseover_event.asEvent()) catch |err| {
lp.log.err(.app, "hover mouseover failed", .{ .err = err });
return error.ActionFailed;
};
const mouseenter_event: *MouseEvent = try .initTrusted(comptime .wrap("mouseenter"), .{
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), mouseenter_event.asEvent()) catch |err| {
lp.log.err(.app, "hover mouseenter failed", .{ .err = err });
return error.ActionFailed;
};
}
pub fn press(node: ?*DOMNode, key: []const u8, page: *Page) !void {
const target = if (node) |n|
(n.is(Element) orelse return error.InvalidNodeType).asEventTarget()
else
page.document.asNode().asEventTarget();
const keydown_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keydown"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
}, page);
page._event_manager.dispatch(target, keydown_event.asEvent()) catch |err| {
lp.log.err(.app, "press keydown failed", .{ .err = err });
return error.ActionFailed;
};
const keyup_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keyup"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
}, page);
page._event_manager.dispatch(target, keyup_event.asEvent()) catch |err| {
lp.log.err(.app, "press keyup failed", .{ .err = err });
return error.ActionFailed;
};
}
pub fn selectOption(node: *DOMNode, value: []const u8, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const select = el.is(Element.Html.Select) orelse return error.InvalidNodeType;
select.setValue(value, page) catch |err| {
lp.log.err(.app, "select setValue failed", .{ .err = err });
return error.ActionFailed;
};
try dispatchInputAndChangeEvents(el, page);
}
pub fn setChecked(node: *DOMNode, checked: bool, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const input = el.is(Element.Html.Input) orelse return error.InvalidNodeType;
if (input._input_type != .checkbox and input._input_type != .radio) {
return error.InvalidNodeType;
}
input.setChecked(checked, page) catch |err| {
lp.log.err(.app, "setChecked failed", .{ .err = err });
return error.ActionFailed;
};
// Match browser event order: click fires first, then input and change.
const click_event: *MouseEvent = try .initTrusted(comptime .wrap("click"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
}, page);
page._event_manager.dispatch(el.asEventTarget(), click_event.asEvent()) catch |err| {
lp.log.err(.app, "dispatch click event failed", .{ .err = err });
};
try dispatchInputAndChangeEvents(el, page);
}
pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
el.focus(page) catch |err| {
lp.log.err(.app, "fill focus failed", .{ .err = err });
};
if (el.is(Element.Html.Input)) |input| {
input.setValue(text, page) catch |err| {
lp.log.err(.app, "fill input failed", .{ .err = err });
@@ -65,15 +176,7 @@ pub fn fill(node: *DOMNode, text: []const u8, page: *Page) !void {
return error.InvalidNodeType;
}
const input_evt: *Event = try .initTrusted(comptime .wrap("input"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), input_evt) catch |err| {
lp.log.err(.app, "dispatch input event failed", .{ .err = err });
};
const change_evt: *Event = try .initTrusted(comptime .wrap("change"), .{ .bubbles = true }, page);
page._event_manager.dispatch(el.asEventTarget(), change_evt) catch |err| {
lp.log.err(.app, "dispatch change event failed", .{ .err = err });
};
try dispatchInputAndChangeEvents(el, page);
}
pub fn scroll(node: ?*DOMNode, x: ?i32, y: ?i32, page: *Page) !void {

View File

@@ -10,5 +10,20 @@
<div id="scrollbox" style="width: 100px; height: 100px; overflow: scroll;" onscroll="window.scrolled = true;">
<div style="height: 500px;">Long content</div>
</div>
<div id="hoverTarget" onmouseover="window.hovered = true;">Hover Me</div>
<input id="keyTarget" onkeydown="window.keyPressed = event.key;" onkeyup="window.keyReleased = event.key;">
<select id="sel2" onchange="window.sel2Changed = this.value">
<option value="a">Alpha</option>
<option value="b">Beta</option>
<option value="c">Gamma</option>
</select>
<input id="chk" type="checkbox">
<input id="rad" type="radio" name="group1">
<script>
document.getElementById('chk').addEventListener('click', function() { window.chkClicked = true; });
document.getElementById('chk').addEventListener('change', function() { window.chkChanged = true; });
document.getElementById('rad').addEventListener('click', function() { window.radClicked = true; });
document.getElementById('rad').addEventListener('change', function() { window.radChanged = true; });
</script>
</body>
</html>

View File

@@ -52,17 +52,19 @@ fn dispatchKeyEvent(cmd: *CDP.Command) !void {
try cmd.sendResult(null, .{});
// quickly ignore types we know we don't handle
switch (params.type) {
.keyUp, .rawKeyDown, .char => return,
.keyDown => {},
}
// rawKeyDown is a Chrome-internal event type not used for JS dispatch
if (params.type == .rawKeyDown) return;
const bc = cmd.browser_context orelse return;
const page = bc.session.currentPage() orelse return;
const KeyboardEvent = @import("../../browser/webapi/event/KeyboardEvent.zig");
const keyboard_event = try KeyboardEvent.initTrusted(comptime .wrap("keydown"), .{
const keyboard_event = try KeyboardEvent.initTrusted(switch (params.type) {
.keyDown => comptime .wrap("keydown"),
.keyUp => comptime .wrap("keyup"),
.char => comptime .wrap("keypress"),
.rawKeyDown => unreachable,
}, .{
.key = params.key,
.code = params.code,
.altKey = params.modifiers & 1 == 1,

View File

@@ -40,6 +40,7 @@ pub const forms = @import("browser/forms.zig");
pub const actions = @import("browser/actions.zig");
pub const structured_data = @import("browser/structured_data.zig");
pub const mcp = @import("mcp.zig");
pub const agent = @import("agent.zig");
pub const build_config = @import("build_config");
pub const crash_handler = @import("crash_handler.zig");

View File

@@ -165,10 +165,30 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
app.network.run();
},
.agent => |opts| {
log.info(.app, "starting agent", .{});
var worker_thread = try std.Thread.spawn(.{}, agentThread, .{ allocator, app, opts });
defer worker_thread.join();
app.network.run();
},
else => unreachable,
}
}
fn agentThread(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) void {
defer app.network.stop();
var agent_instance = lp.agent.Agent.init(allocator, app, opts) catch |err| {
log.fatal(.app, "agent init error", .{ .err = err });
return;
};
defer agent_instance.deinit();
agent_instance.run();
}
fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void {
defer app.network.stop();
lp.fetch(app, url, fetch_opts) catch |err| {

View File

@@ -175,6 +175,74 @@ pub const tool_list = [_]protocol.Tool{
\\}
),
},
.{
.name = "hover",
.description = "Hover over an element, triggering mouseover and mouseenter events. Useful for menus, tooltips, and hover states.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to hover over." }
\\ },
\\ "required": ["backendNodeId"]
\\}
),
},
.{
.name = "press",
.description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "key": { "type": "string", "description": "The key to press (e.g. 'Enter', 'Tab', 'a')." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document." }
\\ },
\\ "required": ["key"]
\\}
),
},
.{
.name = "selectOption",
.description = "Select an option in a <select> dropdown element by its value. Dispatches input and change events.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the <select> element." },
\\ "value": { "type": "string", "description": "The value of the option to select." }
\\ },
\\ "required": ["backendNodeId", "value"]
\\}
),
},
.{
.name = "setChecked",
.description = "Check or uncheck a checkbox or radio button. Dispatches input, change, and click events.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the checkbox or radio input element." },
\\ "checked": { "type": "boolean", "description": "Whether to check (true) or uncheck (false) the element." }
\\ },
\\ "required": ["backendNodeId", "checked"]
\\}
),
},
.{
.name = "findElement",
.description = "Find interactive elements by role and/or accessible name. Returns matching elements with their backend node IDs. Useful for locating specific elements without parsing the full semantic tree.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "role": { "type": "string", "description": "Optional ARIA role to match (e.g. 'button', 'link', 'textbox', 'checkbox')." },
\\ "name": { "type": "string", "description": "Optional accessible name substring to match (case-insensitive)." }
\\ }
\\}
),
},
};
pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -282,6 +350,11 @@ const ToolAction = enum {
fill,
scroll,
waitForSelector,
hover,
press,
selectOption,
setChecked,
findElement,
};
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
@@ -300,6 +373,11 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "fill", .fill },
.{ "scroll", .scroll },
.{ "waitForSelector", .waitForSelector },
.{ "hover", .hover },
.{ "press", .press },
.{ "selectOption", .selectOption },
.{ "setChecked", .setChecked },
.{ "findElement", .findElement },
});
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
@@ -334,6 +412,11 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.fill => try handleFill(server, arena, req.id.?, call_params.arguments),
.scroll => try handleScroll(server, arena, req.id.?, call_params.arguments),
.waitForSelector => try handleWaitForSelector(server, arena, req.id.?, call_params.arguments),
.hover => try handleHover(server, arena, req.id.?, call_params.arguments),
.press => try handlePress(server, arena, req.id.?, call_params.arguments),
.selectOption => try handleSelectOption(server, arena, req.id.?, call_params.arguments),
.setChecked => try handleSetChecked(server, arena, req.id.?, call_params.arguments),
.findElement => try handleFindElement(server, arena, req.id.?, call_params.arguments),
}
}
@@ -400,17 +483,9 @@ fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Val
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(Params, arena, arguments, server, id, "nodeDetails");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
_ = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
const page = server.session.currentPage().?;
const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &server.node_registry, page) catch {
const details = lp.SemanticTree.getNodeDetails(arena, resolved.node, &server.node_registry, resolved.page) catch {
return server.sendError(id, .InternalError, "Failed to get node details");
};
@@ -510,26 +585,19 @@ fn handleClick(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(ClickParams, arena, arguments, server, id, "click");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
lp.actions.click(node.dom, page) catch |err| {
lp.actions.click(resolved.node, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to click element");
};
const page_title = page.getTitle() catch null;
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Clicked element (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.backendNodeId,
page.url,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
@@ -542,27 +610,20 @@ fn handleFill(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arg
text: []const u8,
};
const args = try parseArgs(FillParams, arena, arguments, server, id, "fill");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
lp.actions.fill(node.dom, args.text, page) catch |err| {
lp.actions.fill(resolved.node, args.text, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an input, textarea or select");
}
return server.sendError(id, .InternalError, "Failed to fill element");
};
const page_title = page.getTitle() catch null;
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Filled element (backendNodeId: {d}) with \"{s}\". Page url: {s}, title: {s}", .{
args.backendNodeId,
args.text,
page.url,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
@@ -636,6 +697,189 @@ fn handleWaitForSelector(server: *Server, arena: std.mem.Allocator, id: std.json
return server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleHover(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(Params, arena, arguments, server, id, "hover");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.hover(resolved.node, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to hover element");
};
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Hovered element (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.backendNodeId,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handlePress(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
key: []const u8,
backendNodeId: ?CDPNode.Id = null,
};
const args = try parseArgs(Params, arena, arguments, server, id, "press");
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
var target_node: ?*DOMNode = null;
if (args.backendNodeId) |node_id| {
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
target_node = node.dom;
}
lp.actions.press(target_node, args.key, page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not an HTML element");
}
return server.sendError(id, .InternalError, "Failed to press key");
};
const page_title = page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Pressed key '{s}'. Page url: {s}, title: {s}", .{
args.key,
page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleSelectOption(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
value: []const u8,
};
const args = try parseArgs(Params, arena, arguments, server, id, "selectOption");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.selectOption(resolved.node, args.value, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not a <select> element");
}
return server.sendError(id, .InternalError, "Failed to select option");
};
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Selected option '{s}' (backendNodeId: {d}). Page url: {s}, title: {s}", .{
args.value,
args.backendNodeId,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleSetChecked(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
checked: bool,
};
const args = try parseArgs(Params, arena, arguments, server, id, "setChecked");
const resolved = try resolveNodeAndPage(server, id, args.backendNodeId);
lp.actions.setChecked(resolved.node, args.checked, resolved.page) catch |err| {
if (err == error.InvalidNodeType) {
return server.sendError(id, .InvalidParams, "Node is not a checkbox or radio input");
}
return server.sendError(id, .InternalError, "Failed to set checked state");
};
const state_str = if (args.checked) "checked" else "unchecked";
const page_title = resolved.page.getTitle() catch null;
const result_text = try std.fmt.allocPrint(arena, "Set element (backendNodeId: {d}) to {s}. Page url: {s}, title: {s}", .{
args.backendNodeId,
state_str,
resolved.page.url,
page_title orelse "(none)",
});
const content = [_]protocol.TextContent([]const u8){.{ .text = result_text }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleFindElement(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
role: ?[]const u8 = null,
name: ?[]const u8 = null,
};
const args = try parseArgsOrDefault(Params, arena, arguments, server, id);
if (args.role == null and args.name == null) {
return server.sendError(id, .InvalidParams, "At least one of 'role' or 'name' must be provided");
}
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const elements = lp.interactive.collectInteractiveElements(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "elements collection failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to collect interactive elements");
};
var matches: std.ArrayList(lp.interactive.InteractiveElement) = .empty;
for (elements) |el| {
if (args.role) |role| {
const el_role = el.role orelse continue;
if (!std.ascii.eqlIgnoreCase(el_role, role)) continue;
}
if (args.name) |name| {
const el_name = el.name orelse continue;
if (!containsIgnoreCase(el_name, name)) continue;
}
try matches.append(arena, el);
}
const matched = try matches.toOwnedSlice(arena);
lp.interactive.registerNodes(matched, &server.node_registry) catch |err| {
log.err(.mcp, "node registration failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to register element nodes");
};
var aw: std.Io.Writer.Allocating = .init(arena);
try std.json.Stringify.value(matched, .{}, &aw.writer);
const content = [_]protocol.TextContent([]const u8){.{ .text = aw.written() }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn containsIgnoreCase(haystack: []const u8, needle: []const u8) bool {
if (needle.len > haystack.len) return false;
if (needle.len == 0) return true;
const end = haystack.len - needle.len + 1;
for (0..end) |i| {
if (std.ascii.eqlIgnoreCase(haystack[i..][0..needle.len], needle)) return true;
}
return false;
}
const NodeAndPage = struct { node: *DOMNode, page: *lp.Page };
fn resolveNodeAndPage(server: *Server, id: std.json.Value, node_id: CDPNode.Id) !NodeAndPage {
const page = server.session.currentPage() orelse {
try server.sendError(id, .PageNotLoaded, "Page not loaded");
return error.PageNotLoaded;
};
const node = server.node_registry.lookup_by_id.get(node_id) orelse {
try server.sendError(id, .InvalidParams, "Node not found");
return error.InvalidParams;
};
return .{ .node = node.dom, .page = page };
}
fn ensurePage(server: *Server, id: std.json.Value, url: ?[:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !*lp.Page {
if (url) |u| {
try performGoto(server, u, id, timeout, waitUntil);
@@ -736,7 +980,7 @@ test "MCP - evaluate error reporting" {
} }, out.written());
}
test "MCP - Actions: click, fill, scroll" {
test "MCP - Actions: click, fill, scroll, hover, press, selectOption, setChecked" {
defer testing.reset();
const aa = testing.arena_allocator;
@@ -797,7 +1041,67 @@ test "MCP - Actions: click, fill, scroll" {
out.clearRetainingCapacity();
}
// Evaluate assertions
{
// Test Hover
const el = page.document.getElementById("hoverTarget", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":5,\"method\":\"tools/call\",\"params\":{\"name\":\"hover\",\"arguments\":{\"backendNodeId\":", id_str, "}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Hovered element") != null);
out.clearRetainingCapacity();
}
{
// Test Press
const el = page.document.getElementById("keyTarget", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":6,\"method\":\"tools/call\",\"params\":{\"name\":\"press\",\"arguments\":{\"key\":\"Enter\",\"backendNodeId\":", id_str, "}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Pressed key") != null);
out.clearRetainingCapacity();
}
{
// Test SelectOption
const el = page.document.getElementById("sel2", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":7,\"method\":\"tools/call\",\"params\":{\"name\":\"selectOption\",\"arguments\":{\"backendNodeId\":", id_str, ",\"value\":\"b\"}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Selected option") != null);
out.clearRetainingCapacity();
}
{
// Test SetChecked (checkbox)
const el = page.document.getElementById("chk", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":8,\"method\":\"tools/call\",\"params\":{\"name\":\"setChecked\",\"arguments\":{\"backendNodeId\":", id_str, ",\"checked\":true}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "checked") != null);
out.clearRetainingCapacity();
}
{
// Test SetChecked (radio)
const el = page.document.getElementById("rad", page).?.asNode();
const el_id = (try server.node_registry.register(el)).id;
var id_buf: [12]u8 = undefined;
const id_str = std.fmt.bufPrint(&id_buf, "{d}", .{el_id}) catch unreachable;
const msg = try std.mem.concat(aa, u8, &.{ "{\"jsonrpc\":\"2.0\",\"id\":9,\"method\":\"tools/call\",\"params\":{\"name\":\"setChecked\",\"arguments\":{\"backendNodeId\":", id_str, ",\"checked\":true}}}" });
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "checked") != null);
out.clearRetainingCapacity();
}
// Evaluate JS assertions for all actions
var ls: js.Local.Scope = undefined;
page.js.localScope(&ls);
defer ls.deinit();
@@ -809,12 +1113,66 @@ test "MCP - Actions: click, fill, scroll" {
const result = try ls.local.exec(
\\ window.clicked === true && window.inputVal === 'hello' &&
\\ window.changed === true && window.selChanged === 'opt2' &&
\\ window.scrolled === true
\\ window.scrolled === true &&
\\ window.hovered === true &&
\\ window.keyPressed === 'Enter' && window.keyReleased === 'Enter' &&
\\ window.sel2Changed === 'b' &&
\\ window.chkClicked === true && window.chkChanged === true &&
\\ window.radClicked === true && window.radChanged === true
, null);
try testing.expect(result.isTrue());
}
test "MCP - findElement" {
defer testing.reset();
const aa = testing.arena_allocator;
var out: std.io.Writer.Allocating = .init(aa);
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
defer server.deinit();
{
// Find by role
const msg =
\\{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"findElement","arguments":{"role":"button"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Click Me") != null);
out.clearRetainingCapacity();
}
{
// Find by name (case-insensitive substring)
const msg =
\\{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"findElement","arguments":{"name":"click"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "Click Me") != null);
out.clearRetainingCapacity();
}
{
// Find with no matches
const msg =
\\{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"findElement","arguments":{"role":"slider"}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "[]") != null);
out.clearRetainingCapacity();
}
{
// Error: no params provided
const msg =
\\{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"findElement","arguments":{}}}
;
try router.handleMessage(server, aa, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "error") != null);
out.clearRetainingCapacity();
}
}
test "MCP - waitForSelector: existing element" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);