Files
browser/src/agent/Command.zig

410 lines
13 KiB
Zig

const std = @import("std");
pub const TypeArgs = struct {
selector: []const u8,
value: []const u8,
};
pub const ExtractArgs = struct {
selector: []const u8,
file: ?[]const u8,
};
pub const Command = union(enum) {
goto: []const u8,
click: []const u8,
type_cmd: TypeArgs,
wait: []const u8,
tree: void,
markdown: void,
extract: ExtractArgs,
eval_js: []const u8,
login: void,
accept_cookies: void,
exit: void,
comment: void,
natural_language: []const u8,
};
/// Parse a line of REPL input into a Pandascript command.
/// Unrecognized input is returned as `.natural_language`.
/// For multi-line EVAL blocks in scripts, use `ScriptParser`.
pub fn parse(line: []const u8) Command {
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0) return .{ .natural_language = trimmed };
// Skip comment lines
if (trimmed[0] == '#') return .{ .comment = {} };
// Find the command word (first whitespace-delimited token)
const cmd_end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
const cmd_word = trimmed[0..cmd_end];
const rest = std.mem.trim(u8, trimmed[cmd_end..], &std.ascii.whitespace);
if (eqlIgnoreCase(cmd_word, "GOTO")) {
if (rest.len == 0) return .{ .natural_language = trimmed };
return .{ .goto = rest };
}
if (eqlIgnoreCase(cmd_word, "CLICK")) {
const arg = extractQuoted(rest) orelse rest;
if (arg.len == 0) return .{ .natural_language = trimmed };
return .{ .click = arg };
}
if (eqlIgnoreCase(cmd_word, "TYPE")) {
const first = extractQuotedWithRemainder(rest) orelse return .{ .natural_language = trimmed };
const second_arg = std.mem.trim(u8, first.remainder, &std.ascii.whitespace);
const second = extractQuoted(second_arg) orelse return .{ .natural_language = trimmed };
return .{ .type_cmd = .{ .selector = first.value, .value = second } };
}
if (eqlIgnoreCase(cmd_word, "WAIT")) {
const arg = extractQuoted(rest) orelse rest;
if (arg.len == 0) return .{ .natural_language = trimmed };
return .{ .wait = arg };
}
if (eqlIgnoreCase(cmd_word, "TREE")) {
return .{ .tree = {} };
}
if (eqlIgnoreCase(cmd_word, "MARKDOWN") or eqlIgnoreCase(cmd_word, "MD")) {
return .{ .markdown = {} };
}
if (eqlIgnoreCase(cmd_word, "EXTRACT")) {
const selector = extractQuoted(rest) orelse {
if (rest.len == 0) return .{ .natural_language = trimmed };
return .{ .extract = .{ .selector = rest, .file = null } };
};
// Look for > filename after the quoted selector
const after_quote = extractQuotedWithRemainder(rest) orelse return .{ .extract = .{ .selector = selector, .file = null } };
const after = std.mem.trim(u8, after_quote.remainder, &std.ascii.whitespace);
if (after.len > 0 and after[0] == '>') {
const file = std.mem.trim(u8, after[1..], &std.ascii.whitespace);
return .{ .extract = .{ .selector = selector, .file = if (file.len > 0) file else null } };
}
return .{ .extract = .{ .selector = selector, .file = null } };
}
if (eqlIgnoreCase(cmd_word, "EVAL")) {
if (rest.len == 0) return .{ .natural_language = trimmed };
const arg = extractQuoted(rest) orelse rest;
return .{ .eval_js = arg };
}
if (eqlIgnoreCase(cmd_word, "LOGIN")) {
return .{ .login = {} };
}
if (eqlIgnoreCase(cmd_word, "ACCEPT_COOKIES") or eqlIgnoreCase(cmd_word, "ACCEPT-COOKIES")) {
return .{ .accept_cookies = {} };
}
if (eqlIgnoreCase(cmd_word, "EXIT")) {
return .{ .exit = {} };
}
return .{ .natural_language = trimmed };
}
/// Iterator for parsing a script file, handling multi-line EVAL """ ... """ blocks.
pub const ScriptIterator = struct {
lines: std.mem.SplitIterator(u8, .scalar),
line_num: u32,
allocator: std.mem.Allocator,
pub fn init(content: []const u8, allocator: std.mem.Allocator) ScriptIterator {
return .{
.lines = std.mem.splitScalar(u8, content, '\n'),
.line_num = 0,
.allocator = allocator,
};
}
pub const Entry = struct {
line_num: u32,
raw_line: []const u8,
command: Command,
};
/// Returns the next command from the script, or null at EOF.
/// Multi-line EVAL blocks are assembled into a single eval_js command.
pub fn next(self: *ScriptIterator) ?Entry {
while (self.lines.next()) |line| {
self.line_num += 1;
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (trimmed.len == 0) continue;
// Check for EVAL """ multi-line block
if (isEvalTripleQuote(trimmed)) {
const start_line = self.line_num;
if (self.collectEvalBlock()) |js| {
return .{
.line_num = start_line,
.raw_line = trimmed,
.command = .{ .eval_js = js },
};
} else {
return .{
.line_num = start_line,
.raw_line = trimmed,
.command = .{ .natural_language = "unterminated EVAL block" },
};
}
}
return .{
.line_num = self.line_num,
.raw_line = trimmed,
.command = parse(trimmed),
};
}
return null;
}
fn isEvalTripleQuote(line: []const u8) bool {
const cmd_end = std.mem.indexOfAny(u8, line, &std.ascii.whitespace) orelse line.len;
const cmd_word = line[0..cmd_end];
if (!eqlIgnoreCase(cmd_word, "EVAL")) return false;
const rest = std.mem.trim(u8, line[cmd_end..], &std.ascii.whitespace);
return std.mem.startsWith(u8, rest, "\"\"\"");
}
/// Collect lines until closing """, return the JS content.
fn collectEvalBlock(self: *ScriptIterator) ?[]const u8 {
var parts: std.ArrayListUnmanaged(u8) = .empty;
while (self.lines.next()) |line| {
self.line_num += 1;
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
if (std.mem.eql(u8, trimmed, "\"\"\"")) {
return parts.toOwnedSlice(self.allocator) catch null;
}
if (parts.items.len > 0) {
parts.append(self.allocator, '\n') catch return null;
}
parts.appendSlice(self.allocator, line) catch return null;
}
// Unterminated
parts.deinit(self.allocator);
return null;
}
};
const QuotedResult = struct {
value: []const u8,
remainder: []const u8,
};
fn extractQuotedWithRemainder(s: []const u8) ?QuotedResult {
if (s.len < 2 or s[0] != '"') return null;
const end = std.mem.indexOfScalarPos(u8, s, 1, '"') orelse return null;
return .{
.value = s[1..end],
.remainder = s[end + 1 ..],
};
}
fn extractQuoted(s: []const u8) ?[]const u8 {
const result = extractQuotedWithRemainder(s) orelse return null;
return result.value;
}
fn eqlIgnoreCase(a: []const u8, comptime upper: []const u8) bool {
if (a.len != upper.len) return false;
for (a, upper) |ac, uc| {
if (std.ascii.toUpper(ac) != uc) return false;
}
return true;
}
// --- Tests ---
test "parse GOTO" {
const cmd = parse("GOTO https://example.com");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse GOTO case insensitive" {
const cmd = parse("goto https://example.com");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse GOTO missing url" {
const cmd = parse("GOTO");
try std.testing.expect(cmd == .natural_language);
}
test "parse CLICK quoted" {
const cmd = parse("CLICK \"Login\"");
try std.testing.expectEqualStrings("Login", cmd.click);
}
test "parse CLICK unquoted" {
const cmd = parse("CLICK .submit-btn");
try std.testing.expectEqualStrings(".submit-btn", cmd.click);
}
test "parse TYPE two quoted args" {
const cmd = parse("TYPE \"#email\" \"user@test.com\"");
try std.testing.expectEqualStrings("#email", cmd.type_cmd.selector);
try std.testing.expectEqualStrings("user@test.com", cmd.type_cmd.value);
}
test "parse TYPE missing second arg" {
const cmd = parse("TYPE \"#email\"");
try std.testing.expect(cmd == .natural_language);
}
test "parse WAIT" {
const cmd = parse("WAIT \".dashboard\"");
try std.testing.expectEqualStrings(".dashboard", cmd.wait);
}
test "parse TREE" {
const cmd = parse("TREE");
try std.testing.expect(cmd == .tree);
}
test "parse MARKDOWN alias MD" {
try std.testing.expect(parse("MARKDOWN") == .markdown);
try std.testing.expect(parse("md") == .markdown);
}
test "parse EXTRACT with file" {
const cmd = parse("EXTRACT \".title\" > titles.json");
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
try std.testing.expectEqualStrings("titles.json", cmd.extract.file.?);
}
test "parse EXTRACT without file" {
const cmd = parse("EXTRACT \".title\"");
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
try std.testing.expect(cmd.extract.file == null);
}
test "parse EVAL single line" {
const cmd = parse("EVAL \"document.title\"");
try std.testing.expectEqualStrings("document.title", cmd.eval_js);
}
test "parse LOGIN" {
try std.testing.expect(parse("LOGIN") == .login);
try std.testing.expect(parse("login") == .login);
}
test "parse ACCEPT_COOKIES" {
try std.testing.expect(parse("ACCEPT_COOKIES") == .accept_cookies);
try std.testing.expect(parse("ACCEPT-COOKIES") == .accept_cookies);
}
test "parse EXIT" {
try std.testing.expect(parse("EXIT") == .exit);
}
test "parse comment" {
try std.testing.expect(parse("# this is a comment") == .comment);
try std.testing.expect(parse("# INTENT: LOGIN") == .comment);
}
test "parse natural language fallback" {
const cmd = parse("what is on this page?");
try std.testing.expectEqualStrings("what is on this page?", cmd.natural_language);
}
test "parse whitespace trimming" {
const cmd = parse(" GOTO https://example.com ");
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
}
test "parse empty input" {
const cmd = parse("");
try std.testing.expect(cmd == .natural_language);
}
test "ScriptIterator basic commands" {
const script =
\\GOTO https://example.com
\\TREE
\\CLICK "Login"
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expectEqualStrings("https://example.com", e1.command.goto);
try std.testing.expectEqual(@as(u32, 1), e1.line_num);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .tree);
const e3 = iter.next().?;
try std.testing.expectEqualStrings("Login", e3.command.click);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator skips blank lines and comments" {
const script =
\\# Navigate
\\GOTO https://example.com
\\
\\# Extract
\\TREE
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .comment);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .goto);
const e3 = iter.next().?;
try std.testing.expect(e3.command == .comment);
const e4 = iter.next().?;
try std.testing.expect(e4.command == .tree);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator multi-line EVAL" {
const script =
\\GOTO https://example.com
\\EVAL """
\\ const x = 1;
\\ const y = 2;
\\ return x + y;
\\"""
\\TREE
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .goto);
const e2 = iter.next().?;
try std.testing.expect(e2.command == .eval_js);
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "const x = 1;") != null);
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "return x + y;") != null);
defer std.testing.allocator.free(e2.command.eval_js);
const e3 = iter.next().?;
try std.testing.expect(e3.command == .tree);
try std.testing.expect(iter.next() == null);
}
test "ScriptIterator unterminated EVAL" {
const script =
\\EVAL """
\\ const x = 1;
;
var iter = ScriptIterator.init(script, std.testing.allocator);
const e1 = iter.next().?;
try std.testing.expect(e1.command == .natural_language);
try std.testing.expectEqualStrings("unterminated EVAL block", e1.command.natural_language);
}