mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-04-04 08:30:31 +00:00
410 lines
13 KiB
Zig
410 lines
13 KiB
Zig
const std = @import("std");
|
|
|
|
pub const TypeArgs = struct {
|
|
selector: []const u8,
|
|
value: []const u8,
|
|
};
|
|
|
|
pub const ExtractArgs = struct {
|
|
selector: []const u8,
|
|
file: ?[]const u8,
|
|
};
|
|
|
|
pub const Command = union(enum) {
|
|
goto: []const u8,
|
|
click: []const u8,
|
|
type_cmd: TypeArgs,
|
|
wait: []const u8,
|
|
tree: void,
|
|
markdown: void,
|
|
extract: ExtractArgs,
|
|
eval_js: []const u8,
|
|
login: void,
|
|
accept_cookies: void,
|
|
exit: void,
|
|
comment: void,
|
|
natural_language: []const u8,
|
|
};
|
|
|
|
/// Parse a line of REPL input into a Pandascript command.
|
|
/// Unrecognized input is returned as `.natural_language`.
|
|
/// For multi-line EVAL blocks in scripts, use `ScriptParser`.
|
|
pub fn parse(line: []const u8) Command {
|
|
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
|
|
if (trimmed.len == 0) return .{ .natural_language = trimmed };
|
|
|
|
// Skip comment lines
|
|
if (trimmed[0] == '#') return .{ .comment = {} };
|
|
|
|
// Find the command word (first whitespace-delimited token)
|
|
const cmd_end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
|
|
const cmd_word = trimmed[0..cmd_end];
|
|
const rest = std.mem.trim(u8, trimmed[cmd_end..], &std.ascii.whitespace);
|
|
|
|
if (eqlIgnoreCase(cmd_word, "GOTO")) {
|
|
if (rest.len == 0) return .{ .natural_language = trimmed };
|
|
return .{ .goto = rest };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "CLICK")) {
|
|
const arg = extractQuoted(rest) orelse rest;
|
|
if (arg.len == 0) return .{ .natural_language = trimmed };
|
|
return .{ .click = arg };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "TYPE")) {
|
|
const first = extractQuotedWithRemainder(rest) orelse return .{ .natural_language = trimmed };
|
|
const second_arg = std.mem.trim(u8, first.remainder, &std.ascii.whitespace);
|
|
const second = extractQuoted(second_arg) orelse return .{ .natural_language = trimmed };
|
|
return .{ .type_cmd = .{ .selector = first.value, .value = second } };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "WAIT")) {
|
|
const arg = extractQuoted(rest) orelse rest;
|
|
if (arg.len == 0) return .{ .natural_language = trimmed };
|
|
return .{ .wait = arg };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "TREE")) {
|
|
return .{ .tree = {} };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "MARKDOWN") or eqlIgnoreCase(cmd_word, "MD")) {
|
|
return .{ .markdown = {} };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "EXTRACT")) {
|
|
const selector = extractQuoted(rest) orelse {
|
|
if (rest.len == 0) return .{ .natural_language = trimmed };
|
|
return .{ .extract = .{ .selector = rest, .file = null } };
|
|
};
|
|
// Look for > filename after the quoted selector
|
|
const after_quote = extractQuotedWithRemainder(rest) orelse return .{ .extract = .{ .selector = selector, .file = null } };
|
|
const after = std.mem.trim(u8, after_quote.remainder, &std.ascii.whitespace);
|
|
if (after.len > 0 and after[0] == '>') {
|
|
const file = std.mem.trim(u8, after[1..], &std.ascii.whitespace);
|
|
return .{ .extract = .{ .selector = selector, .file = if (file.len > 0) file else null } };
|
|
}
|
|
return .{ .extract = .{ .selector = selector, .file = null } };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "EVAL")) {
|
|
if (rest.len == 0) return .{ .natural_language = trimmed };
|
|
const arg = extractQuoted(rest) orelse rest;
|
|
return .{ .eval_js = arg };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "LOGIN")) {
|
|
return .{ .login = {} };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "ACCEPT_COOKIES") or eqlIgnoreCase(cmd_word, "ACCEPT-COOKIES")) {
|
|
return .{ .accept_cookies = {} };
|
|
}
|
|
|
|
if (eqlIgnoreCase(cmd_word, "EXIT")) {
|
|
return .{ .exit = {} };
|
|
}
|
|
|
|
return .{ .natural_language = trimmed };
|
|
}
|
|
|
|
/// Iterator for parsing a script file, handling multi-line EVAL """ ... """ blocks.
|
|
pub const ScriptIterator = struct {
|
|
lines: std.mem.SplitIterator(u8, .scalar),
|
|
line_num: u32,
|
|
allocator: std.mem.Allocator,
|
|
|
|
pub fn init(content: []const u8, allocator: std.mem.Allocator) ScriptIterator {
|
|
return .{
|
|
.lines = std.mem.splitScalar(u8, content, '\n'),
|
|
.line_num = 0,
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
pub const Entry = struct {
|
|
line_num: u32,
|
|
raw_line: []const u8,
|
|
command: Command,
|
|
};
|
|
|
|
/// Returns the next command from the script, or null at EOF.
|
|
/// Multi-line EVAL blocks are assembled into a single eval_js command.
|
|
pub fn next(self: *ScriptIterator) ?Entry {
|
|
while (self.lines.next()) |line| {
|
|
self.line_num += 1;
|
|
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
|
|
if (trimmed.len == 0) continue;
|
|
|
|
// Check for EVAL """ multi-line block
|
|
if (isEvalTripleQuote(trimmed)) {
|
|
const start_line = self.line_num;
|
|
if (self.collectEvalBlock()) |js| {
|
|
return .{
|
|
.line_num = start_line,
|
|
.raw_line = trimmed,
|
|
.command = .{ .eval_js = js },
|
|
};
|
|
} else {
|
|
return .{
|
|
.line_num = start_line,
|
|
.raw_line = trimmed,
|
|
.command = .{ .natural_language = "unterminated EVAL block" },
|
|
};
|
|
}
|
|
}
|
|
|
|
return .{
|
|
.line_num = self.line_num,
|
|
.raw_line = trimmed,
|
|
.command = parse(trimmed),
|
|
};
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn isEvalTripleQuote(line: []const u8) bool {
|
|
const cmd_end = std.mem.indexOfAny(u8, line, &std.ascii.whitespace) orelse line.len;
|
|
const cmd_word = line[0..cmd_end];
|
|
if (!eqlIgnoreCase(cmd_word, "EVAL")) return false;
|
|
const rest = std.mem.trim(u8, line[cmd_end..], &std.ascii.whitespace);
|
|
return std.mem.startsWith(u8, rest, "\"\"\"");
|
|
}
|
|
|
|
/// Collect lines until closing """, return the JS content.
|
|
fn collectEvalBlock(self: *ScriptIterator) ?[]const u8 {
|
|
var parts: std.ArrayListUnmanaged(u8) = .empty;
|
|
while (self.lines.next()) |line| {
|
|
self.line_num += 1;
|
|
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
|
|
if (std.mem.eql(u8, trimmed, "\"\"\"")) {
|
|
return parts.toOwnedSlice(self.allocator) catch null;
|
|
}
|
|
if (parts.items.len > 0) {
|
|
parts.append(self.allocator, '\n') catch return null;
|
|
}
|
|
parts.appendSlice(self.allocator, line) catch return null;
|
|
}
|
|
// Unterminated
|
|
parts.deinit(self.allocator);
|
|
return null;
|
|
}
|
|
};
|
|
|
|
const QuotedResult = struct {
|
|
value: []const u8,
|
|
remainder: []const u8,
|
|
};
|
|
|
|
fn extractQuotedWithRemainder(s: []const u8) ?QuotedResult {
|
|
if (s.len < 2 or s[0] != '"') return null;
|
|
const end = std.mem.indexOfScalarPos(u8, s, 1, '"') orelse return null;
|
|
return .{
|
|
.value = s[1..end],
|
|
.remainder = s[end + 1 ..],
|
|
};
|
|
}
|
|
|
|
fn extractQuoted(s: []const u8) ?[]const u8 {
|
|
const result = extractQuotedWithRemainder(s) orelse return null;
|
|
return result.value;
|
|
}
|
|
|
|
fn eqlIgnoreCase(a: []const u8, comptime upper: []const u8) bool {
|
|
if (a.len != upper.len) return false;
|
|
for (a, upper) |ac, uc| {
|
|
if (std.ascii.toUpper(ac) != uc) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// --- Tests ---
|
|
|
|
test "parse GOTO" {
|
|
const cmd = parse("GOTO https://example.com");
|
|
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
|
|
}
|
|
|
|
test "parse GOTO case insensitive" {
|
|
const cmd = parse("goto https://example.com");
|
|
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
|
|
}
|
|
|
|
test "parse GOTO missing url" {
|
|
const cmd = parse("GOTO");
|
|
try std.testing.expect(cmd == .natural_language);
|
|
}
|
|
|
|
test "parse CLICK quoted" {
|
|
const cmd = parse("CLICK \"Login\"");
|
|
try std.testing.expectEqualStrings("Login", cmd.click);
|
|
}
|
|
|
|
test "parse CLICK unquoted" {
|
|
const cmd = parse("CLICK .submit-btn");
|
|
try std.testing.expectEqualStrings(".submit-btn", cmd.click);
|
|
}
|
|
|
|
test "parse TYPE two quoted args" {
|
|
const cmd = parse("TYPE \"#email\" \"user@test.com\"");
|
|
try std.testing.expectEqualStrings("#email", cmd.type_cmd.selector);
|
|
try std.testing.expectEqualStrings("user@test.com", cmd.type_cmd.value);
|
|
}
|
|
|
|
test "parse TYPE missing second arg" {
|
|
const cmd = parse("TYPE \"#email\"");
|
|
try std.testing.expect(cmd == .natural_language);
|
|
}
|
|
|
|
test "parse WAIT" {
|
|
const cmd = parse("WAIT \".dashboard\"");
|
|
try std.testing.expectEqualStrings(".dashboard", cmd.wait);
|
|
}
|
|
|
|
test "parse TREE" {
|
|
const cmd = parse("TREE");
|
|
try std.testing.expect(cmd == .tree);
|
|
}
|
|
|
|
test "parse MARKDOWN alias MD" {
|
|
try std.testing.expect(parse("MARKDOWN") == .markdown);
|
|
try std.testing.expect(parse("md") == .markdown);
|
|
}
|
|
|
|
test "parse EXTRACT with file" {
|
|
const cmd = parse("EXTRACT \".title\" > titles.json");
|
|
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
|
|
try std.testing.expectEqualStrings("titles.json", cmd.extract.file.?);
|
|
}
|
|
|
|
test "parse EXTRACT without file" {
|
|
const cmd = parse("EXTRACT \".title\"");
|
|
try std.testing.expectEqualStrings(".title", cmd.extract.selector);
|
|
try std.testing.expect(cmd.extract.file == null);
|
|
}
|
|
|
|
test "parse EVAL single line" {
|
|
const cmd = parse("EVAL \"document.title\"");
|
|
try std.testing.expectEqualStrings("document.title", cmd.eval_js);
|
|
}
|
|
|
|
test "parse LOGIN" {
|
|
try std.testing.expect(parse("LOGIN") == .login);
|
|
try std.testing.expect(parse("login") == .login);
|
|
}
|
|
|
|
test "parse ACCEPT_COOKIES" {
|
|
try std.testing.expect(parse("ACCEPT_COOKIES") == .accept_cookies);
|
|
try std.testing.expect(parse("ACCEPT-COOKIES") == .accept_cookies);
|
|
}
|
|
|
|
test "parse EXIT" {
|
|
try std.testing.expect(parse("EXIT") == .exit);
|
|
}
|
|
|
|
test "parse comment" {
|
|
try std.testing.expect(parse("# this is a comment") == .comment);
|
|
try std.testing.expect(parse("# INTENT: LOGIN") == .comment);
|
|
}
|
|
|
|
test "parse natural language fallback" {
|
|
const cmd = parse("what is on this page?");
|
|
try std.testing.expectEqualStrings("what is on this page?", cmd.natural_language);
|
|
}
|
|
|
|
test "parse whitespace trimming" {
|
|
const cmd = parse(" GOTO https://example.com ");
|
|
try std.testing.expectEqualStrings("https://example.com", cmd.goto);
|
|
}
|
|
|
|
test "parse empty input" {
|
|
const cmd = parse("");
|
|
try std.testing.expect(cmd == .natural_language);
|
|
}
|
|
|
|
test "ScriptIterator basic commands" {
|
|
const script =
|
|
\\GOTO https://example.com
|
|
\\TREE
|
|
\\CLICK "Login"
|
|
;
|
|
var iter = ScriptIterator.init(script, std.testing.allocator);
|
|
|
|
const e1 = iter.next().?;
|
|
try std.testing.expectEqualStrings("https://example.com", e1.command.goto);
|
|
try std.testing.expectEqual(@as(u32, 1), e1.line_num);
|
|
|
|
const e2 = iter.next().?;
|
|
try std.testing.expect(e2.command == .tree);
|
|
|
|
const e3 = iter.next().?;
|
|
try std.testing.expectEqualStrings("Login", e3.command.click);
|
|
|
|
try std.testing.expect(iter.next() == null);
|
|
}
|
|
|
|
test "ScriptIterator skips blank lines and comments" {
|
|
const script =
|
|
\\# Navigate
|
|
\\GOTO https://example.com
|
|
\\
|
|
\\# Extract
|
|
\\TREE
|
|
;
|
|
var iter = ScriptIterator.init(script, std.testing.allocator);
|
|
|
|
const e1 = iter.next().?;
|
|
try std.testing.expect(e1.command == .comment);
|
|
|
|
const e2 = iter.next().?;
|
|
try std.testing.expect(e2.command == .goto);
|
|
|
|
const e3 = iter.next().?;
|
|
try std.testing.expect(e3.command == .comment);
|
|
|
|
const e4 = iter.next().?;
|
|
try std.testing.expect(e4.command == .tree);
|
|
|
|
try std.testing.expect(iter.next() == null);
|
|
}
|
|
|
|
test "ScriptIterator multi-line EVAL" {
|
|
const script =
|
|
\\GOTO https://example.com
|
|
\\EVAL """
|
|
\\ const x = 1;
|
|
\\ const y = 2;
|
|
\\ return x + y;
|
|
\\"""
|
|
\\TREE
|
|
;
|
|
var iter = ScriptIterator.init(script, std.testing.allocator);
|
|
|
|
const e1 = iter.next().?;
|
|
try std.testing.expect(e1.command == .goto);
|
|
|
|
const e2 = iter.next().?;
|
|
try std.testing.expect(e2.command == .eval_js);
|
|
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "const x = 1;") != null);
|
|
try std.testing.expect(std.mem.indexOf(u8, e2.command.eval_js, "return x + y;") != null);
|
|
defer std.testing.allocator.free(e2.command.eval_js);
|
|
|
|
const e3 = iter.next().?;
|
|
try std.testing.expect(e3.command == .tree);
|
|
|
|
try std.testing.expect(iter.next() == null);
|
|
}
|
|
|
|
test "ScriptIterator unterminated EVAL" {
|
|
const script =
|
|
\\EVAL """
|
|
\\ const x = 1;
|
|
;
|
|
var iter = ScriptIterator.init(script, std.testing.allocator);
|
|
|
|
const e1 = iter.next().?;
|
|
try std.testing.expect(e1.command == .natural_language);
|
|
try std.testing.expectEqualStrings("unterminated EVAL block", e1.command.natural_language);
|
|
}
|