mcp: add detectForms tool for structured form discovery

Add a detectForms MCP tool and lp.detectForms CDP command that return
structured form metadata from the current page. Each form includes its
action URL, HTTP method, and fields with names, types, required status,
values, select options, and backendNodeIds for use with the fill tool.

This lets AI agents discover and fill forms in a single step instead of
calling interactiveElements, filtering for form fields, and guessing
which fields belong to which form.

New files:
- src/browser/forms.zig: FormInfo/FormField structs, collectForms()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt Van Horn
2026-03-21 08:40:50 -07:00
parent fdc79af55c
commit 78c6def2b1
4 changed files with 514 additions and 0 deletions

339
src/browser/forms.zig Normal file
View File

@@ -0,0 +1,339 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Page = @import("Page.zig");
const TreeWalker = @import("webapi/TreeWalker.zig");
const Element = @import("webapi/Element.zig");
const Node = @import("webapi/Node.zig");
const Allocator = std.mem.Allocator;
pub const SelectOption = struct {
value: []const u8,
text: []const u8,
pub fn jsonStringify(self: *const SelectOption, jw: anytype) !void {
try jw.beginObject();
try jw.objectField("value");
try jw.write(self.value);
try jw.objectField("text");
try jw.write(self.text);
try jw.endObject();
}
};
pub const FormField = struct {
node: *Node,
tag_name: []const u8,
name: ?[]const u8,
input_type: ?[]const u8,
required: bool,
value: ?[]const u8,
placeholder: ?[]const u8,
options: []const SelectOption,
pub fn jsonStringify(self: *const FormField, jw: anytype) !void {
try jw.beginObject();
try jw.objectField("tagName");
try jw.write(self.tag_name);
if (self.name) |v| {
try jw.objectField("name");
try jw.write(v);
}
if (self.input_type) |v| {
try jw.objectField("inputType");
try jw.write(v);
}
if (self.required) {
try jw.objectField("required");
try jw.write(true);
}
if (self.value) |v| {
try jw.objectField("value");
try jw.write(v);
}
if (self.placeholder) |v| {
try jw.objectField("placeholder");
try jw.write(v);
}
if (self.options.len > 0) {
try jw.objectField("options");
try jw.beginArray();
for (self.options) |opt| {
try opt.jsonStringify(jw);
}
try jw.endArray();
}
try jw.endObject();
}
};
pub const FormInfo = struct {
node: *Node,
action: ?[]const u8,
method: ?[]const u8,
fields: []const FormField,
pub fn jsonStringify(self: *const FormInfo, jw: anytype) !void {
try jw.beginObject();
if (self.action) |v| {
try jw.objectField("action");
try jw.write(v);
}
if (self.method) |v| {
try jw.objectField("method");
try jw.write(v);
}
try jw.objectField("fields");
try jw.beginArray();
for (self.fields) |field| {
try field.jsonStringify(jw);
}
try jw.endArray();
try jw.endObject();
}
};
/// Collect all forms and their fields under `root`.
pub fn collectForms(
root: *Node,
arena: Allocator,
page: *Page,
) ![]FormInfo {
var forms: std.ArrayList(FormInfo) = .empty;
var tw = TreeWalker.Full.init(root, .{});
while (tw.next()) |node| {
const el = node.is(Element) orelse continue;
if (el.getTag() != .form) continue;
const form_el = el.is(Element.Html.Form) orelse continue;
const fields = try collectFormFields(node, arena, page);
if (fields.len == 0) continue;
const action = form_el.getAction(page) catch null;
const method_str = form_el.getMethod();
try forms.append(arena, .{
.node = node,
.action = if (action) |a| if (a.len > 0) a else null else null,
.method = if (method_str.len > 0) method_str else null,
.fields = fields,
});
}
return forms.items;
}
fn collectFormFields(
form_node: *Node,
arena: Allocator,
page: *Page,
) ![]FormField {
var fields: std.ArrayList(FormField) = .empty;
var tw = TreeWalker.Full.init(form_node, .{});
while (tw.next()) |node| {
const el = node.is(Element) orelse continue;
switch (el.getTag()) {
.input => {
const input = el.is(Element.Html.Input) orelse continue;
if (input._input_type == .hidden) continue;
if (input._input_type == .submit or input._input_type == .button or input._input_type == .image) continue;
try fields.append(arena, .{
.node = node,
.tag_name = "input",
.name = el.getAttributeSafe(comptime .wrap("name")),
.input_type = input._input_type.toString(),
.required = el.getAttributeSafe(comptime .wrap("required")) != null,
.value = input.getValue(),
.placeholder = el.getAttributeSafe(comptime .wrap("placeholder")),
.options = &.{},
});
},
.textarea => {
const textarea = el.is(Element.Html.TextArea) orelse continue;
try fields.append(arena, .{
.node = node,
.tag_name = "textarea",
.name = el.getAttributeSafe(comptime .wrap("name")),
.input_type = null,
.required = el.getAttributeSafe(comptime .wrap("required")) != null,
.value = textarea.getValue(),
.placeholder = el.getAttributeSafe(comptime .wrap("placeholder")),
.options = &.{},
});
},
.select => {
const select = el.is(Element.Html.Select) orelse continue;
const options = try collectSelectOptions(node, arena, page);
try fields.append(arena, .{
.node = node,
.tag_name = "select",
.name = el.getAttributeSafe(comptime .wrap("name")),
.input_type = null,
.required = el.getAttributeSafe(comptime .wrap("required")) != null,
.value = select.getValue(page),
.placeholder = null,
.options = options,
});
},
else => {},
}
}
return fields.items;
}
fn collectSelectOptions(
select_node: *Node,
arena: Allocator,
page: *Page,
) ![]SelectOption {
var options: std.ArrayList(SelectOption) = .empty;
const Option = Element.Html.Option;
var tw = TreeWalker.Full.init(select_node, .{});
while (tw.next()) |node| {
const el = node.is(Element) orelse continue;
const option = el.is(Option) orelse continue;
try options.append(arena, .{
.value = option.getValue(page),
.text = option.getText(page),
});
}
return options.items;
}
const testing = @import("../testing.zig");
fn testForms(html: []const u8) ![]FormInfo {
const page = try testing.test_session.createPage();
defer testing.test_session.removePage();
const doc = page.window._document;
const div = try doc.createElement("div", null, page);
try page.parseHtmlAsChildren(div.asNode(), html);
return collectForms(div.asNode(), page.call_arena, page);
}
test "browser.forms: login form" {
const forms = try testForms(
\\<form action="/login" method="POST">
\\ <input type="email" name="email" required placeholder="Email">
\\ <input type="password" name="password" required>
\\ <input type="submit" value="Log In">
\\</form>
);
try testing.expectEqual(1, forms.len);
try testing.expectEqual("/login", forms[0].action.?);
try testing.expectEqual("POST", forms[0].method.?);
try testing.expectEqual(2, forms[0].fields.len);
try testing.expectEqual("email", forms[0].fields[0].name.?);
try testing.expectEqual("email", forms[0].fields[0].input_type.?);
try testing.expect(forms[0].fields[0].required);
try testing.expectEqual("password", forms[0].fields[1].name.?);
}
test "browser.forms: form with select" {
const forms = try testForms(
\\<form>
\\ <select name="color">
\\ <option value="red">Red</option>
\\ <option value="blue">Blue</option>
\\ </select>
\\</form>
);
try testing.expectEqual(1, forms.len);
try testing.expectEqual(1, forms[0].fields.len);
try testing.expectEqual("select", forms[0].fields[0].tag_name);
try testing.expectEqual(2, forms[0].fields[0].options.len);
try testing.expectEqual("red", forms[0].fields[0].options[0].value);
try testing.expectEqual("Red", forms[0].fields[0].options[0].text);
}
test "browser.forms: form with textarea" {
const forms = try testForms(
\\<form method="POST">
\\ <textarea name="message" placeholder="Your message"></textarea>
\\</form>
);
try testing.expectEqual(1, forms.len);
try testing.expectEqual(1, forms[0].fields.len);
try testing.expectEqual("textarea", forms[0].fields[0].tag_name);
try testing.expectEqual("Your message", forms[0].fields[0].placeholder.?);
}
test "browser.forms: empty form skipped" {
const forms = try testForms(
\\<form action="/empty">
\\ <p>No fields here</p>
\\</form>
);
try testing.expectEqual(0, forms.len);
}
test "browser.forms: hidden inputs excluded" {
const forms = try testForms(
\\<form>
\\ <input type="hidden" name="csrf" value="token123">
\\ <input type="text" name="username">
\\</form>
);
try testing.expectEqual(1, forms.len);
try testing.expectEqual(1, forms[0].fields.len);
try testing.expectEqual("username", forms[0].fields[0].name.?);
}
test "browser.forms: multiple forms" {
const forms = try testForms(
\\<form action="/search" method="GET">
\\ <input type="text" name="q" placeholder="Search">
\\</form>
\\<form action="/login" method="POST">
\\ <input type="email" name="email">
\\ <input type="password" name="pass">
\\</form>
);
try testing.expectEqual(2, forms.len);
try testing.expectEqual(1, forms[0].fields.len);
try testing.expectEqual(2, forms[1].fields.len);
}

View File

@@ -32,6 +32,7 @@ pub fn processMessage(cmd: anytype) !void {
getSemanticTree, getSemanticTree,
getInteractiveElements, getInteractiveElements,
getStructuredData, getStructuredData,
detectForms,
clickNode, clickNode,
fillNode, fillNode,
scrollNode, scrollNode,
@@ -42,6 +43,7 @@ pub fn processMessage(cmd: anytype) !void {
.getSemanticTree => return getSemanticTree(cmd), .getSemanticTree => return getSemanticTree(cmd),
.getInteractiveElements => return getInteractiveElements(cmd), .getInteractiveElements => return getInteractiveElements(cmd),
.getStructuredData => return getStructuredData(cmd), .getStructuredData => return getStructuredData(cmd),
.detectForms => return detectForms(cmd),
.clickNode => return clickNode(cmd), .clickNode => return clickNode(cmd),
.fillNode => return fillNode(cmd), .fillNode => return fillNode(cmd),
.scrollNode => return scrollNode(cmd), .scrollNode => return scrollNode(cmd),
@@ -160,6 +162,32 @@ fn getStructuredData(cmd: anytype) !void {
}, .{}); }, .{});
} }
fn detectForms(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const forms_data = try lp.forms.collectForms(
page.document.asNode(),
cmd.arena,
page,
);
// Register form and field nodes for backendNodeId references
var form_ids: std.ArrayList(Node.Id) = try .initCapacity(cmd.arena, forms_data.len);
for (forms_data) |form| {
const registered = try bc.node_registry.register(form.node);
form_ids.appendAssumeCapacity(registered.id);
for (form.fields) |field| {
_ = try bc.node_registry.register(field.node);
}
}
return cmd.sendResult(.{
.forms = forms_data,
.formNodeIds = form_ids.items,
}, .{});
}
fn clickNode(cmd: anytype) !void { fn clickNode(cmd: anytype) !void {
const Params = struct { const Params = struct {
nodeId: ?Node.Id = null, nodeId: ?Node.Id = null,

View File

@@ -35,6 +35,7 @@ pub const markdown = @import("browser/markdown.zig");
pub const SemanticTree = @import("SemanticTree.zig"); pub const SemanticTree = @import("SemanticTree.zig");
pub const CDPNode = @import("cdp/Node.zig"); pub const CDPNode = @import("cdp/Node.zig");
pub const interactive = @import("browser/interactive.zig"); pub const interactive = @import("browser/interactive.zig");
pub const forms = @import("browser/forms.zig");
pub const actions = @import("browser/actions.zig"); pub const actions = @import("browser/actions.zig");
pub const structured_data = @import("browser/structured_data.zig"); pub const structured_data = @import("browser/structured_data.zig");
pub const mcp = @import("mcp.zig"); pub const mcp = @import("mcp.zig");

View File

@@ -101,6 +101,18 @@ pub const tool_list = [_]protocol.Tool{
\\} \\}
), ),
}, },
.{
.name = "detectForms",
.description = "Detect all forms on the page and return their structure including fields, types, and required status. If a url is provided, it navigates to that url first.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before detecting forms." }
\\ }
\\}
),
},
.{ .{
.name = "click", .name = "click",
.description = "Click on an interactive element.", .description = "Click on an interactive element.",
@@ -252,6 +264,7 @@ const ToolAction = enum {
links, links,
interactiveElements, interactiveElements,
structuredData, structuredData,
detectForms,
evaluate, evaluate,
semantic_tree, semantic_tree,
click, click,
@@ -267,6 +280,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "links", .links }, .{ "links", .links },
.{ "interactiveElements", .interactiveElements }, .{ "interactiveElements", .interactiveElements },
.{ "structuredData", .structuredData }, .{ "structuredData", .structuredData },
.{ "detectForms", .detectForms },
.{ "evaluate", .evaluate }, .{ "evaluate", .evaluate },
.{ "semantic_tree", .semantic_tree }, .{ "semantic_tree", .semantic_tree },
.{ "click", .click }, .{ "click", .click },
@@ -299,6 +313,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.links => try handleLinks(server, arena, req.id.?, call_params.arguments), .links => try handleLinks(server, arena, req.id.?, call_params.arguments),
.interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments), .interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments),
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments), .structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
.detectForms => try handleDetectForms(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments), .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments), .semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
.click => try handleClick(server, arena, req.id.?, call_params.arguments), .click => try handleClick(server, arena, req.id.?, call_params.arguments),
@@ -435,6 +450,137 @@ fn handleStructuredData(server: *Server, arena: std.mem.Allocator, id: std.json.
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content }); try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
} }
const FormWithId = struct {
backendNodeId: CDPNode.Id,
action: ?[]const u8,
method: ?[]const u8,
fields: []const FormFieldWithId,
pub fn jsonStringify(self: *const FormWithId, jw: anytype) !void {
try jw.beginObject();
try jw.objectField("backendNodeId");
try jw.write(self.backendNodeId);
if (self.action) |a| {
try jw.objectField("action");
try jw.write(a);
}
if (self.method) |m| {
try jw.objectField("method");
try jw.write(m);
}
try jw.objectField("fields");
try jw.beginArray();
for (self.fields) |field| {
try field.jsonStringify(jw);
}
try jw.endArray();
try jw.endObject();
}
};
const FormFieldWithId = struct {
backendNodeId: CDPNode.Id,
tag_name: []const u8,
name: ?[]const u8,
input_type: ?[]const u8,
required: bool,
value: ?[]const u8,
placeholder: ?[]const u8,
options: []const lp.forms.SelectOption,
pub fn jsonStringify(self: *const FormFieldWithId, jw: anytype) !void {
try jw.beginObject();
try jw.objectField("backendNodeId");
try jw.write(self.backendNodeId);
try jw.objectField("tagName");
try jw.write(self.tag_name);
if (self.name) |v| {
try jw.objectField("name");
try jw.write(v);
}
if (self.input_type) |v| {
try jw.objectField("inputType");
try jw.write(v);
}
if (self.required) {
try jw.objectField("required");
try jw.write(true);
}
if (self.value) |v| {
try jw.objectField("value");
try jw.write(v);
}
if (self.placeholder) |v| {
try jw.objectField("placeholder");
try jw.write(v);
}
if (self.options.len > 0) {
try jw.objectField("options");
try jw.beginArray();
for (self.options) |opt| {
try opt.jsonStringify(jw);
}
try jw.endArray();
}
try jw.endObject();
}
};
fn handleDetectForms(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
url: ?[:0]const u8 = null,
};
if (arguments) |args_raw| {
if (std.json.parseFromValueLeaky(Params, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| {
if (args.url) |u| {
try performGoto(server, u, id);
}
} else |_| {}
}
const page = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const forms_data = lp.forms.collectForms(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "form collection failed", .{ .err = err });
return server.sendError(id, .InternalError, "Failed to collect forms");
};
// Build output with backendNodeIds
var results: std.ArrayList(FormWithId) = .empty;
for (forms_data) |form| {
const form_registered = try server.node_registry.register(form.node);
var fields_with_ids: std.ArrayList(FormFieldWithId) = .empty;
for (form.fields) |field| {
const field_registered = try server.node_registry.register(field.node);
try fields_with_ids.append(arena, .{
.backendNodeId = field_registered.id,
.tag_name = field.tag_name,
.name = field.name,
.input_type = field.input_type,
.required = field.required,
.value = field.value,
.placeholder = field.placeholder,
.options = field.options,
});
}
try results.append(arena, .{
.backendNodeId = form_registered.id,
.action = form.action,
.method = form.method,
.fields = fields_with_ids.items,
});
}
var aw: std.Io.Writer.Allocating = .init(arena);
try std.json.Stringify.value(results.items, .{}, &aw.writer);
const content = [_]protocol.TextContent([]const u8){.{ .text = aw.written() }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void { fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArguments(EvaluateParams, arena, arguments, server, id, "evaluate"); const args = try parseArguments(EvaluateParams, arena, arguments, server, id, "evaluate");