mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-21 20:24:42 +00:00
Merge pull request #1776 from lightpanda-io/semantic-tree
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
Add native Semantic Tree extraction engine for AI agents
This commit is contained in:
@@ -200,6 +200,8 @@ pub const DumpFormat = enum {
|
||||
html,
|
||||
markdown,
|
||||
wpt,
|
||||
semantic_tree,
|
||||
semantic_tree_text,
|
||||
};
|
||||
|
||||
pub const Fetch = struct {
|
||||
@@ -346,7 +348,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
|
||||
\\
|
||||
\\Options:
|
||||
\\--dump Dumps document to stdout.
|
||||
\\ Argument must be 'html' or 'markdown'.
|
||||
\\ Argument must be 'html', 'markdown', 'semantic_tree', or 'semantic_tree_text'.
|
||||
\\ Defaults to no dump.
|
||||
\\
|
||||
\\--strip_mode Comma separated list of tag groups to remove from dump
|
||||
|
||||
450
src/SemanticTree.zig
Normal file
450
src/SemanticTree.zig
Normal file
@@ -0,0 +1,450 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. See <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const lp = @import("lightpanda");
|
||||
const log = @import("log.zig");
|
||||
const isAllWhitespace = @import("string.zig").isAllWhitespace;
|
||||
const Page = lp.Page;
|
||||
const interactive = @import("browser/interactive.zig");
|
||||
|
||||
const CData = @import("browser/webapi/CData.zig");
|
||||
const Element = @import("browser/webapi/Element.zig");
|
||||
const Node = @import("browser/webapi/Node.zig");
|
||||
const AXNode = @import("cdp/AXNode.zig");
|
||||
const CDPNode = @import("cdp/Node.zig");
|
||||
|
||||
const Self = @This();
|
||||
|
||||
dom_node: *Node,
|
||||
registry: *CDPNode.Registry,
|
||||
page: *Page,
|
||||
arena: std.mem.Allocator,
|
||||
prune: bool = false,
|
||||
|
||||
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) error{WriteFailed}!void {
|
||||
var visitor = JsonVisitor{ .jw = jw, .tree = self };
|
||||
var xpath_buffer: std.ArrayList(u8) = .{};
|
||||
const listener_targets = interactive.buildListenerTargetMap(self.page, self.arena) catch |err| {
|
||||
log.err(.app, "listener map failed", .{ .err = err });
|
||||
return error.WriteFailed;
|
||||
};
|
||||
self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| {
|
||||
log.err(.app, "semantic tree json dump failed", .{ .err = err });
|
||||
return error.WriteFailed;
|
||||
};
|
||||
}
|
||||
|
||||
pub fn textStringify(self: @This(), writer: *std.Io.Writer) error{WriteFailed}!void {
|
||||
var visitor = TextVisitor{ .writer = writer, .tree = self, .depth = 0 };
|
||||
var xpath_buffer: std.ArrayList(u8) = .empty;
|
||||
const listener_targets = interactive.buildListenerTargetMap(self.page, self.arena) catch |err| {
|
||||
log.err(.app, "listener map failed", .{ .err = err });
|
||||
return error.WriteFailed;
|
||||
};
|
||||
self.walk(self.dom_node, &xpath_buffer, null, &visitor, 1, listener_targets) catch |err| {
|
||||
log.err(.app, "semantic tree text dump failed", .{ .err = err });
|
||||
return error.WriteFailed;
|
||||
};
|
||||
}
|
||||
|
||||
const OptionData = struct {
|
||||
value: []const u8,
|
||||
text: []const u8,
|
||||
selected: bool,
|
||||
};
|
||||
|
||||
const NodeData = struct {
|
||||
id: u32,
|
||||
axn: AXNode,
|
||||
role: []const u8,
|
||||
name: ?[]const u8,
|
||||
value: ?[]const u8,
|
||||
options: ?[]OptionData = null,
|
||||
xpath: []const u8,
|
||||
is_interactive: bool,
|
||||
node_name: []const u8,
|
||||
};
|
||||
|
||||
fn walk(self: @This(), node: *Node, xpath_buffer: *std.ArrayList(u8), parent_name: ?[]const u8, visitor: anytype, index: usize, listener_targets: interactive.ListenerTargetMap) !void {
|
||||
// 1. Skip non-content nodes
|
||||
if (node.is(Element)) |el| {
|
||||
const tag = el.getTag();
|
||||
if (tag.isMetadata() or tag == .svg) return;
|
||||
|
||||
// We handle options/optgroups natively inside their parents, skip them in the general walk
|
||||
if (tag == .datalist or tag == .option or tag == .optgroup) return;
|
||||
|
||||
// Check visibility using the engine's checkVisibility which handles CSS display: none
|
||||
if (!el.checkVisibility(self.page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (el.is(Element.Html)) |html_el| {
|
||||
if (html_el.getHidden()) return;
|
||||
}
|
||||
} else if (node.is(CData.Text)) |text_node| {
|
||||
const text = text_node.getWholeText();
|
||||
if (isAllWhitespace(text)) {
|
||||
return;
|
||||
}
|
||||
} else if (node._type != .document and node._type != .document_fragment) {
|
||||
return;
|
||||
}
|
||||
|
||||
const cdp_node = try self.registry.register(node);
|
||||
const axn = AXNode.fromNode(node);
|
||||
const role = try axn.getRole();
|
||||
|
||||
var is_interactive = false;
|
||||
var value: ?[]const u8 = null;
|
||||
var options: ?[]OptionData = null;
|
||||
var node_name: []const u8 = "text";
|
||||
|
||||
if (node.is(Element)) |el| {
|
||||
node_name = el.getTagNameLower();
|
||||
|
||||
if (el.is(Element.Html.Input)) |input| {
|
||||
value = input.getValue();
|
||||
if (el.getAttributeSafe(comptime lp.String.wrap("list"))) |list_id| {
|
||||
options = try extractDataListOptions(list_id, self.page, self.arena);
|
||||
}
|
||||
} else if (el.is(Element.Html.TextArea)) |textarea| {
|
||||
value = textarea.getValue();
|
||||
} else if (el.is(Element.Html.Select)) |select| {
|
||||
value = select.getValue(self.page);
|
||||
options = try extractSelectOptions(el.asNode(), self.page, self.arena);
|
||||
}
|
||||
|
||||
if (el.is(Element.Html)) |html_el| {
|
||||
if (interactive.classifyInteractivity(el, html_el, listener_targets) != null) {
|
||||
is_interactive = true;
|
||||
}
|
||||
}
|
||||
} else if (node._type == .document or node._type == .document_fragment) {
|
||||
node_name = "root";
|
||||
}
|
||||
|
||||
const initial_xpath_len = xpath_buffer.items.len;
|
||||
try appendXPathSegment(node, xpath_buffer.writer(self.arena), index);
|
||||
const xpath = xpath_buffer.items;
|
||||
|
||||
var name = try axn.getName(self.page, self.arena);
|
||||
|
||||
const has_explicit_label = if (node.is(Element)) |el|
|
||||
el.getAttributeSafe(.wrap("aria-label")) != null or el.getAttributeSafe(.wrap("title")) != null
|
||||
else
|
||||
false;
|
||||
|
||||
const structural = isStructuralRole(role);
|
||||
|
||||
// Filter out computed concatenated names for generic containers without explicit labels.
|
||||
// This prevents token bloat and ensures their StaticText children aren't incorrectly pruned.
|
||||
// We ignore interactivity because a generic wrapper with an event listener still shouldn't hoist all text.
|
||||
if (name != null and structural and !has_explicit_label) {
|
||||
name = null;
|
||||
}
|
||||
|
||||
var data = NodeData{
|
||||
.id = cdp_node.id,
|
||||
.axn = axn,
|
||||
.role = role,
|
||||
.name = name,
|
||||
.value = value,
|
||||
.options = options,
|
||||
.xpath = xpath,
|
||||
.is_interactive = is_interactive,
|
||||
.node_name = node_name,
|
||||
};
|
||||
|
||||
var should_visit = true;
|
||||
if (self.prune) {
|
||||
if (structural and !is_interactive and !has_explicit_label) {
|
||||
should_visit = false;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, role, "StaticText") and node._parent != null) {
|
||||
if (parent_name != null and name != null and std.mem.indexOf(u8, parent_name.?, name.?) != null) {
|
||||
should_visit = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var did_visit = false;
|
||||
var should_walk_children = true;
|
||||
if (should_visit) {
|
||||
should_walk_children = try visitor.visit(node, &data);
|
||||
did_visit = true; // Always true if should_visit was true, because visit() executed and opened structures
|
||||
} else {
|
||||
// If we skip the node, we must NOT tell the visitor to close it later
|
||||
did_visit = false;
|
||||
}
|
||||
|
||||
if (should_walk_children) {
|
||||
// If we are printing this node normally OR skipping it and unrolling its children,
|
||||
// we walk the children iterator.
|
||||
var it = node.childrenIterator();
|
||||
var tag_counts = std.StringArrayHashMap(usize).init(self.arena);
|
||||
while (it.next()) |child| {
|
||||
var tag: []const u8 = "text()";
|
||||
if (child.is(Element)) |el| {
|
||||
tag = el.getTagNameLower();
|
||||
}
|
||||
|
||||
const gop = try tag_counts.getOrPut(tag);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = 0;
|
||||
}
|
||||
gop.value_ptr.* += 1;
|
||||
|
||||
try self.walk(child, xpath_buffer, name, visitor, gop.value_ptr.*, listener_targets);
|
||||
}
|
||||
}
|
||||
|
||||
if (did_visit) {
|
||||
try visitor.leave();
|
||||
}
|
||||
|
||||
xpath_buffer.shrinkRetainingCapacity(initial_xpath_len);
|
||||
}
|
||||
|
||||
fn extractSelectOptions(node: *Node, page: *Page, arena: std.mem.Allocator) ![]OptionData {
|
||||
var options = std.ArrayListUnmanaged(OptionData){};
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |child| {
|
||||
if (child.is(Element)) |el| {
|
||||
if (el.getTag() == .option) {
|
||||
if (el.is(Element.Html.Option)) |opt| {
|
||||
const text = opt.getText();
|
||||
const value = opt.getValue(page);
|
||||
const selected = opt.getSelected();
|
||||
try options.append(arena, .{ .text = text, .value = value, .selected = selected });
|
||||
}
|
||||
} else if (el.getTag() == .optgroup) {
|
||||
var group_it = child.childrenIterator();
|
||||
while (group_it.next()) |group_child| {
|
||||
if (group_child.is(Element.Html.Option)) |opt| {
|
||||
const text = opt.getText();
|
||||
const value = opt.getValue(page);
|
||||
const selected = opt.getSelected();
|
||||
try options.append(arena, .{ .text = text, .value = value, .selected = selected });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return options.toOwnedSlice(arena);
|
||||
}
|
||||
|
||||
fn extractDataListOptions(list_id: []const u8, page: *Page, arena: std.mem.Allocator) !?[]OptionData {
|
||||
if (page.document.getElementById(list_id, page)) |referenced_el| {
|
||||
if (referenced_el.getTag() == .datalist) {
|
||||
return try extractSelectOptions(referenced_el.asNode(), page, arena);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn appendXPathSegment(node: *Node, writer: anytype, index: usize) !void {
|
||||
if (node.is(Element)) |el| {
|
||||
const tag = el.getTagNameLower();
|
||||
try std.fmt.format(writer, "/{s}[{d}]", .{ tag, index });
|
||||
} else if (node.is(CData.Text)) |_| {
|
||||
try std.fmt.format(writer, "/text()[{d}]", .{index});
|
||||
}
|
||||
}
|
||||
|
||||
const JsonVisitor = struct {
|
||||
jw: *std.json.Stringify,
|
||||
tree: Self,
|
||||
|
||||
pub fn visit(self: *JsonVisitor, node: *Node, data: *NodeData) !bool {
|
||||
try self.jw.beginObject();
|
||||
|
||||
try self.jw.objectField("nodeId");
|
||||
try self.jw.write(try std.fmt.allocPrint(self.tree.arena, "{d}", .{data.id}));
|
||||
|
||||
try self.jw.objectField("backendDOMNodeId");
|
||||
try self.jw.write(data.id);
|
||||
|
||||
try self.jw.objectField("nodeName");
|
||||
try self.jw.write(data.node_name);
|
||||
|
||||
try self.jw.objectField("xpath");
|
||||
try self.jw.write(data.xpath);
|
||||
|
||||
if (node.is(Element)) |el| {
|
||||
try self.jw.objectField("nodeType");
|
||||
try self.jw.write(1);
|
||||
|
||||
try self.jw.objectField("isInteractive");
|
||||
try self.jw.write(data.is_interactive);
|
||||
|
||||
try self.jw.objectField("role");
|
||||
try self.jw.write(data.role);
|
||||
|
||||
if (data.name) |name| {
|
||||
if (name.len > 0) {
|
||||
try self.jw.objectField("name");
|
||||
try self.jw.write(name);
|
||||
}
|
||||
}
|
||||
|
||||
if (data.value) |value| {
|
||||
try self.jw.objectField("value");
|
||||
try self.jw.write(value);
|
||||
}
|
||||
|
||||
if (el._attributes) |attrs| {
|
||||
try self.jw.objectField("attributes");
|
||||
try self.jw.beginObject();
|
||||
var iter = attrs.iterator();
|
||||
while (iter.next()) |attr| {
|
||||
try self.jw.objectField(attr._name.str());
|
||||
try self.jw.write(attr._value.str());
|
||||
}
|
||||
try self.jw.endObject();
|
||||
}
|
||||
|
||||
if (data.options) |options| {
|
||||
try self.jw.objectField("options");
|
||||
try self.jw.beginArray();
|
||||
for (options) |opt| {
|
||||
try self.jw.beginObject();
|
||||
try self.jw.objectField("value");
|
||||
try self.jw.write(opt.value);
|
||||
try self.jw.objectField("text");
|
||||
try self.jw.write(opt.text);
|
||||
try self.jw.objectField("selected");
|
||||
try self.jw.write(opt.selected);
|
||||
try self.jw.endObject();
|
||||
}
|
||||
try self.jw.endArray();
|
||||
}
|
||||
} else if (node.is(CData.Text)) |text_node| {
|
||||
try self.jw.objectField("nodeType");
|
||||
try self.jw.write(3);
|
||||
try self.jw.objectField("nodeValue");
|
||||
try self.jw.write(text_node.getWholeText());
|
||||
} else {
|
||||
try self.jw.objectField("nodeType");
|
||||
try self.jw.write(9);
|
||||
}
|
||||
|
||||
try self.jw.objectField("children");
|
||||
try self.jw.beginArray();
|
||||
|
||||
if (data.options != null) {
|
||||
// Signal to not walk children, as we handled them natively
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn leave(self: *JsonVisitor) !void {
|
||||
try self.jw.endArray();
|
||||
try self.jw.endObject();
|
||||
}
|
||||
};
|
||||
|
||||
fn isStructuralRole(role: []const u8) bool {
|
||||
const structural_roles = std.StaticStringMap(void).initComptime(.{
|
||||
.{ "none", {} },
|
||||
.{ "generic", {} },
|
||||
.{ "InlineTextBox", {} },
|
||||
.{ "banner", {} },
|
||||
.{ "navigation", {} },
|
||||
.{ "main", {} },
|
||||
.{ "list", {} },
|
||||
.{ "listitem", {} },
|
||||
.{ "table", {} },
|
||||
.{ "rowgroup", {} },
|
||||
.{ "row", {} },
|
||||
.{ "cell", {} },
|
||||
.{ "region", {} },
|
||||
});
|
||||
return structural_roles.has(role);
|
||||
}
|
||||
|
||||
const TextVisitor = struct {
|
||||
writer: *std.Io.Writer,
|
||||
tree: Self,
|
||||
depth: usize,
|
||||
|
||||
pub fn visit(self: *TextVisitor, node: *Node, data: *NodeData) !bool {
|
||||
// Format: " [12] link: Hacker News (value)"
|
||||
for (0..(self.depth * 2)) |_| {
|
||||
try self.writer.writeByte(' ');
|
||||
}
|
||||
try self.writer.print("[{d}] {s}: ", .{ data.id, data.role });
|
||||
|
||||
if (data.name) |n| {
|
||||
if (n.len > 0) {
|
||||
try self.writer.writeAll(n);
|
||||
}
|
||||
} else if (node.is(CData.Text)) |text_node| {
|
||||
const trimmed = std.mem.trim(u8, text_node.getWholeText(), " \t\r\n");
|
||||
if (trimmed.len > 0) {
|
||||
try self.writer.writeAll(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
if (data.value) |v| {
|
||||
if (v.len > 0) {
|
||||
try self.writer.print(" (value: {s})", .{v});
|
||||
}
|
||||
}
|
||||
|
||||
if (data.options) |options| {
|
||||
try self.writer.writeAll(" options: [");
|
||||
for (options, 0..) |opt, i| {
|
||||
if (i > 0) try self.writer.writeAll(", ");
|
||||
try self.writer.print("'{s}'", .{opt.value});
|
||||
if (opt.selected) {
|
||||
try self.writer.writeAll(" (selected)");
|
||||
}
|
||||
}
|
||||
try self.writer.writeAll("]\n");
|
||||
self.depth += 1;
|
||||
return false; // Native handling complete, do not walk children
|
||||
}
|
||||
|
||||
try self.writer.writeByte('\n');
|
||||
self.depth += 1;
|
||||
|
||||
// If this is a leaf-like semantic node and we already have a name,
|
||||
// skip children to avoid redundant StaticText or noise.
|
||||
const is_leaf_semantic = std.mem.eql(u8, data.role, "link") or
|
||||
std.mem.eql(u8, data.role, "button") or
|
||||
std.mem.eql(u8, data.role, "heading") or
|
||||
std.mem.eql(u8, data.role, "code");
|
||||
if (is_leaf_semantic and data.name != null and data.name.?.len > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn leave(self: *TextVisitor) !void {
|
||||
if (self.depth > 0) {
|
||||
self.depth -= 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -157,7 +157,7 @@ pub fn collectInteractiveElements(
|
||||
.node = node,
|
||||
.tag_name = el.getTagNameLower(),
|
||||
.role = getRole(el),
|
||||
.name = getAccessibleName(el),
|
||||
.name = try getAccessibleName(el, arena),
|
||||
.interactivity_type = itype,
|
||||
.listener_types = listener_types,
|
||||
.disabled = isDisabled(el),
|
||||
@@ -178,12 +178,12 @@ pub fn collectInteractiveElements(
|
||||
return results.items;
|
||||
}
|
||||
|
||||
const ListenerTargetMap = std.AutoHashMapUnmanaged(usize, std.ArrayList([]const u8));
|
||||
pub const ListenerTargetMap = std.AutoHashMapUnmanaged(usize, std.ArrayList([]const u8));
|
||||
|
||||
/// Pre-build a map from event_target pointer → list of event type names.
|
||||
/// This lets both classifyInteractivity (O(1) "has any?") and
|
||||
/// getListenerTypes (O(1) "which ones?") avoid re-iterating per element.
|
||||
fn buildListenerTargetMap(page: *Page, arena: Allocator) !ListenerTargetMap {
|
||||
pub fn buildListenerTargetMap(page: *Page, arena: Allocator) !ListenerTargetMap {
|
||||
var map = ListenerTargetMap{};
|
||||
|
||||
// addEventListener registrations
|
||||
@@ -209,7 +209,7 @@ fn buildListenerTargetMap(page: *Page, arena: Allocator) !ListenerTargetMap {
|
||||
return map;
|
||||
}
|
||||
|
||||
fn classifyInteractivity(
|
||||
pub fn classifyInteractivity(
|
||||
el: *Element,
|
||||
html_el: *Element.Html,
|
||||
listener_targets: ListenerTargetMap,
|
||||
@@ -296,7 +296,7 @@ fn getRole(el: *Element) ?[]const u8 {
|
||||
};
|
||||
}
|
||||
|
||||
fn getAccessibleName(el: *Element) ?[]const u8 {
|
||||
fn getAccessibleName(el: *Element, arena: Allocator) !?[]const u8 {
|
||||
// aria-label
|
||||
if (el.getAttributeSafe(comptime .wrap("aria-label"))) |v| {
|
||||
if (v.len > 0) return v;
|
||||
@@ -325,11 +325,15 @@ fn getAccessibleName(el: *Element) ?[]const u8 {
|
||||
}
|
||||
|
||||
// Text content (first non-empty text node, trimmed)
|
||||
return getTextContent(el.asNode());
|
||||
return try getTextContent(el.asNode(), arena);
|
||||
}
|
||||
|
||||
fn getTextContent(node: *Node) ?[]const u8 {
|
||||
var tw = TreeWalker.FullExcludeSelf.init(node, .{});
|
||||
fn getTextContent(node: *Node, arena: Allocator) !?[]const u8 {
|
||||
var tw: TreeWalker.FullExcludeSelf = .init(node, .{});
|
||||
|
||||
var arr: std.ArrayList(u8) = .empty;
|
||||
var single_chunk: ?[]const u8 = null;
|
||||
|
||||
while (tw.next()) |child| {
|
||||
// Skip text inside script/style elements.
|
||||
if (child.is(Element)) |el| {
|
||||
@@ -344,13 +348,29 @@ fn getTextContent(node: *Node) ?[]const u8 {
|
||||
if (child.is(Node.CData)) |cdata| {
|
||||
if (cdata.is(Node.CData.Text)) |text| {
|
||||
const content = std.mem.trim(u8, text.getWholeText(), &std.ascii.whitespace);
|
||||
if (content.len > 0) return content;
|
||||
if (content.len > 0) {
|
||||
if (single_chunk == null and arr.items.len == 0) {
|
||||
single_chunk = content;
|
||||
} else {
|
||||
if (single_chunk) |sc| {
|
||||
try arr.appendSlice(arena, sc);
|
||||
try arr.append(arena, ' ');
|
||||
single_chunk = null;
|
||||
}
|
||||
try arr.appendSlice(arena, content);
|
||||
try arr.append(arena, ' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (single_chunk) |sc| return sc;
|
||||
if (arr.items.len == 0) return null;
|
||||
|
||||
// strip out trailing space
|
||||
return arr.items[0 .. arr.items.len - 1];
|
||||
}
|
||||
fn isDisabled(el: *Element) bool {
|
||||
if (el.getAttributeSafe(comptime .wrap("disabled")) != null) return true;
|
||||
return isDisabledByFieldset(el);
|
||||
|
||||
@@ -24,6 +24,7 @@ const TreeWalker = @import("webapi/TreeWalker.zig");
|
||||
const CData = @import("webapi/CData.zig");
|
||||
const Element = @import("webapi/Element.zig");
|
||||
const Node = @import("webapi/Node.zig");
|
||||
const isAllWhitespace = @import("../string.zig").isAllWhitespace;
|
||||
|
||||
pub const Opts = struct {
|
||||
// Options for future customization (e.g., dialect)
|
||||
@@ -46,13 +47,6 @@ const State = struct {
|
||||
last_char_was_newline: bool = true,
|
||||
};
|
||||
|
||||
fn isBlock(tag: Element.Tag) bool {
|
||||
return switch (tag) {
|
||||
.p, .div, .section, .article, .main, .header, .footer, .nav, .aside, .h1, .h2, .h3, .h4, .h5, .h6, .ul, .ol, .blockquote, .pre, .table, .hr => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn shouldAddSpacing(tag: Element.Tag) bool {
|
||||
return switch (tag) {
|
||||
.p, .h1, .h2, .h3, .h4, .h5, .h6, .blockquote, .pre, .table => true,
|
||||
@@ -99,26 +93,18 @@ fn isSignificantText(node: *Node) bool {
|
||||
}
|
||||
|
||||
fn isVisibleElement(el: *Element) bool {
|
||||
return switch (el.getTag()) {
|
||||
.script, .style, .noscript, .template, .head, .meta, .link, .title, .svg => false,
|
||||
else => true,
|
||||
};
|
||||
const tag = el.getTag();
|
||||
return !tag.isMetadata() and tag != .svg;
|
||||
}
|
||||
|
||||
fn getAnchorLabel(el: *Element) ?[]const u8 {
|
||||
return el.getAttributeSafe(comptime .wrap("aria-label")) orelse el.getAttributeSafe(comptime .wrap("title"));
|
||||
}
|
||||
|
||||
fn isAllWhitespace(text: []const u8) bool {
|
||||
return for (text) |c| {
|
||||
if (!std.ascii.isWhitespace(c)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
fn hasBlockDescendant(root: *Node) bool {
|
||||
var tw = TreeWalker.FullExcludeSelf.Elements.init(root, .{});
|
||||
while (tw.next()) |el| {
|
||||
if (isBlock(el.getTag())) return true;
|
||||
if (el.getTag().isBlock()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -192,7 +178,7 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
|
||||
// --- Opening Tag Logic ---
|
||||
|
||||
// Ensure block elements start on a new line (double newline for paragraphs etc)
|
||||
if (isBlock(tag) and !state.in_table) {
|
||||
if (tag.isBlock() and !state.in_table) {
|
||||
try ensureNewline(state, writer);
|
||||
if (shouldAddSpacing(tag)) {
|
||||
try writer.writeByte('\n');
|
||||
@@ -431,7 +417,7 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
|
||||
}
|
||||
|
||||
// Post-block newlines
|
||||
if (isBlock(tag) and !state.in_table) {
|
||||
if (tag.isBlock() and !state.in_table) {
|
||||
try ensureNewline(state, writer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1580,6 +1580,36 @@ pub const Tag = enum {
|
||||
else => tag,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isBlock(self: Tag) bool {
|
||||
// zig fmt: off
|
||||
return switch (self) {
|
||||
// Semantic Layout
|
||||
.article, .aside, .footer, .header, .main, .nav, .section,
|
||||
// Grouping / Containers
|
||||
.address, .div, .fieldset, .figure, .p,
|
||||
// Headings
|
||||
.h1, .h2, .h3, .h4, .h5, .h6,
|
||||
// Lists
|
||||
.dl, .ol, .ul,
|
||||
// Preformatted / Quotes
|
||||
.blockquote, .pre,
|
||||
// Tables
|
||||
.table,
|
||||
// Other
|
||||
.hr,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
// zig fmt: on
|
||||
}
|
||||
|
||||
pub fn isMetadata(self: Tag) bool {
|
||||
return switch (self) {
|
||||
.base, .head, .link, .meta, .noscript, .script, .style, .template, .title => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const JsApi = struct {
|
||||
|
||||
@@ -557,13 +557,13 @@ pub const Writer = struct {
|
||||
|
||||
pub const AXRole = enum(u8) {
|
||||
// zig fmt: off
|
||||
none, article, banner, blockquote, button, caption, cell, checkbox, code,
|
||||
columnheader, combobox, complementary, contentinfo, definition, deletion,
|
||||
dialog, document, emphasis, figure, form, group, heading, image, insertion,
|
||||
link, list, listbox, listitem, main, marquee, meter, navigation, option,
|
||||
none, article, banner, blockquote, button, caption, cell, checkbox, code, color,
|
||||
columnheader, combobox, complementary, contentinfo, date, definition, deletion,
|
||||
dialog, document, emphasis, figure, file, form, group, heading, image, insertion,
|
||||
link, list, listbox, listitem, main, marquee, menuitem, meter, month, navigation, option,
|
||||
paragraph, presentation, progressbar, radio, region, row, rowgroup,
|
||||
rowheader, searchbox, separator, slider, spinbutton, status, strong,
|
||||
subscript, superscript, table, term, textbox, time, RootWebArea, LineBreak,
|
||||
subscript, superscript, @"switch", table, term, textbox, time, RootWebArea, LineBreak,
|
||||
StaticText,
|
||||
// zig fmt: on
|
||||
|
||||
@@ -620,9 +620,13 @@ pub const AXRole = enum(u8) {
|
||||
.number => .spinbutton,
|
||||
.search => .searchbox,
|
||||
.checkbox => .checkbox,
|
||||
.color => .color,
|
||||
.date => .date,
|
||||
.file => .file,
|
||||
.month => .month,
|
||||
.@"datetime-local", .week, .time => .combobox,
|
||||
// zig fmt: off
|
||||
.password, .@"datetime-local", .hidden, .month, .color,
|
||||
.week, .time, .file, .date => .none,
|
||||
.password, .hidden => .none,
|
||||
// zig fmt: on
|
||||
};
|
||||
},
|
||||
@@ -738,6 +742,44 @@ const AXSource = enum(u8) {
|
||||
value, // input value
|
||||
};
|
||||
|
||||
pub fn getName(self: AXNode, page: *Page, allocator: std.mem.Allocator) !?[]const u8 {
|
||||
var aw: std.Io.Writer.Allocating = .init(allocator);
|
||||
defer aw.deinit();
|
||||
|
||||
// writeName expects a std.json.Stringify instance.
|
||||
const TextCaptureWriter = struct {
|
||||
aw: *std.Io.Writer.Allocating,
|
||||
writer: *std.Io.Writer,
|
||||
|
||||
pub fn write(w: @This(), val: anytype) !void {
|
||||
const T = @TypeOf(val);
|
||||
if (T == []const u8 or T == [:0]const u8 or T == *const [val.len]u8) {
|
||||
try w.aw.writer.writeAll(val);
|
||||
} else if (comptime std.meta.hasMethod(T, "format")) {
|
||||
try std.fmt.format(w.aw.writer, "{s}", .{val});
|
||||
} else {
|
||||
// Ignore unexpected types (e.g. booleans) to avoid garbage output
|
||||
}
|
||||
}
|
||||
|
||||
// Mock JSON Stringifier lifecycle methods
|
||||
pub fn beginWriteRaw(_: @This()) !void {}
|
||||
pub fn endWriteRaw(_: @This()) void {}
|
||||
};
|
||||
|
||||
const w: TextCaptureWriter = .{ .aw = &aw, .writer = &aw.writer };
|
||||
|
||||
const source = try self.writeName(w, page);
|
||||
if (source != null) {
|
||||
// Remove literal quotes inserted by writeString.
|
||||
var raw_text = std.mem.trim(u8, aw.written(), "\"");
|
||||
raw_text = std.mem.trim(u8, raw_text, &std.ascii.whitespace);
|
||||
return try allocator.dupe(u8, raw_text);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource {
|
||||
const node = axnode.dom;
|
||||
|
||||
@@ -823,15 +865,17 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource {
|
||||
.object, .progress, .meter, .main, .nav, .aside, .header,
|
||||
.footer, .form, .section, .article, .ul, .ol, .dl, .menu,
|
||||
.thead, .tbody, .tfoot, .tr, .td, .div, .span, .p, .details, .li,
|
||||
.style, .script,
|
||||
.style, .script, .html, .body,
|
||||
// zig fmt: on
|
||||
=> {},
|
||||
else => {
|
||||
// write text content if exists.
|
||||
var buf = std.Io.Writer.Allocating.init(page.call_arena);
|
||||
try el.getInnerText(&buf.writer);
|
||||
try writeString(buf.written(), w);
|
||||
return .contents;
|
||||
var buf: std.Io.Writer.Allocating = .init(page.call_arena);
|
||||
try writeAccessibleNameFallback(node, &buf.writer, page);
|
||||
if (buf.written().len > 0) {
|
||||
try writeString(buf.written(), w);
|
||||
return .contents;
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@@ -855,6 +899,48 @@ fn writeName(axnode: AXNode, w: anytype, page: *Page) !?AXSource {
|
||||
};
|
||||
}
|
||||
|
||||
fn writeAccessibleNameFallback(node: *DOMNode, writer: *std.Io.Writer, page: *Page) !void {
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |child| {
|
||||
switch (child._type) {
|
||||
.cdata => |cd| switch (cd._type) {
|
||||
.text => |*text| {
|
||||
const content = std.mem.trim(u8, text.getWholeText(), &std.ascii.whitespace);
|
||||
if (content.len > 0) {
|
||||
try writer.writeAll(content);
|
||||
try writer.writeByte(' ');
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.element => |el| {
|
||||
if (el.getTag() == .img) {
|
||||
if (el.getAttributeSafe(.wrap("alt"))) |alt| {
|
||||
try writer.writeAll(alt);
|
||||
try writer.writeByte(' ');
|
||||
}
|
||||
} else if (el.getTag() == .svg) {
|
||||
// Try to find a <title> inside SVG
|
||||
var sit = child.childrenIterator();
|
||||
while (sit.next()) |s_child| {
|
||||
if (s_child.is(DOMNode.Element)) |s_el| {
|
||||
if (std.mem.eql(u8, s_el.getTagNameLower(), "title")) {
|
||||
try writeAccessibleNameFallback(s_child, writer, page);
|
||||
try writer.writeByte(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!el.getTag().isMetadata()) {
|
||||
try writeAccessibleNameFallback(child, writer, page);
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn isHidden(elt: *DOMNode.Element) bool {
|
||||
if (elt.getAttributeSafe(comptime .wrap("aria-hidden"))) |value| {
|
||||
if (std.mem.eql(u8, value, "true")) {
|
||||
@@ -987,7 +1073,7 @@ fn isIgnore(self: AXNode, page: *Page) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn getRole(self: AXNode) ![]const u8 {
|
||||
pub fn getRole(self: AXNode) ![]const u8 {
|
||||
if (self.role_attr) |role_value| {
|
||||
// TODO the role can have multiple comma separated values.
|
||||
return role_value;
|
||||
|
||||
@@ -18,25 +18,67 @@
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
const log = @import("../../log.zig");
|
||||
const markdown = lp.markdown;
|
||||
const SemanticTree = lp.SemanticTree;
|
||||
const interactive = lp.interactive;
|
||||
const structured_data = lp.structured_data;
|
||||
const Node = @import("../Node.zig");
|
||||
const DOMNode = @import("../../browser/webapi/Node.zig");
|
||||
|
||||
pub fn processMessage(cmd: anytype) !void {
|
||||
const action = std.meta.stringToEnum(enum {
|
||||
getMarkdown,
|
||||
getSemanticTree,
|
||||
getInteractiveElements,
|
||||
getStructuredData,
|
||||
}, cmd.input.action) orelse return error.UnknownMethod;
|
||||
|
||||
switch (action) {
|
||||
.getMarkdown => return getMarkdown(cmd),
|
||||
.getSemanticTree => return getSemanticTree(cmd),
|
||||
.getInteractiveElements => return getInteractiveElements(cmd),
|
||||
.getStructuredData => return getStructuredData(cmd),
|
||||
}
|
||||
}
|
||||
|
||||
fn getSemanticTree(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
format: ?enum { text } = null,
|
||||
prune: ?bool = null,
|
||||
};
|
||||
const params = (try cmd.params(Params)) orelse Params{};
|
||||
|
||||
const bc = cmd.browser_context orelse return error.NoBrowserContext;
|
||||
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
|
||||
const dom_node = page.document.asNode();
|
||||
|
||||
var st = SemanticTree{
|
||||
.dom_node = dom_node,
|
||||
.registry = &bc.node_registry,
|
||||
.page = page,
|
||||
.arena = cmd.arena,
|
||||
.prune = params.prune orelse false,
|
||||
};
|
||||
|
||||
if (params.format) |format| {
|
||||
if (format == .text) {
|
||||
st.prune = params.prune orelse true;
|
||||
var aw: std.Io.Writer.Allocating = .init(cmd.arena);
|
||||
defer aw.deinit();
|
||||
try st.textStringify(&aw.writer);
|
||||
|
||||
return cmd.sendResult(.{
|
||||
.semanticTree = aw.written(),
|
||||
}, .{});
|
||||
}
|
||||
}
|
||||
|
||||
return cmd.sendResult(.{
|
||||
.semanticTree = st,
|
||||
}, .{});
|
||||
}
|
||||
|
||||
fn getMarkdown(cmd: anytype) !void {
|
||||
const Params = struct {
|
||||
nodeId: ?Node.Id = null,
|
||||
@@ -51,7 +93,7 @@ fn getMarkdown(cmd: anytype) !void {
|
||||
else
|
||||
page.document.asNode();
|
||||
|
||||
var aw = std.Io.Writer.Allocating.init(cmd.arena);
|
||||
var aw: std.Io.Writer.Allocating = .init(cmd.arena);
|
||||
defer aw.deinit();
|
||||
try markdown.dump(dom_node, .{}, &aw.writer, page);
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ pub const Network = @import("network/Runtime.zig");
|
||||
pub const Server = @import("Server.zig");
|
||||
pub const Config = @import("Config.zig");
|
||||
pub const URL = @import("browser/URL.zig");
|
||||
pub const String = @import("string.zig").String;
|
||||
pub const Page = @import("browser/Page.zig");
|
||||
pub const Browser = @import("browser/Browser.zig");
|
||||
pub const Session = @import("browser/Session.zig");
|
||||
@@ -31,6 +32,8 @@ pub const log = @import("log.zig");
|
||||
pub const js = @import("browser/js/js.zig");
|
||||
pub const dump = @import("browser/dump.zig");
|
||||
pub const markdown = @import("browser/markdown.zig");
|
||||
pub const SemanticTree = @import("SemanticTree.zig");
|
||||
pub const CDPNode = @import("cdp/Node.zig");
|
||||
pub const interactive = @import("browser/interactive.zig");
|
||||
pub const structured_data = @import("browser/structured_data.zig");
|
||||
pub const mcp = @import("mcp.zig");
|
||||
@@ -110,6 +113,24 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void {
|
||||
switch (mode) {
|
||||
.html => try dump.root(page.window._document, opts.dump, writer, page),
|
||||
.markdown => try markdown.dump(page.window._document.asNode(), .{}, writer, page),
|
||||
.semantic_tree, .semantic_tree_text => {
|
||||
var registry = CDPNode.Registry.init(app.allocator);
|
||||
defer registry.deinit();
|
||||
|
||||
const st: SemanticTree = .{
|
||||
.dom_node = page.window._document.asNode(),
|
||||
.registry = ®istry,
|
||||
.page = page,
|
||||
.arena = page.call_arena,
|
||||
.prune = (mode == .semantic_tree_text),
|
||||
};
|
||||
|
||||
if (mode == .semantic_tree) {
|
||||
try std.json.Stringify.value(st, .{}, writer);
|
||||
} else {
|
||||
try st.textStringify(writer);
|
||||
}
|
||||
},
|
||||
.wpt => try dumpWPT(page, writer),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ const HttpClient = @import("../browser/HttpClient.zig");
|
||||
const testing = @import("../testing.zig");
|
||||
const protocol = @import("protocol.zig");
|
||||
const router = @import("router.zig");
|
||||
const CDPNode = @import("../cdp/Node.zig");
|
||||
|
||||
const Self = @This();
|
||||
|
||||
@@ -17,6 +18,7 @@ http_client: *HttpClient,
|
||||
notification: *lp.Notification,
|
||||
browser: lp.Browser,
|
||||
session: *lp.Session,
|
||||
node_registry: CDPNode.Registry,
|
||||
|
||||
writer: *std.io.Writer,
|
||||
mutex: std.Thread.Mutex = .{},
|
||||
@@ -44,12 +46,15 @@ pub fn init(allocator: std.mem.Allocator, app: *App, writer: *std.io.Writer) !*S
|
||||
.http_client = http_client,
|
||||
.notification = notification,
|
||||
.session = undefined,
|
||||
.node_registry = CDPNode.Registry.init(allocator),
|
||||
};
|
||||
|
||||
self.session = try self.browser.newSession(self.notification);
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.node_registry.deinit();
|
||||
self.aw.deinit();
|
||||
self.browser.deinit();
|
||||
self.notification.deinit();
|
||||
@@ -82,7 +87,7 @@ pub fn sendResult(self: *Self, id: std.json.Value, result: anytype) !void {
|
||||
}
|
||||
|
||||
pub fn sendError(self: *Self, id: std.json.Value, code: protocol.ErrorCode, message: []const u8) !void {
|
||||
try self.sendResponse(protocol.Response{
|
||||
try self.sendResponse(.{
|
||||
.id = id,
|
||||
.@"error" = protocol.Error{
|
||||
.code = @intFromEnum(code),
|
||||
|
||||
@@ -114,6 +114,7 @@ pub const Tool = struct {
|
||||
};
|
||||
|
||||
pub fn minify(comptime json: []const u8) []const u8 {
|
||||
@setEvalBranchQuota(100000);
|
||||
return comptime blk: {
|
||||
var res: []const u8 = "";
|
||||
var in_string = false;
|
||||
|
||||
@@ -8,6 +8,7 @@ const Element = @import("../browser/webapi/Element.zig");
|
||||
const Selector = @import("../browser/webapi/selector/Selector.zig");
|
||||
const protocol = @import("protocol.zig");
|
||||
const Server = @import("Server.zig");
|
||||
const CDPNode = @import("../cdp/Node.zig");
|
||||
|
||||
pub const tool_list = [_]protocol.Tool{
|
||||
.{
|
||||
@@ -61,6 +62,18 @@ pub const tool_list = [_]protocol.Tool{
|
||||
\\}
|
||||
),
|
||||
},
|
||||
.{
|
||||
.name = "semantic_tree",
|
||||
.description = "Get the page content as a simplified semantic DOM tree for AI reasoning. If a url is provided, it navigates to that url first.",
|
||||
.inputSchema = protocol.minify(
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
\\ "properties": {
|
||||
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." }
|
||||
\\ }
|
||||
\\}
|
||||
),
|
||||
},
|
||||
.{
|
||||
.name = "interactiveElements",
|
||||
.description = "Extract interactive elements from the opened page. If a url is provided, it navigates to that url first.",
|
||||
@@ -103,13 +116,16 @@ const EvaluateParams = struct {
|
||||
|
||||
const ToolStreamingText = struct {
|
||||
page: *lp.Page,
|
||||
action: enum { markdown, links },
|
||||
action: enum { markdown, links, semantic_tree },
|
||||
registry: ?*CDPNode.Registry = null,
|
||||
arena: ?std.mem.Allocator = null,
|
||||
|
||||
pub fn jsonStringify(self: @This(), jw: *std.json.Stringify) !void {
|
||||
try jw.beginWriteRaw();
|
||||
try jw.writer.writeByte('"');
|
||||
var escaped = protocol.JsonEscapingWriter.init(jw.writer);
|
||||
var escaped: protocol.JsonEscapingWriter = .init(jw.writer);
|
||||
const w = &escaped.writer;
|
||||
|
||||
switch (self.action) {
|
||||
.markdown => lp.markdown.dump(self.page.document.asNode(), .{}, w, self.page) catch |err| {
|
||||
log.err(.mcp, "markdown dump failed", .{ .err = err });
|
||||
@@ -137,7 +153,21 @@ const ToolStreamingText = struct {
|
||||
log.err(.mcp, "query links failed", .{ .err = err });
|
||||
}
|
||||
},
|
||||
.semantic_tree => {
|
||||
const st = lp.SemanticTree{
|
||||
.dom_node = self.page.document.asNode(),
|
||||
.registry = self.registry.?,
|
||||
.page = self.page,
|
||||
.arena = self.arena.?,
|
||||
.prune = true,
|
||||
};
|
||||
|
||||
st.textStringify(w) catch |err| {
|
||||
log.err(.mcp, "semantic tree dump failed", .{ .err = err });
|
||||
};
|
||||
},
|
||||
}
|
||||
|
||||
try jw.writer.writeByte('"');
|
||||
jw.endWriteRaw();
|
||||
}
|
||||
@@ -151,6 +181,7 @@ const ToolAction = enum {
|
||||
interactiveElements,
|
||||
structuredData,
|
||||
evaluate,
|
||||
semantic_tree,
|
||||
};
|
||||
|
||||
const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
|
||||
@@ -161,6 +192,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
|
||||
.{ "interactiveElements", .interactiveElements },
|
||||
.{ "structuredData", .structuredData },
|
||||
.{ "evaluate", .evaluate },
|
||||
.{ "semantic_tree", .semantic_tree },
|
||||
});
|
||||
|
||||
pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Request) !void {
|
||||
@@ -188,6 +220,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
|
||||
.interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments),
|
||||
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
|
||||
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
|
||||
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,6 +274,27 @@ fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, ar
|
||||
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
|
||||
}
|
||||
|
||||
fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
|
||||
const TreeParams = struct {
|
||||
url: ?[:0]const u8 = null,
|
||||
};
|
||||
if (arguments) |args_raw| {
|
||||
if (std.json.parseFromValueLeaky(TreeParams, arena, args_raw, .{ .ignore_unknown_fields = true })) |args| {
|
||||
if (args.url) |u| {
|
||||
try performGoto(server, u, id);
|
||||
}
|
||||
} else |_| {}
|
||||
}
|
||||
const page = server.session.currentPage() orelse {
|
||||
return server.sendError(id, .PageNotLoaded, "Page not loaded");
|
||||
};
|
||||
|
||||
const content = [_]protocol.TextContent(ToolStreamingText){.{
|
||||
.text = .{ .page = page, .action = .semantic_tree, .registry = &server.node_registry, .arena = arena },
|
||||
}};
|
||||
try server.sendResult(id, protocol.CallToolResult(ToolStreamingText){ .content = &content });
|
||||
}
|
||||
|
||||
fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
|
||||
const Params = struct {
|
||||
url: ?[:0]const u8 = null,
|
||||
|
||||
@@ -305,6 +305,12 @@ pub const String = packed struct {
|
||||
}
|
||||
};
|
||||
|
||||
pub fn isAllWhitespace(text: []const u8) bool {
|
||||
return for (text) |c| {
|
||||
if (!std.ascii.isWhitespace(c)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
// Discriminatory type that signals the bridge to use arena instead of call_arena
|
||||
// Use this for strings that need to persist beyond the current call
|
||||
// The caller can unwrap and store just the underlying .str field
|
||||
|
||||
Reference in New Issue
Block a user