cdp: add LP domain and getMarkdown method

This PR introduces a custom CDP domain 'LP' (Lightpanda) to expose browser-specific tools. The first method, 'LP.getMarkdown', allows retrieving a Markdown representation of the DOM or a specific node by its 'nodeId'. This is optimized for AI agents and LLM-based scraping tasks.
This commit is contained in:
Adrià Arrufat
2026-03-03 16:11:31 +09:00
parent cce533ebb6
commit cc93180d57
2 changed files with 79 additions and 0 deletions

59
src/cdp/domains/lp.zig Normal file
View File

@@ -0,0 +1,59 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const lp = @import("lightpanda");
const markdown = lp.markdown;
const Node = @import("../Node.zig");
pub fn processMessage(cmd: anytype) !void {
const action = std.meta.stringToEnum(enum {
getMarkdown,
}, cmd.input.action) orelse return error.UnknownMethod;
switch (action) {
.getMarkdown => return getMarkdown(cmd),
}
}
fn getMarkdown(cmd: anytype) !void {
const Params = struct {
nodeId: ?Node.Id = null,
};
const params = (try cmd.params(Params)) orelse Params{};
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const dom_node = if (params.nodeId) |nodeId| blk: {
const node = bc.node_registry.lookup_by_id.get(nodeId) orelse return error.InvalidNodeId;
break :blk node.dom;
} else blk: {
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
break :blk page.window._document.asNode();
};
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
var aw = std.Io.Writer.Allocating.init(cmd.arena);
defer aw.deinit();
try markdown.dump(dom_node, .{}, &aw.writer, page);
return cmd.sendResult(.{
.markdown = aw.written(),
}, .{});
}