From cc93180d57db5690f0aefbd29675bddf2d4ec1e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Tue, 3 Mar 2026 16:11:31 +0900 Subject: [PATCH] cdp: add LP domain and getMarkdown method This PR introduces a custom CDP domain 'LP' (Lightpanda) to expose browser-specific tools. The first method, 'LP.getMarkdown', allows retrieving a Markdown representation of the DOM or a specific node by its 'nodeId'. This is optimized for AI agents and LLM-based scraping tasks. --- src/cdp/cdp.zig | 20 ++++++++++++++ src/cdp/domains/lp.zig | 59 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 src/cdp/domains/lp.zig diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 6d7c31e5..9a11f567 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -219,6 +219,10 @@ pub fn CDPT(comptime TypeProvider: type) type { }; switch (domain.len) { + 2 => switch (@as(u16, @bitCast(domain[0..2].*))) { + asUint(u16, "LP") => return @import("domains/lp.zig").processMessage(command), + else => {}, + }, 3 => switch (@as(u24, @bitCast(domain[0..3].*))) { asUint(u24, "DOM") => return @import("domains/dom.zig").processMessage(command), asUint(u24, "Log") => return @import("domains/log.zig").processMessage(command), @@ -978,3 +982,19 @@ test "cdp: STARTUP sessionId" { try ctx.expectSentResult(null, .{ .id = 4, .index = 0, .session_id = "STARTUP" }); } } + +test "cdp: LP.getMarkdown" { + var ctx = testing.context(); + defer ctx.deinit(); + + const bc = try ctx.loadBrowserContext(.{}); + _ = try bc.session.createPage(); + + try ctx.processMessage(.{ + .id = 1, + .method = "LP.getMarkdown", + }); + + const result = ctx.client.?.sent.items[0].object.get("result").?.object; + try testing.expect(result.get("markdown") != null); +} diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig new file mode 100644 index 00000000..d2c1e6ad --- /dev/null +++ b/src/cdp/domains/lp.zig @@ -0,0 +1,59 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const markdown = lp.markdown; +const Node = @import("../Node.zig"); + +pub fn processMessage(cmd: anytype) !void { + const action = std.meta.stringToEnum(enum { + getMarkdown, + }, cmd.input.action) orelse return error.UnknownMethod; + + switch (action) { + .getMarkdown => return getMarkdown(cmd), + } +} + +fn getMarkdown(cmd: anytype) !void { + const Params = struct { + nodeId: ?Node.Id = null, + }; + const params = (try cmd.params(Params)) orelse Params{}; + + const bc = cmd.browser_context orelse return error.NoBrowserContext; + + const dom_node = if (params.nodeId) |nodeId| blk: { + const node = bc.node_registry.lookup_by_id.get(nodeId) orelse return error.InvalidNodeId; + break :blk node.dom; + } else blk: { + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + break :blk page.window._document.asNode(); + }; + + const page = bc.session.currentPage() orelse return error.PageNotLoaded; + + var aw = std.Io.Writer.Allocating.init(cmd.arena); + defer aw.deinit(); + try markdown.dump(dom_node, .{}, &aw.writer, page); + + return cmd.sendResult(.{ + .markdown = aw.written(), + }, .{}); +}