Lazily load nodes

Node registry now only tracks the node id (which we need to be consistent) and
the underlying parser.Node. All other data is loaded on-demand (i.e. when we
serialize the node). This allows us to serialize node values as they appear
when they are serialized, as opposed to when they are registered.
This commit is contained in:
Karl Seguin
2025-04-04 11:24:34 +08:00
parent 68d1be3b94
commit 4d075818f6
3 changed files with 154 additions and 132 deletions

View File

@@ -22,32 +22,13 @@ const Allocator = std.mem.Allocator;
pub const Id = u32; pub const Id = u32;
const log = std.log.scoped(.cdp_node);
const Node = @This(); const Node = @This();
id: Id, id: Id,
parent_id: ?Id = null,
node_type: u32,
backend_node_id: Id,
node_name: []const u8,
local_name: []const u8,
node_value: []const u8,
child_node_count: u32,
children: []const *Node,
document_url: ?[]const u8,
base_url: ?[]const u8,
xml_version: []const u8,
compatibility_mode: CompatibilityMode,
is_scrollable: bool,
_node: *parser.Node, _node: *parser.Node,
const CompatibilityMode = enum {
NoQuirksMode,
};
pub fn writer(self: *const Node, opts: Writer.Opts) Writer {
return .{ .node = self, .opts = opts };
}
// Whenever we send a node to the client, we register it here for future lookup. // Whenever we send a node to the client, we register it here for future lookup.
// We maintain a node -> id and id -> node lookup. // We maintain a node -> id and id -> node lookup.
pub const Registry = struct { pub const Registry = struct {
@@ -94,66 +75,21 @@ pub const Registry = struct {
// but, just in case, let's try to keep things tidy. // but, just in case, let's try to keep things tidy.
errdefer _ = self.lookup_by_node.remove(n); errdefer _ = self.lookup_by_node.remove(n);
const id = self.node_id;
self.node_id = id + 1;
const child_nodes = try self.registerChildNodes(n);
const node = try self.node_pool.create(); const node = try self.node_pool.create();
errdefer self.node_pool.destroy(node); errdefer self.node_pool.destroy(node);
const id = self.node_id;
self.node_id = id + 1;
node.* = .{ node.* = .{
._node = n, ._node = n,
.id = id, .id = id,
.parent_id = null, // TODO
.backend_node_id = id, // ??
.node_name = parser.nodeName(n) catch return error.NodeNameError,
.local_name = parser.nodeLocalName(n) catch return error.NodeLocalNameError,
.node_value = (parser.nodeValue(n) catch return error.NameValueError) orelse "",
.node_type = @intFromEnum(parser.nodeType(n) catch return error.NodeTypeError),
.child_node_count = @intCast(child_nodes.len),
.children = child_nodes,
.document_url = null,
.base_url = null,
.xml_version = "",
.compatibility_mode = .NoQuirksMode,
.is_scrollable = false,
}; };
// if (try parser.nodeParentNode(n)) |pn| {
// _ = pn;
// // TODO
// }
node_lookup_gop.value_ptr.* = node; node_lookup_gop.value_ptr.* = node;
try self.lookup_by_id.putNoClobber(self.allocator, id, node); try self.lookup_by_id.putNoClobber(self.allocator, id, node);
return node; return node;
} }
pub fn registerChildNodes(self: *Registry, n: *parser.Node) RegisterError![]*Node {
const node_list = parser.nodeGetChildNodes(n) catch return error.GetChildNodeError;
const count = parser.nodeListLength(node_list) catch return error.NodeListLengthError;
var arr = try self.arena.allocator().alloc(*Node, count);
var i: usize = 0;
for (0..count) |_| {
const child = (parser.nodeListItem(node_list, @intCast(i)) catch return error.NodeListItemError) orelse continue;
arr[i] = try self.register(child);
i += 1;
}
return arr[0..i];
}
};
const RegisterError = error{
OutOfMemory,
GetChildNodeError,
NodeListLengthError,
NodeListItemError,
NodeNameError,
NodeLocalNameError,
NameValueError,
NodeTypeError,
}; };
const NodeContext = struct { const NodeContext = struct {
@@ -261,67 +197,98 @@ pub const Search = struct {
// Sometimes we want to serializ the node without chidren, sometimes with just // Sometimes we want to serializ the node without chidren, sometimes with just
// its direct children, and sometimes the entire tree. // its direct children, and sometimes the entire tree.
// (For now, we only support direct children) // (For now, we only support direct children)
pub const Writer = struct { pub const Writer = struct {
opts: Opts, opts: Opts,
node: *const Node, node: *const Node,
registry: *Registry,
pub const Opts = struct {}; pub const Opts = struct {};
pub fn jsonStringify(self: *const Writer, w: anytype) !void { pub fn jsonStringify(self: *const Writer, w: anytype) !void {
self.toJSON(w) catch |err| {
// The only error our jsonStringify method can return is
// @TypeOf(w).Error. In other words, our code can't return its own
// error, we can only return a writer error. Kinda sucks.
log.err("json stringify: {}", .{err});
return error.OutOfMemory;
};
}
fn toJSON(self: *const Writer, w: anytype) !void {
try w.beginObject(); try w.beginObject();
try writeCommon(self.node, w); try writeCommon(self.node, false, w);
try w.objectField("children");
try w.beginArray(); {
for (self.node.children) |node| { var registry = self.registry;
try w.beginObject(); const child_nodes = try parser.nodeGetChildNodes(self.node._node);
try writeCommon(node, w); const child_count = try parser.nodeListLength(child_nodes);
try w.endObject();
var i: usize = 0;
try w.objectField("children");
try w.beginArray();
for (0..child_count) |_| {
const child = (try parser.nodeListItem(child_nodes, @intCast(i))) orelse continue;
const child_node = try registry.register(child);
try w.beginObject();
try writeCommon(child_node, true, w);
try w.endObject();
i += 1;
}
try w.endArray();
try w.objectField("childNodeCount");
try w.write(i);
} }
try w.endArray();
try w.endObject(); try w.endObject();
} }
fn writeCommon(node: *const Node, w: anytype) !void { fn writeCommon(node: *const Node, include_child_count: bool, w: anytype) !void {
try w.objectField("nodeId"); try w.objectField("nodeId");
try w.write(node.id); try w.write(node.id);
if (node.parent_id) |pid| {
try w.objectField("parentId");
try w.write(pid);
}
try w.objectField("backendNodeId"); try w.objectField("backendNodeId");
try w.write(node.backend_node_id); try w.write(node.id);
const n = node._node;
// TODO:
// try w.objectField("parentId");
// try w.write(pid);
try w.objectField("nodeType"); try w.objectField("nodeType");
try w.write(node.node_type); try w.write(@intFromEnum(try parser.nodeType(n)));
try w.objectField("nodeName"); try w.objectField("nodeName");
try w.write(node.node_name); try w.write(try parser.nodeName(n));
try w.objectField("localName"); try w.objectField("localName");
try w.write(node.local_name); try w.write(try parser.nodeLocalName(n));
try w.objectField("nodeValue"); try w.objectField("nodeValue");
try w.write(node.node_value); try w.write((try parser.nodeValue(n)) orelse "");
try w.objectField("childNodeCount"); if (include_child_count) {
try w.write(node.child_node_count); try w.objectField("childNodeCount");
const child_nodes = try parser.nodeGetChildNodes(n);
try w.write(try parser.nodeListLength(child_nodes));
}
try w.objectField("documentURL"); try w.objectField("documentURL");
try w.write(node.document_url); try w.write(null);
try w.objectField("baseURL"); try w.objectField("baseURL");
try w.write(node.base_url); try w.write(null);
try w.objectField("xmlVersion"); try w.objectField("xmlVersion");
try w.write(node.xml_version); try w.write("");
try w.objectField("compatibilityMode"); try w.objectField("compatibilityMode");
try w.write(node.compatibility_mode); try w.write("NoQuirksMode");
try w.objectField("isScrollable"); try w.objectField("isScrollable");
try w.write(node.is_scrollable); try w.write(false);
} }
}; };
@@ -345,46 +312,18 @@ test "cdp Node: Registry register" {
try testing.expectEqual(node, n1c); try testing.expectEqual(node, n1c);
try testing.expectEqual(0, node.id); try testing.expectEqual(0, node.id);
try testing.expectEqual(null, node.parent_id);
try testing.expectEqual(1, node.node_type);
try testing.expectEqual(0, node.backend_node_id);
try testing.expectEqual("A", node.node_name);
try testing.expectEqual("a", node.local_name);
try testing.expectEqual("", node.node_value);
try testing.expectEqual(1, node.child_node_count);
try testing.expectEqual(1, node.children.len);
try testing.expectEqual(1, node.children[0].id);
try testing.expectEqual(null, node.document_url);
try testing.expectEqual(null, node.base_url);
try testing.expectEqual("", node.xml_version);
try testing.expectEqual(.NoQuirksMode, node.compatibility_mode);
try testing.expectEqual(false, node.is_scrollable);
try testing.expectEqual(n, node._node); try testing.expectEqual(n, node._node);
} }
{ {
const n = (try doc.querySelector("p")).?; const n = (try doc.querySelector("p")).?;
const node = try registry.register(n); const node = try registry.register(n);
const n1b = registry.lookup_by_id.get(2).?; const n1b = registry.lookup_by_id.get(1).?;
const n1c = registry.lookup_by_node.get(node._node).?; const n1c = registry.lookup_by_node.get(node._node).?;
try testing.expectEqual(node, n1b); try testing.expectEqual(node, n1b);
try testing.expectEqual(node, n1c); try testing.expectEqual(node, n1c);
try testing.expectEqual(2, node.id); try testing.expectEqual(1, node.id);
try testing.expectEqual(null, node.parent_id);
try testing.expectEqual(1, node.node_type);
try testing.expectEqual(2, node.backend_node_id);
try testing.expectEqual("P", node.node_name);
try testing.expectEqual("p", node.local_name);
try testing.expectEqual("", node.node_value);
try testing.expectEqual(1, node.child_node_count);
try testing.expectEqual(1, node.children.len);
try testing.expectEqual(3, node.children[0].id);
try testing.expectEqual(null, node.document_url);
try testing.expectEqual(null, node.base_url);
try testing.expectEqual("", node.xml_version);
try testing.expectEqual(.NoQuirksMode, node.compatibility_mode);
try testing.expectEqual(false, node.is_scrollable);
try testing.expectEqual(n, node._node); try testing.expectEqual(n, node._node);
} }
} }
@@ -449,17 +388,92 @@ test "cdp Node: Writer" {
{ {
const node = try registry.register(doc.asNode()); const node = try registry.register(doc.asNode());
const json = try std.json.stringifyAlloc(testing.allocator, node.writer(.{}), .{}); const json = try std.json.stringifyAlloc(testing.allocator, Writer{
.node = node,
.opts = .{},
.registry = &registry,
}, .{});
defer testing.allocator.free(json); defer testing.allocator.free(json);
try testing.expectJson(.{ .nodeId = 0, .backendNodeId = 0, .nodeType = 9, .nodeName = "#document", .localName = "", .nodeValue = "", .documentURL = null, .baseURL = null, .xmlVersion = "", .isScrollable = false, .compatibilityMode = "NoQuirksMode", .childNodeCount = 1, .children = &.{.{ .nodeId = 1, .backendNodeId = 1, .nodeType = 1, .nodeName = "HTML", .localName = "html", .nodeValue = "", .childNodeCount = 2, .documentURL = null, .baseURL = null, .xmlVersion = "", .compatibilityMode = "NoQuirksMode", .isScrollable = false }} }, json); try testing.expectJson(.{
.nodeId = 0,
.backendNodeId = 0,
.nodeType = 9,
.nodeName = "#document",
.localName = "",
.nodeValue = "",
.documentURL = null,
.baseURL = null,
.xmlVersion = "",
.isScrollable = false,
.compatibilityMode = "NoQuirksMode",
.childNodeCount = 1,
.children = &.{.{
.nodeId = 1,
.backendNodeId = 1,
.nodeType = 1,
.nodeName = "HTML",
.localName = "html",
.nodeValue = "",
.childNodeCount = 2,
.documentURL = null,
.baseURL = null,
.xmlVersion = "",
.compatibilityMode = "NoQuirksMode",
.isScrollable = false,
}},
}, json);
} }
{ {
const node = registry.lookup_by_id.get(1).?; const node = registry.lookup_by_id.get(1).?;
const json = try std.json.stringifyAlloc(testing.allocator, node.writer(.{}), .{}); const json = try std.json.stringifyAlloc(testing.allocator, Writer{
.node = node,
.opts = .{},
.registry = &registry,
}, .{});
defer testing.allocator.free(json); defer testing.allocator.free(json);
try testing.expectJson(.{ .nodeId = 1, .backendNodeId = 1, .nodeType = 1, .nodeName = "HTML", .localName = "html", .nodeValue = "", .childNodeCount = 2, .documentURL = null, .baseURL = null, .xmlVersion = "", .compatibilityMode = "NoQuirksMode", .isScrollable = false, .children = &.{ .{ .nodeId = 2, .backendNodeId = 2, .nodeType = 1, .nodeName = "HEAD", .localName = "head", .nodeValue = "", .childNodeCount = 0, .documentURL = null, .baseURL = null, .xmlVersion = "", .compatibilityMode = "NoQuirksMode", .isScrollable = false }, .{ .nodeId = 3, .backendNodeId = 3, .nodeType = 1, .nodeName = "BODY", .localName = "body", .nodeValue = "", .childNodeCount = 2, .documentURL = null, .baseURL = null, .xmlVersion = "", .compatibilityMode = "NoQuirksMode", .isScrollable = false } } }, json); try testing.expectJson(.{
.nodeId = 1,
.backendNodeId = 1,
.nodeType = 1,
.nodeName = "HTML",
.localName = "html",
.nodeValue = "",
.childNodeCount = 2,
.documentURL = null,
.baseURL = null,
.xmlVersion = "",
.compatibilityMode = "NoQuirksMode",
.isScrollable = false,
.children = &.{ .{
.nodeId = 2,
.backendNodeId = 2,
.nodeType = 1,
.nodeName = "HEAD",
.localName = "head",
.nodeValue = "",
.childNodeCount = 0,
.documentURL = null,
.baseURL = null,
.xmlVersion = "",
.compatibilityMode = "NoQuirksMode",
.isScrollable = false,
}, .{
.nodeId = 3,
.backendNodeId = 3,
.nodeType = 1,
.nodeName = "BODY",
.localName = "body",
.nodeValue = "",
.childNodeCount = 2,
.documentURL = null,
.baseURL = null,
.xmlVersion = "",
.compatibilityMode = "NoQuirksMode",
.isScrollable = false,
} },
}, json);
} }
} }

View File

@@ -325,6 +325,14 @@ pub fn BrowserContext(comptime CDP_T: type) type {
self.node_search_list.reset(); self.node_search_list.reset();
} }
pub fn nodeWriter(self: *Self, node: *const Node, opts: Node.Writer.Opts) Node.Writer {
return .{
.node = node,
.opts = opts,
.registry = &self.node_registry,
};
}
pub fn onInspectorResponse(ctx: *anyopaque, _: u32, msg: []const u8) void { pub fn onInspectorResponse(ctx: *anyopaque, _: u32, msg: []const u8) void {
if (std.log.defaultLogEnabled(.debug)) { if (std.log.defaultLogEnabled(.debug)) {
// msg should be {"id":<id>,... // msg should be {"id":<id>,...

View File

@@ -51,7 +51,7 @@ fn getDocument(cmd: anytype) !void {
const doc = page.doc orelse return error.DocumentNotLoaded; const doc = page.doc orelse return error.DocumentNotLoaded;
const node = try bc.node_registry.register(parser.documentToNode(doc)); const node = try bc.node_registry.register(parser.documentToNode(doc));
return cmd.sendResult(.{ .root = node.writer(.{}) }, .{}); return cmd.sendResult(bc.nodeWriter(node, .{}), .{});
} }
// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch
@@ -148,7 +148,7 @@ test "cdp.dom: search flow" {
.method = "DOM.getSearchResults", .method = "DOM.getSearchResults",
.params = .{ .searchId = "0", .fromIndex = 0, .toIndex = 2 }, .params = .{ .searchId = "0", .fromIndex = 0, .toIndex = 2 },
}); });
try ctx.expectSentResult(.{ .nodeIds = &.{ 0, 2 } }, .{ .id = 13 }); try ctx.expectSentResult(.{ .nodeIds = &.{ 0, 1 } }, .{ .id = 13 });
// different fromIndex // different fromIndex
try ctx.processMessage(.{ try ctx.processMessage(.{
@@ -156,7 +156,7 @@ test "cdp.dom: search flow" {
.method = "DOM.getSearchResults", .method = "DOM.getSearchResults",
.params = .{ .searchId = "0", .fromIndex = 1, .toIndex = 2 }, .params = .{ .searchId = "0", .fromIndex = 1, .toIndex = 2 },
}); });
try ctx.expectSentResult(.{ .nodeIds = &.{2} }, .{ .id = 14 }); try ctx.expectSentResult(.{ .nodeIds = &.{1} }, .{ .id = 14 });
// different toIndex // different toIndex
try ctx.processMessage(.{ try ctx.processMessage(.{