From 26be25c3d53981d1aa9cd1c97412fe479144339e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 3 Feb 2025 15:49:48 +0100 Subject: [PATCH 1/9] cdp: dispatch a DOM.documentUpdated event --- src/cdp/page.zig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cdp/page.zig b/src/cdp/page.zig index 2079f106..04d0159a 100644 --- a/src/cdp/page.zig +++ b/src/cdp/page.zig @@ -361,6 +361,16 @@ fn navigate( ); } + // DOM.documentUpdated + try sendEvent( + alloc, + ctx, + "DOM.documentUpdated", + struct {}, + .{}, + input.sessionId, + ); + // frameNavigated event const FrameNavigated = struct { frame: Frame, From 8f8a1fda8504c6cfab4602908c571bf6c0f30d1e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 4 Feb 2025 15:48:27 +0100 Subject: [PATCH 2/9] cdp: implement DOM.getDocument --- src/cdp/dom.zig | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/cdp/dom.zig b/src/cdp/dom.zig index 0e8c1d66..1f8d356a 100644 --- a/src/cdp/dom.zig +++ b/src/cdp/dom.zig @@ -25,10 +25,13 @@ const result = cdp.result; const IncomingMessage = @import("msg.zig").IncomingMessage; const Input = @import("msg.zig").Input; +const parser = @import("netsurf"); + const log = std.log.scoped(.cdp); const Methods = enum { enable, + getDocument, }; pub fn dom( @@ -42,6 +45,7 @@ pub fn dom( return switch (method) { .enable => enable(alloc, msg, ctx), + .getDocument => getDocument(alloc, msg, ctx), }; } @@ -57,3 +61,79 @@ fn enable( return result(alloc, input.id, null, null, input.sessionId); } + +const NodeId = u32; + +const Node = struct { + nodeId: NodeId, + parentId: ?NodeId = null, + backendNodeId: NodeId, + nodeType: u32, + nodeName: []const u8 = "", + localName: []const u8 = "", + nodeValue: []const u8 = "", + childNodeCount: u32, + children: ?[]const Node = null, + documentURL: ?[]const u8 = null, + baseURL: ?[]const u8 = null, + xmlVersion: []const u8 = "", + compatibilityMode: []const u8 = "NoQuirksMode", + isScrollable: bool = false, + + fn init(n: *parser.Node) !Node { + const children = try parser.nodeGetChildNodes(n); + const ln = try parser.nodeListLength(children); + + return .{ + .nodeId = 1, + .backendNodeId = 1, + .nodeType = @intFromEnum(try parser.nodeType(n)), + .nodeName = try parser.nodeName(n), + .localName = try parser.nodeLocalName(n), + .nodeValue = try parser.nodeValue(n) orelse "", + .childNodeCount = ln, + }; + } +}; + +// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-getDocument +fn getDocument( + alloc: std.mem.Allocator, + msg: *IncomingMessage, + ctx: *Ctx, +) ![]const u8 { + // input + const Params = struct { + depth: ?u32 = null, + pierce: ?bool = null, + }; + const input = try Input(Params).get(alloc, msg); + defer input.deinit(); + std.debug.assert(input.sessionId != null); + log.debug("Req > id {d}, method {s}", .{ input.id, "DOM.getDocument" }); + + if (ctx.browser.session.page == null) { + return error.NoPage; + } + + // retrieve the root node + const page = ctx.browser.session.page.?; + + if (page.doc == null) { + return error.NoDocument; + } + + const root = try parser.documentGetDocumentElement(page.doc.?) orelse { + return error.NoRoot; + }; + + // output + const Resp = struct { + root: Node, + }; + const resp: Resp = .{ + .root = try Node.init(parser.elementToNode(root)), + }; + + return result(alloc, input.id, Resp, resp, input.sessionId); +} From e7dcb8a6052a90271532ce5bde29e934392c3e13 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 5 Feb 2025 17:48:18 +0100 Subject: [PATCH 3/9] cdp: introduce current page avoid page struct copy --- src/browser/browser.zig | 6 ++++++ src/cdp/dom.zig | 10 ++-------- src/cdp/page.zig | 2 +- src/cdp/target.zig | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index e7e2445c..ebdc3dbb 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -82,6 +82,12 @@ pub const Browser = struct { self.session.deinit(); try Session.init(&self.session, alloc, loop, uri); } + + pub fn currentPage(self: *Browser) ?*Page { + if (self.session.page == null) return null; + + return &self.session.page.?; + } }; // Session is like a browser's tab. diff --git a/src/cdp/dom.zig b/src/cdp/dom.zig index 1f8d356a..eee737d5 100644 --- a/src/cdp/dom.zig +++ b/src/cdp/dom.zig @@ -112,16 +112,10 @@ fn getDocument( std.debug.assert(input.sessionId != null); log.debug("Req > id {d}, method {s}", .{ input.id, "DOM.getDocument" }); - if (ctx.browser.session.page == null) { - return error.NoPage; - } - // retrieve the root node - const page = ctx.browser.session.page.?; + const page = ctx.browser.currentPage() orelse return error.NoPage; - if (page.doc == null) { - return error.NoDocument; - } + if (page.doc == null) return error.NoDocument; const root = try parser.documentGetDocumentElement(page.doc.?) orelse { return error.NoRoot; diff --git a/src/cdp/page.zig b/src/cdp/page.zig index 04d0159a..a34f04db 100644 --- a/src/cdp/page.zig +++ b/src/cdp/page.zig @@ -333,7 +333,7 @@ fn navigate( // Launch navigate, the page must have been created by a // target.createTarget. - var p = ctx.browser.session.page orelse return error.NoPage; + var p = ctx.browser.currentPage() orelse return error.NoPage; ctx.state.executionContextId += 1; const auxData = try std.fmt.allocPrint( alloc, diff --git a/src/cdp/target.zig b/src/cdp/target.zig index 90384b64..ca9ae90f 100644 --- a/src/cdp/target.zig +++ b/src/cdp/target.zig @@ -353,7 +353,7 @@ fn createTarget( } // TODO stop the previous page instead? - if (ctx.browser.session.page != null) return error.pageAlreadyExists; + if (ctx.browser.currentPage() != null) return error.pageAlreadyExists; // create the page const p = try ctx.browser.session.createPage(); @@ -464,7 +464,7 @@ fn closeTarget( null, ); - if (ctx.browser.session.page != null) ctx.browser.session.page.?.end(); + if (ctx.browser.currentPage()) |page| page.end(); return ""; } From 402f72cfa8eaeb424ab1874bbdd0e01cf712be58 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 5 Feb 2025 17:48:58 +0100 Subject: [PATCH 4/9] cdp: adjust page deinit --- src/browser/browser.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index ebdc3dbb..a7b684b1 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -153,7 +153,7 @@ pub const Session = struct { } fn deinit(self: *Session) void { - if (self.page) |*p| p.end(); + if (self.page) |*p| p.deinit(); if (self.inspector) |inspector| { inspector.deinit(self.alloc); @@ -265,6 +265,7 @@ pub const Page = struct { self.session.window.replaceLocation(&self.location) catch |e| { log.err("reset window location: {any}", .{e}); }; + self.doc = null; // clear netsurf memory arena. parser.deinit(); @@ -273,6 +274,7 @@ pub const Page = struct { } pub fn deinit(self: *Page) void { + self.end(); self.arena.deinit(); self.session.page = null; } From 8b22313ca12632d85a18982cff7b3d1e739bef5c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 6 Feb 2025 17:15:21 +0100 Subject: [PATCH 5/9] netsurf: return empty string on null for node name --- src/netsurf/netsurf.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netsurf/netsurf.zig b/src/netsurf/netsurf.zig index 97716683..171e61be 100644 --- a/src/netsurf/netsurf.zig +++ b/src/netsurf/netsurf.zig @@ -1008,6 +1008,7 @@ pub fn nodeLocalName(node: *Node) ![]const u8 { var s: ?*String = undefined; const err = nodeVtable(node).dom_node_get_local_name.?(node, &s); try DOMErr(err); + if (s == null) return ""; var s_lower: ?*String = undefined; const errStr = c.dom_string_tolower(s, true, &s_lower); try DOMErr(errStr); @@ -1098,6 +1099,7 @@ pub fn nodeName(node: *Node) ![]const u8 { var s: ?*String = undefined; const err = nodeVtable(node).dom_node_get_node_name.?(node, &s); try DOMErr(err); + if (s == null) return ""; return strToData(s.?); } From 39b37867762c575bc0e22674db6ed543436b403a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 6 Feb 2025 17:17:44 +0100 Subject: [PATCH 6/9] cdp: ctx state has init and deinit now --- src/cdp/cdp.zig | 28 ++++++++++++++++++++++++++++ src/cdp/page.zig | 1 + src/server.zig | 9 ++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 65e2cc85..0ba40531 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -34,6 +34,7 @@ const IncomingMessage = @import("msg.zig").IncomingMessage; const Input = @import("msg.zig").Input; const inspector = @import("inspector.zig").inspector; const dom = @import("dom.zig").dom; +const cdpdom = @import("dom.zig"); const css = @import("css.zig").css; const security = @import("security.zig").security; @@ -129,6 +130,33 @@ pub const State = struct { loaderID: []const u8 = LoaderID, page_life_cycle_events: bool = false, // TODO; Target based value + + // DOM + nodelist: cdpdom.NodeList, + nodesearchlist: cdpdom.NodeSearchList, + + pub fn init(alloc: std.mem.Allocator) State { + return .{ + .nodelist = cdpdom.NodeList.init(alloc), + .nodesearchlist = cdpdom.NodeSearchList.init(alloc), + }; + } + + pub fn deinit(self: *State) void { + self.nodelist.deinit(); + + // deinit all node searches. + for (self.nodesearchlist.items) |*s| s.deinit(); + self.nodesearchlist.deinit(); + } + + pub fn reset(self: *State) void { + self.nodelist.reset(); + + // deinit all node searches. + for (self.nodesearchlist.items) |*s| s.deinit(); + self.nodesearchlist.clearAndFree(); + } }; // Utils diff --git a/src/cdp/page.zig b/src/cdp/page.zig index a34f04db..8b1470b9 100644 --- a/src/cdp/page.zig +++ b/src/cdp/page.zig @@ -259,6 +259,7 @@ fn navigate( log.debug("Req > id {d}, method {s}", .{ input.id, "page.navigate" }); // change state + ctx.state.reset(); ctx.state.url = input.params.url; // TODO: hard coded ID ctx.state.loaderID = "AF8667A203C5392DBE9AC290044AA4C2"; diff --git a/src/server.zig b/src/server.zig index cbe16e12..bc58c64b 100644 --- a/src/server.zig +++ b/src/server.zig @@ -69,13 +69,17 @@ pub const Ctx = struct { last_active: ?std.time.Instant = null, // CDP - state: cdp.State = .{}, + state: cdp.State = undefined, // JS fields browser: *Browser, // TODO: is pointer mandatory here? sessionNew: bool, // try_catch: jsruntime.TryCatch, // TODO + pub fn deinit(self: *Ctx) void { + self.state.deinit(); + } + // callbacks // --------- @@ -458,7 +462,10 @@ pub fn handle( .accept_completion = &accept_completion, .conn_completion = &conn_completion, .timeout_completion = &timeout_completion, + .state = cdp.State.init(browser.session.alloc), }; + defer ctx.deinit(); + try browser.session.initInspector( &ctx, Ctx.onInspectorResp, From 4e4a8f1babca9db0caf6c8dae32f9a5daa8163ea Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 6 Feb 2025 17:18:23 +0100 Subject: [PATCH 7/9] cdp: implement DOM.performSearch --- src/cdp/dom.zig | 121 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 7 deletions(-) diff --git a/src/cdp/dom.zig b/src/cdp/dom.zig index eee737d5..3a4ceb66 100644 --- a/src/cdp/dom.zig +++ b/src/cdp/dom.zig @@ -24,6 +24,7 @@ const cdp = @import("cdp.zig"); const result = cdp.result; const IncomingMessage = @import("msg.zig").IncomingMessage; const Input = @import("msg.zig").Input; +const css = @import("../dom/css.zig"); const parser = @import("netsurf"); @@ -32,6 +33,7 @@ const log = std.log.scoped(.cdp); const Methods = enum { enable, getDocument, + performSearch, }; pub fn dom( @@ -46,6 +48,7 @@ pub fn dom( return switch (method) { .enable => enable(alloc, msg, ctx), .getDocument => getDocument(alloc, msg, ctx), + .performSearch => performSearch(alloc, msg, ctx), }; } @@ -62,6 +65,36 @@ fn enable( return result(alloc, input.id, null, null, input.sessionId); } +// NodeList references tree nodes with an array id. +pub const NodeList = struct { + coll: List, + + const List = std.ArrayList(*parser.Node); + + pub fn init(alloc: std.mem.Allocator) NodeList { + return .{ + .coll = List.init(alloc), + }; + } + + pub fn deinit(self: *NodeList) void { + self.coll.deinit(); + } + + pub fn reset(self: *NodeList) void { + self.coll.clearAndFree(); + } + + pub fn set(self: *NodeList, node: *parser.Node) !NodeId { + for (self.coll.items, 0..) |n, i| { + if (n == node) return @intCast(i); + } + + try self.coll.append(node); + return @intCast(self.coll.items.len); + } +}; + const NodeId = u32; const Node = struct { @@ -80,13 +113,13 @@ const Node = struct { compatibilityMode: []const u8 = "NoQuirksMode", isScrollable: bool = false, - fn init(n: *parser.Node) !Node { + fn init(n: *parser.Node, id: NodeId) !Node { const children = try parser.nodeGetChildNodes(n); const ln = try parser.nodeListLength(children); return .{ - .nodeId = 1, - .backendNodeId = 1, + .nodeId = id, + .backendNodeId = id, .nodeType = @intFromEnum(try parser.nodeType(n)), .nodeName = try parser.nodeName(n), .localName = try parser.nodeLocalName(n), @@ -117,16 +150,90 @@ fn getDocument( if (page.doc == null) return error.NoDocument; - const root = try parser.documentGetDocumentElement(page.doc.?) orelse { - return error.NoRoot; - }; + const node = parser.documentToNode(page.doc.?); + const id = try ctx.state.nodelist.set(node); // output const Resp = struct { root: Node, }; const resp: Resp = .{ - .root = try Node.init(parser.elementToNode(root)), + .root = try Node.init(node, id), + }; + + return result(alloc, input.id, Resp, resp, input.sessionId); +} + +pub const NodeSearch = struct { + coll: List, + name: []u8, + alloc: std.mem.Allocator, + + var count: u8 = 0; + + const List = std.ArrayListUnmanaged(NodeId); + + pub fn initCapacity(alloc: std.mem.Allocator, ln: usize) !NodeSearch { + count += 1; + + return .{ + .alloc = alloc, + .coll = try List.initCapacity(alloc, ln), + .name = try std.fmt.allocPrint(alloc, "{d}", .{count}), + }; + } + + pub fn deinit(self: *NodeSearch) void { + self.coll.deinit(self.alloc); + self.alloc.free(self.name); + } + + pub fn append(self: *NodeSearch, id: NodeId) !void { + try self.coll.append(self.alloc, id); + } +}; +pub const NodeSearchList = std.ArrayList(NodeSearch); + +// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch +fn performSearch( + alloc: std.mem.Allocator, + msg: *IncomingMessage, + ctx: *Ctx, +) ![]const u8 { + // input + const Params = struct { + query: []const u8, + includeUserAgentShadowDOM: ?bool = null, + }; + const input = try Input(Params).get(alloc, msg); + defer input.deinit(); + std.debug.assert(input.sessionId != null); + log.debug("Req > id {d}, method {s}", .{ input.id, "DOM.performSearch" }); + + // retrieve the root node + const page = ctx.browser.currentPage() orelse return error.NoPage; + + if (page.doc == null) return error.NoDocument; + + const list = try css.querySelectorAll(alloc, parser.documentToNode(page.doc.?), input.params.query); + const ln = list.nodes.items.len; + var ns = try NodeSearch.initCapacity(alloc, ln); + + for (list.nodes.items) |n| { + const id = try ctx.state.nodelist.set(n); + try ns.append(id); + } + + try ctx.state.nodesearchlist.append(ns); + + // output + const Resp = struct { + searchId: []const u8, + resultCount: u32, + }; + const resp: Resp = .{ + .searchId = ns.name, + .resultCount = @intCast(ln), }; return result(alloc, input.id, Resp, resp, input.sessionId); From fb3b38aec7d8628791d3b4060b7d0a6991d30652 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 7 Feb 2025 15:01:57 +0100 Subject: [PATCH 8/9] cdp: implement getSearchResults and discardSearchResults --- src/cdp/dom.zig | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/src/cdp/dom.zig b/src/cdp/dom.zig index 3a4ceb66..cd12ef12 100644 --- a/src/cdp/dom.zig +++ b/src/cdp/dom.zig @@ -34,6 +34,8 @@ const Methods = enum { enable, getDocument, performSearch, + getSearchResults, + discardSearchResults, }; pub fn dom( @@ -49,6 +51,8 @@ pub fn dom( .enable => enable(alloc, msg, ctx), .getDocument => getDocument(alloc, msg, ctx), .performSearch => performSearch(alloc, msg, ctx), + .getSearchResults => getSearchResults(alloc, msg, ctx), + .discardSearchResults => discardSearchResults(alloc, msg, ctx), }; } @@ -238,3 +242,75 @@ fn performSearch( return result(alloc, input.id, Resp, resp, input.sessionId); } + +// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-discardSearchResults +fn discardSearchResults( + alloc: std.mem.Allocator, + msg: *IncomingMessage, + ctx: *Ctx, +) ![]const u8 { + // input + const Params = struct { + searchId: []const u8, + }; + const input = try Input(Params).get(alloc, msg); + defer input.deinit(); + std.debug.assert(input.sessionId != null); + log.debug("Req > id {d}, method {s}", .{ input.id, "DOM.discardSearchResults" }); + + // retrieve the search from context + for (ctx.state.nodesearchlist.items, 0..) |*s, i| { + if (!std.mem.eql(u8, s.name, input.params.searchId)) continue; + + s.deinit(); + _ = ctx.state.nodesearchlist.swapRemove(i); + break; + } + + return result(alloc, input.id, null, null, input.sessionId); +} + +// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-getSearchResults +fn getSearchResults( + alloc: std.mem.Allocator, + msg: *IncomingMessage, + ctx: *Ctx, +) ![]const u8 { + // input + const Params = struct { + searchId: []const u8, + fromIndex: u32, + toIndex: u32, + }; + const input = try Input(Params).get(alloc, msg); + defer input.deinit(); + std.debug.assert(input.sessionId != null); + log.debug("Req > id {d}, method {s}", .{ input.id, "DOM.getSearchResults" }); + + if (input.params.fromIndex >= input.params.toIndex) return error.BadIndices; + + // retrieve the search from context + var ns: ?*const NodeSearch = undefined; + for (ctx.state.nodesearchlist.items) |s| { + if (!std.mem.eql(u8, s.name, input.params.searchId)) continue; + + ns = &s; + break; + } + + if (ns == null) return error.searchResultNotFound; + const items = ns.?.coll.items; + + if (input.params.fromIndex >= items.len) return error.BadFromIndex; + if (input.params.toIndex > items.len) return error.BadToIndex; + + // output + const Resp = struct { + nodeIds: []NodeId, + }; + const resp: Resp = .{ + .nodeIds = ns.?.coll.items[input.params.fromIndex..input.params.toIndex], + }; + + return result(alloc, input.id, Resp, resp, input.sessionId); +} From 055530c8c654fd8291c73536819cf77f7cf75bb4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 10 Feb 2025 12:19:35 +0100 Subject: [PATCH 9/9] cdp: send dom node children --- src/cdp/dom.zig | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/cdp/dom.zig b/src/cdp/dom.zig index cd12ef12..d5c661f6 100644 --- a/src/cdp/dom.zig +++ b/src/cdp/dom.zig @@ -109,7 +109,7 @@ const Node = struct { nodeName: []const u8 = "", localName: []const u8 = "", nodeValue: []const u8 = "", - childNodeCount: u32, + childNodeCount: ?u32 = null, children: ?[]const Node = null, documentURL: ?[]const u8 = null, baseURL: ?[]const u8 = null, @@ -117,10 +117,8 @@ const Node = struct { compatibilityMode: []const u8 = "NoQuirksMode", isScrollable: bool = false, - fn init(n: *parser.Node, id: NodeId) !Node { - const children = try parser.nodeGetChildNodes(n); - const ln = try parser.nodeListLength(children); - + fn init(n: *parser.Node, nlist: *NodeList) !Node { + const id = try nlist.set(n); return .{ .nodeId = id, .backendNodeId = id, @@ -128,9 +126,32 @@ const Node = struct { .nodeName = try parser.nodeName(n), .localName = try parser.nodeLocalName(n), .nodeValue = try parser.nodeValue(n) orelse "", - .childNodeCount = ln, }; } + + fn initChildren( + self: *Node, + alloc: std.mem.Allocator, + n: *parser.Node, + nlist: *NodeList, + ) !std.ArrayList(Node) { + const children = try parser.nodeGetChildNodes(n); + const ln = try parser.nodeListLength(children); + self.childNodeCount = ln; + + var list = try std.ArrayList(Node).initCapacity(alloc, ln); + + var i: u32 = 0; + while (i < ln) { + defer i += 1; + const child = try parser.nodeListItem(children, i) orelse continue; + try list.append(try Node.init(child, nlist)); + } + + self.children = list.items; + + return list; + } }; // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-getDocument @@ -155,17 +176,22 @@ fn getDocument( if (page.doc == null) return error.NoDocument; const node = parser.documentToNode(page.doc.?); - const id = try ctx.state.nodelist.set(node); + var n = try Node.init(node, &ctx.state.nodelist); + var list = try n.initChildren(alloc, node, &ctx.state.nodelist); + defer list.deinit(); // output const Resp = struct { root: Node, }; const resp: Resp = .{ - .root = try Node.init(node, id), + .root = n, }; - return result(alloc, input.id, Resp, resp, input.sessionId); + const res = try result(alloc, input.id, Resp, resp, input.sessionId); + try ctx.send(res); + + return ""; } pub const NodeSearch = struct {