Compare commits

..

1 Commits

Author SHA1 Message Date
Pierre Tachoire
6bb8bc8391 ci: use proxy for integration tests 2026-04-03 09:24:17 +02:00
19 changed files with 197 additions and 331 deletions

View File

@@ -62,7 +62,7 @@ jobs:
- name: run end to end integration tests
continue-on-error: true
run: |
./lightpanda serve --log-level error & echo $! > LPD.pid
./lightpanda serve --http-proxy ${{ secrets.MASSIVE_PROXY_RESIDENTIAL_US }} --log-level error & echo $! > LPD.pid
go run integration/main.go |tee result.log
kill `cat LPD.pid`

View File

@@ -159,7 +159,6 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
pub fn cdpTimeout(self: *const Config) usize {
return switch (self.mode) {
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
.mcp => 10000, // Default timeout for MCP-CDP
else => unreachable,
};
}
@@ -167,7 +166,6 @@ pub fn cdpTimeout(self: *const Config) usize {
pub fn port(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.port,
.mcp => |opts| opts.cdp_port orelse 0,
else => unreachable,
};
}
@@ -175,7 +173,6 @@ pub fn port(self: *const Config) u16 {
pub fn advertiseHost(self: *const Config) []const u8 {
return switch (self.mode) {
.serve => |opts| opts.advertise_host orelse opts.host,
.mcp => "127.0.0.1",
else => unreachable,
};
}
@@ -194,7 +191,6 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
pub fn maxConnections(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_connections,
.mcp => 16,
else => unreachable,
};
}
@@ -202,7 +198,6 @@ pub fn maxConnections(self: *const Config) u16 {
pub fn maxPendingConnections(self: *const Config) u31 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_pending_connections,
.mcp => 128,
else => unreachable,
};
}
@@ -228,7 +223,6 @@ pub const Serve = struct {
pub const Mcp = struct {
common: Common = .{},
version: mcp.Version = .default,
cdp_port: ?u16 = null,
};
pub const DumpFormat = enum {
@@ -682,19 +676,6 @@ fn parseMcpArgs(
continue;
}
if (std.mem.eql(u8, "--cdp-port", opt) or std.mem.eql(u8, "--cdp_port", opt)) {
const str = args.next() orelse {
log.fatal(.mcp, "missing argument value", .{ .arg = opt });
return error.InvalidArgument;
};
result.cdp_port = std.fmt.parseInt(u16, str, 10) catch |err| {
log.fatal(.mcp, "invalid argument value", .{ .arg = opt, .err = err });
return error.InvalidArgument;
};
continue;
}
if (try parseCommonArg(allocator, opt, args, &result.common)) {
continue;
}

View File

@@ -297,12 +297,13 @@ pub const Client = struct {
}
var cdp = &self.mode.cdp;
const timeout_ms = self.ws.timeout_ms;
var last_message = milliTimestamp(.monotonic);
var ms_remaining = self.ws.timeout_ms;
while (true) {
const result = cdp.pageWait(timeout_ms) catch |wait_err| switch (wait_err) {
const result = cdp.pageWait(ms_remaining) catch |wait_err| switch (wait_err) {
error.NoPage => {
const status = http.tick(timeout_ms) catch |err| {
const status = http.tick(ms_remaining) catch |err| {
log.err(.app, "http tick", .{ .err = err });
return;
};
@@ -313,6 +314,8 @@ pub const Client = struct {
if (self.readSocket() == false) {
return;
}
last_message = milliTimestamp(.monotonic);
ms_remaining = self.ws.timeout_ms;
continue;
},
else => return wait_err,
@@ -323,10 +326,18 @@ pub const Client = struct {
if (self.readSocket() == false) {
return;
}
last_message = milliTimestamp(.monotonic);
ms_remaining = self.ws.timeout_ms;
},
.done => {
log.info(.app, "CDP timeout", .{});
return;
const now = milliTimestamp(.monotonic);
const elapsed = now - last_message;
if (elapsed >= ms_remaining) {
log.info(.app, "CDP timeout", .{});
return;
}
ms_remaining -= @intCast(elapsed);
last_message = now;
},
}
}

View File

@@ -235,6 +235,10 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void {
}
}
if (comptime IS_DEBUG and abort_all) {
std.debug.assert(self.active == 0);
}
{
var q = &self.queue;
var n = q.first;
@@ -255,16 +259,12 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void {
}
if (comptime IS_DEBUG and abort_all) {
// Even after an abort_all, we could still have transfers, but, at the
// very least, they should all be flagged as aborted.
var it = self.in_use.first;
var leftover: usize = 0;
while (it) |node| : (it = node.next) {
const conn: *http.Connection = @fieldParentPtr("node", node);
std.debug.assert((Transfer.fromConnection(conn) catch unreachable).aborted);
leftover += 1;
}
std.debug.assert(self.active == leftover);
std.debug.assert(self.in_use.first == null);
const running = self.handles.perform() catch |err| {
lp.assert(false, "multi perform in abort", .{ .err = err });
};
std.debug.assert(running == 0);
}
}

View File

@@ -351,30 +351,6 @@ pub fn deinit(self: *Page, abort_http: bool) void {
session.releaseArena(qn.arena);
}
{
// Release all objects we're referencing
{
var it = self._blob_urls.valueIterator();
while (it.next()) |blob| {
blob.*.releaseRef(session);
}
}
{
var it: ?*std.DoublyLinkedList.Node = self._mutation_observers.first;
while (it) |node| : (it = node.next) {
const observer: *MutationObserver = @fieldParentPtr("node", node);
observer.releaseRef(session);
}
}
for (self._intersection_observers.items) |observer| {
observer.releaseRef(session);
}
self.window._document._selection.releaseRef(session);
}
session.browser.env.destroyContext(self.js);
self._script_manager.shutdown = true;
@@ -438,15 +414,7 @@ pub fn releaseArena(self: *Page, allocator: Allocator) void {
pub fn isSameOrigin(self: *const Page, url: [:0]const u8) !bool {
const current_origin = self.origin orelse return false;
// fastpath
if (!std.mem.startsWith(u8, url, current_origin)) {
return false;
}
// Starting here, at least protocols are equals.
// Compare hosts (domain:port) strictly
return std.mem.eql(u8, URL.getHost(url), URL.getHost(current_origin));
return std.mem.startsWith(u8, url, current_origin);
}
/// Look up a blob URL in this page's registry.
@@ -1370,24 +1338,20 @@ pub fn schedulePerformanceObserverDelivery(self: *Page) !void {
}
pub fn registerMutationObserver(self: *Page, observer: *MutationObserver) !void {
observer.acquireRef();
self._mutation_observers.append(&observer.node);
}
pub fn unregisterMutationObserver(self: *Page, observer: *MutationObserver) void {
observer.releaseRef(self._session);
self._mutation_observers.remove(&observer.node);
}
pub fn registerIntersectionObserver(self: *Page, observer: *IntersectionObserver) !void {
observer.acquireRef();
try self._intersection_observers.append(self.arena, observer);
}
pub fn unregisterIntersectionObserver(self: *Page, observer: *IntersectionObserver) void {
for (self._intersection_observers.items, 0..) |obs, i| {
if (obs == observer) {
observer.releaseRef(self._session);
_ = self._intersection_observers.swapRemove(i);
return;
}
@@ -3624,41 +3588,3 @@ test "WebApi: Frames" {
test "WebApi: Integration" {
try testing.htmlRunner("integration", .{});
}
test "Page: isSameOrigin" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
var page: Page = undefined;
page.origin = null;
try testing.expectEqual(false, page.isSameOrigin("https://origin.com/"));
page.origin = try URL.getOrigin(allocator, "https://origin.com/foo/bar") orelse unreachable;
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/foo/bar")); // exact same
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/bar/bar")); // path differ
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/")); // path differ
try testing.expectEqual(true, page.isSameOrigin("https://origin.com")); // no path
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/foo?q=1"));
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/foo#hash"));
try testing.expectEqual(true, page.isSameOrigin("https://origin.com/foo?q=1#hash"));
// FIXME try testing.expectEqual(true, page.isSameOrigin("https://foo:bar@origin.com"));
// FIXME try testing.expectEqual(true, page.isSameOrigin("https://origin.com:443/foo"));
try testing.expectEqual(false, page.isSameOrigin("http://origin.com/")); // another proto
try testing.expectEqual(false, page.isSameOrigin("https://origin.com:123/")); // another port
try testing.expectEqual(false, page.isSameOrigin("https://sub.origin.com/")); // another subdomain
try testing.expectEqual(false, page.isSameOrigin("https://target.com/")); // different domain
try testing.expectEqual(false, page.isSameOrigin("https://origin.com.target.com/")); // different domain
try testing.expectEqual(false, page.isSameOrigin("https://target.com/@origin.com"));
page.origin = try URL.getOrigin(allocator, "https://origin.com:8443/foo") orelse unreachable;
try testing.expectEqual(true, page.isSameOrigin("https://origin.com:8443/bar"));
try testing.expectEqual(false, page.isSameOrigin("https://origin.com/bar")); // missing port
try testing.expectEqual(false, page.isSameOrigin("https://origin.com:9999/bar")); // wrong port
try testing.expectEqual(false, page.isSameOrigin(""));
try testing.expectEqual(false, page.isSameOrigin("not-a-url"));
try testing.expectEqual(false, page.isSameOrigin("//origin.com/foo"));
}

View File

@@ -68,6 +68,7 @@ pub fn waitCDP(self: *Runner, opts: WaitOpts) !CDPWaitResult {
fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult {
var timer = try std.time.Timer.start();
var ms_remaining = opts.ms;
const tick_opts = TickOpts{
.ms = 200,
@@ -91,10 +92,11 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult {
.cdp_socket => if (comptime is_cdp) return .cdp_socket else unreachable,
};
const ms_elapsed: u32 = @intCast(timer.read() / std.time.ns_per_ms);
if (ms_elapsed >= opts.ms) {
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed >= ms_remaining) {
return .done;
}
ms_remaining -= @intCast(ms_elapsed);
if (next_ms > 0) {
std.Thread.sleep(std.time.ns_per_ms * next_ms);
}
@@ -235,16 +237,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
page._parse_state = .{ .raw_done = @errorName(err) };
return err;
},
.raw_done => {
if (comptime is_cdp) {
const http_result = try http_client.tick(@intCast(opts.ms));
if (http_result == .cdp_socket) {
return .cdp_socket;
}
return .{ .ok = 0 };
}
return .done;
},
.raw_done => return .done,
}
}

View File

@@ -501,11 +501,7 @@ pub const FinalizerCallback = struct {
session: *Session,
resolved_ptr_id: usize,
finalizer_ptr_id: usize,
release_ref: *const fn (ptr_id: usize, session: *Session) void,
// Track how many identities (JS worlds) reference this FC.
// Only cleanup when all identities have finalized.
identity_count: u8 = 0,
_deinit: *const fn (ptr_id: usize, session: *Session) void,
// For every FinalizerCallback we'll have 1+ FinalizerCallback.Identity: one
// for every identity that gets the instance. In most cases, that'l be 1.
@@ -514,9 +510,8 @@ pub const FinalizerCallback = struct {
fc: *Session.FinalizerCallback,
};
// Called during page reset to force cleanup regardless of identity_count.
fn deinit(self: *FinalizerCallback, session: *Session) void {
self.release_ref(self.finalizer_ptr_id, session);
self._deinit(self.finalizer_ptr_id, session);
session.releaseArena(self.arena);
}
};

View File

@@ -509,7 +509,7 @@ fn getUserInfo(raw: [:0]const u8) ?[]const u8 {
return raw[authority_start .. auth.host_start - 1];
}
pub fn getHost(raw: []const u8) []const u8 {
pub fn getHost(raw: [:0]const u8) []const u8 {
const auth = parseAuthority(raw) orelse return "";
return auth.getHost(raw);
}

View File

@@ -296,7 +296,7 @@ pub fn createContext(self: *Env, page: *Page, params: ContextParams) !*Context {
// it gets setup automatically as objects are created, but the Window
// object already exists in v8 (it's the global) so we manually create
// the mapping here.
const tao = try params.identity_arena.create(@import("TaggedOpaque.zig"));
const tao = try context_arena.create(@import("TaggedOpaque.zig"));
tao.* = .{
.value = @ptrCast(page.window),
.prototype_chain = (&Window.JsApi.Meta.prototype_chain).ptr,

View File

@@ -244,10 +244,7 @@ pub fn mapZigInstanceToJs(self: *const Local, js_obj_handle: ?*const v8.Object,
// The TAO contains the pointer to our Zig instance as
// well as any meta data we'll need to use it later.
// See the TaggedOpaque struct for more details.
// Use identity_arena so TAOs survive context destruction. V8 objects
// are stored in identity_map (session-level) and may be referenced
// after their creating context is destroyed (e.g., via microtasks).
const tao = try ctx.identity_arena.create(TaggedOpaque);
const tao = try context_arena.create(TaggedOpaque);
tao.* = .{
.value = resolved.ptr,
.prototype_chain = resolved.prototype_chain.ptr,
@@ -269,6 +266,7 @@ pub fn mapZigInstanceToJs(self: *const Local, js_obj_handle: ?*const v8.Object,
v8.v8__Global__New(isolate.handle, js_obj.handle, gop.value_ptr);
if (resolved.finalizer) |finalizer| {
const finalizer_ptr_id = finalizer.ptr_id;
finalizer.acquireRef(finalizer_ptr_id);
const session = ctx.session;
const finalizer_gop = try session.finalizer_callbacks.getOrPut(session.page_arena, finalizer_ptr_id);
@@ -277,8 +275,7 @@ pub fn mapZigInstanceToJs(self: *const Local, js_obj_handle: ?*const v8.Object,
// see this Zig instance. We need to create the FinalizerCallback
// so that we can cleanup on page reset if v8 doesn't finalize.
errdefer _ = session.finalizer_callbacks.remove(finalizer_ptr_id);
finalizer.acquire_ref(finalizer_ptr_id);
finalizer_gop.value_ptr.* = try self.createFinalizerCallback(resolved_ptr_id, finalizer_ptr_id, finalizer.release_ref_from_zig);
finalizer_gop.value_ptr.* = try self.createFinalizerCallback(resolved_ptr_id, finalizer_ptr_id, finalizer.deinit);
}
const fc = finalizer_gop.value_ptr.*;
const identity_finalizer = try fc.arena.create(Session.FinalizerCallback.Identity);
@@ -286,9 +283,8 @@ pub fn mapZigInstanceToJs(self: *const Local, js_obj_handle: ?*const v8.Object,
.fc = fc,
.identity = ctx.identity,
};
fc.identity_count += 1;
v8.v8__Global__SetWeakFinalizer(gop.value_ptr, identity_finalizer, finalizer.release_ref, v8.kParameter);
v8.v8__Global__SetWeakFinalizer(gop.value_ptr, identity_finalizer, finalizer.release, v8.kParameter);
}
return js_obj;
},
@@ -1132,9 +1128,9 @@ const Resolved = struct {
// Resolved.ptr is the most specific value in a chain (e.g. IFrame, not EventTarget, Node, ...)
// Finalizer.ptr_id is the most specific value in a chain that defines an acquireRef
ptr_id: usize,
acquire_ref: *const fn (ptr_id: usize) void,
release_ref: *const fn (handle: ?*const v8.WeakCallbackInfo) callconv(.c) void,
release_ref_from_zig: *const fn (ptr_id: usize, session: *Session) void,
deinit: *const fn (ptr_id: usize, session: *Session) void,
acquireRef: *const fn (ptr_id: usize) void,
release: *const fn (handle: ?*const v8.WeakCallbackInfo) callconv(.c) void,
};
};
pub fn resolveValue(value: anytype) Resolved {
@@ -1174,49 +1170,32 @@ fn resolveT(comptime T: type, value: *T) Resolved {
const finalizer_ptr = getFinalizerPtr(value);
const Wrap = struct {
fn deinit(ptr_id: usize, session: *Session) void {
FT.deinit(@ptrFromInt(ptr_id), session);
}
fn acquireRef(ptr_id: usize) void {
FT.acquireRef(@ptrFromInt(ptr_id));
}
fn releaseRef(handle: ?*const v8.WeakCallbackInfo) callconv(.c) void {
fn release(handle: ?*const v8.WeakCallbackInfo) callconv(.c) void {
const ptr = v8.v8__WeakCallbackInfo__GetParameter(handle.?).?;
const identity_finalizer: *Session.FinalizerCallback.Identity = @ptrCast(@alignCast(ptr));
const fc = identity_finalizer.fc;
const session = fc.session;
const finalizer_ptr_id = fc.finalizer_ptr_id;
// Remove from this identity's map
if (identity_finalizer.identity.identity_map.fetchRemove(fc.resolved_ptr_id)) |kv| {
var global = kv.value;
v8.v8__Global__Reset(&global);
}
const identity_count = fc.identity_count;
if (identity_count == 1) {
// All IsolatedWorlds that reference this object have
// released it. Release the instance ref, remove the
// FinalizerCallback and free it.
FT.releaseRef(@ptrFromInt(finalizer_ptr_id), session);
const removed = session.finalizer_callbacks.remove(finalizer_ptr_id);
if (comptime IS_DEBUG) {
std.debug.assert(removed);
}
session.releaseArena(fc.arena);
} else {
fc.identity_count = identity_count - 1;
}
}
fn releaseRefFromZig(ptr_id: usize, session: *Session) void {
FT.releaseRef(@ptrFromInt(ptr_id), session);
FT.releaseRef(@ptrFromInt(fc.finalizer_ptr_id), fc.session);
}
};
break :blk .{
.ptr_id = @intFromPtr(finalizer_ptr),
.acquire_ref = Wrap.acquireRef,
.release_ref = Wrap.releaseRef,
.release_ref_from_zig = Wrap.releaseRefFromZig,
.deinit = Wrap.deinit,
.acquireRef = Wrap.acquireRef,
.release = Wrap.release,
};
},
};
@@ -1475,7 +1454,7 @@ fn createFinalizerCallback(
// The most specific value where finalizers are defined
// What actually gets acquired / released / deinit
finalizer_ptr_id: usize,
release_ref: *const fn (ptr_id: usize, session: *Session) void,
deinit: *const fn (ptr_id: usize, session: *Session) void,
) !*Session.FinalizerCallback {
const session = self.ctx.session;
@@ -1486,7 +1465,7 @@ fn createFinalizerCallback(
fc.* = .{
.arena = arena,
.session = session,
.release_ref = release_ref,
._deinit = deinit,
.resolved_ptr_id = resolved_ptr_id,
.finalizer_ptr_id = finalizer_ptr_id,
};

View File

@@ -4,7 +4,7 @@
<div id=empty></div>
<div id=one><p id=p10></p></div>
<!--<script id=childNodes>
<script id=childNodes>
const div = $('#d1');
const children = div.childNodes;
testing.expectEqual(true, children instanceof NodeList);
@@ -65,24 +65,24 @@
testing.expectEqual([], Array.from(empty.values()));
testing.expectEqual([], Array.from(empty.entries()));
testing.expectEqual([], Array.from(empty));
</script> -->
</script>
<script id=one>
const one = $('#one').childNodes;
// const p10 = $('#p10');
// testing.expectEqual(1, one.length);
// testing.expectEqual(p10, one[0]);
// testing.expectEqual([0], Array.from(one.keys()));
// testing.expectEqual([p10], Array.from(one.values()));
// testing.expectEqual([[0, p10]], Array.from(one.entries()));
const p10 = $('#p10');
testing.expectEqual(1, one.length);
testing.expectEqual(p10, one[0]);
testing.expectEqual([0], Array.from(one.keys()));
testing.expectEqual([p10], Array.from(one.values()));
testing.expectEqual([[0, p10]], Array.from(one.entries()));
// testing.expectEqual([p10], Array.from(one));
testing.expectEqual([p10], Array.from(one));
let foreach = [];
one.forEach((p) => foreach.push(p));
testing.expectEqual([p10], foreach);
</script>
<!-- <script id=contains>
<script id=contains>
testing.expectEqual(true, document.contains(document));
testing.expectEqual(true, $('#d1').contains($('#d1')));
testing.expectEqual(true, document.contains($('#d1')));
@@ -94,4 +94,3 @@
testing.expectEqual(false, $('#d1').contains($('#empty')));
testing.expectEqual(false, $('#d1').contains($('#p10')));
</script>
-->

View File

@@ -114,9 +114,7 @@ pub fn init(callback: js.Function.Temp, options: ?ObserverInit, page: *Page) !*I
pub fn deinit(self: *IntersectionObserver, session: *Session) void {
self._callback.release();
for (self._pending_entries.items) |entry| {
// These were never handed to v8, they do not have a corresponding
// FinalizerCallback. We 100% own them.
entry.deinit(session);
entry.deinitIfUnused(session);
}
session.releaseArena(self._arena);
}
@@ -137,11 +135,14 @@ pub fn observe(self: *IntersectionObserver, target: *Element, page: *Page) !void
}
}
try self._observing.append(self._arena, target);
if (self._observing.items.len == 1) {
// Register with page if this is our first observation
if (self._observing.items.len == 0) {
self._rc._refs += 1;
try page.registerIntersectionObserver(self);
}
try self._observing.append(self._arena, target);
// Don't initialize previous state yet - let checkIntersection do it
// This ensures we get an entry on first observation
@@ -165,7 +166,7 @@ pub fn unobserve(self: *IntersectionObserver, target: *Element, page: *Page) voi
while (j < self._pending_entries.items.len) {
if (self._pending_entries.items[j]._target == target) {
const entry = self._pending_entries.swapRemove(j);
entry.deinit(page._session);
entry.deinitIfUnused(page._session);
} else {
j += 1;
}
@@ -175,21 +176,25 @@ pub fn unobserve(self: *IntersectionObserver, target: *Element, page: *Page) voi
}
if (original_length > 0 and self._observing.items.len == 0) {
page.unregisterIntersectionObserver(self);
self._rc._refs -= 1;
}
}
pub fn disconnect(self: *IntersectionObserver, page: *Page) void {
for (self._pending_entries.items) |entry| {
entry.deinit(page._session);
entry.deinitIfUnused(page._session);
}
self._pending_entries.clearRetainingCapacity();
self._previous_states.clearRetainingCapacity();
if (self._observing.items.len > 0) {
page.unregisterIntersectionObserver(self);
}
const observing_count = self._observing.items.len;
self._observing.clearRetainingCapacity();
page.unregisterIntersectionObserver(self);
if (observing_count > 0) {
_ = self.releaseRef(page._session);
}
}
pub fn takeRecords(self: *IntersectionObserver, page: *Page) ![]*IntersectionObserverEntry {
@@ -335,6 +340,13 @@ pub const IntersectionObserverEntry = struct {
session.releaseArena(self._arena);
}
fn deinitIfUnused(self: *IntersectionObserverEntry, session: *Session) void {
if (self._rc._refs == 0) {
// hasn't been handed to JS yet.
self.deinit(session);
}
}
pub fn releaseRef(self: *IntersectionObserverEntry, session: *Session) void {
self._rc.release(self, session);
}

View File

@@ -87,12 +87,8 @@ pub fn init(callback: js.Function.Temp, page: *Page) !*MutationObserver {
return self;
}
/// Force cleanup on Session shutdown.
pub fn deinit(self: *MutationObserver, session: *Session) void {
for (self._pending_records.items) |record| {
// These were never handed to v8, they do not have a corresponding
// FinalizerCallback. We 100% own them.
record.deinit(session);
}
self._callback.release();
session.releaseArena(self._arena);
}
@@ -167,14 +163,16 @@ pub fn observe(self: *MutationObserver, target: *Node, options: ObserveOptions,
}
}
// Register with page if this is our first observation
if (self._observing.items.len == 0) {
self._rc._refs += 1;
try page.registerMutationObserver(self);
}
try self._observing.append(arena, .{
.target = target,
.options = store_options,
});
if (self._observing.items.len == 1) {
try page.registerMutationObserver(self);
}
}
pub fn disconnect(self: *MutationObserver, page: *Page) void {
@@ -182,11 +180,13 @@ pub fn disconnect(self: *MutationObserver, page: *Page) void {
_ = record.releaseRef(page._session);
}
self._pending_records.clearRetainingCapacity();
if (self._observing.items.len > 0) {
page.unregisterMutationObserver(self);
}
const observing_count = self._observing.items.len;
self._observing.clearRetainingCapacity();
if (observing_count > 0) {
_ = self.releaseRef(page._session);
}
page.unregisterMutationObserver(self);
}
pub fn takeRecords(self: *MutationObserver, page: *Page) ![]*MutationRecord {

View File

@@ -42,8 +42,8 @@ _rc: lp.RC(u32) = .{},
pub fn deinit(self: *NodeList, session: *Session) void {
switch (self._data) {
.child_nodes => |cn| cn.deinit(session),
.selector_list => |list| list.deinit(session),
.child_nodes => |cn| cn.deinit(session),
else => {},
}
}
@@ -92,12 +92,7 @@ pub fn entries(self: *NodeList, page: *Page) !*EntryIterator {
pub fn forEach(self: *NodeList, cb: js.Function, page: *Page) !void {
var i: i32 = 0;
var it = try self.values(page);
// the iterator takes a reference against our list
defer self.releaseRef(page._session);
while (true) : (i += 1) {
const next = try it.next(page);
if (next.done) {

View File

@@ -26,8 +26,7 @@ pub fn Entry(comptime Inner: type, comptime field: ?[]const u8) type {
const R = reflect(Inner, field);
return struct {
_inner: Inner,
_rc: lp.RC(u8) = .{},
inner: Inner,
const Self = @This();
@@ -39,31 +38,29 @@ pub fn Entry(comptime Inner: type, comptime field: ?[]const u8) type {
};
pub fn init(inner: Inner, page: *Page) !*Self {
const self = try page._factory.create(Self{ ._inner = inner });
if (@hasDecl(Inner, "acquireRef")) {
self._inner.acquireRef();
}
return self;
return page._factory.create(Self{ .inner = inner });
}
pub fn deinit(self: *Self, session: *Session) void {
if (@hasDecl(Inner, "releaseRef")) {
self._inner.releaseRef(session);
}
session.factory.destroy(self);
_ = self;
_ = session;
}
pub fn releaseRef(self: *Self, session: *Session) void {
self._rc.release(self, session);
// Release the reference to the inner type that we acquired
if (@hasDecl(Inner, "releaseRef")) {
self.inner.releaseRef(session);
}
}
pub fn acquireRef(self: *Self) void {
self._rc.acquire();
if (@hasDecl(Inner, "acquireRef")) {
self.inner.acquireRef();
}
}
pub fn next(self: *Self, page: *Page) if (R.has_error_return) anyerror!Result else Result {
const entry = (if (comptime R.has_error_return) try self._inner.next(page) else self._inner.next(page)) orelse {
const entry = (if (comptime R.has_error_return) try self.inner.next(page) else self.inner.next(page)) orelse {
return .{ .done = true, .value = null };
};

View File

@@ -90,13 +90,13 @@ const ResponseType = enum {
pub fn init(page: *Page) !*XMLHttpRequest {
const arena = try page.getArena(.{ .debug = "XMLHttpRequest" });
errdefer page.releaseArena(arena);
const self = try page._factory.xhrEventTarget(arena, XMLHttpRequest{
const xhr = try page._factory.xhrEventTarget(arena, XMLHttpRequest{
._page = page,
._arena = arena,
._proto = undefined,
._request_headers = try Headers.init(null, page),
});
return self;
return xhr;
}
pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
@@ -243,10 +243,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
try page.headersForRequest(&headers);
}
self.acquireRef();
self._active_request = true;
http_client.request(.{
try http_client.request(.{
.ctx = self,
.url = self._url,
.method = self._method,
@@ -263,10 +260,9 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
.done_callback = httpDoneCallback,
.error_callback = httpErrorCallback,
.shutdown_callback = httpShutdownCallback,
}) catch |err| {
self.releaseSelfRef();
return err;
};
});
self.acquireRef();
self._active_request = true;
}
fn handleBlobUrl(self: *XMLHttpRequest, page: *Page) !void {
@@ -522,7 +518,6 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void {
fn httpShutdownCallback(ctx: *anyopaque) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
self._transfer = null;
self.releaseSelfRef();
}
pub fn abort(self: *XMLHttpRequest) void {

View File

@@ -259,6 +259,9 @@ pub fn RC(comptime T: type) type {
return;
}
value.deinit(session);
if (session.finalizer_callbacks.fetchRemove(@intFromPtr(value))) |kv| {
session.releaseArena(kv.value.arena);
}
}
pub fn format(self: @This(), writer: *std.Io.Writer) !void {

View File

@@ -144,22 +144,11 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
app.network.run();
},
.mcp => |opts| {
.mcp => {
log.info(.mcp, "starting server", .{});
log.opts.format = .logfmt;
var cdp_server: ?*lp.Server = null;
if (opts.cdp_port) |port| {
const address = std.net.Address.parseIp("127.0.0.1", port) catch |err| {
log.fatal(.mcp, "invalid cdp address", .{ .err = err, .port = port });
return;
};
cdp_server = try lp.Server.init(app, address);
try sighandler.on(lp.Server.shutdown, .{cdp_server.?});
}
defer if (cdp_server) |s| s.deinit();
var worker_thread = try std.Thread.spawn(.{}, mcpThread, .{ allocator, app });
defer worker_thread.join();

View File

@@ -9,72 +9,57 @@ const protocol = @import("protocol.zig");
const Server = @import("Server.zig");
const CDPNode = @import("../cdp/Node.zig");
const goto_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "The URL to navigate to, must be a valid URL." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ },
\\ "required": ["url"]
\\}
);
const url_params_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before processing." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ }
\\}
);
const evaluate_schema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "script": { "type": "string" },
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ },
\\ "required": ["script"]
\\}
);
pub const tool_list = [_]protocol.Tool{
.{
.name = "goto",
.description = "Navigate to a specified URL and load the page in memory so it can be reused later for info extraction.",
.inputSchema = goto_schema,
},
.{
.name = "navigate",
.description = "Alias for goto. Navigate to a specified URL and load the page in memory.",
.inputSchema = goto_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "The URL to navigate to, must be a valid URL." }
\\ },
\\ "required": ["url"]
\\}
),
},
.{
.name = "markdown",
.description = "Get the page content in markdown format. If a url is provided, it navigates to that url first.",
.inputSchema = url_params_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching markdown." }
\\ }
\\}
),
},
.{
.name = "links",
.description = "Extract all links in the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = url_params_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting links." }
\\ }
\\}
),
},
.{
.name = "evaluate",
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first.",
.inputSchema = evaluate_schema,
},
.{
.name = "eval",
.description = "Alias for evaluate. Evaluate JavaScript in the current page context.",
.inputSchema = evaluate_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "script": { "type": "string" },
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." }
\\ },
\\ "required": ["script"]
\\}
),
},
.{
.name = "semantic_tree",
@@ -84,8 +69,6 @@ pub const tool_list = [_]protocol.Tool{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before fetching the semantic tree." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID to get the tree for a specific element instead of the document root." },
\\ "maxDepth": { "type": "integer", "description": "Optional maximum depth of the tree to return. Useful for exploring high-level structure first." }
\\ }
@@ -108,17 +91,38 @@ pub const tool_list = [_]protocol.Tool{
.{
.name = "interactiveElements",
.description = "Extract interactive elements from the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = url_params_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting interactive elements." }
\\ }
\\}
),
},
.{
.name = "structuredData",
.description = "Extract structured data (like JSON-LD, OpenGraph, etc) from the opened page. If a url is provided, it navigates to that url first.",
.inputSchema = url_params_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before extracting structured data." }
\\ }
\\}
),
},
.{
.name = "detectForms",
.description = "Detect all forms on the page and return their structure including fields, types, and required status. If a url is provided, it navigates to that url first.",
.inputSchema = url_params_schema,
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "url": { "type": "string", "description": "Optional URL to navigate to before detecting forms." }
\\ }
\\}
),
},
.{
.name = "click",
@@ -185,21 +189,15 @@ pub fn handleList(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
const GotoParams = struct {
url: [:0]const u8,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const UrlParams = struct {
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const EvaluateParams = struct {
script: [:0]const u8,
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const ToolStreamingText = struct {
@@ -276,7 +274,6 @@ const ToolAction = enum {
structuredData,
detectForms,
evaluate,
eval,
semantic_tree,
click,
fill,
@@ -294,7 +291,6 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "structuredData", .structuredData },
.{ "detectForms", .detectForms },
.{ "evaluate", .evaluate },
.{ "eval", .eval },
.{ "semantic_tree", .semantic_tree },
.{ "click", .click },
.{ "fill", .fill },
@@ -328,7 +324,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments),
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
.detectForms => try handleDetectForms(server, arena, req.id.?, call_params.arguments),
.eval, .evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.evaluate => try handleEvaluate(server, arena, req.id.?, call_params.arguments),
.semantic_tree => try handleSemanticTree(server, arena, req.id.?, call_params.arguments),
.click => try handleClick(server, arena, req.id.?, call_params.arguments),
.fill => try handleFill(server, arena, req.id.?, call_params.arguments),
@@ -339,7 +335,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
fn handleGoto(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgs(GotoParams, arena, arguments, server, id, "goto");
try performGoto(server, args.url, id, args.timeout, args.waitUntil);
try performGoto(server, args.url, id);
const content = [_]protocol.TextContent([]const u8){.{ .text = "Navigated successfully." }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
@@ -347,7 +343,7 @@ fn handleGoto(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arg
fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .page = page, .action = .markdown },
@@ -359,7 +355,7 @@ fn handleMarkdown(server: *Server, arena: std.mem.Allocator, id: std.json.Value,
fn handleLinks(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{ .page = page, .action = .links },
@@ -374,11 +370,9 @@ fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Va
url: ?[:0]const u8 = null,
backendNodeId: ?u32 = null,
maxDepth: ?u32 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
};
const args = try parseArgsOrDefault(TreeParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const content = [_]protocol.TextContent(ToolStreamingText){.{
.text = .{
@@ -423,7 +417,7 @@ fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Val
fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const elements = lp.interactive.collectInteractiveElements(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "elements collection failed", .{ .err = err });
@@ -444,7 +438,7 @@ fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.
fn handleStructuredData(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const data = lp.structured_data.collectStructuredData(page.document.asNode(), arena, page) catch |err| {
log.err(.mcp, "struct data collection failed", .{ .err = err });
@@ -459,7 +453,7 @@ fn handleStructuredData(server: *Server, arena: std.mem.Allocator, id: std.json.
fn handleDetectForms(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
const forms_data = lp.forms.collectForms(arena, page.document.asNode(), page) catch |err| {
log.err(.mcp, "form collection failed", .{ .err = err });
@@ -480,7 +474,7 @@ fn handleDetectForms(server: *Server, arena: std.mem.Allocator, id: std.json.Val
fn handleEvaluate(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgs(EvaluateParams, arena, arguments, server, id, "evaluate");
const page = try ensurePage(server, id, args.url, args.timeout, args.waitUntil);
const page = try ensurePage(server, id, args.url);
var ls: js.Local.Scope = undefined;
page.js.localScope(&ls);
@@ -636,9 +630,9 @@ fn handleWaitForSelector(server: *Server, arena: std.mem.Allocator, id: std.json
return server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn ensurePage(server: *Server, id: std.json.Value, url: ?[:0]const u8, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !*lp.Page {
fn ensurePage(server: *Server, id: std.json.Value, url: ?[:0]const u8) !*lp.Page {
if (url) |u| {
try performGoto(server, u, id, timeout, waitUntil);
try performGoto(server, u, id);
}
return server.session.currentPage() orelse {
try server.sendError(id, .PageNotLoaded, "Page not loaded");
@@ -674,7 +668,7 @@ fn parseArgs(comptime T: type, arena: std.mem.Allocator, arguments: ?std.json.Va
};
}
fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value, timeout: ?u32, waitUntil: ?lp.Config.WaitUntil) !void {
fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value) !void {
const session = server.session;
if (session.page != null) {
session.removePage();
@@ -695,11 +689,8 @@ fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value, timeout:
try server.sendError(id, .InternalError, "Failed to start page runner");
return error.NavigationFailed;
};
runner.wait(.{
.ms = timeout orelse 10000,
.until = waitUntil orelse .done,
}) catch {
try server.sendError(id, .InternalError, "Error waiting for page load");
runner.wait(.{ .ms = 2000 }) catch {
try server.sendError(id, .InternalError, "Timeout waiting for page load");
return error.NavigationFailed;
};
}