diff --git a/src/Config.zig b/src/Config.zig index 629df32b..f65ecd5d 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -217,6 +217,15 @@ pub const DumpFormat = enum { semantic_tree_text, }; +pub const WaitUntil = enum { + load, + domcontentloaded, + networkidle, + fixed, + + pub const js_enum_from_string = true; +}; + pub const Fetch = struct { url: [:0]const u8, dump_mode: ?DumpFormat = null, @@ -224,6 +233,8 @@ pub const Fetch = struct { with_base: bool = false, with_frames: bool = false, strip: dump.Opts.Strip = .{}, + wait_ms: u32 = 5000, + wait_until: WaitUntil = .load, }; pub const Common = struct { @@ -619,8 +630,34 @@ fn parseFetchArgs( var url: ?[:0]const u8 = null; var common: Common = .{}; var strip: dump.Opts.Strip = .{}; + var wait_ms: u32 = 5000; + var wait_until: WaitUntil = .load; while (args.next()) |opt| { + if (std.mem.eql(u8, "--wait_ms", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--wait_ms" }); + return error.InvalidArgument; + }; + wait_ms = std.fmt.parseInt(u32, str, 10) catch |err| { + log.fatal(.app, "invalid argument value", .{ .arg = "--wait_ms", .err = err }); + return error.InvalidArgument; + }; + continue; + } + + if (std.mem.eql(u8, "--wait_until", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--wait_until" }); + return error.InvalidArgument; + }; + wait_until = std.meta.stringToEnum(WaitUntil, str) orelse { + log.fatal(.app, "invalid argument value", .{ .arg = "--wait_until", .val = str }); + return error.InvalidArgument; + }; + continue; + } + if (std.mem.eql(u8, "--dump", opt)) { var peek_args = args.*; if (peek_args.next()) |next_arg| { @@ -709,6 +746,8 @@ fn parseFetchArgs( .common = common, .with_base = with_base, .with_frames = with_frames, + .wait_ms = wait_ms, + .wait_until = wait_until, }; } diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 73b6b26e..cc965757 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -319,15 +319,15 @@ fn findPageBy(page: *Page, comptime field: []const u8, id: u32) ?*Page { return null; } -pub fn wait(self: *Session, wait_ms: u32) WaitResult { +pub fn wait(self: *Session, wait_ms: u32, wait_until: lp.Config.WaitUntil) WaitResult { var page = &(self.page orelse return .no_page); while (true) { - const wait_result = self._wait(page, wait_ms) catch |err| { + const wait_result = self._wait(&page, wait_ms, wait_until) catch |err| { switch (err) { error.JsError => {}, // already logged (with hopefully more context) else => log.err(.browser, "session wait", .{ .err = err, - .url = page.url, + .url = page.*.url, }), } return .done; @@ -346,7 +346,7 @@ pub fn wait(self: *Session, wait_ms: u32) WaitResult { } } -fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { +fn _wait(self: *Session, page: **Page, wait_ms: u32, wait_until: lp.Config.WaitUntil) !WaitResult { var timer = try std.time.Timer.start(); var ms_remaining = wait_ms; @@ -366,13 +366,15 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { const exit_when_done = http_client.cdp_client == null; while (true) { - switch (page._parse_state) { + switch (page.*._parse_state) { .pre, .raw, .text, .image => { // The main page hasn't started/finished navigating. // There's no JS to run, and no reason to run the scheduler. if (http_client.active == 0 and exit_when_done) { // haven't started navigating, I guess. - return .done; + if (wait_until != .fixed) { + return .done; + } } // Either we have active http connections, or we're in CDP // mode with an extra socket. Either way, we're waiting @@ -404,15 +406,15 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { try browser.runMacrotasks(); // Each call to this runs scheduled load events. - try page.dispatchLoad(); + try page.*.dispatchLoad(); const http_active = http_client.active; const total_network_activity = http_active + http_client.intercepted; - if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { - page.notifyNetworkAlmostIdle(); + if (page.*._notified_network_almost_idle.check(total_network_activity <= 2)) { + page.*.notifyNetworkAlmostIdle(); } - if (page._notified_network_idle.check(total_network_activity == 0)) { - page.notifyNetworkIdle(); + if (page.*._notified_network_idle.check(total_network_activity == 0)) { + page.*.notifyNetworkIdle(); } if (http_active == 0 and exit_when_done) { @@ -423,17 +425,14 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { std.debug.assert(http_client.intercepted == 0); } - var ms = blk: { - // if (wait_ms - ms_remaining < 100) { - // if (comptime builtin.is_test) { - // return .done; - // } - // // Look, we want to exit ASAP, but we don't want - // // to exit so fast that we've run none of the - // // background jobs. - // break :blk 50; - // } + const is_event_done = switch (wait_until) { + .fixed => false, + .domcontentloaded => (page.*._load_state == .load or page.*._load_state == .complete), + .load => (page.*._load_state == .complete), + .networkidle => (page.*._load_state == .complete and http_active == 0), + }; + var ms = blk: { if (browser.hasBackgroundTasks()) { // _we_ have nothing to run, but v8 is working on // background tasks. We'll wait for them. @@ -441,19 +440,27 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { break :blk 20; } - break :blk browser.msToNextMacrotask() orelse return .done; + const next_task = browser.msToNextMacrotask(); + if (next_task == null and is_event_done) { + return .done; + } + break :blk next_task orelse 20; }; if (ms > ms_remaining) { + if (is_event_done) { + return .done; + } // Same as above, except we have a scheduled task, // it just happens to be too far into the future // compared to how long we were told to wait. if (!browser.hasBackgroundTasks()) { - return .done; + if (is_event_done) return .done; + } else { + // _we_ have nothing to run, but v8 is working on + // background tasks. We'll wait for them. + browser.waitForBackgroundTasks(); } - // _we_ have nothing to run, but v8 is working on - // background tasks. We'll wait for them. - browser.waitForBackgroundTasks(); ms = 20; } @@ -484,7 +491,7 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { } }, .err => |err| { - page._parse_state = .{ .raw_done = @errorName(err) }; + page.*._parse_state = .{ .raw_done = @errorName(err) }; return err; }, .raw_done => { diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 58ed11b9..e3456226 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -131,7 +131,7 @@ pub fn CDPT(comptime TypeProvider: type) type { // timeouts (or http events) which are ready to be processed. pub fn pageWait(self: *Self, ms: u32) Session.WaitResult { const session = &(self.browser.session orelse return .no_page); - return session.wait(ms); + return session.wait(ms, .load); } // Called from above, in processMessage which handles client messages diff --git a/src/cdp/domains/lp.zig b/src/cdp/domains/lp.zig index 19fc8cac..6c90571e 100644 --- a/src/cdp/domains/lp.zig +++ b/src/cdp/domains/lp.zig @@ -280,7 +280,7 @@ test "cdp.lp: action tools" { const page = try bc.session.createPage(); const url = "http://localhost:9582/src/browser/tests/mcp_actions.html"; try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } }); - _ = bc.session.wait(5000); + _ = bc.session.wait(5000, .load); // Test Click const btn = page.document.getElementById("btn", page).?.asNode(); diff --git a/src/cdp/testing.zig b/src/cdp/testing.zig index e07cab44..9e739a31 100644 --- a/src/cdp/testing.zig +++ b/src/cdp/testing.zig @@ -136,7 +136,7 @@ const TestContext = struct { 0, ); try page.navigate(full_url, .{}); - _ = bc.session.wait(2000); + _ = bc.session.wait(2000, .load); } return bc; } diff --git a/src/lightpanda.zig b/src/lightpanda.zig index a9c7a1f0..6096f2a8 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -46,6 +46,7 @@ const IS_DEBUG = @import("builtin").mode == .Debug; pub const FetchOpts = struct { wait_ms: u32 = 5000, + wait_until: Config.WaitUntil = .load, dump: dump.Opts, dump_mode: ?Config.DumpFormat = null, writer: ?*std.Io.Writer = null, @@ -107,7 +108,7 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { .reason = .address_bar, .kind = .{ .push = null }, }); - _ = session.wait(opts.wait_ms); + _ = session.wait(opts.wait_ms, opts.wait_until); const writer = opts.writer orelse return; if (opts.dump_mode) |mode| { diff --git a/src/main.zig b/src/main.zig index 1b5ffdb2..93404acc 100644 --- a/src/main.zig +++ b/src/main.zig @@ -120,7 +120,8 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { log.debug(.app, "startup", .{ .mode = "fetch", .dump_mode = opts.dump_mode, .url = url, .snapshot = app.snapshot.fromEmbedded() }); var fetch_opts = lp.FetchOpts{ - .wait_ms = 5000, + .wait_ms = opts.wait_ms, + .wait_until = opts.wait_until, .dump_mode = opts.dump_mode, .dump = .{ .strip = opts.strip, diff --git a/src/main_legacy_test.zig b/src/main_legacy_test.zig index a6d1593f..839fd484 100644 --- a/src/main_legacy_test.zig +++ b/src/main_legacy_test.zig @@ -106,7 +106,7 @@ pub fn run(allocator: Allocator, file: []const u8, session: *lp.Session) !void { defer try_catch.deinit(); try page.navigate(url, .{}); - _ = session.wait(2000); + _ = session.wait(2000, .load); ls.local.eval("testing.assertOk()", "testing.assertOk()") catch |err| { const caught = try_catch.caughtOrError(allocator, err); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index d8fd4ead..f1b6cf7c 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -538,7 +538,7 @@ fn performGoto(server: *Server, url: [:0]const u8, id: std.json.Value) !void { return error.NavigationFailed; }; - _ = server.session.wait(5000); + _ = server.session.wait(5000, .load); } const testing = @import("../testing.zig"); @@ -603,7 +603,7 @@ test "MCP - Actions: click, fill, scroll" { const page = try server.session.createPage(); const url = "http://localhost:9582/src/browser/tests/mcp_actions.html"; try page.navigate(url, .{ .reason = .address_bar, .kind = .{ .push = null } }); - _ = server.session.wait(5000); + _ = server.session.wait(5000, .load); // Test Click const btn = page.document.getElementById("btn", page).?.asNode(); diff --git a/src/testing.zig b/src/testing.zig index adebdc32..0f9747d0 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -415,7 +415,7 @@ fn runWebApiTest(test_file: [:0]const u8) !void { defer try_catch.deinit(); try page.navigate(url, .{}); - _ = test_session.wait(2000); + _ = test_session.wait(2000, .load); test_browser.runMicrotasks(); @@ -439,7 +439,7 @@ pub fn pageTest(comptime test_file: []const u8) !*Page { ); try page.navigate(url, .{}); - _ = test_session.wait(2000); + _ = test_session.wait(2000, .load); return page; }