From c7484c69c025519ce1f1c0116d7b1669249a4b39 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 5 Aug 2025 16:20:00 +0800 Subject: [PATCH] Increase max concurrent request to 10 Improve wait analysis dump. De-prioritize secondary schedules. Don't log warning for application/json scripts Change pretty log timer to display time from start. --- src/app.zig | 2 +- src/browser/Scheduler.zig | 46 ++++++++++++++--------------------- src/browser/ScriptManager.zig | 45 +++++++++++++++++++++++++++++++--- src/browser/page.zig | 33 +++++++++++++++++++------ src/browser/xhr/xhr.zig | 4 ++- src/http/Client.zig | 3 ++- src/log.zig | 16 +++++------- 7 files changed, 98 insertions(+), 51 deletions(-) diff --git a/src/app.zig b/src/app.zig index 6572f538..078b23ce 100644 --- a/src/app.zig +++ b/src/app.zig @@ -51,7 +51,7 @@ pub const App = struct { errdefer notification.deinit(); var http = try Http.init(allocator, .{ - .max_concurrent_transfers = 3, + .max_concurrent_transfers = 10, .http_proxy = config.http_proxy, .tls_verify_host = config.tls_verify_host, .proxy_bearer_token = config.proxy_bearer_token, diff --git a/src/browser/Scheduler.zig b/src/browser/Scheduler.zig index 1326fb77..fbdb8105 100644 --- a/src/browser/Scheduler.zig +++ b/src/browser/Scheduler.zig @@ -59,32 +59,22 @@ pub fn add(self: *Scheduler, ctx: *anyopaque, func: Task.Func, ms: u32, opts: Ad }); } -// fn debug(self: *Scheduler) void { -// var it = self.primary.iterator(); -// while (it.next()) |task| { -// std.debug.print("- {s}\n", .{task.name}); -// } -// } +pub fn runHighPriority(self: *Scheduler) !?u32 { + return self.runQueue(&self.primary); +} -pub fn run(self: *Scheduler, force_secondary: bool) !?u32 { - if (self.primary.count() == 0 and force_secondary == false) { +pub fn runLowPriority(self: *Scheduler) !?u32 { + return self.runQueue(&self.secondary); +} + +fn runQueue(self: *Scheduler, queue: *Queue) !?u32 { + // this is O(1) + if (queue.count() == 0) { return null; } const now = std.time.milliTimestamp(); - const time_to_next_primary = try self.runQueue(&self.primary, now); - const time_to_next_secondary = try self.runQueue(&self.secondary, now); - if (time_to_next_primary == null) { - return time_to_next_secondary; - } - if (time_to_next_secondary == null) { - return time_to_next_primary; - } - return @min(time_to_next_primary.?, time_to_next_secondary.?); -} - -fn runQueue(self: *Scheduler, queue: *Queue, now: i64) !?u32 { var next = queue.peek(); while (next) |task| { const time_to_next = task.ms - now; @@ -131,32 +121,32 @@ test "Scheduler" { var task = TestTask{ .allocator = testing.arena_allocator }; var s = Scheduler.init(testing.arena_allocator); - try testing.expectEqual(null, s.run(false)); + try testing.expectEqual(null, s.runHighPriority()); try testing.expectEqual(0, task.calls.items.len); try s.add(&task, TestTask.run1, 3, .{}); - try testing.expectDelta(3, try s.run(false), 1); + try testing.expectDelta(3, try s.runHighPriority(), 1); try testing.expectEqual(0, task.calls.items.len); std.time.sleep(std.time.ns_per_ms * 5); - try testing.expectEqual(null, s.run(false)); + try testing.expectEqual(null, s.runHighPriority()); try testing.expectEqualSlices(u32, &.{1}, task.calls.items); try s.add(&task, TestTask.run2, 3, .{}); try s.add(&task, TestTask.run1, 2, .{}); std.time.sleep(std.time.ns_per_ms * 5); - try testing.expectDelta(2, try s.run(false), 1); + try testing.expectDelta(null, try s.runHighPriority(), 1); try testing.expectEqualSlices(u32, &.{ 1, 1, 2 }, task.calls.items); std.time.sleep(std.time.ns_per_ms * 5); - // only secondary won't be run unless forced - try testing.expectEqual(null, try s.run(false)); + // wont' run secondary + try testing.expectEqual(null, try s.runHighPriority()); try testing.expectEqualSlices(u32, &.{ 1, 1, 2 }, task.calls.items); - // only secondary will be run when forced - try testing.expectDelta(2, try s.run(true), 1); + //runs secondary + try testing.expectDelta(2, try s.runLowPriority(), 1); try testing.expectEqualSlices(u32, &.{ 1, 1, 2, 2 }, task.calls.items); } diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 3b061956..bb69ef1f 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -132,6 +132,9 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { if (std.ascii.eqlIgnoreCase(script_type, "module")) { break :blk .module; } + if (std.ascii.eqlIgnoreCase(script_type, "application/json")) { + return; + } log.warn(.user_script, "unknown script type", .{ .type = script_type }); return; }; @@ -206,6 +209,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { pending_script.complete = true; self.scripts.append(&pending_script.node); return; + } else { + log.debug(.http, "script queue", .{ .url = remote_url.? }); } pending_script.node = .{ .data = pending_script }; @@ -217,7 +222,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { .url = remote_url.?, .ctx = pending_script, .method = .GET, - .start_callback = startCallback, + .start_callback = if (log.enabled(.http, .debug)) startCallback else null, .header_done_callback = headerCallback, .data_callback = dataCallback, .done_callback = doneCallback, @@ -225,6 +230,39 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void { }); } +// @TODO: Improving this would have the simplest biggest performance improvement +// for most sites. +// +// For JS imports (both static and dynamic), we currently block to get the +// result (the content of the file). +// +// For static imports, this is necessary, since v8 is expecting the compiled module +// as part of the function return. (we should try to pre-load the JavaScript +// source via module.GetModuleRequests(), but that's for a later time). +// +// For dynamic dynamic imports, this is not strictly necessary since the v8 +// call returns a Promise; we could make this a normal get call, associated with +// the promise, and when done, resolve the promise. +// +// In both cases, for now at least, we just issue a "blocking" request. We block +// by ticking the http client until the script is complete. +// +// This uses the client.blockingRequest call which has a dedicated handle for +// these blocking requests. Because they are blocking, we're guaranteed to have +// only 1 at a time, thus the 1 reserved handle. +// +// You almost don't need the http client's blocking handle. In most cases, you +// should always have 1 free handle whenever you get here, because we always +// release the handle before executing the doneCallback. So, if a module does: +// import * as x from 'blah' +// And we need to load 'blah', there should always be 1 free handle - the handle +// of the http GET we just completed before executing the module. +// The exception to this, and the reason we need a special blocking handle, is +// for inline modules within the HTML page itself: +// +// Unlike external modules which can only ever be executed after releasing an +// http handle, these are executed without there necessarily being a free handle. +// Thus, Http/Client.zig maintains a dedicated handle for these calls. pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { var blocking = Blocking{ .allocator = self.allocator, @@ -232,10 +270,11 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { }; var client = self.client; - try client.request(.{ + try client.blockingRequest(.{ .url = url, .method = .GET, .ctx = &blocking, + .start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null, .header_done_callback = Blocking.headerCallback, .data_callback = Blocking.dataCallback, .done_callback = Blocking.doneCallback, @@ -244,7 +283,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { // rely on http's timeout settings to avoid an endless/long loop. while (true) { - try client.tick(1000); + try client.tick(200); switch (blocking.state) { .running => {}, .done => |result| return result, diff --git a/src/browser/page.zig b/src/browser/page.zig index 51129e88..3811c1c9 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -292,13 +292,10 @@ pub const Page = struct { // The HTML page was parsed. We now either have JS scripts to // download, or timeouts to execute, or both. - // If we have active http transfers, we might as well run - // any "secondary" task, since we won't be exiting this loop - // anyways. // scheduler.run could trigger new http transfers, so do not // store http_client.active BEFORE this call and then use // it AFTER. - const ms_to_next_task = try scheduler.run(http_client.active > 0); + const ms_to_next_task = try scheduler.runHighPriority(); if (try_catch.hasCaught()) { const msg = (try try_catch.err(self.arena)) orelse "unknown"; @@ -316,7 +313,7 @@ pub const Page = struct { // we'd wait to long, might as well exit early. return; } - + _ = try scheduler.runLowPriority(); std.time.sleep(std.time.ns_per_ms * ms); break :SW; } @@ -326,6 +323,8 @@ pub const Page = struct { return; } + _ = try scheduler.runLowPriority(); + // We'll block here, waiting for network IO. We know // when the next timeout is scheduled, and we know how long // the caller wants to wait for, so we can pick a good wait @@ -344,7 +343,7 @@ pub const Page = struct { } const ms_elapsed = timer.lap() / 1_000_000; - if (ms_elapsed > ms_remaining) { + if (ms_elapsed >= ms_remaining) { return; } ms_remaining -= ms_elapsed; @@ -354,7 +353,27 @@ pub const Page = struct { fn printWaitAnalysis(self: *Page) void { std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))}); std.debug.print("load: {s}\n", .{@tagName(self.load_state)}); - std.debug.print("active requests: {d}\n", .{self.http_client.active}); + { + std.debug.print("\nactive requests: {d}\n", .{self.http_client.active}); + var n_ = self.http_client.handles.in_use.first; + while (n_) |n| { + const transfer = HttpClient.Transfer.fromEasy(n.data.conn.easy) catch |err| { + std.debug.print(" - failed to load transfer: {any}\n", .{err}); + break; + }; + std.debug.print(" - {s}\n", .{transfer}); + n_ = n.next; + } + } + + { + std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len}); + var n_ = self.http_client.queue.first; + while (n_) |n| { + std.debug.print(" - {s}\n", .{n.data.url}); + n_ = n.next; + } + } { std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len}); diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index bd237c6d..f59f9cee 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -363,7 +363,7 @@ pub const XMLHttpRequest = struct { if (self.state != .opened) return DOMError.InvalidState; if (self.send_flag) return DOMError.InvalidState; - log.debug(.http, "request", .{ .method = self.method, .url = self.url, .source = "xhr" }); + log.debug(.http, "request queued", .{ .method = self.method, .url = self.url, .source = "xhr" }); self.send_flag = true; if (body) |b| { @@ -394,6 +394,8 @@ pub const XMLHttpRequest = struct { try transfer.addHeader(hdr); } + log.debug(.http, "request start", .{ .method = self.method, .url = self.url, .source = "xhr" }); + // @newhttp // { // var arr: std.ArrayListUnmanaged(u8) = .{}; diff --git a/src/http/Client.zig b/src/http/Client.zig index 1da6d857..a34a0ff9 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -531,7 +531,8 @@ pub const Transfer = struct { return chunk_len; } - fn fromEasy(easy: *c.CURL) !*Transfer { + // pub because Page.printWaitAnalysis uses it + pub fn fromEasy(easy: *c.CURL) !*Transfer { var private: *anyopaque = undefined; try errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_PRIVATE, &private)); return @alignCast(@ptrCast(private)); diff --git a/src/log.zig b/src/log.zig index 751d67c3..9ac06d46 100644 --- a/src/log.zig +++ b/src/log.zig @@ -320,22 +320,18 @@ fn timestamp() i64 { return std.time.milliTimestamp(); } -var last_log: i64 = 0; +var first_log: i64 = 0; fn elapsed() i64 { const now = timestamp(); last_log_lock.lock(); - const previous = last_log; - last_log = now; - last_log_lock.unlock(); + defer last_log_lock.unlock(); - if (previous == 0) { - return 0; + if (first_log == 0) { + first_log = now; } - if (previous > now) { - return 0; - } - return now - previous; + + return now - first_log; } const testing = @import("testing.zig");