Increase max concurrent request to 10

Improve wait analysis dump.

De-prioritize secondary schedules.

Don't log warning for application/json scripts

Change pretty log timer to display time from start.
This commit is contained in:
Karl Seguin
2025-08-05 16:20:00 +08:00
parent 9876d79680
commit c7484c69c0
7 changed files with 98 additions and 51 deletions

View File

@@ -51,7 +51,7 @@ pub const App = struct {
errdefer notification.deinit();
var http = try Http.init(allocator, .{
.max_concurrent_transfers = 3,
.max_concurrent_transfers = 10,
.http_proxy = config.http_proxy,
.tls_verify_host = config.tls_verify_host,
.proxy_bearer_token = config.proxy_bearer_token,

View File

@@ -59,32 +59,22 @@ pub fn add(self: *Scheduler, ctx: *anyopaque, func: Task.Func, ms: u32, opts: Ad
});
}
// fn debug(self: *Scheduler) void {
// var it = self.primary.iterator();
// while (it.next()) |task| {
// std.debug.print("- {s}\n", .{task.name});
// }
// }
pub fn runHighPriority(self: *Scheduler) !?u32 {
return self.runQueue(&self.primary);
}
pub fn run(self: *Scheduler, force_secondary: bool) !?u32 {
if (self.primary.count() == 0 and force_secondary == false) {
pub fn runLowPriority(self: *Scheduler) !?u32 {
return self.runQueue(&self.secondary);
}
fn runQueue(self: *Scheduler, queue: *Queue) !?u32 {
// this is O(1)
if (queue.count() == 0) {
return null;
}
const now = std.time.milliTimestamp();
const time_to_next_primary = try self.runQueue(&self.primary, now);
const time_to_next_secondary = try self.runQueue(&self.secondary, now);
if (time_to_next_primary == null) {
return time_to_next_secondary;
}
if (time_to_next_secondary == null) {
return time_to_next_primary;
}
return @min(time_to_next_primary.?, time_to_next_secondary.?);
}
fn runQueue(self: *Scheduler, queue: *Queue, now: i64) !?u32 {
var next = queue.peek();
while (next) |task| {
const time_to_next = task.ms - now;
@@ -131,32 +121,32 @@ test "Scheduler" {
var task = TestTask{ .allocator = testing.arena_allocator };
var s = Scheduler.init(testing.arena_allocator);
try testing.expectEqual(null, s.run(false));
try testing.expectEqual(null, s.runHighPriority());
try testing.expectEqual(0, task.calls.items.len);
try s.add(&task, TestTask.run1, 3, .{});
try testing.expectDelta(3, try s.run(false), 1);
try testing.expectDelta(3, try s.runHighPriority(), 1);
try testing.expectEqual(0, task.calls.items.len);
std.time.sleep(std.time.ns_per_ms * 5);
try testing.expectEqual(null, s.run(false));
try testing.expectEqual(null, s.runHighPriority());
try testing.expectEqualSlices(u32, &.{1}, task.calls.items);
try s.add(&task, TestTask.run2, 3, .{});
try s.add(&task, TestTask.run1, 2, .{});
std.time.sleep(std.time.ns_per_ms * 5);
try testing.expectDelta(2, try s.run(false), 1);
try testing.expectDelta(null, try s.runHighPriority(), 1);
try testing.expectEqualSlices(u32, &.{ 1, 1, 2 }, task.calls.items);
std.time.sleep(std.time.ns_per_ms * 5);
// only secondary won't be run unless forced
try testing.expectEqual(null, try s.run(false));
// wont' run secondary
try testing.expectEqual(null, try s.runHighPriority());
try testing.expectEqualSlices(u32, &.{ 1, 1, 2 }, task.calls.items);
// only secondary will be run when forced
try testing.expectDelta(2, try s.run(true), 1);
//runs secondary
try testing.expectDelta(2, try s.runLowPriority(), 1);
try testing.expectEqualSlices(u32, &.{ 1, 1, 2, 2 }, task.calls.items);
}

View File

@@ -132,6 +132,9 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
if (std.ascii.eqlIgnoreCase(script_type, "module")) {
break :blk .module;
}
if (std.ascii.eqlIgnoreCase(script_type, "application/json")) {
return;
}
log.warn(.user_script, "unknown script type", .{ .type = script_type });
return;
};
@@ -206,6 +209,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
pending_script.complete = true;
self.scripts.append(&pending_script.node);
return;
} else {
log.debug(.http, "script queue", .{ .url = remote_url.? });
}
pending_script.node = .{ .data = pending_script };
@@ -217,7 +222,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
.url = remote_url.?,
.ctx = pending_script,
.method = .GET,
.start_callback = startCallback,
.start_callback = if (log.enabled(.http, .debug)) startCallback else null,
.header_done_callback = headerCallback,
.data_callback = dataCallback,
.done_callback = doneCallback,
@@ -225,6 +230,39 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
});
}
// @TODO: Improving this would have the simplest biggest performance improvement
// for most sites.
//
// For JS imports (both static and dynamic), we currently block to get the
// result (the content of the file).
//
// For static imports, this is necessary, since v8 is expecting the compiled module
// as part of the function return. (we should try to pre-load the JavaScript
// source via module.GetModuleRequests(), but that's for a later time).
//
// For dynamic dynamic imports, this is not strictly necessary since the v8
// call returns a Promise; we could make this a normal get call, associated with
// the promise, and when done, resolve the promise.
//
// In both cases, for now at least, we just issue a "blocking" request. We block
// by ticking the http client until the script is complete.
//
// This uses the client.blockingRequest call which has a dedicated handle for
// these blocking requests. Because they are blocking, we're guaranteed to have
// only 1 at a time, thus the 1 reserved handle.
//
// You almost don't need the http client's blocking handle. In most cases, you
// should always have 1 free handle whenever you get here, because we always
// release the handle before executing the doneCallback. So, if a module does:
// import * as x from 'blah'
// And we need to load 'blah', there should always be 1 free handle - the handle
// of the http GET we just completed before executing the module.
// The exception to this, and the reason we need a special blocking handle, is
// for inline modules within the HTML page itself:
// <script type=module>import ....</script>
// Unlike external modules which can only ever be executed after releasing an
// http handle, these are executed without there necessarily being a free handle.
// Thus, Http/Client.zig maintains a dedicated handle for these calls.
pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult {
var blocking = Blocking{
.allocator = self.allocator,
@@ -232,10 +270,11 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult {
};
var client = self.client;
try client.request(.{
try client.blockingRequest(.{
.url = url,
.method = .GET,
.ctx = &blocking,
.start_callback = if (log.enabled(.http, .debug)) Blocking.startCallback else null,
.header_done_callback = Blocking.headerCallback,
.data_callback = Blocking.dataCallback,
.done_callback = Blocking.doneCallback,
@@ -244,7 +283,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult {
// rely on http's timeout settings to avoid an endless/long loop.
while (true) {
try client.tick(1000);
try client.tick(200);
switch (blocking.state) {
.running => {},
.done => |result| return result,

View File

@@ -292,13 +292,10 @@ pub const Page = struct {
// The HTML page was parsed. We now either have JS scripts to
// download, or timeouts to execute, or both.
// If we have active http transfers, we might as well run
// any "secondary" task, since we won't be exiting this loop
// anyways.
// scheduler.run could trigger new http transfers, so do not
// store http_client.active BEFORE this call and then use
// it AFTER.
const ms_to_next_task = try scheduler.run(http_client.active > 0);
const ms_to_next_task = try scheduler.runHighPriority();
if (try_catch.hasCaught()) {
const msg = (try try_catch.err(self.arena)) orelse "unknown";
@@ -316,7 +313,7 @@ pub const Page = struct {
// we'd wait to long, might as well exit early.
return;
}
_ = try scheduler.runLowPriority();
std.time.sleep(std.time.ns_per_ms * ms);
break :SW;
}
@@ -326,6 +323,8 @@ pub const Page = struct {
return;
}
_ = try scheduler.runLowPriority();
// We'll block here, waiting for network IO. We know
// when the next timeout is scheduled, and we know how long
// the caller wants to wait for, so we can pick a good wait
@@ -344,7 +343,7 @@ pub const Page = struct {
}
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed > ms_remaining) {
if (ms_elapsed >= ms_remaining) {
return;
}
ms_remaining -= ms_elapsed;
@@ -354,7 +353,27 @@ pub const Page = struct {
fn printWaitAnalysis(self: *Page) void {
std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))});
std.debug.print("load: {s}\n", .{@tagName(self.load_state)});
std.debug.print("active requests: {d}\n", .{self.http_client.active});
{
std.debug.print("\nactive requests: {d}\n", .{self.http_client.active});
var n_ = self.http_client.handles.in_use.first;
while (n_) |n| {
const transfer = HttpClient.Transfer.fromEasy(n.data.conn.easy) catch |err| {
std.debug.print(" - failed to load transfer: {any}\n", .{err});
break;
};
std.debug.print(" - {s}\n", .{transfer});
n_ = n.next;
}
}
{
std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len});
var n_ = self.http_client.queue.first;
while (n_) |n| {
std.debug.print(" - {s}\n", .{n.data.url});
n_ = n.next;
}
}
{
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len});

View File

@@ -363,7 +363,7 @@ pub const XMLHttpRequest = struct {
if (self.state != .opened) return DOMError.InvalidState;
if (self.send_flag) return DOMError.InvalidState;
log.debug(.http, "request", .{ .method = self.method, .url = self.url, .source = "xhr" });
log.debug(.http, "request queued", .{ .method = self.method, .url = self.url, .source = "xhr" });
self.send_flag = true;
if (body) |b| {
@@ -394,6 +394,8 @@ pub const XMLHttpRequest = struct {
try transfer.addHeader(hdr);
}
log.debug(.http, "request start", .{ .method = self.method, .url = self.url, .source = "xhr" });
// @newhttp
// {
// var arr: std.ArrayListUnmanaged(u8) = .{};

View File

@@ -531,7 +531,8 @@ pub const Transfer = struct {
return chunk_len;
}
fn fromEasy(easy: *c.CURL) !*Transfer {
// pub because Page.printWaitAnalysis uses it
pub fn fromEasy(easy: *c.CURL) !*Transfer {
var private: *anyopaque = undefined;
try errorCheck(c.curl_easy_getinfo(easy, c.CURLINFO_PRIVATE, &private));
return @alignCast(@ptrCast(private));

View File

@@ -320,22 +320,18 @@ fn timestamp() i64 {
return std.time.milliTimestamp();
}
var last_log: i64 = 0;
var first_log: i64 = 0;
fn elapsed() i64 {
const now = timestamp();
last_log_lock.lock();
const previous = last_log;
last_log = now;
last_log_lock.unlock();
defer last_log_lock.unlock();
if (previous == 0) {
return 0;
if (first_log == 0) {
first_log = now;
}
if (previous > now) {
return 0;
}
return now - previous;
return now - first_log;
}
const testing = @import("testing.zig");