fix ScriptManager wrong order execution

This commit is contained in:
Karl Seguin
2025-08-04 10:52:03 +08:00
parent f45726d61f
commit 74b40b97ec
3 changed files with 107 additions and 61 deletions

View File

@@ -200,6 +200,18 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
return;
}
if (self.getList(&pending_script.script)) |list| {
pending_script.node = .{.data = pending_script};
list.append(&pending_script.node);
} else {
// async scripts don't get added to a list, because we can execute
// them in any order
std.debug.assert(script.is_async);
self.async_count += 1;
}
errdefer pending_script.deinit();
try self.client.request(.{
.url = remote_url.?,
.ctx = pending_script,
@@ -332,14 +344,18 @@ fn startCallback(transfer: *HttpClient.Transfer) !void {
fn headerCallback(transfer: *HttpClient.Transfer) !void {
const script: *PendingScript = @alignCast(@ptrCast(transfer.ctx));
script.headerCallback(transfer) catch |err| {
log.err(.http, "SM.headerCallback", .{ .err = err, .transfer = transfer });
log.err(.http, "SM.headerCallback", .{
.err = err,
.transfer = transfer,
.status = transfer.response_header.?.status,
});
return err;
};
}
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const script: *PendingScript = @alignCast(@ptrCast(transfer.ctx));
script.dataCallback(data) catch |err| {
script.dataCallback(transfer, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len });
return err;
};
@@ -367,27 +383,22 @@ const PendingScript = struct {
fn deinit(self: *PendingScript) void {
var manager = self.manager;
if (self.script.source == .remote) {
manager.buffer_pool.release(self.script.source.remote);
const script = &self.script;
if (script.source == .remote) {
manager.buffer_pool.release(script.source.remote);
}
if (manager.getList(&self.script)) |list| {
if (manager.getList(script)) |list| {
list.remove(&self.node);
} else {
std.debug.assert(script.is_async);
manager.asyncDone();
}
}
fn startCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
if (self.manager.getList(&self.script)) |list| {
self.node.data = self;
list.append(&self.node);
} else {
// async scripts don't get added to a list, because we can execute
// them in any order
std.debug.assert(self.script.is_async);
self.manager.async_count += 1;
}
// if the script is async, it isn't tracked in a list, because we can
// execute it as soon as it's done loading.
_ = self;
log.debug(.http, "script fetch start", .{ .req = transfer });
}
@@ -408,9 +419,17 @@ const PendingScript = struct {
});
}
fn dataCallback(self: *PendingScript, data: []const u8) !void {
// @newhttp TODO: max-length enforcement
fn dataCallback(self: *PendingScript, transfer: *HttpClient.Transfer, data: []const u8) !void {
_ = transfer;
// too verbose
// log.debug(.http, "script data chunk", .{
// .req = transfer,
// .len = data.len,
// });
// @newhttp TODO: max-length enforcement ??
try self.script.source.remote.appendSlice(self.manager.allocator, data);
}
fn doneCallback(self: *PendingScript) void {
@@ -421,16 +440,18 @@ const PendingScript = struct {
// async script can be evaluated immediately
defer self.deinit();
self.script.eval(self.manager.page);
manager.asyncDone();
} else {
self.complete = true;
self.manager.evaluate();
manager.evaluate();
}
}
fn errorCallback(self: *PendingScript, err: anyerror) void {
log.warn(.http, "script fetch error", .{ .req = self.script.url, .err = err });
self.deinit();
defer self.deinit();
// this script might have been blocking others;
self.manager.evaluate();
}
};
@@ -473,7 +494,7 @@ const Script = struct {
const url = self.url;
log.debug(.browser, "executing script", .{
log.info(.browser, "executing script", .{
.src = url,
.kind = self.kind,
.cacheable = cacheable,
@@ -662,6 +683,12 @@ const Blocking = struct {
}
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
// too verbose
// log.debug(.http, "script data chunk", .{
// .req = transfer,
// .blocking = true,
// });
var self: *Blocking = @alignCast(@ptrCast(transfer.ctx));
self.buffer.appendSlice(self.allocator, data) catch |err| {
log.err(.http, "SM.dataCallback", .{

View File

@@ -90,9 +90,10 @@ pub const Page = struct {
scheduler: Scheduler,
http_client: *HttpClient,
script_manager: ScriptManager,
mode: Mode,
document_state: DocumentState = .parsing,
load_state: LoadState = .parsing,
const Mode = union(enum) {
pre: void,
@@ -103,9 +104,16 @@ pub const Page = struct {
raw_done: []const u8,
};
const DocumentState = enum {
const LoadState = enum {
// the main HTML is being parsed (or downloaded)
parsing,
// the main HTML has been parsed and the JavaScript (including deferred
// scripts) have been loaded. Corresponds to the DOMContentLoaded event
load,
// the page has been loaded and all async scripts (if any) are done
// Corresponds to the load event
complete,
};
@@ -154,7 +162,7 @@ pub const Page = struct {
self.http_client.abort();
self.script_manager.reset();
self.document_state = .parsing;
self.load_state = .parsing;
self.mode = .{ .pre = {} };
_ = self.session.browser.page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 });
}
@@ -391,23 +399,38 @@ pub const Page = struct {
}
pub fn documentIsLoaded(self: *Page) void {
std.debug.assert(self.document_state == .parsing);
self.document_state = .load;
if (self.load_state != .parsing) {
// Ideally, documentIsLoaded would only be called once, but if a
// script is dynamically added from an async script after
// documentIsLoaded is already called, then ScriptManager will call
// it again.
return;
}
self.load_state = .load;
HTMLDocument.documentIsLoaded(self.window.document, self) catch |err| {
log.err(.browser, "document is loaded", .{ .err = err });
};
}
pub fn documentIsComplete(self: *Page) void {
std.debug.assert(self.document_state != .complete);
if (self.load_state == .complete) {
// Ideally, documentIsComplete would only be called once, but with
// dynamic scripts, it can be hard to keep track of that. An async
// script could be evaluated AFTER Loaded and Complete and load its
// own non non-async script - which, upon completion, needs to check
// whether Laoded/Complete have already been called, which is what
// this guard is.
return;
}
// documentIsComplete could be called directly, without first calling
// documentIsLoaded, if there were _only_ async scrypts
if (self.document_state == .parsing) {
// documentIsLoaded, if there were _only_ async scripts
if (self.load_state == .parsing) {
self.documentIsLoaded();
}
self.document_state = .complete;
self.load_state = .complete;
self._documentIsComplete() catch |err| {
log.err(.browser, "document is complete", .{ .err = err });
};

View File

@@ -134,7 +134,7 @@ pub fn abort(self: *Client) void {
pub fn tick(self: *Client, timeout_ms: usize) !void {
var handles = &self.handles;
while (true) {
if (handles.isEmpty()) {
if (handles.hasAvailable() == false) {
break;
}
const queue_node = self.queue.popFirst() orelse break;
@@ -234,35 +234,31 @@ fn perform(self: *Client, timeout_ms: c_int) !void {
try errorMCheck(c.curl_multi_poll(multi, null, 0, timeout_ms, null));
}
while (true) {
var remaining: c_int = undefined;
const msg: *c.CURLMsg = c.curl_multi_info_read(multi, &remaining) orelse break;
if (msg.msg == c.CURLMSG_DONE) {
const easy = msg.easy_handle.?;
var messages_count: c_int = 0;
while (c.curl_multi_info_read(multi, &messages_count)) |msg_| {
const msg: *c.CURLMsg = @ptrCast(msg_);
// This is the only possible mesage type from CURL for now.
std.debug.assert(msg.msg == c.CURLMSG_DONE);
const transfer = try Transfer.fromEasy(easy);
const easy = msg.easy_handle.?;
const ctx = transfer.ctx;
const done_callback = transfer.req.done_callback;
const error_callback = transfer.req.error_callback;
const transfer = try Transfer.fromEasy(easy);
const ctx = transfer.ctx;
const done_callback = transfer.req.done_callback;
const error_callback = transfer.req.error_callback;
// release it ASAP so that it's available; some done_callbacks
// will load more resources.
self.endTransfer(transfer);
// release it ASAP so that it's available; some done_callbacks
// will load more resources.
self.endTransfer(transfer);
if (errorCheck(msg.data.result)) {
done_callback(ctx) catch |err| {
// transfer isn't valid at this point, don't use it.
log.err(.http, "done_callback", .{ .err = err });
error_callback(ctx, err);
};
} else |err| {
if (errorCheck(msg.data.result)) {
done_callback(ctx) catch |err| {
// transfer isn't valid at this point, don't use it.
log.err(.http, "done_callback", .{ .err = err });
error_callback(ctx, err);
}
}
if (remaining == 0) {
break;
};
} else |err| {
error_callback(ctx, err);
}
}
}
@@ -316,8 +312,8 @@ const Handles = struct {
allocator.free(self.handles);
}
fn isEmpty(self: *const Handles) bool {
return self.available.first == null;
fn hasAvailable(self: *const Handles) bool {
return self.available.first != null;
}
fn getFreeHandle(self: *Handles) ?*Handle {
@@ -365,7 +361,7 @@ const Handle = struct {
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HEADERDATA, easy));
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_HEADERFUNCTION, Transfer.headerCallback));
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEDATA, easy));
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEFUNCTION, Transfer.bodyCallback));
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_WRITEFUNCTION, Transfer.dataCallback));
// tls
if (opts.tls_verify_host) {
@@ -534,7 +530,7 @@ pub const Transfer = struct {
return buf_len;
}
fn bodyCallback(buffer: [*]const u8, chunk_count: usize, chunk_len: usize, data: *anyopaque) callconv(.c) usize {
fn dataCallback(buffer: [*]const u8, chunk_count: usize, chunk_len: usize, data: *anyopaque) callconv(.c) usize {
// libcurl should only ever emit 1 chunk at a time
std.debug.assert(chunk_count == 1);