Improve page.wait

Allow page.wait to transition page mode.

Optimize initial page load. No point running scheduler until the initial
page is loaded.

Support ISO-8859-1 charset
This commit is contained in:
Karl Seguin
2025-08-02 13:13:58 +08:00
parent 77475ca5e4
commit 4244b572d1
4 changed files with 42 additions and 27 deletions

View File

@@ -105,6 +105,8 @@ pub const Mime = struct {
if (std.ascii.eqlIgnoreCase(attribute_value, "utf-8")) {
charset = "UTF-8";
} else if (std.ascii.eqlIgnoreCase(attribute_value, "iso-8859-1")) {
charset = "ISO-8859-1";
} else {
// we only care about null (which we default to UTF-8)
// or UTF-8. If this is actually set (i.e. not null)

View File

@@ -253,22 +253,35 @@ pub const Page = struct {
}
fn _wait(self: *Page, wait_sec: usize) !void {
switch (self.mode) {
.pre, .html, .raw, .parsed => {
// The HTML page was parsed. We now either have JS scripts to
// download, or timeouts to execute, or both.
var ms_remaining = wait_sec * 1000;
var timer = try std.time.Timer.start();
var try_catch: Env.TryCatch = undefined;
try_catch.init(self.main_context);
defer try_catch.deinit();
var try_catch: Env.TryCatch = undefined;
try_catch.init(self.main_context);
defer try_catch.deinit();
var scheduler = &self.scheduler;
var http_client = self.http_client;
var scheduler = &self.scheduler;
var http_client = self.http_client;
var ms_remaining = wait_sec * 1000;
var timer = try std.time.Timer.start();
while (true) {
SW: switch (self.mode) {
.pre, .raw => {
// The main page hasn't started/finished navigating.
// There's no JS to run, and no reason to run the scheduler.
if (http_client.active == 0) {
// haven't started navigating, I guess.
return;
}
// There should only be 1 active http transfer, the main page
std.debug.assert(http_client.active == 1);
try http_client.tick(ms_remaining);
},
.html, .parsed => {
// The HTML page was parsed. We now either have JS scripts to
// download, or timeouts to execute, or both.
while (true) {
// If we have active http transfers, we might as well run
// any "secondary" task, since we won't be exiting this loop
// anyways.
@@ -295,8 +308,7 @@ pub const Page = struct {
}
std.time.sleep(std.time.ns_per_ms * ms);
ms_remaining -= ms;
continue;
break :SW;
}
// We have no active http transfer and no pending
@@ -316,16 +328,16 @@ pub const Page = struct {
log.warn(.user_script, "page wait", .{ .err = msg, .src = "data" });
return error.JsError;
}
},
.err => |err| return err,
.raw_done => return,
}
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed > ms_remaining) {
return;
}
ms_remaining -= ms_elapsed;
}
},
.err => |err| return err,
.raw_done => return,
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed > ms_remaining) {
return;
}
ms_remaining -= ms_elapsed;
}
}

View File

@@ -222,7 +222,7 @@ fn perform(self: *Client, timeout_ms: c_int) !void {
if (errorCheck(msg.data.result)) {
done_callback(ctx) catch |err| {
// transfer isn't valid at this point, don't use it.
log.err(.http, "done_callback", .{.err = err});
log.err(.http, "done_callback", .{ .err = err });
error_callback(ctx, err);
};
} else |err| {
@@ -495,14 +495,14 @@ pub const Transfer = struct {
if (buf_len == 2) {
transfer.req.header_done_callback(transfer) catch |err| {
log.err(.http, "header_done_callback", .{.err = err, .req = transfer});
log.err(.http, "header_done_callback", .{ .err = err, .req = transfer });
// returning < buf_len terminates the request
return 0;
};
} else {
if (transfer.req.header_callback) |cb| {
cb(transfer, header) catch |err| {
log.err(.http, "header_callback", .{.err = err, .req = transfer});
log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return 0;
};
}
@@ -525,7 +525,7 @@ pub const Transfer = struct {
}
transfer.req.data_callback(transfer, buffer[0..chunk_len]) catch |err| {
log.err(.http, "data_callback", .{.err = err, .req = transfer});
log.err(.http, "data_callback", .{ .err = err, .req = transfer });
return c.CURL_WRITEFUNC_ERROR;
};
return chunk_len;

View File

@@ -408,6 +408,7 @@ pub const JsRunner = struct {
const html_doc = try parser.documentHTMLParseFromStr(opts.html);
try page.setDocument(html_doc);
page.mode = .{ .parsed = {} };
return .{
.app = app,