mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-28 22:53:28 +00:00
Add Accept-Encoding
This is necessary because of CloudFront which will send gzip content even if we don't ask for it. Properly handle scripts that are both async and defer. Add a helper to print state of page wait. This can be helpful in identifying what's causing the page to hang on page.wait.
This commit is contained in:
@@ -34,6 +34,9 @@ const ScriptManager = @This();
|
||||
|
||||
page: *Page,
|
||||
|
||||
// used to prevent recursive evalution
|
||||
is_evaluating: bool,
|
||||
|
||||
// Only once this is true can deferred scripts be run
|
||||
static_scripts_done: bool,
|
||||
|
||||
@@ -48,6 +51,8 @@ scripts: OrderList,
|
||||
// dom_loaded == true,
|
||||
deferreds: OrderList,
|
||||
|
||||
shutdown: bool = false,
|
||||
|
||||
client: *HttpClient,
|
||||
allocator: Allocator,
|
||||
buffer_pool: BufferPool,
|
||||
@@ -63,6 +68,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager {
|
||||
.asyncs = .{},
|
||||
.scripts = .{},
|
||||
.deferreds = .{},
|
||||
.is_evaluating = false,
|
||||
.allocator = allocator,
|
||||
.client = browser.http_client,
|
||||
.static_scripts_done = false,
|
||||
@@ -72,6 +78,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager {
|
||||
}
|
||||
|
||||
pub fn deinit(self: *ScriptManager) void {
|
||||
self.reset();
|
||||
self.buffer_pool.deinit();
|
||||
self.script_pool.deinit();
|
||||
}
|
||||
@@ -193,7 +200,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
|
||||
};
|
||||
|
||||
if (source == .@"inline") {
|
||||
// if we're here, it means that we have pending scripts (i.e. self.ordered
|
||||
// if we're here, it means that we have pending scripts (i.e. self.scripts
|
||||
// is not empty). Because the script is inline, it's complete/ready, but
|
||||
// we need to process them in order
|
||||
pending_script.complete = true;
|
||||
@@ -201,9 +208,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
|
||||
return;
|
||||
}
|
||||
|
||||
const list = self.getList(&pending_script.script);
|
||||
pending_script.node = .{ .data = pending_script };
|
||||
list.append(&pending_script.node);
|
||||
self.getList(&pending_script.script).append(&pending_script.node);
|
||||
|
||||
errdefer pending_script.deinit();
|
||||
|
||||
@@ -255,7 +261,17 @@ pub fn staticScriptsDone(self: *ScriptManager) void {
|
||||
// try to evaluate completed scripts (in order). This is called whenever a script
|
||||
// is completed.
|
||||
fn evaluate(self: *ScriptManager) void {
|
||||
if (self.is_evaluating) {
|
||||
// It's possible for a script.eval to cause evaluate to be called again.
|
||||
// This is particularly true with blockingGet, but even without this,
|
||||
// it's theoretically possible (but unlikely). We could make this work
|
||||
// but there's little reason to support the complexity.
|
||||
return;
|
||||
}
|
||||
|
||||
const page = self.page;
|
||||
self.is_evaluating = true;
|
||||
defer self.is_evaluating = false;
|
||||
|
||||
while (self.scripts.first) |n| {
|
||||
var pending_script = n.data;
|
||||
@@ -269,8 +285,8 @@ fn evaluate(self: *ScriptManager) void {
|
||||
if (self.static_scripts_done == false) {
|
||||
// We can only execute deferred scripts if
|
||||
// 1 - all the normal scripts are done
|
||||
// 2 - and we've loaded all the normal scripts
|
||||
// The last one isn't obvious, but it's possible for self.scripts to/
|
||||
// 2 - we've finished parsing the HTML and at least queued all the scripts
|
||||
// The last one isn't obvious, but it's possible for self.scripts to
|
||||
// be empty not because we're done executing all the normal scripts
|
||||
// but because we're done executing some (or maybe none), but we're still
|
||||
// parsing the HTML.
|
||||
@@ -315,14 +331,17 @@ fn asyncDone(self: *ScriptManager) void {
|
||||
}
|
||||
|
||||
fn getList(self: *ScriptManager, script: *const Script) *OrderList {
|
||||
if (script.is_defer) {
|
||||
return &self.deferreds;
|
||||
}
|
||||
|
||||
// When a script has both the async and defer flag set, it should be
|
||||
// treated as async. Async is newer, so some websites use both so that
|
||||
// if async isn't known, it'll fallback to defer.
|
||||
if (script.is_async) {
|
||||
return &self.asyncs;
|
||||
}
|
||||
|
||||
if (script.is_defer) {
|
||||
return &self.deferreds;
|
||||
}
|
||||
|
||||
return &self.scripts;
|
||||
}
|
||||
|
||||
@@ -375,16 +394,22 @@ const PendingScript = struct {
|
||||
manager: *ScriptManager,
|
||||
|
||||
fn deinit(self: *PendingScript) void {
|
||||
var manager = self.manager;
|
||||
const script = &self.script;
|
||||
const manager = self.manager;
|
||||
|
||||
if (script.source == .remote) {
|
||||
manager.buffer_pool.release(script.source.remote);
|
||||
}
|
||||
|
||||
manager.getList(script).remove(&self.node);
|
||||
}
|
||||
|
||||
fn remove(self: *PendingScript) void {
|
||||
if (self.node) |*node| {
|
||||
self.manager.getList(&self.script).remove(node);
|
||||
self.node = null;
|
||||
}
|
||||
}
|
||||
|
||||
fn startCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
||||
_ = self;
|
||||
log.debug(.http, "script fetch start", .{ .req = transfer });
|
||||
@@ -392,19 +417,25 @@ const PendingScript = struct {
|
||||
|
||||
fn headerCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
||||
const header = &transfer.response_header.?;
|
||||
if (header.status != 200) {
|
||||
return error.InvalidStatusCode;
|
||||
}
|
||||
|
||||
// @newhttp TODO: pre size based on content-length
|
||||
// @newhttp TODO: max-length enfocement
|
||||
self.script.source = .{ .remote = self.manager.buffer_pool.get() };
|
||||
|
||||
log.debug(.http, "script header", .{
|
||||
.req = transfer,
|
||||
.status = header.status,
|
||||
.content_type = header.contentType(),
|
||||
});
|
||||
|
||||
if (header.status != 200) {
|
||||
return error.InvalidStatusCode;
|
||||
}
|
||||
|
||||
// If this isn't true, then we'll likely leak memory. If you don't
|
||||
// set `CURLOPT_SUPPRESS_CONNECT_HEADERS` and CONNECT to a proxy, this
|
||||
// will fail. This assertion exists to catch incorrect assumptions about
|
||||
// how libcurl works, or about how we've configured it.
|
||||
std.debug.assert(self.script.source.remote.capacity == 0);
|
||||
|
||||
// @newhttp TODO: pre size based on content-length
|
||||
// @newhttp TODO: max-length enfocement
|
||||
self.script.source = .{ .remote = self.manager.buffer_pool.get() };
|
||||
}
|
||||
|
||||
fn dataCallback(self: *PendingScript, transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
@@ -436,9 +467,15 @@ const PendingScript = struct {
|
||||
|
||||
fn errorCallback(self: *PendingScript, err: anyerror) void {
|
||||
log.warn(.http, "script fetch error", .{ .req = self.script.url, .err = err });
|
||||
|
||||
const manager = self.manager;
|
||||
|
||||
self.deinit();
|
||||
|
||||
if (manager.shutdown) {
|
||||
return;
|
||||
}
|
||||
|
||||
manager.evaluate();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -153,6 +153,8 @@ pub const Page = struct {
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Page) void {
|
||||
self.script_manager.shutdown = true;
|
||||
|
||||
self.http_client.abort();
|
||||
self.script_manager.deinit();
|
||||
}
|
||||
@@ -268,6 +270,9 @@ pub const Page = struct {
|
||||
var scheduler = &self.scheduler;
|
||||
var http_client = self.http_client;
|
||||
|
||||
// for debugging
|
||||
// defer self.printWaitAnalysis();
|
||||
|
||||
while (true) {
|
||||
SW: switch (self.mode) {
|
||||
.pre, .raw => {
|
||||
@@ -346,6 +351,56 @@ pub const Page = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn printWaitAnalysis(self: *Page) void {
|
||||
std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))});
|
||||
std.debug.print("load: {s}\n", .{@tagName(self.load_state)});
|
||||
std.debug.print("active requests: {d}\n", .{self.http_client.active});
|
||||
|
||||
{
|
||||
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len});
|
||||
var n_ = self.script_manager.scripts.first;
|
||||
while (n_) |n| {
|
||||
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||
n_ = n.next;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len});
|
||||
var n_ = self.script_manager.deferreds.first;
|
||||
while (n_) |n| {
|
||||
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||
n_ = n.next;
|
||||
}
|
||||
}
|
||||
|
||||
const now = std.time.milliTimestamp();
|
||||
{
|
||||
std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len});
|
||||
var n_ = self.script_manager.asyncs.first;
|
||||
while (n_) |n| {
|
||||
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||
n_ = n.next;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std.debug.print("\nprimary schedule: {d}\n", .{self.scheduler.primary.count()});
|
||||
var it = self.scheduler.primary.iterator();
|
||||
while (it.next()) |task| {
|
||||
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std.debug.print("\nsecondary schedule: {d}\n", .{self.scheduler.secondary.count()});
|
||||
var it = self.scheduler.secondary.iterator();
|
||||
while (it.next()) |task| {
|
||||
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
|
||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||
try self.url.origin(arr.writer(arena));
|
||||
|
||||
@@ -118,18 +118,13 @@ pub const Session = struct {
|
||||
|
||||
std.debug.assert(self.page != null);
|
||||
|
||||
// Cleanup is a bit sensitive. We could still have inflight I/O. For
|
||||
// example, we could have an XHR request which is still in the connect
|
||||
// phase. It's important that we clean these up, as they're holding onto
|
||||
// limited resources (like our fixed-sized http state pool).
|
||||
//
|
||||
self.page.?.deinit();
|
||||
self.page = null;
|
||||
|
||||
// RemoveJsContext() will execute the destructor of any type that
|
||||
// registered a destructor (e.g. XMLHttpRequest).
|
||||
self.executor.removeJsContext();
|
||||
|
||||
self.page.?.deinit();
|
||||
self.page = null;
|
||||
|
||||
// clear netsurf memory arena.
|
||||
parser.deinit();
|
||||
|
||||
|
||||
@@ -141,6 +141,11 @@ pub const Connection = struct {
|
||||
}
|
||||
}
|
||||
|
||||
// compression, don't remove this. CloudFront will send gzip content
|
||||
// even if we don't support it, and then it won't be decompressed.
|
||||
// empty string means: use whatever's available
|
||||
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_ACCEPT_ENCODING, ""));
|
||||
|
||||
// debug
|
||||
if (comptime Http.ENABLE_DEBUG) {
|
||||
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_VERBOSE, @as(c_long, 1)));
|
||||
|
||||
Reference in New Issue
Block a user