mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 15:13:28 +00:00
Add Accept-Encoding
This is necessary because of CloudFront which will send gzip content even if we don't ask for it. Properly handle scripts that are both async and defer. Add a helper to print state of page wait. This can be helpful in identifying what's causing the page to hang on page.wait.
This commit is contained in:
@@ -34,6 +34,9 @@ const ScriptManager = @This();
|
|||||||
|
|
||||||
page: *Page,
|
page: *Page,
|
||||||
|
|
||||||
|
// used to prevent recursive evalution
|
||||||
|
is_evaluating: bool,
|
||||||
|
|
||||||
// Only once this is true can deferred scripts be run
|
// Only once this is true can deferred scripts be run
|
||||||
static_scripts_done: bool,
|
static_scripts_done: bool,
|
||||||
|
|
||||||
@@ -48,6 +51,8 @@ scripts: OrderList,
|
|||||||
// dom_loaded == true,
|
// dom_loaded == true,
|
||||||
deferreds: OrderList,
|
deferreds: OrderList,
|
||||||
|
|
||||||
|
shutdown: bool = false,
|
||||||
|
|
||||||
client: *HttpClient,
|
client: *HttpClient,
|
||||||
allocator: Allocator,
|
allocator: Allocator,
|
||||||
buffer_pool: BufferPool,
|
buffer_pool: BufferPool,
|
||||||
@@ -63,6 +68,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager {
|
|||||||
.asyncs = .{},
|
.asyncs = .{},
|
||||||
.scripts = .{},
|
.scripts = .{},
|
||||||
.deferreds = .{},
|
.deferreds = .{},
|
||||||
|
.is_evaluating = false,
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.client = browser.http_client,
|
.client = browser.http_client,
|
||||||
.static_scripts_done = false,
|
.static_scripts_done = false,
|
||||||
@@ -72,6 +78,7 @@ pub fn init(browser: *Browser, page: *Page) ScriptManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *ScriptManager) void {
|
pub fn deinit(self: *ScriptManager) void {
|
||||||
|
self.reset();
|
||||||
self.buffer_pool.deinit();
|
self.buffer_pool.deinit();
|
||||||
self.script_pool.deinit();
|
self.script_pool.deinit();
|
||||||
}
|
}
|
||||||
@@ -193,7 +200,7 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (source == .@"inline") {
|
if (source == .@"inline") {
|
||||||
// if we're here, it means that we have pending scripts (i.e. self.ordered
|
// if we're here, it means that we have pending scripts (i.e. self.scripts
|
||||||
// is not empty). Because the script is inline, it's complete/ready, but
|
// is not empty). Because the script is inline, it's complete/ready, but
|
||||||
// we need to process them in order
|
// we need to process them in order
|
||||||
pending_script.complete = true;
|
pending_script.complete = true;
|
||||||
@@ -201,9 +208,8 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element) !void {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const list = self.getList(&pending_script.script);
|
|
||||||
pending_script.node = .{ .data = pending_script };
|
pending_script.node = .{ .data = pending_script };
|
||||||
list.append(&pending_script.node);
|
self.getList(&pending_script.script).append(&pending_script.node);
|
||||||
|
|
||||||
errdefer pending_script.deinit();
|
errdefer pending_script.deinit();
|
||||||
|
|
||||||
@@ -255,7 +261,17 @@ pub fn staticScriptsDone(self: *ScriptManager) void {
|
|||||||
// try to evaluate completed scripts (in order). This is called whenever a script
|
// try to evaluate completed scripts (in order). This is called whenever a script
|
||||||
// is completed.
|
// is completed.
|
||||||
fn evaluate(self: *ScriptManager) void {
|
fn evaluate(self: *ScriptManager) void {
|
||||||
|
if (self.is_evaluating) {
|
||||||
|
// It's possible for a script.eval to cause evaluate to be called again.
|
||||||
|
// This is particularly true with blockingGet, but even without this,
|
||||||
|
// it's theoretically possible (but unlikely). We could make this work
|
||||||
|
// but there's little reason to support the complexity.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const page = self.page;
|
const page = self.page;
|
||||||
|
self.is_evaluating = true;
|
||||||
|
defer self.is_evaluating = false;
|
||||||
|
|
||||||
while (self.scripts.first) |n| {
|
while (self.scripts.first) |n| {
|
||||||
var pending_script = n.data;
|
var pending_script = n.data;
|
||||||
@@ -269,8 +285,8 @@ fn evaluate(self: *ScriptManager) void {
|
|||||||
if (self.static_scripts_done == false) {
|
if (self.static_scripts_done == false) {
|
||||||
// We can only execute deferred scripts if
|
// We can only execute deferred scripts if
|
||||||
// 1 - all the normal scripts are done
|
// 1 - all the normal scripts are done
|
||||||
// 2 - and we've loaded all the normal scripts
|
// 2 - we've finished parsing the HTML and at least queued all the scripts
|
||||||
// The last one isn't obvious, but it's possible for self.scripts to/
|
// The last one isn't obvious, but it's possible for self.scripts to
|
||||||
// be empty not because we're done executing all the normal scripts
|
// be empty not because we're done executing all the normal scripts
|
||||||
// but because we're done executing some (or maybe none), but we're still
|
// but because we're done executing some (or maybe none), but we're still
|
||||||
// parsing the HTML.
|
// parsing the HTML.
|
||||||
@@ -315,14 +331,17 @@ fn asyncDone(self: *ScriptManager) void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn getList(self: *ScriptManager, script: *const Script) *OrderList {
|
fn getList(self: *ScriptManager, script: *const Script) *OrderList {
|
||||||
if (script.is_defer) {
|
// When a script has both the async and defer flag set, it should be
|
||||||
return &self.deferreds;
|
// treated as async. Async is newer, so some websites use both so that
|
||||||
}
|
// if async isn't known, it'll fallback to defer.
|
||||||
|
|
||||||
if (script.is_async) {
|
if (script.is_async) {
|
||||||
return &self.asyncs;
|
return &self.asyncs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (script.is_defer) {
|
||||||
|
return &self.deferreds;
|
||||||
|
}
|
||||||
|
|
||||||
return &self.scripts;
|
return &self.scripts;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -375,16 +394,22 @@ const PendingScript = struct {
|
|||||||
manager: *ScriptManager,
|
manager: *ScriptManager,
|
||||||
|
|
||||||
fn deinit(self: *PendingScript) void {
|
fn deinit(self: *PendingScript) void {
|
||||||
var manager = self.manager;
|
|
||||||
const script = &self.script;
|
const script = &self.script;
|
||||||
|
const manager = self.manager;
|
||||||
|
|
||||||
if (script.source == .remote) {
|
if (script.source == .remote) {
|
||||||
manager.buffer_pool.release(script.source.remote);
|
manager.buffer_pool.release(script.source.remote);
|
||||||
}
|
}
|
||||||
|
|
||||||
manager.getList(script).remove(&self.node);
|
manager.getList(script).remove(&self.node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn remove(self: *PendingScript) void {
|
||||||
|
if (self.node) |*node| {
|
||||||
|
self.manager.getList(&self.script).remove(node);
|
||||||
|
self.node = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn startCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
fn startCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
||||||
_ = self;
|
_ = self;
|
||||||
log.debug(.http, "script fetch start", .{ .req = transfer });
|
log.debug(.http, "script fetch start", .{ .req = transfer });
|
||||||
@@ -392,19 +417,25 @@ const PendingScript = struct {
|
|||||||
|
|
||||||
fn headerCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
fn headerCallback(self: *PendingScript, transfer: *HttpClient.Transfer) !void {
|
||||||
const header = &transfer.response_header.?;
|
const header = &transfer.response_header.?;
|
||||||
if (header.status != 200) {
|
|
||||||
return error.InvalidStatusCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
// @newhttp TODO: pre size based on content-length
|
|
||||||
// @newhttp TODO: max-length enfocement
|
|
||||||
self.script.source = .{ .remote = self.manager.buffer_pool.get() };
|
|
||||||
|
|
||||||
log.debug(.http, "script header", .{
|
log.debug(.http, "script header", .{
|
||||||
.req = transfer,
|
.req = transfer,
|
||||||
.status = header.status,
|
.status = header.status,
|
||||||
.content_type = header.contentType(),
|
.content_type = header.contentType(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (header.status != 200) {
|
||||||
|
return error.InvalidStatusCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this isn't true, then we'll likely leak memory. If you don't
|
||||||
|
// set `CURLOPT_SUPPRESS_CONNECT_HEADERS` and CONNECT to a proxy, this
|
||||||
|
// will fail. This assertion exists to catch incorrect assumptions about
|
||||||
|
// how libcurl works, or about how we've configured it.
|
||||||
|
std.debug.assert(self.script.source.remote.capacity == 0);
|
||||||
|
|
||||||
|
// @newhttp TODO: pre size based on content-length
|
||||||
|
// @newhttp TODO: max-length enfocement
|
||||||
|
self.script.source = .{ .remote = self.manager.buffer_pool.get() };
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dataCallback(self: *PendingScript, transfer: *HttpClient.Transfer, data: []const u8) !void {
|
fn dataCallback(self: *PendingScript, transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||||
@@ -436,9 +467,15 @@ const PendingScript = struct {
|
|||||||
|
|
||||||
fn errorCallback(self: *PendingScript, err: anyerror) void {
|
fn errorCallback(self: *PendingScript, err: anyerror) void {
|
||||||
log.warn(.http, "script fetch error", .{ .req = self.script.url, .err = err });
|
log.warn(.http, "script fetch error", .{ .req = self.script.url, .err = err });
|
||||||
|
|
||||||
const manager = self.manager;
|
const manager = self.manager;
|
||||||
|
|
||||||
self.deinit();
|
self.deinit();
|
||||||
|
|
||||||
|
if (manager.shutdown) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
manager.evaluate();
|
manager.evaluate();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -153,6 +153,8 @@ pub const Page = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Page) void {
|
pub fn deinit(self: *Page) void {
|
||||||
|
self.script_manager.shutdown = true;
|
||||||
|
|
||||||
self.http_client.abort();
|
self.http_client.abort();
|
||||||
self.script_manager.deinit();
|
self.script_manager.deinit();
|
||||||
}
|
}
|
||||||
@@ -268,6 +270,9 @@ pub const Page = struct {
|
|||||||
var scheduler = &self.scheduler;
|
var scheduler = &self.scheduler;
|
||||||
var http_client = self.http_client;
|
var http_client = self.http_client;
|
||||||
|
|
||||||
|
// for debugging
|
||||||
|
// defer self.printWaitAnalysis();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
SW: switch (self.mode) {
|
SW: switch (self.mode) {
|
||||||
.pre, .raw => {
|
.pre, .raw => {
|
||||||
@@ -346,6 +351,56 @@ pub const Page = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn printWaitAnalysis(self: *Page) void {
|
||||||
|
std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))});
|
||||||
|
std.debug.print("load: {s}\n", .{@tagName(self.load_state)});
|
||||||
|
std.debug.print("active requests: {d}\n", .{self.http_client.active});
|
||||||
|
|
||||||
|
{
|
||||||
|
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len});
|
||||||
|
var n_ = self.script_manager.scripts.first;
|
||||||
|
while (n_) |n| {
|
||||||
|
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||||
|
n_ = n.next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len});
|
||||||
|
var n_ = self.script_manager.deferreds.first;
|
||||||
|
while (n_) |n| {
|
||||||
|
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||||
|
n_ = n.next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = std.time.milliTimestamp();
|
||||||
|
{
|
||||||
|
std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len});
|
||||||
|
var n_ = self.script_manager.asyncs.first;
|
||||||
|
while (n_) |n| {
|
||||||
|
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
|
||||||
|
n_ = n.next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std.debug.print("\nprimary schedule: {d}\n", .{self.scheduler.primary.count()});
|
||||||
|
var it = self.scheduler.primary.iterator();
|
||||||
|
while (it.next()) |task| {
|
||||||
|
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std.debug.print("\nsecondary schedule: {d}\n", .{self.scheduler.secondary.count()});
|
||||||
|
var it = self.scheduler.secondary.iterator();
|
||||||
|
while (it.next()) |task| {
|
||||||
|
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
|
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
|
||||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||||
try self.url.origin(arr.writer(arena));
|
try self.url.origin(arr.writer(arena));
|
||||||
|
|||||||
@@ -118,18 +118,13 @@ pub const Session = struct {
|
|||||||
|
|
||||||
std.debug.assert(self.page != null);
|
std.debug.assert(self.page != null);
|
||||||
|
|
||||||
// Cleanup is a bit sensitive. We could still have inflight I/O. For
|
self.page.?.deinit();
|
||||||
// example, we could have an XHR request which is still in the connect
|
self.page = null;
|
||||||
// phase. It's important that we clean these up, as they're holding onto
|
|
||||||
// limited resources (like our fixed-sized http state pool).
|
|
||||||
//
|
|
||||||
// RemoveJsContext() will execute the destructor of any type that
|
// RemoveJsContext() will execute the destructor of any type that
|
||||||
// registered a destructor (e.g. XMLHttpRequest).
|
// registered a destructor (e.g. XMLHttpRequest).
|
||||||
self.executor.removeJsContext();
|
self.executor.removeJsContext();
|
||||||
|
|
||||||
self.page.?.deinit();
|
|
||||||
self.page = null;
|
|
||||||
|
|
||||||
// clear netsurf memory arena.
|
// clear netsurf memory arena.
|
||||||
parser.deinit();
|
parser.deinit();
|
||||||
|
|
||||||
|
|||||||
@@ -141,6 +141,11 @@ pub const Connection = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compression, don't remove this. CloudFront will send gzip content
|
||||||
|
// even if we don't support it, and then it won't be decompressed.
|
||||||
|
// empty string means: use whatever's available
|
||||||
|
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_ACCEPT_ENCODING, ""));
|
||||||
|
|
||||||
// debug
|
// debug
|
||||||
if (comptime Http.ENABLE_DEBUG) {
|
if (comptime Http.ENABLE_DEBUG) {
|
||||||
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_VERBOSE, @as(c_long, 1)));
|
try errorCheck(c.curl_easy_setopt(easy, c.CURLOPT_VERBOSE, @as(c_long, 1)));
|
||||||
|
|||||||
Reference in New Issue
Block a user