mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 07:03:29 +00:00
Merge pull request #776 from lightpanda-io/fix_internal_navigation_deadlocks
Rework internal navigation to prevent deadlocking
This commit is contained in:
@@ -192,7 +192,12 @@ pub const Page = struct {
|
||||
const session = self.session;
|
||||
const notification = session.browser.notification;
|
||||
|
||||
log.debug(.http, "navigate", .{ .url = request_url, .reason = opts.reason });
|
||||
log.debug(.http, "navigate", .{
|
||||
.url = request_url,
|
||||
.method = opts.method,
|
||||
.reason = opts.reason,
|
||||
.body = opts.body != null,
|
||||
});
|
||||
|
||||
// if the url is about:blank, nothing to do.
|
||||
if (std.mem.eql(u8, "about:blank", request_url.raw)) {
|
||||
@@ -685,6 +690,8 @@ pub const Page = struct {
|
||||
}
|
||||
}
|
||||
|
||||
// We cannot navigate immediately as navigating will delete the DOM tree,
|
||||
// which holds this event's node.
|
||||
// As such we schedule the function to be called as soon as possible.
|
||||
// The page.arena is safe to use here, but the transfer_arena exists
|
||||
// specifically for this type of lifetime.
|
||||
@@ -699,7 +706,7 @@ pub const Page = struct {
|
||||
navi.* = .{
|
||||
.opts = opts,
|
||||
.session = self.session,
|
||||
.url = try arena.dupe(u8, url),
|
||||
.url = try self.url.resolve(arena, url),
|
||||
};
|
||||
_ = try self.loop.timeout(0, &navi.navigate_node);
|
||||
}
|
||||
@@ -787,15 +794,77 @@ pub const Page = struct {
|
||||
};
|
||||
|
||||
const DelayedNavigation = struct {
|
||||
url: []const u8,
|
||||
url: URL,
|
||||
session: *Session,
|
||||
opts: NavigateOpts,
|
||||
initial: bool = true,
|
||||
navigate_node: Loop.CallbackNode = .{ .func = delayNavigate },
|
||||
|
||||
// Navigation is blocking, which is problem because it can seize up
|
||||
// the loop and deadlock. We can only safely try to navigate to a
|
||||
// new page when we're sure there's at least 1 free slot in the
|
||||
// http client. We handle this in two phases:
|
||||
//
|
||||
// In the first phase, when self.initial == true, we'll shutdown the page
|
||||
// and create a new one. The shutdown is important, because it resets the
|
||||
// loop ctx_id and closes the scope. Closing the scope calls our XHR
|
||||
// destructors which aborts requests. This is necessary to make sure our
|
||||
// [blocking] navigate won't block.
|
||||
//
|
||||
// In the 2nd phase, we wait until there's a free http slot so that our
|
||||
// navigate definetly won't block (which could deadlock the system if there
|
||||
// are still pending async requests, which we've seen happen, even after
|
||||
// an abort).
|
||||
fn delayNavigate(node: *Loop.CallbackNode, repeat_delay: *?u63) void {
|
||||
_ = repeat_delay;
|
||||
const self: *DelayedNavigation = @fieldParentPtr("navigate_node", node);
|
||||
self.session.pageNavigate(self.url, self.opts) catch |err| {
|
||||
|
||||
const session = self.session;
|
||||
const initial = self.initial;
|
||||
|
||||
if (initial) {
|
||||
session.removePage();
|
||||
_ = session.createPage() catch |err| {
|
||||
log.err(.browser, "delayed navigation page error", .{
|
||||
.err = err,
|
||||
.url = self.url,
|
||||
});
|
||||
return;
|
||||
};
|
||||
self.initial = false;
|
||||
}
|
||||
|
||||
if (session.browser.http_client.freeSlotCount() == 0) {
|
||||
log.debug(.browser, "delayed navigate waiting", .{});
|
||||
const delay = 0 * std.time.ns_per_ms;
|
||||
|
||||
// If this isn't the initial check, we can safely re-use the timer
|
||||
// to check again.
|
||||
if (initial == false) {
|
||||
repeat_delay.* = delay;
|
||||
return;
|
||||
}
|
||||
|
||||
// However, if this _is_ the initial check, we called
|
||||
// session.removePage above, and that reset the loop ctx_id.
|
||||
// We can't re-use this timer, because it has the previous ctx_id.
|
||||
// We can create a new timeout though, and that'll get the new ctx_id.
|
||||
//
|
||||
// Page has to be not-null here because we called createPage above.
|
||||
_ = session.page.?.loop.timeout(delay, &self.navigate_node) catch |err| {
|
||||
log.err(.browser, "delayed navigation loop err", .{ .err = err });
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
const page = session.currentPage() orelse return;
|
||||
defer if (!page.delayed_navigation) {
|
||||
// If, while loading the page, we intend to navigate to another
|
||||
// page, then we need to keep the transfer_arena around, as this
|
||||
// sub-navigation is probably using it.
|
||||
_ = session.browser.transfer_arena.reset(.{ .retain_with_limit = 64 * 1024 });
|
||||
};
|
||||
|
||||
return page.navigate(self.url, self.opts) catch |err| {
|
||||
log.err(.browser, "delayed navigation error", .{ .err = err, .url = self.url });
|
||||
};
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ const Allocator = std.mem.Allocator;
|
||||
|
||||
const Env = @import("env.zig").Env;
|
||||
const Page = @import("page.zig").Page;
|
||||
const URL = @import("../url.zig").URL;
|
||||
const Browser = @import("browser.zig").Browser;
|
||||
const NavigateOpts = @import("page.zig").NavigateOpts;
|
||||
|
||||
@@ -72,7 +73,7 @@ pub const Session = struct {
|
||||
|
||||
pub fn deinit(self: *Session) void {
|
||||
if (self.page != null) {
|
||||
self.removePage() catch {};
|
||||
self.removePage();
|
||||
}
|
||||
self.cookie_jar.deinit();
|
||||
self.storage_shed.deinit();
|
||||
@@ -104,7 +105,7 @@ pub const Session = struct {
|
||||
return page;
|
||||
}
|
||||
|
||||
pub fn removePage(self: *Session) !void {
|
||||
pub fn removePage(self: *Session) void {
|
||||
// Inform CDP the page is going to be removed, allowing other worlds to remove themselves before the main one
|
||||
self.browser.notification.dispatch(.page_remove, .{});
|
||||
|
||||
@@ -127,12 +128,6 @@ pub const Session = struct {
|
||||
// window.setTimeout and running microtasks should be ignored
|
||||
self.browser.app.loop.reset();
|
||||
|
||||
// Finally, we run the loop. Because of the reset just above, this will
|
||||
// ignore any timeouts. And, because of the endScope about this, it
|
||||
// should ensure that the http requests detect the shutdown socket and
|
||||
// release their resources.
|
||||
try self.browser.app.loop.run();
|
||||
|
||||
self.page = null;
|
||||
|
||||
// clear netsurf memory arena.
|
||||
@@ -144,28 +139,4 @@ pub const Session = struct {
|
||||
pub fn currentPage(self: *Session) ?*Page {
|
||||
return &(self.page orelse return null);
|
||||
}
|
||||
|
||||
pub fn pageNavigate(self: *Session, url_string: []const u8, opts: NavigateOpts) !void {
|
||||
// currently, this is only called from the page, so let's hope
|
||||
// it isn't null!
|
||||
std.debug.assert(self.page != null);
|
||||
|
||||
defer if (self.page) |*p| {
|
||||
if (!p.delayed_navigation) {
|
||||
// If, while loading the page, we intend to navigate to another
|
||||
// page, then we need to keep the transfer_arena around, as this
|
||||
// sub-navigation is probably using it.
|
||||
_ = self.browser.transfer_arena.reset(.{ .retain_with_limit = 64 * 1024 });
|
||||
}
|
||||
};
|
||||
|
||||
// it's safe to use the transfer arena here, because the page will
|
||||
// eventually clone the URL using its own page_arena (after it gets
|
||||
// the final URL, possibly following redirects)
|
||||
const url = try self.page.?.url.resolve(self.transfer_arena, url_string);
|
||||
|
||||
try self.removePage();
|
||||
var page = try self.createPage();
|
||||
return page.navigate(url, opts);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -341,10 +341,17 @@ pub const XMLHttpRequest = struct {
|
||||
log.debug(.script_event, "dispatch event", .{
|
||||
.type = typ,
|
||||
.source = "xhr",
|
||||
.method = self.method,
|
||||
.url = self.url,
|
||||
});
|
||||
self._dispatchEvt(typ) catch |err| {
|
||||
log.err(.app, "dispatch event error", .{ .err = err, .type = typ, .source = "xhr" });
|
||||
log.err(.app, "dispatch event error", .{
|
||||
.err = err,
|
||||
.type = typ,
|
||||
.source = "xhr",
|
||||
.method = self.method,
|
||||
.url = self.url,
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -365,10 +372,17 @@ pub const XMLHttpRequest = struct {
|
||||
log.debug(.script_event, "dispatch progress event", .{
|
||||
.type = typ,
|
||||
.source = "xhr",
|
||||
.method = self.method,
|
||||
.url = self.url,
|
||||
});
|
||||
self._dispatchProgressEvent(typ, opts) catch |err| {
|
||||
log.err(.app, "dispatch progress event error", .{ .err = err, .type = typ, .source = "xhr" });
|
||||
log.err(.app, "dispatch progress event error", .{
|
||||
.err = err,
|
||||
.type = typ,
|
||||
.source = "xhr",
|
||||
.method = self.method,
|
||||
.url = self.url,
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -220,7 +220,7 @@ fn closeTarget(cmd: anytype) !void {
|
||||
bc.session_id = null;
|
||||
}
|
||||
|
||||
try bc.session.removePage();
|
||||
bc.session.removePage();
|
||||
if (bc.isolated_world) |*world| {
|
||||
world.deinit();
|
||||
bc.isolated_world = null;
|
||||
|
||||
@@ -113,11 +113,19 @@ pub const Client = struct {
|
||||
loop: *Loop,
|
||||
opts: RequestOpts,
|
||||
) !void {
|
||||
|
||||
// See the page's DelayedNavitation for why we're doing this. TL;DR -
|
||||
// we need to keep 1 slot available for the blocking page navigation flow
|
||||
// (Almost worth keeping a dedicate State just for that flow, but keep
|
||||
// thinking we need a more permanent solution (i.e. making everything
|
||||
// non-blocking).
|
||||
if (self.freeSlotCount() > 1) {
|
||||
if (self.state_pool.acquireOrNull()) |state| {
|
||||
// if we have state ready, we can skip the loop and immediately
|
||||
// kick this request off.
|
||||
return self.asyncRequestReady(method, uri, ctx, callback, state, opts);
|
||||
}
|
||||
}
|
||||
|
||||
// This cannot be a client-owned MemoryPool. The page can end before
|
||||
// this is ever completed (and the check callback will never be called).
|
||||
@@ -174,6 +182,10 @@ pub const Client = struct {
|
||||
.client = self,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn freeSlotCount(self: *Client) usize {
|
||||
return self.state_pool.freeSlotCount();
|
||||
}
|
||||
};
|
||||
|
||||
const RequestOpts = struct {
|
||||
@@ -2531,6 +2543,12 @@ const StatePool = struct {
|
||||
allocator.free(self.states);
|
||||
}
|
||||
|
||||
pub fn freeSlotCount(self: *StatePool) usize {
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
return self.available;
|
||||
}
|
||||
|
||||
pub fn acquireWait(self: *StatePool) *State {
|
||||
const states = self.states;
|
||||
|
||||
@@ -3022,8 +3040,14 @@ test "HttpClient: async connect error" {
|
||||
.{},
|
||||
);
|
||||
|
||||
try loop.io.run_for_ns(std.time.ns_per_ms);
|
||||
try reset.timedWait(std.time.ns_per_s);
|
||||
for (0..10) |_| {
|
||||
try loop.io.run_for_ns(std.time.ns_per_ms * 10);
|
||||
if (reset.isSet()) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return error.Timeout;
|
||||
}
|
||||
}
|
||||
|
||||
test "HttpClient: async no body" {
|
||||
|
||||
@@ -81,12 +81,13 @@ pub const Loop = struct {
|
||||
|
||||
// run tail events. We do run the tail events to ensure all the
|
||||
// contexts are correcly free.
|
||||
while (self.hasPendinEvents()) {
|
||||
self.io.run_for_ns(10 * std.time.ns_per_ms) catch |err| {
|
||||
while (self.pending_network_count != 0 or self.pending_timeout_count != 0) {
|
||||
self.io.run_for_ns(std.time.ns_per_ms * 10) catch |err| {
|
||||
log.err(.loop, "deinit", .{ .err = err });
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
if (comptime CANCEL_SUPPORTED) {
|
||||
self.io.cancel_all();
|
||||
}
|
||||
@@ -96,21 +97,6 @@ pub const Loop = struct {
|
||||
self.cancelled.deinit(self.alloc);
|
||||
}
|
||||
|
||||
// We can shutdown once all the pending network IO is complete.
|
||||
// In debug mode we also wait until al the pending timeouts are complete
|
||||
// but we only do this so that the `timeoutCallback` can free all allocated
|
||||
// memory and we won't report a leak.
|
||||
fn hasPendinEvents(self: *const Self) bool {
|
||||
if (self.pending_network_count > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (builtin.mode != .Debug) {
|
||||
return false;
|
||||
}
|
||||
return self.pending_timeout_count > 0;
|
||||
}
|
||||
|
||||
// Retrieve all registred I/O events completed by OS kernel,
|
||||
// and execute sequentially their callbacks.
|
||||
// Stops when there is no more I/O events registered on the loop.
|
||||
@@ -121,9 +107,11 @@ pub const Loop = struct {
|
||||
self.stopping = true;
|
||||
defer self.stopping = false;
|
||||
|
||||
while (self.pending_network_count > 0) {
|
||||
try self.io.run_for_ns(10 * std.time.ns_per_ms);
|
||||
// at each iteration we might have new events registred by previous callbacks
|
||||
while (self.pending_network_count != 0 or self.pending_timeout_count != 0) {
|
||||
self.io.run_for_ns(std.time.ns_per_ms * 10) catch |err| {
|
||||
log.err(.loop, "deinit", .{ .err = err });
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user