Compare commits

..

4 Commits

Author SHA1 Message Date
Karl Seguin
ca76575c2a Add handling for resolving special URLs
Takes inspiration from https://github.com/lightpanda-io/browser/pull/2030 and
fixes https://github.com/lightpanda-io/browser/issues/1994

A url like http:/test gets special treatment. If the scheme, `http:` matches
the base scheme, then it's treated as relative to the base. If it doesn't match
the base scheme, then it's normalized to http://test, e.g. the path becomes
the host.
2026-03-30 17:07:10 +08:00
dinisimys2018
49cef740b2 fix(browser-url): added errdefer for joined result then get resolve error 2026-03-30 10:05:21 +03:00
dinisimys2018
4d812f1e74 fix(browser-url): added errdefer for path 2026-03-30 10:00:08 +03:00
dinisimys2018
3ee1da0ac1 fix resolve path with scheme 2026-03-30 00:34:34 +03:00
6 changed files with 145 additions and 91 deletions

View File

@@ -74,8 +74,6 @@ const EventListeners = struct {
page_network_idle: List = .{},
page_network_almost_idle: List = .{},
page_frame_created: List = .{},
page_dom_content_loaded: List = .{},
page_loaded: List = .{},
http_request_fail: List = .{},
http_request_start: List = .{},
http_request_intercept: List = .{},
@@ -93,8 +91,6 @@ const Events = union(enum) {
page_network_idle: *const PageNetworkIdle,
page_network_almost_idle: *const PageNetworkAlmostIdle,
page_frame_created: *const PageFrameCreated,
page_dom_content_loaded: *const PageDOMContentLoaded,
page_loaded: *const PageLoaded,
http_request_fail: *const RequestFail,
http_request_start: *const RequestStart,
http_request_intercept: *const RequestIntercept,
@@ -141,18 +137,6 @@ pub const PageFrameCreated = struct {
timestamp: u64,
};
pub const PageDOMContentLoaded = struct {
req_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const PageLoaded = struct {
req_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const RequestStart = struct {
transfer: *Transfer,
};

View File

@@ -1325,15 +1325,15 @@ pub const Transfer = struct {
}
}
transfer.req.notification.dispatch(.http_response_header_done, &.{
.transfer = transfer,
});
const proceed = transfer.req.header_callback(transfer) catch |err| {
log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return err;
};
transfer.req.notification.dispatch(.http_response_header_done, &.{
.transfer = transfer,
});
return proceed and transfer.aborted == false;
}

View File

@@ -487,6 +487,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
return error.InjectBlankFailed;
};
}
self.documentIsComplete();
session.notification.dispatch(.page_navigate, &.{
.frame_id = self._frame_id,
@@ -518,8 +519,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
// force next request id manually b/c we won't create a real req.
_ = session.browser.http_client.incrReqId();
self.documentIsComplete();
return;
}
@@ -739,12 +738,6 @@ pub fn _documentIsLoaded(self: *Page) !void {
self.document.asEventTarget(),
event,
);
self._session.notification.dispatch(.page_dom_content_loaded, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.timestamp = timestamp(.monotonic),
});
}
pub fn scriptsCompletedLoading(self: *Page) void {
@@ -803,6 +796,19 @@ pub fn documentIsComplete(self: *Page) void {
self._documentIsComplete() catch |err| {
log.err(.page, "document is complete", .{ .err = err, .type = self._type, .url = self.url });
};
if (self._navigated_options) |no| {
// _navigated_options will be null in special short-circuit cases, like
// "navigating" to about:blank, in which case this notification has
// already been sent
self._session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.opts = no,
.url = self.url,
.timestamp = timestamp(.monotonic),
});
}
}
fn _documentIsComplete(self: *Page) !void {
@@ -821,12 +827,6 @@ fn _documentIsComplete(self: *Page) !void {
try self._event_manager.dispatchDirect(window_target, event, self.window._on_load, .{ .inject_target = false, .context = "page load" });
}
self._session.notification.dispatch(.page_loaded, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.timestamp = timestamp(.monotonic),
});
if (self._event_manager.hasDirectListeners(window_target, "pageshow", self.window._on_pageshow)) {
const pageshow_event = (try PageTransitionEvent.initTrusted(comptime .wrap("pageshow"), .{}, self)).asEvent();
try self._event_manager.dispatchDirect(window_target, pageshow_event, self.window._on_pageshow, .{ .context = "page show" });
@@ -879,19 +879,6 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
});
}
if (self._navigated_options) |no| {
// _navigated_options will be null in special short-circuit cases, like
// "navigating" to about:blank, in which case this notification has
// already been sent
self._session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.opts = no,
.url = self.url,
.timestamp = timestamp(.monotonic),
});
}
return true;
}

View File

@@ -77,6 +77,41 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime
return processResolved(allocator, result, opts);
}
if (path.len >= 4) { // Minimum: "ws:x"
if (std.mem.indexOfScalar(u8, path[0..@min(path.len, 6)], ':')) |pos| {
// we know this isn't a complete URL, else the very first check in
// this function would have handled it.
const possible_special_protocol = path[0..pos];
const special_schemes = [_][]const u8{ "https", "http", "ws", "wss", "file", "ftp" };
for (special_schemes) |special_scheme| {
if (std.ascii.eqlIgnoreCase(possible_special_protocol, special_scheme)) {
const rest = path[pos + 1 ..];
// Check if base has the same scheme
const base_scheme_end = std.mem.indexOf(u8, base, "://") orelse 0;
if (base_scheme_end > 0 and std.ascii.eqlIgnoreCase(base[0..base_scheme_end], special_scheme)) {
// Same scheme - strip it and resolve rest as relative
return resolve(allocator, base, rest, opts);
}
// Different scheme - construct absolute URL
// Skip any leading slashes in rest
var rest_start: usize = 0;
while (rest_start < rest.len and (rest[rest_start] == '/' or rest[rest_start] == '\\')) {
rest_start += 1;
}
const rest_trimmed = rest[rest_start..];
// file: scheme needs empty host (triple slash)
const separator = if (std.mem.eql(u8, special_scheme, "file")) ":///" else "://";
const normalized = try std.mem.joinZ(allocator, "", &.{ special_scheme, separator, rest_trimmed });
return resolve(allocator, "", normalized, opts);
}
}
// Don't know what this is, just try to resolve it through our normal logic
}
}
const scheme_end = std.mem.indexOf(u8, base, "://");
const authority_start = if (scheme_end) |end| end + 3 else 0;
const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len;
@@ -1570,3 +1605,84 @@ test "URL: getOrigin" {
}
}
}
test "URL: resolve path scheme" {
const Case = struct {
base: [:0]const u8,
path: [:0]const u8,
expected: [:0]const u8,
};
const cases = [_]Case{
.{
.base = "https://www.example.com/example",
.path = "https:/about",
.expected = "https://www.example.com/about",
},
.{
.base = "https://www.example.com/example",
.path = "https:about",
.expected = "https://www.example.com/about",
},
.{
.base = "https://www.example.com/example",
.path = "https://about",
.expected = "https://about",
},
.{
.base = "https://www.example.com/example",
.path = "http:about",
.expected = "http://about",
},
.{
.base = "https://www.example.com/example",
.path = "http:/about",
.expected = "http://about",
},
.{
.base = "https://www.example.com/example",
.path = "http://about",
.expected = "http://about",
},
.{
.base = "https://site/",
.path = "https://path",
.expected = "https://path",
},
.{
.base = "http://localhost/",
.path = "data:test",
.expected = "data:test",
},
.{
.base = "https://www.example.com/example",
.path = "ws://about",
.expected = "ws://about",
},
.{
.base = "https://www.example.com/example",
.path = "wss://about",
.expected = "wss://about",
},
.{
.base = "https://www.example.com/example",
.path = "ftp://about",
.expected = "ftp://about",
},
.{
.base = "https://www.example.com/example",
.path = "file://path/to/file",
.expected = "file://path/to/file",
},
.{
.base = "https://www.example.com/example",
.path = "file:/path/to/file",
.expected = "file:///path/to/file",
},
};
for (cases) |case| {
const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
try testing.expectString(case.expected, result);
}
}

View File

@@ -427,8 +427,6 @@ pub fn BrowserContext(comptime CDP_T: type) type {
try notification.register(.page_navigate, self, onPageNavigate);
try notification.register(.page_navigated, self, onPageNavigated);
try notification.register(.page_frame_created, self, onPageFrameCreated);
try notification.register(.page_dom_content_loaded, self, onPageDOMContentLoaded);
try notification.register(.page_loaded, self, onPageLoaded);
}
pub fn deinit(self: *Self) void {
@@ -604,16 +602,6 @@ pub fn BrowserContext(comptime CDP_T: type) type {
return @import("domains/page.zig").pageFrameCreated(self, msg);
}
pub fn onPageDOMContentLoaded(ctx: *anyopaque, msg: *const Notification.PageDOMContentLoaded) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageDOMContentLoaded(self, msg);
}
pub fn onPageLoaded(ctx: *anyopaque, msg: *const Notification.PageLoaded) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageLoaded(self, msg);
}
pub fn onPageNetworkIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkIdle(self, msg);

View File

@@ -385,6 +385,7 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
// things, but no session.
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
@@ -436,9 +437,9 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
// When we actually recreated the context we should have the inspector send
// this event, see: resetContextGroup. Sending this event will tell the
// client that the context ids they had are invalid and the context should
// be dropped. The client will expect us to send new contextCreated events,
// this event, see: resetContextGroup Sending this event will tell the
// client that the context ids they had are invalid and the context shouls
// be dropped The client will expect us to send new contextCreated events,
// such that the client has new id's for the active contexts.
// Only send executionContextsCleared for main frame navigations. For child
// frames (iframes), clearing all contexts would destroy the main frame's
@@ -448,18 +449,6 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
try cdp.sendEvent("Runtime.executionContextsCleared", null, .{ .session_id = session_id });
}
// frameNavigated event
try cdp.sendEvent("Page.frameNavigated", .{
.type = "Navigation",
.frame = Frame{
.id = frame_id,
.url = event.url,
.loaderId = loader_id,
.securityOrigin = bc.security_origin,
.secureContextType = bc.secure_context_type,
},
}, .{ .session_id = session_id });
{
const aux_data = try std.fmt.allocPrint(arena, "{{\"isDefault\":true,\"type\":\"default\",\"frameId\":\"{s}\",\"loaderId\":\"{s}\"}}", .{ frame_id, loader_id });
@@ -509,22 +498,18 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
// chromedp client expects to receive the events is this order.
// see https://github.com/chromedp/chromedp/issues/1558
try cdp.sendEvent("DOM.documentUpdated", null, .{ .session_id = session_id });
}
pub fn pageDOMContentLoaded(bc: anytype, event: *const Notification.PageDOMContentLoaded) !void {
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
var cdp = bc.cdp;
// domContentEventFired event
// TODO: partially hard coded
try cdp.sendEvent(
"Page.domContentEventFired",
.{ .timestamp = timestamp },
.{ .session_id = session_id },
);
// lifecycle DOMContentLoaded event
// TODO: partially hard coded
if (bc.page_life_cycle_events) {
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "DOMContentLoaded",
@@ -532,23 +517,16 @@ pub fn pageDOMContentLoaded(bc: anytype, event: *const Notification.PageDOMConte
.loaderId = loader_id,
}, .{ .session_id = session_id });
}
}
pub fn pageLoaded(bc: anytype, event: *const Notification.PageLoaded) !void {
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
var cdp = bc.cdp;
const frame_id = &id.toFrameId(event.frame_id);
// loadEventFired event
try cdp.sendEvent(
"Page.loadEventFired",
.{ .timestamp = timestamp },
.{ .session_id = session_id },
);
// lifecycle DOMContentLoaded event
if (bc.page_life_cycle_events) {
const loader_id = &id.toLoaderId(event.req_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "load",
@@ -557,6 +535,7 @@ pub fn pageLoaded(bc: anytype, event: *const Notification.PageLoaded) !void {
}, .{ .session_id = session_id });
}
// frameStoppedLoading
return cdp.sendEvent("Page.frameStoppedLoading", .{
.frameId = frame_id,
}, .{ .session_id = session_id });