Merge pull request #1562 from lightpanda-io/robots-cdp-failure

dispatch .page_navigated event on page error callback and create HTML page
This commit is contained in:
Pierre Tachoire
2026-02-17 14:17:44 +01:00
committed by GitHub
2 changed files with 74 additions and 13 deletions

View File

@@ -38,6 +38,10 @@ pub const ContentTypeEnum = enum {
text_javascript,
text_plain,
text_css,
image_jpeg,
image_gif,
image_png,
image_webp,
application_json,
unknown,
other,
@@ -49,6 +53,10 @@ pub const ContentType = union(ContentTypeEnum) {
text_javascript: void,
text_plain: void,
text_css: void,
image_jpeg: void,
image_gif: void,
image_png: void,
image_webp: void,
application_json: void,
unknown: void,
other: struct { type: []const u8, sub_type: []const u8 },
@@ -61,6 +69,10 @@ pub fn contentTypeString(mime: *const Mime) []const u8 {
.text_javascript => "application/javascript",
.text_plain => "text/plain",
.text_css => "text/css",
.image_jpeg => "image/jpeg",
.image_png => "image/png",
.image_gif => "image/gif",
.image_webp => "image/webp",
.application_json => "application/json",
else => "",
};
@@ -243,6 +255,11 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
@"application/javascript",
@"application/x-javascript",
@"image/jpeg",
@"image/png",
@"image/gif",
@"image/webp",
@"application/json",
}, type_name)) |known_type| {
const ct: ContentType = switch (known_type) {
@@ -251,6 +268,10 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
.@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
.@"text/plain" => .{ .text_plain = {} },
.@"text/css" => .{ .text_css = {} },
.@"image/jpeg" => .{ .image_jpeg = {} },
.@"image/png" => .{ .image_png = {} },
.@"image/gif" => .{ .image_gif = {} },
.@"image/webp" => .{ .image_webp = {} },
.@"application/json" => .{ .application_json = {} },
};
return .{ ct, attribute_start };
@@ -358,6 +379,11 @@ test "Mime: parse common" {
try expect(.{ .content_type = .{ .application_json = {} } }, "application/json");
try expect(.{ .content_type = .{ .text_css = {} } }, "text/css");
try expect(.{ .content_type = .{ .image_jpeg = {} } }, "image/jpeg");
try expect(.{ .content_type = .{ .image_png = {} } }, "image/png");
try expect(.{ .content_type = .{ .image_gif = {} } }, "image/gif");
try expect(.{ .content_type = .{ .image_webp = {} } }, "image/webp");
}
test "Mime: parse uncommon" {

View File

@@ -708,7 +708,10 @@ fn pageDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
try arr.appendSlice(self.arena, "<html><head><meta charset=\"utf-8\"></head><body><pre>");
self._parse_state = .{ .text = arr };
},
else => self._parse_state = .{ .raw = .{} },
.image_jpeg, .image_gif, .image_png, .image_webp => {
self._parse_state = .{ .image = .empty };
},
else => self._parse_state = .{ .raw = .empty },
}
}
@@ -730,7 +733,7 @@ fn pageDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
v = v[index + 1 ..];
}
},
.raw => |*buf| try buf.appendSlice(self.arena, data),
.raw, .image => |*buf| try buf.appendSlice(self.arena, data),
.pre => unreachable,
.complete => unreachable,
.err => unreachable,
@@ -753,12 +756,13 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
log.debug(.page, "page.load.complete", .{ .url = self.url });
};
const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
switch (self._parse_state) {
.html => |buf| {
const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
parser.parse(buf.items);
self._script_manager.staticScriptsDone();
if (self._script_manager.isDone()) {
@@ -770,16 +774,26 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
},
.text => |*buf| {
try buf.appendSlice(self.arena, "</pre></body></html>");
const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
parser.parse(buf.items);
self.documentIsComplete();
},
.image => |buf| {
self._parse_state = .{ .raw_done = buf.items };
// Use empty an HTML containing the image.
const html = try std.mem.concat(parse_arena, u8, &.{
"<html><head><meta charset=\"utf-8\"></head><body><img src=\"",
self.url,
"\"></body></htm>",
});
parser.parse(html);
self.documentIsComplete();
},
.raw => |buf| {
self._parse_state = .{ .raw_done = buf.items };
// Use empty an empty HTML document.
parser.parse("<html><head><meta charset=\"utf-8\"></head><body></body></htm>");
self.documentIsComplete();
},
.pre => {
@@ -787,6 +801,20 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
// We assume we have received an OK status (checked in Client.headerCallback)
// so we load a blank document to navigate away from any prior page.
self._parse_state = .{ .complete = {} };
// Use empty an empty HTML document.
parser.parse("<html><head><meta charset=\"utf-8\"></head><body></body></htm>");
self.documentIsComplete();
},
.err => |err| {
// Generate a pseudo HTML page indicating the failure.
const html = try std.mem.concat(parse_arena, u8, &.{
"<html><head><meta charset=\"utf-8\"></head><body><h1>Navigation failed</h1><p>Reason: ",
@errorName(err),
"</p></body></htm>",
});
parser.parse(html);
self.documentIsComplete();
},
else => unreachable,
@@ -797,8 +825,14 @@ fn pageErrorCallback(ctx: *anyopaque, err: anyerror) void {
log.err(.page, "navigate failed", .{ .err = err });
var self: *Page = @ptrCast(@alignCast(ctx));
self.clearTransferArena();
self._parse_state = .{ .err = err };
// In case of error, we want to complete the page with a custom HTML
// containing the error.
pageDoneCallback(ctx) catch |e| {
log.err(.browser, "pageErrorCallback", .{ .err = e });
return;
};
}
// The transfer arena is useful and interesting, but has a weird lifetime.
@@ -859,7 +893,7 @@ fn _wait(self: *Page, wait_ms: u32) !Session.WaitResult {
while (true) {
switch (self._parse_state) {
.pre, .raw, .text => {
.pre, .raw, .text, .image => {
// The main page hasn't started/finished navigating.
// There's no JS to run, and no reason to run the scheduler.
if (http_client.active == 0 and exit_when_done) {
@@ -2827,6 +2861,7 @@ const ParseState = union(enum) {
err: anyerror,
html: std.ArrayList(u8),
text: std.ArrayList(u8),
image: std.ArrayList(u8),
raw: std.ArrayList(u8),
raw_done: []const u8,
};