Merge pull request #1562 from lightpanda-io/robots-cdp-failure

dispatch .page_navigated event on page error callback and create HTML page
This commit is contained in:
Pierre Tachoire
2026-02-17 14:17:44 +01:00
committed by GitHub
2 changed files with 74 additions and 13 deletions

View File

@@ -38,6 +38,10 @@ pub const ContentTypeEnum = enum {
text_javascript, text_javascript,
text_plain, text_plain,
text_css, text_css,
image_jpeg,
image_gif,
image_png,
image_webp,
application_json, application_json,
unknown, unknown,
other, other,
@@ -49,6 +53,10 @@ pub const ContentType = union(ContentTypeEnum) {
text_javascript: void, text_javascript: void,
text_plain: void, text_plain: void,
text_css: void, text_css: void,
image_jpeg: void,
image_gif: void,
image_png: void,
image_webp: void,
application_json: void, application_json: void,
unknown: void, unknown: void,
other: struct { type: []const u8, sub_type: []const u8 }, other: struct { type: []const u8, sub_type: []const u8 },
@@ -61,6 +69,10 @@ pub fn contentTypeString(mime: *const Mime) []const u8 {
.text_javascript => "application/javascript", .text_javascript => "application/javascript",
.text_plain => "text/plain", .text_plain => "text/plain",
.text_css => "text/css", .text_css => "text/css",
.image_jpeg => "image/jpeg",
.image_png => "image/png",
.image_gif => "image/gif",
.image_webp => "image/webp",
.application_json => "application/json", .application_json => "application/json",
else => "", else => "",
}; };
@@ -243,6 +255,11 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
@"application/javascript", @"application/javascript",
@"application/x-javascript", @"application/x-javascript",
@"image/jpeg",
@"image/png",
@"image/gif",
@"image/webp",
@"application/json", @"application/json",
}, type_name)) |known_type| { }, type_name)) |known_type| {
const ct: ContentType = switch (known_type) { const ct: ContentType = switch (known_type) {
@@ -251,6 +268,10 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
.@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} }, .@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
.@"text/plain" => .{ .text_plain = {} }, .@"text/plain" => .{ .text_plain = {} },
.@"text/css" => .{ .text_css = {} }, .@"text/css" => .{ .text_css = {} },
.@"image/jpeg" => .{ .image_jpeg = {} },
.@"image/png" => .{ .image_png = {} },
.@"image/gif" => .{ .image_gif = {} },
.@"image/webp" => .{ .image_webp = {} },
.@"application/json" => .{ .application_json = {} }, .@"application/json" => .{ .application_json = {} },
}; };
return .{ ct, attribute_start }; return .{ ct, attribute_start };
@@ -358,6 +379,11 @@ test "Mime: parse common" {
try expect(.{ .content_type = .{ .application_json = {} } }, "application/json"); try expect(.{ .content_type = .{ .application_json = {} } }, "application/json");
try expect(.{ .content_type = .{ .text_css = {} } }, "text/css"); try expect(.{ .content_type = .{ .text_css = {} } }, "text/css");
try expect(.{ .content_type = .{ .image_jpeg = {} } }, "image/jpeg");
try expect(.{ .content_type = .{ .image_png = {} } }, "image/png");
try expect(.{ .content_type = .{ .image_gif = {} } }, "image/gif");
try expect(.{ .content_type = .{ .image_webp = {} } }, "image/webp");
} }
test "Mime: parse uncommon" { test "Mime: parse uncommon" {

View File

@@ -708,7 +708,10 @@ fn pageDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
try arr.appendSlice(self.arena, "<html><head><meta charset=\"utf-8\"></head><body><pre>"); try arr.appendSlice(self.arena, "<html><head><meta charset=\"utf-8\"></head><body><pre>");
self._parse_state = .{ .text = arr }; self._parse_state = .{ .text = arr };
}, },
else => self._parse_state = .{ .raw = .{} }, .image_jpeg, .image_gif, .image_png, .image_webp => {
self._parse_state = .{ .image = .empty };
},
else => self._parse_state = .{ .raw = .empty },
} }
} }
@@ -730,7 +733,7 @@ fn pageDataCallback(transfer: *Http.Transfer, data: []const u8) !void {
v = v[index + 1 ..]; v = v[index + 1 ..];
} }
}, },
.raw => |*buf| try buf.appendSlice(self.arena, data), .raw, .image => |*buf| try buf.appendSlice(self.arena, data),
.pre => unreachable, .pre => unreachable,
.complete => unreachable, .complete => unreachable,
.err => unreachable, .err => unreachable,
@@ -753,12 +756,13 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
log.debug(.page, "page.load.complete", .{ .url = self.url }); log.debug(.page, "page.load.complete", .{ .url = self.url });
}; };
switch (self._parse_state) {
.html => |buf| {
const parse_arena = try self.getArena(.{ .debug = "Page.parse" }); const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
defer self.releaseArena(parse_arena); defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self); var parser = Parser.init(parse_arena, self.document.asNode(), self);
switch (self._parse_state) {
.html => |buf| {
parser.parse(buf.items); parser.parse(buf.items);
self._script_manager.staticScriptsDone(); self._script_manager.staticScriptsDone();
if (self._script_manager.isDone()) { if (self._script_manager.isDone()) {
@@ -770,16 +774,26 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
}, },
.text => |*buf| { .text => |*buf| {
try buf.appendSlice(self.arena, "</pre></body></html>"); try buf.appendSlice(self.arena, "</pre></body></html>");
const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
parser.parse(buf.items); parser.parse(buf.items);
self.documentIsComplete(); self.documentIsComplete();
}, },
.image => |buf| {
self._parse_state = .{ .raw_done = buf.items };
// Use empty an HTML containing the image.
const html = try std.mem.concat(parse_arena, u8, &.{
"<html><head><meta charset=\"utf-8\"></head><body><img src=\"",
self.url,
"\"></body></htm>",
});
parser.parse(html);
self.documentIsComplete();
},
.raw => |buf| { .raw => |buf| {
self._parse_state = .{ .raw_done = buf.items }; self._parse_state = .{ .raw_done = buf.items };
// Use empty an empty HTML document.
parser.parse("<html><head><meta charset=\"utf-8\"></head><body></body></htm>");
self.documentIsComplete(); self.documentIsComplete();
}, },
.pre => { .pre => {
@@ -787,6 +801,20 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
// We assume we have received an OK status (checked in Client.headerCallback) // We assume we have received an OK status (checked in Client.headerCallback)
// so we load a blank document to navigate away from any prior page. // so we load a blank document to navigate away from any prior page.
self._parse_state = .{ .complete = {} }; self._parse_state = .{ .complete = {} };
// Use empty an empty HTML document.
parser.parse("<html><head><meta charset=\"utf-8\"></head><body></body></htm>");
self.documentIsComplete();
},
.err => |err| {
// Generate a pseudo HTML page indicating the failure.
const html = try std.mem.concat(parse_arena, u8, &.{
"<html><head><meta charset=\"utf-8\"></head><body><h1>Navigation failed</h1><p>Reason: ",
@errorName(err),
"</p></body></htm>",
});
parser.parse(html);
self.documentIsComplete(); self.documentIsComplete();
}, },
else => unreachable, else => unreachable,
@@ -797,8 +825,14 @@ fn pageErrorCallback(ctx: *anyopaque, err: anyerror) void {
log.err(.page, "navigate failed", .{ .err = err }); log.err(.page, "navigate failed", .{ .err = err });
var self: *Page = @ptrCast(@alignCast(ctx)); var self: *Page = @ptrCast(@alignCast(ctx));
self.clearTransferArena();
self._parse_state = .{ .err = err }; self._parse_state = .{ .err = err };
// In case of error, we want to complete the page with a custom HTML
// containing the error.
pageDoneCallback(ctx) catch |e| {
log.err(.browser, "pageErrorCallback", .{ .err = e });
return;
};
} }
// The transfer arena is useful and interesting, but has a weird lifetime. // The transfer arena is useful and interesting, but has a weird lifetime.
@@ -859,7 +893,7 @@ fn _wait(self: *Page, wait_ms: u32) !Session.WaitResult {
while (true) { while (true) {
switch (self._parse_state) { switch (self._parse_state) {
.pre, .raw, .text => { .pre, .raw, .text, .image => {
// The main page hasn't started/finished navigating. // The main page hasn't started/finished navigating.
// There's no JS to run, and no reason to run the scheduler. // There's no JS to run, and no reason to run the scheduler.
if (http_client.active == 0 and exit_when_done) { if (http_client.active == 0 and exit_when_done) {
@@ -2827,6 +2861,7 @@ const ParseState = union(enum) {
err: anyerror, err: anyerror,
html: std.ArrayList(u8), html: std.ArrayList(u8),
text: std.ArrayList(u8), text: std.ArrayList(u8),
image: std.ArrayList(u8),
raw: std.ArrayList(u8), raw: std.ArrayList(u8),
raw_done: []const u8, raw_done: []const u8,
}; };