create a DOM tree for non-html files too.

This commit is contained in:
Pierre Tachoire
2025-06-27 12:17:03 -07:00
parent 6937c8ecb4
commit 7896d274a3
2 changed files with 32 additions and 5 deletions

View File

@@ -35,6 +35,8 @@ pub const Mime = struct {
text_html,
text_javascript,
text_plain,
text_css,
application_json,
unknown,
other,
};
@@ -44,6 +46,8 @@ pub const Mime = struct {
text_html: void,
text_javascript: void,
text_plain: void,
text_css: void,
application_json: void,
unknown: void,
other: struct { type: []const u8, sub_type: []const u8 },
};
@@ -174,18 +178,22 @@ pub const Mime = struct {
if (std.meta.stringToEnum(enum {
@"text/xml",
@"text/html",
@"text/css",
@"text/plain",
@"text/javascript",
@"application/javascript",
@"application/x-javascript",
@"text/plain",
@"application/json",
}, type_name)) |known_type| {
const ct: ContentType = switch (known_type) {
.@"text/xml" => .{ .text_xml = {} },
.@"text/html" => .{ .text_html = {} },
.@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
.@"text/plain" => .{ .text_plain = {} },
.@"text/css" => .{ .text_css = {} },
.@"application/json" => .{ .application_json = {} },
};
return .{ ct, attribute_start };
}
@@ -351,6 +359,9 @@ test "Mime: parse common" {
try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript");
try expect(.{ .content_type = .{ .application_json = {} } }, "application/json");
try expect(.{ .content_type = .{ .text_css = {} } }, "text/css");
}
test "Mime: parse uncommon" {

View File

@@ -238,17 +238,33 @@ pub const Page = struct {
.reason = opts.reason,
});
if (!mime.isHTML()) {
if (mime.isHTML()) {
// the page is an HTML, load it as it.
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
} else {
// the page isn't an HTML
var arr: std.ArrayListUnmanaged(u8) = .{};
while (try response.next()) |data| {
try arr.appendSlice(arena, try arena.dupe(u8, data));
}
// save the body into the page.
self.raw_data = arr.items;
return;
}
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
// construct a pseudo HTML containing the response body.
var buf: std.ArrayListUnmanaged(u8) = .{};
switch (mime.content_type) {
.application_json, .text_plain, .text_javascript, .text_css => {
try buf.appendSlice(arena, "<html><head><meta charset=\"utf-8\"></head><body><pre>");
try buf.appendSlice(arena, self.raw_data.?);
try buf.appendSlice(arena, "</pre></body></html>\n");
},
// In other cases, we prefer to not integrate the content into the HTML document page iself.
else => {},
}
var fbs = std.io.fixedBufferStream(buf.items);
try self.loadHTMLDoc(fbs.reader(), mime.charset orelse "utf-8");
}
}
try self.processHTMLDoc();