create a DOM tree for non-html files too.

This commit is contained in:
Pierre Tachoire
2025-06-27 12:17:03 -07:00
parent 6937c8ecb4
commit 7896d274a3
2 changed files with 32 additions and 5 deletions

View File

@@ -35,6 +35,8 @@ pub const Mime = struct {
text_html, text_html,
text_javascript, text_javascript,
text_plain, text_plain,
text_css,
application_json,
unknown, unknown,
other, other,
}; };
@@ -44,6 +46,8 @@ pub const Mime = struct {
text_html: void, text_html: void,
text_javascript: void, text_javascript: void,
text_plain: void, text_plain: void,
text_css: void,
application_json: void,
unknown: void, unknown: void,
other: struct { type: []const u8, sub_type: []const u8 }, other: struct { type: []const u8, sub_type: []const u8 },
}; };
@@ -174,18 +178,22 @@ pub const Mime = struct {
if (std.meta.stringToEnum(enum { if (std.meta.stringToEnum(enum {
@"text/xml", @"text/xml",
@"text/html", @"text/html",
@"text/css",
@"text/plain",
@"text/javascript", @"text/javascript",
@"application/javascript", @"application/javascript",
@"application/x-javascript", @"application/x-javascript",
@"text/plain", @"application/json",
}, type_name)) |known_type| { }, type_name)) |known_type| {
const ct: ContentType = switch (known_type) { const ct: ContentType = switch (known_type) {
.@"text/xml" => .{ .text_xml = {} }, .@"text/xml" => .{ .text_xml = {} },
.@"text/html" => .{ .text_html = {} }, .@"text/html" => .{ .text_html = {} },
.@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} }, .@"text/javascript", .@"application/javascript", .@"application/x-javascript" => .{ .text_javascript = {} },
.@"text/plain" => .{ .text_plain = {} }, .@"text/plain" => .{ .text_plain = {} },
.@"text/css" => .{ .text_css = {} },
.@"application/json" => .{ .application_json = {} },
}; };
return .{ ct, attribute_start }; return .{ ct, attribute_start };
} }
@@ -351,6 +359,9 @@ test "Mime: parse common" {
try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript"); try expect(.{ .content_type = .{ .text_javascript = {} } }, "text/javascript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript"); try expect(.{ .content_type = .{ .text_javascript = {} } }, "Application/JavaScript");
try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript"); try expect(.{ .content_type = .{ .text_javascript = {} } }, "application/x-javascript");
try expect(.{ .content_type = .{ .application_json = {} } }, "application/json");
try expect(.{ .content_type = .{ .text_css = {} } }, "text/css");
} }
test "Mime: parse uncommon" { test "Mime: parse uncommon" {

View File

@@ -238,17 +238,33 @@ pub const Page = struct {
.reason = opts.reason, .reason = opts.reason,
}); });
if (!mime.isHTML()) { if (mime.isHTML()) {
// the page is an HTML, load it as it.
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
} else {
// the page isn't an HTML
var arr: std.ArrayListUnmanaged(u8) = .{}; var arr: std.ArrayListUnmanaged(u8) = .{};
while (try response.next()) |data| { while (try response.next()) |data| {
try arr.appendSlice(arena, try arena.dupe(u8, data)); try arr.appendSlice(arena, try arena.dupe(u8, data));
} }
// save the body into the page. // save the body into the page.
self.raw_data = arr.items; self.raw_data = arr.items;
return;
}
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8"); // construct a pseudo HTML containing the response body.
var buf: std.ArrayListUnmanaged(u8) = .{};
switch (mime.content_type) {
.application_json, .text_plain, .text_javascript, .text_css => {
try buf.appendSlice(arena, "<html><head><meta charset=\"utf-8\"></head><body><pre>");
try buf.appendSlice(arena, self.raw_data.?);
try buf.appendSlice(arena, "</pre></body></html>\n");
},
// In other cases, we prefer to not integrate the content into the HTML document page iself.
else => {},
}
var fbs = std.io.fixedBufferStream(buf.items);
try self.loadHTMLDoc(fbs.reader(), mime.charset orelse "utf-8");
}
} }
try self.processHTMLDoc(); try self.processHTMLDoc();