mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-30 15:41:48 +00:00
browser: use charset from headers to parse doc
This commit is contained in:
@@ -182,10 +182,10 @@ pub const Page = struct {
|
|||||||
log.info("no content-type HTTP header", .{});
|
log.info("no content-type HTTP header", .{});
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
log.debug("header content-type: {s}", .{ct});
|
||||||
const mime = try Mime.parse(ct);
|
const mime = try Mime.parse(ct);
|
||||||
if (mime.eql(Mime.HTML)) {
|
if (mime.eql(Mime.HTML)) {
|
||||||
// TODO check content-type
|
try self.loadHTMLDoc(req.reader(), mime.charset orelse "utf-8");
|
||||||
try self.loadHTMLDoc(req.reader());
|
|
||||||
} else {
|
} else {
|
||||||
log.info("non-HTML document: {s}", .{ct});
|
log.info("non-HTML document: {s}", .{ct});
|
||||||
|
|
||||||
@@ -195,10 +195,13 @@ pub const Page = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/#read-html
|
// https://html.spec.whatwg.org/#read-html
|
||||||
fn loadHTMLDoc(self: *Page, reader: anytype) !void {
|
fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void {
|
||||||
log.debug("parse html", .{});
|
log.debug("parse html with charset {s}", .{charset});
|
||||||
// TODO pass an encoding detected from HTTP headers.
|
|
||||||
const html_doc = try parser.documentHTMLParse(reader, "UTF-8");
|
const ccharset = try self.alloc.dupeZ(u8, charset);
|
||||||
|
defer self.alloc.free(ccharset);
|
||||||
|
|
||||||
|
const html_doc = try parser.documentHTMLParse(reader, ccharset);
|
||||||
const doc = parser.documentHTMLToDocument(html_doc);
|
const doc = parser.documentHTMLToDocument(html_doc);
|
||||||
|
|
||||||
// save a document's pointer in the page.
|
// save a document's pointer in the page.
|
||||||
|
|||||||
Reference in New Issue
Block a user