netsurf: pass encoding to the parser

This commit is contained in:
Pierre Tachoire
2024-01-12 11:58:49 +01:00
parent 1a03412635
commit 028cd2331f
5 changed files with 9 additions and 7 deletions

View File

@@ -62,7 +62,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParse(file.reader());
doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};

View File

@@ -43,7 +43,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParse(file.reader());
doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};

View File

@@ -1411,10 +1411,10 @@ fn parserErr(err: HubbubErr) ParserError!void {
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML {
var fbs = std.io.fixedBufferStream(str);
return try documentHTMLParse(fbs.reader());
return try documentHTMLParse(fbs.reader(), "UTF-8");
}
pub fn documentHTMLParse(reader: anytype) !*DocumentHTML {
pub fn documentHTMLParse(reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
var parser: ?*c.dom_hubbub_parser = undefined;
var doc: ?*c.dom_document = undefined;
var err: c.hubbub_error = undefined;
@@ -1429,6 +1429,8 @@ pub fn documentHTMLParse(reader: anytype) !*DocumentHTML {
.daf = null,
};
if (enc) |e| params.enc = e;
err = c.dom_hubbub_parser_create(&params, &parser, &doc);
try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser);

View File

@@ -43,7 +43,7 @@ fn testExecFn(
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParse(file.reader());
doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};
@@ -119,6 +119,6 @@ test "bug document html parsing #4" {
const file = try std.fs.cwd().openFile("tests/html/bug-html-parsing-4.html", .{});
defer file.close();
doc = try parser.documentHTMLParse(file.reader());
doc = try parser.documentHTMLParse(file.reader(), null);
parser.documentHTMLClose(doc) catch {};
}

View File

@@ -21,7 +21,7 @@ pub fn run(arena: *std.heap.ArenaAllocator, comptime dir: []const u8, f: []const
const file = try std.fs.cwd().openFile(f, .{});
defer file.close();
const html_doc = try parser.documentHTMLParse(file.reader());
const html_doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
const doc = parser.documentHTMLToDocument(html_doc);
const dirname = fspath.dirname(f[dir.len..]) orelse unreachable;