netsurf: pass encoding to the parser

This commit is contained in:
Pierre Tachoire
2024-01-12 11:58:49 +01:00
parent 1a03412635
commit 028cd2331f
5 changed files with 9 additions and 7 deletions

View File

@@ -62,7 +62,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{}); const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close(); defer file.close();
doc = try parser.documentHTMLParse(file.reader()); doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| { defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
}; };

View File

@@ -43,7 +43,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{}); const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close(); defer file.close();
doc = try parser.documentHTMLParse(file.reader()); doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| { defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
}; };

View File

@@ -1411,10 +1411,10 @@ fn parserErr(err: HubbubErr) ParserError!void {
// The caller is responsible for closing the document. // The caller is responsible for closing the document.
pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML {
var fbs = std.io.fixedBufferStream(str); var fbs = std.io.fixedBufferStream(str);
return try documentHTMLParse(fbs.reader()); return try documentHTMLParse(fbs.reader(), "UTF-8");
} }
pub fn documentHTMLParse(reader: anytype) !*DocumentHTML { pub fn documentHTMLParse(reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
var parser: ?*c.dom_hubbub_parser = undefined; var parser: ?*c.dom_hubbub_parser = undefined;
var doc: ?*c.dom_document = undefined; var doc: ?*c.dom_document = undefined;
var err: c.hubbub_error = undefined; var err: c.hubbub_error = undefined;
@@ -1429,6 +1429,8 @@ pub fn documentHTMLParse(reader: anytype) !*DocumentHTML {
.daf = null, .daf = null,
}; };
if (enc) |e| params.enc = e;
err = c.dom_hubbub_parser_create(&params, &parser, &doc); err = c.dom_hubbub_parser_create(&params, &parser, &doc);
try parserErr(err); try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser); defer c.dom_hubbub_parser_destroy(parser);

View File

@@ -43,7 +43,7 @@ fn testExecFn(
const file = try std.fs.cwd().openFile("test.html", .{}); const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close(); defer file.close();
doc = try parser.documentHTMLParse(file.reader()); doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
defer parser.documentHTMLClose(doc) catch |err| { defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
}; };
@@ -119,6 +119,6 @@ test "bug document html parsing #4" {
const file = try std.fs.cwd().openFile("tests/html/bug-html-parsing-4.html", .{}); const file = try std.fs.cwd().openFile("tests/html/bug-html-parsing-4.html", .{});
defer file.close(); defer file.close();
doc = try parser.documentHTMLParse(file.reader()); doc = try parser.documentHTMLParse(file.reader(), null);
parser.documentHTMLClose(doc) catch {}; parser.documentHTMLClose(doc) catch {};
} }

View File

@@ -21,7 +21,7 @@ pub fn run(arena: *std.heap.ArenaAllocator, comptime dir: []const u8, f: []const
const file = try std.fs.cwd().openFile(f, .{}); const file = try std.fs.cwd().openFile(f, .{});
defer file.close(); defer file.close();
const html_doc = try parser.documentHTMLParse(file.reader()); const html_doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
const doc = parser.documentHTMLToDocument(html_doc); const doc = parser.documentHTMLToDocument(html_doc);
const dirname = fspath.dirname(f[dir.len..]) orelse unreachable; const dirname = fspath.dirname(f[dir.len..]) orelse unreachable;