From cc3a7ec4a13c255c45326330e46b0a25976004b6 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 9 Jan 2024 11:33:02 +0100 Subject: [PATCH 1/2] netsurf: fix DocumentHTMLParseFromStr and add test --- src/netsurf.zig | 43 ++++++++++++------------------------------- src/run_tests.zig | 11 +++++++++++ 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index e0e05093..70232a17 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1396,6 +1396,17 @@ fn parserErr(err: HubbubErr) ParserError!void { // documentHTMLParseFromFile parses the given HTML file. // The caller is responsible for closing the document. pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { + return try documentHTMLParse(file.reader()); +} + +// documentHTMLParseFromStr parses the given HTML string. +// The caller is responsible for closing the document. +pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { + var fbs = std.io.fixedBufferStream(str); + return try documentHTMLParse(fbs.reader()); +} + +pub fn documentHTMLParse(reader: anytype) !*DocumentHTML { var parser: ?*c.dom_hubbub_parser = undefined; var doc: ?*c.dom_document = undefined; var err: c.hubbub_error = undefined; @@ -1417,7 +1428,7 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { var buffer: [1024]u8 = undefined; var ln = buffer.len; while (ln == buffer.len) { - ln = try file.readAll(&buffer); + ln = try reader.read(&buffer); err = c.dom_hubbub_parser_parse_chunk(parser, &buffer, ln); try parserErr(err); } @@ -1428,36 +1439,6 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { return @as(*DocumentHTML, @ptrCast(doc.?)); } -// documentHTMLParseFromStr parses the given HTML string. -// The caller is responsible for closing the document. -pub fn documentHTMLParseFromStr(str: []const u8) ParserError!*DocumentHTML { - var parser: ?*c.dom_hubbub_parser = undefined; - var doc: ?*c.dom_document = undefined; - var err: c.hubbub_error = undefined; - - var params = c.dom_hubbub_parser_params{ - .enc = null, - .fix_enc = true, - .msg = null, - .script = null, - .enable_script = false, - .ctx = null, - .daf = null, - }; - - err = c.dom_hubbub_parser_create(¶ms, &parser, &doc); - try parserErr(err); - defer c.dom_hubbub_parser_destroy(parser); - - err = c.dom_hubbub_parser_parse_chunk(parser, str, str.len); - try parserErr(err); - - err = c.dom_hubbub_parser_completed(parser); - try parserErr(err); - - return @as(*DocumentHTML, @ptrCast(doc.?)); -} - // documentHTMLClose closes the document. pub fn documentHTMLClose(doc: *DocumentHTML) !void { const err = documentHTMLVtable(doc).close.?(doc); diff --git a/src/run_tests.zig b/src/run_tests.zig index 8be3eff6..1e1afd90 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -98,3 +98,14 @@ test { try jsruntime.loadEnv(&arena_alloc, testsAllExecFn, apis); } + +test "DocumentHTMLParseFromStr" { + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + const str = try file.readToEndAlloc(std.testing.allocator, std.math.maxInt(u32)); + defer std.testing.allocator.free(str); + + doc = try parser.documentHTMLParseFromStr(str); + parser.documentHTMLClose(doc) catch {}; +} From 5496e68a0243f756ddaf6ca531ae8fdd8ff6f2aa Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 9 Jan 2024 11:35:18 +0100 Subject: [PATCH 2/2] netsurf: remove documentHTMLParseFromFile --- src/main.zig | 2 +- src/main_shell.zig | 2 +- src/netsurf.zig | 6 ------ src/run_tests.zig | 2 +- src/wpt/run.zig | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/main.zig b/src/main.zig index 7850c00f..fba34679 100644 --- a/src/main.zig +++ b/src/main.zig @@ -64,7 +64,7 @@ pub fn main() !void { const file = try std.fs.cwd().openFile("test.html", .{}); defer file.close(); - doc = try parser.documentHTMLParseFromFile(file); + doc = try parser.documentHTMLParse(file.reader()); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/main_shell.zig b/src/main_shell.zig index 066d2051..9b3611ca 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -45,7 +45,7 @@ pub fn main() !void { const file = try std.fs.cwd().openFile("test.html", .{}); defer file.close(); - doc = try parser.documentHTMLParseFromFile(file); + doc = try parser.documentHTMLParse(file.reader()); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/netsurf.zig b/src/netsurf.zig index 70232a17..88565df7 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1393,12 +1393,6 @@ fn parserErr(err: HubbubErr) ParserError!void { }; } -// documentHTMLParseFromFile parses the given HTML file. -// The caller is responsible for closing the document. -pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { - return try documentHTMLParse(file.reader()); -} - // documentHTMLParseFromStr parses the given HTML string. // The caller is responsible for closing the document. pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { diff --git a/src/run_tests.zig b/src/run_tests.zig index 1e1afd90..e67c23f1 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -41,7 +41,7 @@ fn testExecFn( const file = try std.fs.cwd().openFile("test.html", .{}); defer file.close(); - doc = try parser.documentHTMLParseFromFile(file); + doc = try parser.documentHTMLParse(file.reader()); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/wpt/run.zig b/src/wpt/run.zig index 68243c23..55039ef7 100644 --- a/src/wpt/run.zig +++ b/src/wpt/run.zig @@ -20,7 +20,7 @@ pub fn run(arena: *std.heap.ArenaAllocator, comptime apis: []jsruntime.API, comp const file = try std.fs.cwd().openFile(f, .{}); defer file.close(); - const html_doc = try parser.documentHTMLParseFromFile(file); + const html_doc = try parser.documentHTMLParse(file.reader()); const doc = parser.documentHTMLToDocument(html_doc); const dirname = fspath.dirname(f[dir.len..]) orelse unreachable;