From 9f77ba01bb1ebe6447c3c3b8fc136ab28e082cb8 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 15:13:07 +0100 Subject: [PATCH 1/5] netsurf: rewrite wrapper.ci func in pure zig --- src/netsurf.zig | 100 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 20 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 2455b35a..3d2ded24 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1,7 +1,8 @@ const std = @import("std"); const c = @cImport({ - @cInclude("wrapper.h"); + @cInclude("dom/dom.h"); + @cInclude("dom/bindings/hubbub/parser.h"); }); // Vtable @@ -1284,19 +1285,52 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { // The allocator is required to create a null terminated string from filename. // The buffer is freed by the function. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromFileAlloc(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { - const cstr = try allocator.dupeZ(u8, filename); - defer allocator.free(cstr); - - return documentHTMLParseFromFile(cstr); +// DEPRECATED +pub fn documentHTMLParseFromFileAlloc(_: std.mem.Allocator, filename: []const u8) !*DocumentHTML { + return documentHTMLParseFromFile(filename); } -// documentHTMLParseFromFile parses the given filename c string (ie. with 0 sentinel). +// documentHTMLParseFromFile parses the given HTML file. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromFile(filename: [:0]const u8) !*DocumentHTML { - // create a null terminated c string. - const doc = c.wr_create_doc_dom_from_file(filename.ptr); - if (doc == null) return error.ParserError; +pub fn documentHTMLParseFromFile(filename: []const u8) !*DocumentHTML { + const file = try std.fs.openFileAbsolute(filename, .{}); + defer file.close(); + + var parser: ?*c.dom_hubbub_parser = undefined; + var doc: ?*c.dom_document = undefined; + var err: c.hubbub_error = undefined; + + var params = c.dom_hubbub_parser_params{ + .enc = null, + .fix_enc = true, + .msg = null, + .script = null, + .enable_script = false, + .ctx = null, + .daf = null, + }; + + err = c.dom_hubbub_parser_create(¶ms, &parser, &doc); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + defer c.dom_hubbub_parser_destroy(parser); + + var buffer: [1024 * 4]u8 = undefined; + var ln = buffer.len; + while (ln == buffer.len) { + ln = try file.readAll(&buffer); + err = c.dom_hubbub_parser_parse_chunk(parser, &buffer, ln); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + } + + err = c.dom_hubbub_parser_completed(parser); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + return @as(*DocumentHTML, @ptrCast(doc.?)); } @@ -1304,18 +1338,44 @@ pub fn documentHTMLParseFromFile(filename: [:0]const u8) !*DocumentHTML { // The allocator is required to create a null terminated string. // The c string allocated is freed by the function. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromStrAlloc(allocator: std.mem.Allocator, str: []const u8) !*DocumentHTML { - // create a null terminated c string. - const cstr = try allocator.dupeZ(u8, str); - defer allocator.free(cstr); - return documentHTMLParseFromStr(cstr); +// DEPRECATED +pub fn documentHTMLParseFromStrAlloc(_: std.mem.Allocator, str: []const u8) !*DocumentHTML { + return documentHTMLParseFromStr(str); } -// documentHTMLParseFromStr parses the given c string (ie. with 0 sentinel). +// documentHTMLParseFromStr parses the given HTML string. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromStr(cstr: [:0]const u8) !*DocumentHTML { - const doc = c.wr_create_doc_dom_from_string(cstr.ptr); - if (doc == null) return error.ParserError; +pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { + var parser: ?*c.dom_hubbub_parser = undefined; + var doc: ?*c.dom_document = undefined; + var err: c.hubbub_error = undefined; + + var params = c.dom_hubbub_parser_params{ + .enc = null, + .fix_enc = true, + .msg = null, + .script = null, + .enable_script = false, + .ctx = null, + .daf = null, + }; + + err = c.dom_hubbub_parser_create(¶ms, &parser, &doc); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + defer c.dom_hubbub_parser_destroy(parser); + + err = c.dom_hubbub_parser_parse_chunk(parser, str, str.len); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + + err = c.dom_hubbub_parser_completed(parser); + if (err != c.DOM_HUBBUB_OK) { + return error.ParserError; + } + return @as(*DocumentHTML, @ptrCast(doc.?)); } From 0159051b17381b1345d93b25f98a05db1be32fd1 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 15:18:17 +0100 Subject: [PATCH 2/5] netsurf: remove deprecated API for parsing HTML --- src/main.zig | 5 ++++- src/main_shell.zig | 5 ++++- src/netsurf.zig | 23 +---------------------- src/run_tests.zig | 5 ++++- src/wpt/run.zig | 5 ++++- 5 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/main.zig b/src/main.zig index 2dd6403c..7850c00f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -61,7 +61,10 @@ pub fn main() !void { defer arena.deinit(); // document - doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html"); + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + doc = try parser.documentHTMLParseFromFile(file); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/main_shell.zig b/src/main_shell.zig index 762e97fd..066d2051 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -42,7 +42,10 @@ pub fn main() !void { defer arena.deinit(); // document - doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html"); + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + doc = try parser.documentHTMLParseFromFile(file); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/netsurf.zig b/src/netsurf.zig index 3d2ded24..86fccb2f 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1281,21 +1281,9 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -// documentHTMLParseFromFileAlloc parses the file. -// The allocator is required to create a null terminated string from filename. -// The buffer is freed by the function. -// The caller is responsible for closing the document. -// DEPRECATED -pub fn documentHTMLParseFromFileAlloc(_: std.mem.Allocator, filename: []const u8) !*DocumentHTML { - return documentHTMLParseFromFile(filename); -} - // documentHTMLParseFromFile parses the given HTML file. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromFile(filename: []const u8) !*DocumentHTML { - const file = try std.fs.openFileAbsolute(filename, .{}); - defer file.close(); - +pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { var parser: ?*c.dom_hubbub_parser = undefined; var doc: ?*c.dom_document = undefined; var err: c.hubbub_error = undefined; @@ -1334,15 +1322,6 @@ pub fn documentHTMLParseFromFile(filename: []const u8) !*DocumentHTML { return @as(*DocumentHTML, @ptrCast(doc.?)); } -// documentHTMLParseFromStrAlloc the given string. -// The allocator is required to create a null terminated string. -// The c string allocated is freed by the function. -// The caller is responsible for closing the document. -// DEPRECATED -pub fn documentHTMLParseFromStrAlloc(_: std.mem.Allocator, str: []const u8) !*DocumentHTML { - return documentHTMLParseFromStr(str); -} - // documentHTMLParseFromStr parses the given HTML string. // The caller is responsible for closing the document. pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { diff --git a/src/run_tests.zig b/src/run_tests.zig index 0f541c79..3f6b17e8 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -37,7 +37,10 @@ fn testExecFn( try js_env.attachObject(try js_env.getGlobal(), "window", null); // document - doc = try parser.documentHTMLParseFromFileAlloc(std.testing.allocator, "test.html"); + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + doc = try parser.documentHTMLParseFromFile(file); defer parser.documentHTMLClose(doc) catch |err| { std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)}); }; diff --git a/src/wpt/run.zig b/src/wpt/run.zig index 1cdb74bf..68243c23 100644 --- a/src/wpt/run.zig +++ b/src/wpt/run.zig @@ -17,7 +17,10 @@ pub fn run(arena: *std.heap.ArenaAllocator, comptime apis: []jsruntime.API, comp const alloc = arena.allocator(); // document - const html_doc = try parser.documentHTMLParseFromFileAlloc(alloc, f); + const file = try std.fs.cwd().openFile(f, .{}); + defer file.close(); + + const html_doc = try parser.documentHTMLParseFromFile(file); const doc = parser.documentHTMLToDocument(html_doc); const dirname = fspath.dirname(f[dir.len..]) orelse unreachable; From ac99aab3fd43d495455084c12f3b01b94963b339 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 15:59:16 +0100 Subject: [PATCH 3/5] netsurf: better parsing error --- src/netsurf.zig | 58 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 86fccb2f..d657bdf8 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1281,6 +1281,38 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } +const ParserError = error{ + Reprocess, + EncodingChange, + Paused, + NoMemory, + BadParameter, + BadEncoding, + Invalid, + FileNotFound, + NeedData, + Unknown, +}; + +const HubbubErr = c.hubbub_error; + +fn parserErr(err: HubbubErr) ParserError!void { + return switch (err) { + c.HUBBUB_OK => {}, + c.HUBBUB_REPROCESS => ParserError.Reprocess, + c.HUBBUB_ENCODINGCHANGE => ParserError.EncodingChange, + c.HUBBUB_PAUSED => ParserError.Paused, + c.HUBBUB_NOMEM => ParserError.NoMemory, + c.HUBBUB_BADPARM => ParserError.BadParameter, + c.HUBBUB_BADENCODING => ParserError.BadEncoding, + c.HUBBUB_INVALID => ParserError.Invalid, + c.HUBBUB_FILENOTFOUND => ParserError.FileNotFound, + c.HUBBUB_NEEDDATA => ParserError.NeedData, + c.HUBBUB_UNKNOWN => ParserError.Unknown, + else => unreachable, + }; +} + // documentHTMLParseFromFile parses the given HTML file. // The caller is responsible for closing the document. pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { @@ -1299,9 +1331,7 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { }; err = c.dom_hubbub_parser_create(¶ms, &parser, &doc); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); defer c.dom_hubbub_parser_destroy(parser); var buffer: [1024 * 4]u8 = undefined; @@ -1309,22 +1339,18 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { while (ln == buffer.len) { ln = try file.readAll(&buffer); err = c.dom_hubbub_parser_parse_chunk(parser, &buffer, ln); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); } err = c.dom_hubbub_parser_completed(parser); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); return @as(*DocumentHTML, @ptrCast(doc.?)); } // documentHTMLParseFromStr parses the given HTML string. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { +pub fn documentHTMLParseFromStr(str: []const u8) ParserError!*DocumentHTML { var parser: ?*c.dom_hubbub_parser = undefined; var doc: ?*c.dom_document = undefined; var err: c.hubbub_error = undefined; @@ -1340,20 +1366,14 @@ pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML { }; err = c.dom_hubbub_parser_create(¶ms, &parser, &doc); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); defer c.dom_hubbub_parser_destroy(parser); err = c.dom_hubbub_parser_parse_chunk(parser, str, str.len); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); err = c.dom_hubbub_parser_completed(parser); - if (err != c.DOM_HUBBUB_OK) { - return error.ParserError; - } + try parserErr(err); return @as(*DocumentHTML, @ptrCast(doc.?)); } From 681dd2390c3a9f4f0880210f454dc88b0d4618b0 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 16:00:50 +0100 Subject: [PATCH 4/5] netsurf: remove wrapper C files --- build.zig | 9 --- vendor/netsurf/wrapper/wrapper.c | 127 ------------------------------- vendor/netsurf/wrapper/wrapper.h | 9 --- 3 files changed, 145 deletions(-) delete mode 100644 vendor/netsurf/wrapper/wrapper.c delete mode 100644 vendor/netsurf/wrapper/wrapper.h diff --git a/build.zig b/build.zig index be9cecc2..4555807b 100644 --- a/build.zig +++ b/build.zig @@ -151,13 +151,4 @@ fn linkNetSurf(step: *std.build.LibExeObjStep) void { step.addIncludePath(.{ .path = ns ++ lib ++ "/src" }); } step.addIncludePath(.{ .path = ns ++ "/include" }); - - // wrapper - const flags = [_][]const u8{}; - const files: [1][]const u8 = .{ns ++ "wrapper/wrapper.c"}; - step.addCSourceFiles(.{ - .files = &files, - .flags = &flags, - }); - step.addIncludePath(.{ .path = ns ++ "wrapper" }); } diff --git a/vendor/netsurf/wrapper/wrapper.c b/vendor/netsurf/wrapper/wrapper.c deleted file mode 100644 index ef7e8a8f..00000000 --- a/vendor/netsurf/wrapper/wrapper.c +++ /dev/null @@ -1,127 +0,0 @@ -#include -#include - -#include -#include - -/** - * Generate a LibDOM document DOM from an HTML string - * - * \param string The HTML string - * \return pointer to DOM document, or NULL on error - */ -dom_document *wr_create_doc_dom_from_string(const char *html) -{ - dom_hubbub_parser *parser = NULL; - dom_hubbub_error error; - dom_hubbub_parser_params params; - dom_document *doc; - - params.enc = NULL; - params.fix_enc = true; - params.enable_script = false; - params.msg = NULL; - params.script = NULL; - params.ctx = NULL; - params.daf = NULL; - - /* Create Hubbub parser */ - error = dom_hubbub_parser_create(¶ms, &parser, &doc); - if (error != DOM_HUBBUB_OK) { - printf("Can't create Hubbub Parser\n"); - return NULL; - } - - error = dom_hubbub_parser_parse_chunk(parser, html, strlen(html)); - if (error != DOM_HUBBUB_OK) { - dom_hubbub_parser_destroy(parser); - printf("Parsing errors occur\n"); - return NULL; - } - - /* Done parsing file */ - error = dom_hubbub_parser_completed(parser); - if (error != DOM_HUBBUB_OK) { - dom_hubbub_parser_destroy(parser); - printf("Parsing error when construct DOM\n"); - return NULL; - } - - /* Finished with parser */ - dom_hubbub_parser_destroy(parser); - - return doc; -} - -/** - * Generate a LibDOM document DOM from an HTML file - * - * \param file The file path - * \return pointer to DOM document, or NULL on error - */ -dom_document *wr_create_doc_dom_from_file(const char *filename) -{ - size_t buffer_size = 1024; - dom_hubbub_parser *parser = NULL; - FILE *handle; - int chunk_length; - dom_hubbub_error error; - dom_hubbub_parser_params params; - dom_document *doc; - unsigned char buffer[buffer_size]; - - params.enc = NULL; - params.fix_enc = true; - params.enable_script = false; - params.msg = NULL; - params.script = NULL; - params.ctx = NULL; - params.daf = NULL; - - /* Create Hubbub parser */ - error = dom_hubbub_parser_create(¶ms, &parser, &doc); - if (error != DOM_HUBBUB_OK) { - printf("Can't create Hubbub Parser\n"); - return NULL; - } - - /* Open input file */ - handle = fopen(filename, "rb"); - if (handle == NULL) { - dom_hubbub_parser_destroy(parser); - printf("Can't open test input file: %s\n", filename); - return NULL; - } - - /* Parse input file in chunks */ - chunk_length = buffer_size; - while (chunk_length == buffer_size) { - chunk_length = fread(buffer, 1, buffer_size, handle); - error = dom_hubbub_parser_parse_chunk(parser, buffer, - chunk_length); - if (error != DOM_HUBBUB_OK) { - dom_hubbub_parser_destroy(parser); - printf("Parsing errors occur\n"); - return NULL; - } - } - - /* Done parsing file */ - error = dom_hubbub_parser_completed(parser); - if (error != DOM_HUBBUB_OK) { - dom_hubbub_parser_destroy(parser); - printf("Parsing error when construct DOM\n"); - return NULL; - } - - /* Finished with parser */ - dom_hubbub_parser_destroy(parser); - - /* Close input file */ - if (fclose(handle) != 0) { - printf("Can't close test input file: %s\n", filename); - return NULL; - } - - return doc; -} diff --git a/vendor/netsurf/wrapper/wrapper.h b/vendor/netsurf/wrapper/wrapper.h deleted file mode 100644 index c65274e4..00000000 --- a/vendor/netsurf/wrapper/wrapper.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef wrapper_dom_h_ -#define wrapper_dom_h_ - -#include - -dom_document *wr_create_doc_dom_from_string(const char *html); -dom_document *wr_create_doc_dom_from_file(const char *filename); - -#endif /* wrapper_dom_h_ */ From 1c55269342e5f259297081b2f4317c29f2ca3f9c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 16:23:22 +0100 Subject: [PATCH 5/5] netsurf: use 1024 bytes for fil read buffer --- src/netsurf.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index d657bdf8..84f5c363 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -1334,7 +1334,7 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML { try parserErr(err); defer c.dom_hubbub_parser_destroy(parser); - var buffer: [1024 * 4]u8 = undefined; + var buffer: [1024]u8 = undefined; var ln = buffer.len; while (ln == buffer.len) { ln = try file.readAll(&buffer);