From cd53d2604cb946879f2343c10d3f1325194500e9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 3 Oct 2023 16:46:35 +0200 Subject: [PATCH 1/8] netsurf: add a parser from string wrapper --- src/netsurf.zig | 14 ++++++--- vendor/netsurf/wrapper/wrapper.c | 50 ++++++++++++++++++++++++++++++++ vendor/netsurf/wrapper/wrapper.h | 1 + 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 8843de9e..9c634cd6 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -553,11 +553,17 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -pub fn documentHTMLParse(filename: []const u8) *DocumentHTML { - var f: []u8 = @constCast(filename); - const doc = c.wr_create_doc_dom_from_file(f.ptr); +pub fn documentHTMLParse(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { + var file = try std.fs.cwd().openFile(filename, .{}); + defer file.close(); + + const file_size = try file.getEndPos(); + const html = try file.readToEndAlloc(allocator, file_size); + defer allocator.free(html); + + const doc = c.wr_create_doc_dom_from_string(html.ptr); if (doc == null) { - @panic("error parser"); + return error.ParserError; } return @as(*DocumentHTML, @ptrCast(doc.?)); } diff --git a/vendor/netsurf/wrapper/wrapper.c b/vendor/netsurf/wrapper/wrapper.c index 1f7c14bd..0a9448b7 100644 --- a/vendor/netsurf/wrapper/wrapper.c +++ b/vendor/netsurf/wrapper/wrapper.c @@ -1,8 +1,58 @@ #include +#include #include #include +/** + * Generate a LibDOM document DOM from an HTML string + * + * \param string The HTML string + * \return pointer to DOM document, or NULL on error + */ +dom_document *wr_create_doc_dom_from_string(char *html) +{ + dom_hubbub_parser *parser = NULL; + dom_hubbub_error error; + dom_hubbub_parser_params params; + dom_document *doc; + + params.enc = NULL; + params.fix_enc = true; + params.enable_script = false; + params.msg = NULL; + params.script = NULL; + params.ctx = NULL; + params.daf = NULL; + + /* Create Hubbub parser */ + error = dom_hubbub_parser_create(¶ms, &parser, &doc); + if (error != DOM_HUBBUB_OK) { + printf("Can't create Hubbub Parser\n"); + return NULL; + } + + error = dom_hubbub_parser_parse_chunk(parser, html, strlen(html)); + if (error != DOM_HUBBUB_OK) { + dom_hubbub_parser_destroy(parser); + printf("Parsing errors occur\n"); + return NULL; + } + + /* Done parsing file */ + error = dom_hubbub_parser_completed(parser); + if (error != DOM_HUBBUB_OK) { + dom_hubbub_parser_destroy(parser); + printf("Parsing error when construct DOM\n"); + return NULL; + } + + /* Finished with parser */ + dom_hubbub_parser_destroy(parser); + + return doc; +} + /** * Generate a LibDOM document DOM from an HTML file * diff --git a/vendor/netsurf/wrapper/wrapper.h b/vendor/netsurf/wrapper/wrapper.h index 05904ef1..ba630a70 100644 --- a/vendor/netsurf/wrapper/wrapper.h +++ b/vendor/netsurf/wrapper/wrapper.h @@ -3,6 +3,7 @@ #include +dom_document *wr_create_doc_dom_from_string(char *html); dom_document *wr_create_doc_dom_from_file(char *filename); #endif /* wrapper_dom_h_ */ From 04892c79100d465ae68eadf988d594266eb6e548 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 16:27:32 +0200 Subject: [PATCH 2/8] netsurf: use const char w/ parser wrapper --- vendor/netsurf/wrapper/wrapper.c | 2 +- vendor/netsurf/wrapper/wrapper.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vendor/netsurf/wrapper/wrapper.c b/vendor/netsurf/wrapper/wrapper.c index 0a9448b7..e9704780 100644 --- a/vendor/netsurf/wrapper/wrapper.c +++ b/vendor/netsurf/wrapper/wrapper.c @@ -10,7 +10,7 @@ * \param string The HTML string * \return pointer to DOM document, or NULL on error */ -dom_document *wr_create_doc_dom_from_string(char *html) +dom_document *wr_create_doc_dom_from_string(const char *html) { dom_hubbub_parser *parser = NULL; dom_hubbub_error error; diff --git a/vendor/netsurf/wrapper/wrapper.h b/vendor/netsurf/wrapper/wrapper.h index ba630a70..c65274e4 100644 --- a/vendor/netsurf/wrapper/wrapper.h +++ b/vendor/netsurf/wrapper/wrapper.h @@ -3,7 +3,7 @@ #include -dom_document *wr_create_doc_dom_from_string(char *html); -dom_document *wr_create_doc_dom_from_file(char *filename); +dom_document *wr_create_doc_dom_from_string(const char *html); +dom_document *wr_create_doc_dom_from_file(const char *filename); #endif /* wrapper_dom_h_ */ From 818d4f4af2dfb0332f14f71929be85deb3fb6fb5 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 16:28:18 +0200 Subject: [PATCH 3/8] netsurf: update parseDocumentHtml breaking change --- src/main.zig | 10 +++++----- src/main_shell.zig | 14 +++++++------- src/netsurf.zig | 8 ++++++-- src/run_tests.zig | 3 +-- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/main.zig b/src/main.zig index 8ddd237b..17eb7108 100644 --- a/src/main.zig +++ b/src/main.zig @@ -52,8 +52,12 @@ pub fn main() !void { const vm = jsruntime.VM.init(); defer vm.deinit(); + // alloc + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + // document - doc = parser.documentHTMLParse("test.html"); + doc = try parser.documentHTMLParseFromFile(arena.allocator(), "test.html"); defer parser.documentHTMLClose(doc); // remove socket file of internal server @@ -66,10 +70,6 @@ pub fn main() !void { } }; - // alloc - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - // server var addr = try std.net.Address.initUnix(socket_path); server = std.net.StreamServer.init(.{}); diff --git a/src/main_shell.zig b/src/main_shell.zig index 26227062..619e8df7 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -31,20 +31,20 @@ pub fn main() !void { // generate APIs const apis = jsruntime.compile(DOM.Interfaces); + // allocator + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + var arena = std.heap.ArenaAllocator.init(gpa.allocator()); + defer arena.deinit(); + // document - doc = parser.documentHTMLParse("test.html"); + doc = try parser.documentHTMLParseFromFile(arena.allocator(), "test.html"); defer parser.documentHTMLClose(doc); // create JS vm const vm = jsruntime.VM.init(); defer vm.deinit(); - // alloc - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - var arena = std.heap.ArenaAllocator.init(gpa.allocator()); - defer arena.deinit(); - // launch shell try jsruntime.shell(&arena, apis, execJS, .{ .app_name = "browsercore" }); } diff --git a/src/netsurf.zig b/src/netsurf.zig index 9c634cd6..50800ac8 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -553,7 +553,7 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -pub fn documentHTMLParse(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { +pub fn documentHTMLParseFromFile(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { var file = try std.fs.cwd().openFile(filename, .{}); defer file.close(); @@ -561,7 +561,11 @@ pub fn documentHTMLParse(allocator: std.mem.Allocator, filename: []const u8) !*D const html = try file.readToEndAlloc(allocator, file_size); defer allocator.free(html); - const doc = c.wr_create_doc_dom_from_string(html.ptr); + return documentHTMLParseFromString(html); +} + +pub fn documentHTMLParseFromString(s: []const u8) !*DocumentHTML { + const doc = c.wr_create_doc_dom_from_string(s.ptr); if (doc == null) { return error.ParserError; } diff --git a/src/run_tests.zig b/src/run_tests.zig index 12137743..19f190b0 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -38,10 +38,9 @@ test { const apis = jsruntime.compile(DOM.Interfaces); // document - doc = parser.documentHTMLParse("test.html"); + doc = try parser.documentHTMLParseFromFile(std.testing.allocator, "test.html"); defer parser.documentHTMLClose(doc); - // create JS vm const vm = jsruntime.VM.init(); defer vm.deinit(); From b7aecb72f4ce392980ba9a09b7c414d927542f28 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 17:16:47 +0200 Subject: [PATCH 4/8] netsurf: handle null terminated c string more correctly --- src/netsurf.zig | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 50800ac8..d3bc433c 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -4,6 +4,11 @@ const c = @cImport({ @cInclude("wrapper.h"); }); +// max html file size read and loaded in memory for parsing. +// use the same as zig for reference. +// see https://github.com/ziglang/zig/blob/9f0d2f94175a131d965e86a6396f5ac508b27bf8/src/main.zig#L71C8-L71C8 +pub const max_html_size = std.math.maxInt(u32); // 4GB + // Vtable // ------ @@ -553,19 +558,39 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } +// documentHTMLParseFromFile reads the full document, loads the content in a +// buffer and parse the buffer content. +// The buffer is freed by the function. +// The caller is responsible for closing the document. pub fn documentHTMLParseFromFile(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { var file = try std.fs.cwd().openFile(filename, .{}); defer file.close(); const file_size = try file.getEndPos(); - const html = try file.readToEndAlloc(allocator, file_size); - defer allocator.free(html); - return documentHTMLParseFromString(html); + // read the file and return the result in a null terminted c string. + const cstr = try file.readToEndAllocOptions(allocator, max_html_size, file_size + 1, @alignOf(u8), 0); + defer allocator.free(cstr); + + return documentHTMLParseFromCStr(cstr); } -pub fn documentHTMLParseFromString(s: []const u8) !*DocumentHTML { - const doc = c.wr_create_doc_dom_from_string(s.ptr); +// documentHTMLParseFromCStrparses the given string. +// The allocator is required to create a null terminated string. +// The c string allocated is freed by the function. +// The caller is responsible for closing the document. +pub fn documentHTMLParseFromStr(allocator: std.mem.Allocator, str: [:0]const u8) !*DocumentHTML { + // create a null terminated c string. + const cstr = std.cstr.addNullByte(allocator, str); + defer allocator.free(cstr); + + return documentHTMLParseFromCStr(cstr); +} + +// documentHTMLParseFromCStrparses the given c string (ie. with 0 sentinel). +// The caller is responsible for closing the document. +pub fn documentHTMLParseFromCStr(cstr: [:0]const u8) !*DocumentHTML { + const doc = c.wr_create_doc_dom_from_string(cstr.ptr); if (doc == null) { return error.ParserError; } From a8079ad60ea7b68db231b518c4dcf34896074d3e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 17:39:34 +0200 Subject: [PATCH 5/8] netsurf: refacot again to reuse wr_create_doc_dom_from_file --- src/netsurf.zig | 24 +++++++++--------------- vendor/netsurf/wrapper/wrapper.c | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index d3bc433c..7a4c0f7e 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -4,11 +4,6 @@ const c = @cImport({ @cInclude("wrapper.h"); }); -// max html file size read and loaded in memory for parsing. -// use the same as zig for reference. -// see https://github.com/ziglang/zig/blob/9f0d2f94175a131d965e86a6396f5ac508b27bf8/src/main.zig#L71C8-L71C8 -pub const max_html_size = std.math.maxInt(u32); // 4GB - // Vtable // ------ @@ -559,20 +554,19 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { } // documentHTMLParseFromFile reads the full document, loads the content in a -// buffer and parse the buffer content. +// The allocator is required to create a null terminated string from filename. // The buffer is freed by the function. // The caller is responsible for closing the document. pub fn documentHTMLParseFromFile(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { - var file = try std.fs.cwd().openFile(filename, .{}); - defer file.close(); - - const file_size = try file.getEndPos(); - - // read the file and return the result in a null terminted c string. - const cstr = try file.readToEndAllocOptions(allocator, max_html_size, file_size + 1, @alignOf(u8), 0); + // create a null terminated c string. + const cstr = try allocator.dupeZ(u8, filename); defer allocator.free(cstr); - return documentHTMLParseFromCStr(cstr); + const doc = c.wr_create_doc_dom_from_file(cstr.ptr); + if (doc == null) { + return error.ParserError; + } + return @as(*DocumentHTML, @ptrCast(doc.?)); } // documentHTMLParseFromCStrparses the given string. @@ -581,7 +575,7 @@ pub fn documentHTMLParseFromFile(allocator: std.mem.Allocator, filename: []const // The caller is responsible for closing the document. pub fn documentHTMLParseFromStr(allocator: std.mem.Allocator, str: [:0]const u8) !*DocumentHTML { // create a null terminated c string. - const cstr = std.cstr.addNullByte(allocator, str); + const cstr = try allocator.dupeZ(u8, str); defer allocator.free(cstr); return documentHTMLParseFromCStr(cstr); diff --git a/vendor/netsurf/wrapper/wrapper.c b/vendor/netsurf/wrapper/wrapper.c index e9704780..ef7e8a8f 100644 --- a/vendor/netsurf/wrapper/wrapper.c +++ b/vendor/netsurf/wrapper/wrapper.c @@ -59,7 +59,7 @@ dom_document *wr_create_doc_dom_from_string(const char *html) * \param file The file path * \return pointer to DOM document, or NULL on error */ -dom_document *wr_create_doc_dom_from_file(char *filename) +dom_document *wr_create_doc_dom_from_file(const char *filename) { size_t buffer_size = 1024; dom_hubbub_parser *parser = NULL; From 8a1da4e0f22092508f060dc097eb155bcd868815 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 18:14:21 +0200 Subject: [PATCH 6/8] netsurf: rename parser and create helpers --- src/netsurf.zig | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 7a4c0f7e..a94b1f9b 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -553,37 +553,43 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -// documentHTMLParseFromFile reads the full document, loads the content in a +// documentHTMLParseFromFileAlloc reads the full document, loads the content in a // The allocator is required to create a null terminated string from filename. // The buffer is freed by the function. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromFile(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { - // create a null terminated c string. +pub fn documentHTMLParseFromFileAlloc(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { const cstr = try allocator.dupeZ(u8, filename); defer allocator.free(cstr); - const doc = c.wr_create_doc_dom_from_file(cstr.ptr); + return documentHTMLParseFromFile(cstr); +} + +// documentHTMLParseFromFile parses the given filename c string (ie. with 0 sentinel). +// The caller is responsible for closing the document. +pub fn documentHTMLParseFromFile(filename: [:0]const u8) !*DocumentHTML { + // create a null terminated c string. + const doc = c.wr_create_doc_dom_from_file(filename.ptr); if (doc == null) { return error.ParserError; } return @as(*DocumentHTML, @ptrCast(doc.?)); } -// documentHTMLParseFromCStrparses the given string. +// documentHTMLParseFromStrAlloc the given string. // The allocator is required to create a null terminated string. // The c string allocated is freed by the function. // The caller is responsible for closing the document. -pub fn documentHTMLParseFromStr(allocator: std.mem.Allocator, str: [:0]const u8) !*DocumentHTML { +pub fn documentHTMLParseFromStrAlloc(allocator: std.mem.Allocator, str: [:0]const u8) !*DocumentHTML { // create a null terminated c string. const cstr = try allocator.dupeZ(u8, str); defer allocator.free(cstr); - return documentHTMLParseFromCStr(cstr); + return documentHTMLParseFromStr(cstr); } -// documentHTMLParseFromCStrparses the given c string (ie. with 0 sentinel). +// documentHTMLParseFromStr parses the given c string (ie. with 0 sentinel). // The caller is responsible for closing the document. -pub fn documentHTMLParseFromCStr(cstr: [:0]const u8) !*DocumentHTML { +pub fn documentHTMLParseFromStr(cstr: [:0]const u8) !*DocumentHTML { const doc = c.wr_create_doc_dom_from_string(cstr.ptr); if (doc == null) { return error.ParserError; From cf8725757c81d617be195a57f88ab88a08a218ed Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 18:16:00 +0200 Subject: [PATCH 7/8] netsurf: typo fix --- src/netsurf.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index a94b1f9b..9ec0714e 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -553,7 +553,7 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -// documentHTMLParseFromFileAlloc reads the full document, loads the content in a +// documentHTMLParseFromFileAlloc parses the file. // The allocator is required to create a null terminated string from filename. // The buffer is freed by the function. // The caller is responsible for closing the document. From ac8317adf4f9dd13c51ae178abc7754b14350d4e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 4 Oct 2023 18:29:35 +0200 Subject: [PATCH 8/8] netsurf: use parse helper --- src/main.zig | 2 +- src/main_shell.zig | 2 +- src/run_tests.zig | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.zig b/src/main.zig index 17eb7108..a175929f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -57,7 +57,7 @@ pub fn main() !void { defer arena.deinit(); // document - doc = try parser.documentHTMLParseFromFile(arena.allocator(), "test.html"); + doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html"); defer parser.documentHTMLClose(doc); // remove socket file of internal server diff --git a/src/main_shell.zig b/src/main_shell.zig index 619e8df7..e9acdc9d 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -38,7 +38,7 @@ pub fn main() !void { defer arena.deinit(); // document - doc = try parser.documentHTMLParseFromFile(arena.allocator(), "test.html"); + doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html"); defer parser.documentHTMLClose(doc); // create JS vm diff --git a/src/run_tests.zig b/src/run_tests.zig index 19f190b0..775c7004 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -38,7 +38,7 @@ test { const apis = jsruntime.compile(DOM.Interfaces); // document - doc = try parser.documentHTMLParseFromFile(std.testing.allocator, "test.html"); + doc = try parser.documentHTMLParseFromFileAlloc(std.testing.allocator, "test.html"); defer parser.documentHTMLClose(doc); // create JS vm