From cd53d2604cb946879f2343c10d3f1325194500e9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 3 Oct 2023 16:46:35 +0200 Subject: [PATCH] netsurf: add a parser from string wrapper --- src/netsurf.zig | 14 ++++++--- vendor/netsurf/wrapper/wrapper.c | 50 ++++++++++++++++++++++++++++++++ vendor/netsurf/wrapper/wrapper.h | 1 + 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/netsurf.zig b/src/netsurf.zig index 8843de9e..9c634cd6 100644 --- a/src/netsurf.zig +++ b/src/netsurf.zig @@ -553,11 +553,17 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable { return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); } -pub fn documentHTMLParse(filename: []const u8) *DocumentHTML { - var f: []u8 = @constCast(filename); - const doc = c.wr_create_doc_dom_from_file(f.ptr); +pub fn documentHTMLParse(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML { + var file = try std.fs.cwd().openFile(filename, .{}); + defer file.close(); + + const file_size = try file.getEndPos(); + const html = try file.readToEndAlloc(allocator, file_size); + defer allocator.free(html); + + const doc = c.wr_create_doc_dom_from_string(html.ptr); if (doc == null) { - @panic("error parser"); + return error.ParserError; } return @as(*DocumentHTML, @ptrCast(doc.?)); } diff --git a/vendor/netsurf/wrapper/wrapper.c b/vendor/netsurf/wrapper/wrapper.c index 1f7c14bd..0a9448b7 100644 --- a/vendor/netsurf/wrapper/wrapper.c +++ b/vendor/netsurf/wrapper/wrapper.c @@ -1,8 +1,58 @@ #include +#include #include #include +/** + * Generate a LibDOM document DOM from an HTML string + * + * \param string The HTML string + * \return pointer to DOM document, or NULL on error + */ +dom_document *wr_create_doc_dom_from_string(char *html) +{ + dom_hubbub_parser *parser = NULL; + dom_hubbub_error error; + dom_hubbub_parser_params params; + dom_document *doc; + + params.enc = NULL; + params.fix_enc = true; + params.enable_script = false; + params.msg = NULL; + params.script = NULL; + params.ctx = NULL; + params.daf = NULL; + + /* Create Hubbub parser */ + error = dom_hubbub_parser_create(¶ms, &parser, &doc); + if (error != DOM_HUBBUB_OK) { + printf("Can't create Hubbub Parser\n"); + return NULL; + } + + error = dom_hubbub_parser_parse_chunk(parser, html, strlen(html)); + if (error != DOM_HUBBUB_OK) { + dom_hubbub_parser_destroy(parser); + printf("Parsing errors occur\n"); + return NULL; + } + + /* Done parsing file */ + error = dom_hubbub_parser_completed(parser); + if (error != DOM_HUBBUB_OK) { + dom_hubbub_parser_destroy(parser); + printf("Parsing error when construct DOM\n"); + return NULL; + } + + /* Finished with parser */ + dom_hubbub_parser_destroy(parser); + + return doc; +} + /** * Generate a LibDOM document DOM from an HTML file * diff --git a/vendor/netsurf/wrapper/wrapper.h b/vendor/netsurf/wrapper/wrapper.h index 05904ef1..ba630a70 100644 --- a/vendor/netsurf/wrapper/wrapper.h +++ b/vendor/netsurf/wrapper/wrapper.h @@ -3,6 +3,7 @@ #include +dom_document *wr_create_doc_dom_from_string(char *html); dom_document *wr_create_doc_dom_from_file(char *filename); #endif /* wrapper_dom_h_ */