Merge pull request #52 from Browsercore/netsurf-parse

refacto netsurf parse document
This commit is contained in:
Francis Bouvier
2023-10-09 14:04:37 +02:00
committed by GitHub
6 changed files with 105 additions and 20 deletions

View File

@@ -52,8 +52,12 @@ pub fn main() !void {
const vm = jsruntime.VM.init(); const vm = jsruntime.VM.init();
defer vm.deinit(); defer vm.deinit();
// alloc
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
// document // document
doc = parser.documentHTMLParse("test.html"); doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html");
defer parser.documentHTMLClose(doc); defer parser.documentHTMLClose(doc);
// remove socket file of internal server // remove socket file of internal server
@@ -66,10 +70,6 @@ pub fn main() !void {
} }
}; };
// alloc
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
// server // server
var addr = try std.net.Address.initUnix(socket_path); var addr = try std.net.Address.initUnix(socket_path);
server = std.net.StreamServer.init(.{}); server = std.net.StreamServer.init(.{});

View File

@@ -31,20 +31,20 @@ pub fn main() !void {
// generate APIs // generate APIs
const apis = jsruntime.compile(DOM.Interfaces); const apis = jsruntime.compile(DOM.Interfaces);
// allocator
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
var arena = std.heap.ArenaAllocator.init(gpa.allocator());
defer arena.deinit();
// document // document
doc = parser.documentHTMLParse("test.html"); doc = try parser.documentHTMLParseFromFileAlloc(arena.allocator(), "test.html");
defer parser.documentHTMLClose(doc); defer parser.documentHTMLClose(doc);
// create JS vm // create JS vm
const vm = jsruntime.VM.init(); const vm = jsruntime.VM.init();
defer vm.deinit(); defer vm.deinit();
// alloc
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
var arena = std.heap.ArenaAllocator.init(gpa.allocator());
defer arena.deinit();
// launch shell // launch shell
try jsruntime.shell(&arena, apis, execJS, .{ .app_name = "browsercore" }); try jsruntime.shell(&arena, apis, execJS, .{ .app_name = "browsercore" });
} }

View File

@@ -553,11 +553,46 @@ fn documentHTMLVtable(doc_html: *DocumentHTML) c.dom_html_document_vtable {
return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html); return getVtable(c.dom_html_document_vtable, DocumentHTML, doc_html);
} }
pub fn documentHTMLParse(filename: []const u8) *DocumentHTML { // documentHTMLParseFromFileAlloc parses the file.
var f: []u8 = @constCast(filename); // The allocator is required to create a null terminated string from filename.
const doc = c.wr_create_doc_dom_from_file(f.ptr); // The buffer is freed by the function.
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromFileAlloc(allocator: std.mem.Allocator, filename: []const u8) !*DocumentHTML {
const cstr = try allocator.dupeZ(u8, filename);
defer allocator.free(cstr);
return documentHTMLParseFromFile(cstr);
}
// documentHTMLParseFromFile parses the given filename c string (ie. with 0 sentinel).
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromFile(filename: [:0]const u8) !*DocumentHTML {
// create a null terminated c string.
const doc = c.wr_create_doc_dom_from_file(filename.ptr);
if (doc == null) { if (doc == null) {
@panic("error parser"); return error.ParserError;
}
return @as(*DocumentHTML, @ptrCast(doc.?));
}
// documentHTMLParseFromStrAlloc the given string.
// The allocator is required to create a null terminated string.
// The c string allocated is freed by the function.
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromStrAlloc(allocator: std.mem.Allocator, str: [:0]const u8) !*DocumentHTML {
// create a null terminated c string.
const cstr = try allocator.dupeZ(u8, str);
defer allocator.free(cstr);
return documentHTMLParseFromStr(cstr);
}
// documentHTMLParseFromStr parses the given c string (ie. with 0 sentinel).
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromStr(cstr: [:0]const u8) !*DocumentHTML {
const doc = c.wr_create_doc_dom_from_string(cstr.ptr);
if (doc == null) {
return error.ParserError;
} }
return @as(*DocumentHTML, @ptrCast(doc.?)); return @as(*DocumentHTML, @ptrCast(doc.?));
} }

View File

@@ -38,10 +38,9 @@ test {
const apis = jsruntime.compile(DOM.Interfaces); const apis = jsruntime.compile(DOM.Interfaces);
// document // document
doc = parser.documentHTMLParse("test.html"); doc = try parser.documentHTMLParseFromFileAlloc(std.testing.allocator, "test.html");
defer parser.documentHTMLClose(doc); defer parser.documentHTMLClose(doc);
// create JS vm // create JS vm
const vm = jsruntime.VM.init(); const vm = jsruntime.VM.init();
defer vm.deinit(); defer vm.deinit();

View File

@@ -1,15 +1,65 @@
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <dom/dom.h> #include <dom/dom.h>
#include <dom/bindings/hubbub/parser.h> #include <dom/bindings/hubbub/parser.h>
/**
* Generate a LibDOM document DOM from an HTML string
*
* \param string The HTML string
* \return pointer to DOM document, or NULL on error
*/
dom_document *wr_create_doc_dom_from_string(const char *html)
{
dom_hubbub_parser *parser = NULL;
dom_hubbub_error error;
dom_hubbub_parser_params params;
dom_document *doc;
params.enc = NULL;
params.fix_enc = true;
params.enable_script = false;
params.msg = NULL;
params.script = NULL;
params.ctx = NULL;
params.daf = NULL;
/* Create Hubbub parser */
error = dom_hubbub_parser_create(&params, &parser, &doc);
if (error != DOM_HUBBUB_OK) {
printf("Can't create Hubbub Parser\n");
return NULL;
}
error = dom_hubbub_parser_parse_chunk(parser, html, strlen(html));
if (error != DOM_HUBBUB_OK) {
dom_hubbub_parser_destroy(parser);
printf("Parsing errors occur\n");
return NULL;
}
/* Done parsing file */
error = dom_hubbub_parser_completed(parser);
if (error != DOM_HUBBUB_OK) {
dom_hubbub_parser_destroy(parser);
printf("Parsing error when construct DOM\n");
return NULL;
}
/* Finished with parser */
dom_hubbub_parser_destroy(parser);
return doc;
}
/** /**
* Generate a LibDOM document DOM from an HTML file * Generate a LibDOM document DOM from an HTML file
* *
* \param file The file path * \param file The file path
* \return pointer to DOM document, or NULL on error * \return pointer to DOM document, or NULL on error
*/ */
dom_document *wr_create_doc_dom_from_file(char *filename) dom_document *wr_create_doc_dom_from_file(const char *filename)
{ {
size_t buffer_size = 1024; size_t buffer_size = 1024;
dom_hubbub_parser *parser = NULL; dom_hubbub_parser *parser = NULL;

View File

@@ -3,6 +3,7 @@
#include <dom/dom.h> #include <dom/dom.h>
dom_document *wr_create_doc_dom_from_file(char *filename); dom_document *wr_create_doc_dom_from_string(const char *html);
dom_document *wr_create_doc_dom_from_file(const char *filename);
#endif /* wrapper_dom_h_ */ #endif /* wrapper_dom_h_ */