Merge pull request #146 from lightpanda-io/fix-doc-parse

netsurf: fix documentHTMLParseFromStr
This commit is contained in:
Pierre Tachoire
2024-01-09 12:05:10 +01:00
committed by GitHub
5 changed files with 23 additions and 37 deletions

View File

@@ -64,7 +64,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParseFromFile(file);
doc = try parser.documentHTMLParse(file.reader());
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};

View File

@@ -45,7 +45,7 @@ pub fn main() !void {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParseFromFile(file);
doc = try parser.documentHTMLParse(file.reader());
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};

View File

@@ -1393,9 +1393,14 @@ fn parserErr(err: HubbubErr) ParserError!void {
};
}
// documentHTMLParseFromFile parses the given HTML file.
// documentHTMLParseFromStr parses the given HTML string.
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML {
pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML {
var fbs = std.io.fixedBufferStream(str);
return try documentHTMLParse(fbs.reader());
}
pub fn documentHTMLParse(reader: anytype) !*DocumentHTML {
var parser: ?*c.dom_hubbub_parser = undefined;
var doc: ?*c.dom_document = undefined;
var err: c.hubbub_error = undefined;
@@ -1417,7 +1422,7 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML {
var buffer: [1024]u8 = undefined;
var ln = buffer.len;
while (ln == buffer.len) {
ln = try file.readAll(&buffer);
ln = try reader.read(&buffer);
err = c.dom_hubbub_parser_parse_chunk(parser, &buffer, ln);
try parserErr(err);
}
@@ -1428,36 +1433,6 @@ pub fn documentHTMLParseFromFile(file: std.fs.File) !*DocumentHTML {
return @as(*DocumentHTML, @ptrCast(doc.?));
}
// documentHTMLParseFromStr parses the given HTML string.
// The caller is responsible for closing the document.
pub fn documentHTMLParseFromStr(str: []const u8) ParserError!*DocumentHTML {
var parser: ?*c.dom_hubbub_parser = undefined;
var doc: ?*c.dom_document = undefined;
var err: c.hubbub_error = undefined;
var params = c.dom_hubbub_parser_params{
.enc = null,
.fix_enc = true,
.msg = null,
.script = null,
.enable_script = false,
.ctx = null,
.daf = null,
};
err = c.dom_hubbub_parser_create(&params, &parser, &doc);
try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser);
err = c.dom_hubbub_parser_parse_chunk(parser, str, str.len);
try parserErr(err);
err = c.dom_hubbub_parser_completed(parser);
try parserErr(err);
return @as(*DocumentHTML, @ptrCast(doc.?));
}
// documentHTMLClose closes the document.
pub fn documentHTMLClose(doc: *DocumentHTML) !void {
const err = documentHTMLVtable(doc).close.?(doc);

View File

@@ -41,7 +41,7 @@ fn testExecFn(
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
doc = try parser.documentHTMLParseFromFile(file);
doc = try parser.documentHTMLParse(file.reader());
defer parser.documentHTMLClose(doc) catch |err| {
std.debug.print("documentHTMLClose error: {s}\n", .{@errorName(err)});
};
@@ -98,3 +98,14 @@ test {
try jsruntime.loadEnv(&arena_alloc, testsAllExecFn, apis);
}
test "DocumentHTMLParseFromStr" {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
const str = try file.readToEndAlloc(std.testing.allocator, std.math.maxInt(u32));
defer std.testing.allocator.free(str);
doc = try parser.documentHTMLParseFromStr(str);
parser.documentHTMLClose(doc) catch {};
}

View File

@@ -20,7 +20,7 @@ pub fn run(arena: *std.heap.ArenaAllocator, comptime apis: []jsruntime.API, comp
const file = try std.fs.cwd().openFile(f, .{});
defer file.close();
const html_doc = try parser.documentHTMLParseFromFile(file);
const html_doc = try parser.documentHTMLParse(file.reader());
const doc = parser.documentHTMLToDocument(html_doc);
const dirname = fspath.dirname(f[dir.len..]) orelse unreachable;