netsurf: factorize document parsing

This commit is contained in:
Pierre Tachoire
2024-02-28 15:15:15 +01:00
parent 25bc2d5e75
commit 63e19c7704
2 changed files with 23 additions and 41 deletions

View File

@@ -460,8 +460,11 @@ pub fn testExecFn(
.{ .src = "let h = document.getElementById('para-empty')", .ex = "undefined" }, .{ .src = "let h = document.getElementById('para-empty')", .ex = "undefined" },
.{ .src = "const prev = h.innerHTML", .ex = "undefined" }, .{ .src = "const prev = h.innerHTML", .ex = "undefined" },
.{ .src = "h.innerHTML = '<p>hello world</p>'", .ex = "<p>hello world</p>" }, .{ .src = "h.innerHTML = '<p id=\"hello\">hello world</p>'", .ex = "<p id=\"hello\">hello world</p>" },
.{ .src = "h.innerHTML", .ex = "<p>hello world</p>" }, .{ .src = "h.innerHTML", .ex = "<p id=\"hello\">hello world</p>" },
.{ .src = "h.firstChild.nodeName", .ex = "P" },
.{ .src = "h.firstChild.id", .ex = "hello" },
.{ .src = "h.firstChild.textContent", .ex = "hello world" },
.{ .src = "h.innerHTML = prev; true", .ex = "true" }, .{ .src = "h.innerHTML = prev; true", .ex = "true" },
.{ .src = "document.getElementById('para-empty').innerHTML.trim()", .ex = "<span id=\"para-empty-child\"></span>" }, .{ .src = "document.getElementById('para-empty').innerHTML.trim()", .ex = "<span id=\"para-empty-child\"></span>" },
}; };

View File

@@ -1837,40 +1837,13 @@ pub fn documentHTMLParse(reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
var parser: ?*c.dom_hubbub_parser = undefined; var parser: ?*c.dom_hubbub_parser = undefined;
var doc: ?*c.dom_document = undefined; var doc: ?*c.dom_document = undefined;
var err: c.hubbub_error = undefined; var err: c.hubbub_error = undefined;
var params = parseParams(enc);
var params = c.dom_hubbub_parser_params{
.enc = null,
.fix_enc = true,
.msg = null,
.script = null,
.enable_script = false,
.ctx = null,
.daf = null,
};
if (enc) |e| params.enc = e;
err = c.dom_hubbub_parser_create(&params, &parser, &doc); err = c.dom_hubbub_parser_create(&params, &parser, &doc);
try parserErr(err); try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser); defer c.dom_hubbub_parser_destroy(parser);
var buffer: [1024]u8 = undefined; try parseData(parser.?, reader);
var ln = buffer.len;
while (ln > 0) {
ln = try reader.read(&buffer);
err = c.dom_hubbub_parser_parse_chunk(parser, &buffer, ln);
// TODO handle encoding change error return.
// When the HTML contains a META tag with a different encoding than the
// original one, a c.DOM_HUBBUB_HUBBUB_ERR_ENCODINGCHANGE error is
// returned.
// In this case, we must restart the parsing with the new detected
// encoding. The detected encoding is stored in the document and we can
// get it with documentGetInputEncoding().
try parserErr(err);
}
err = c.dom_hubbub_parser_completed(parser);
try parserErr(err);
return @as(*DocumentHTML, @ptrCast(doc.?)); return @as(*DocumentHTML, @ptrCast(doc.?));
} }
@@ -1884,9 +1857,20 @@ pub fn documentParseFragment(self: *Document, reader: anytype, enc: ?[:0]const u
var parser: ?*c.dom_hubbub_parser = undefined; var parser: ?*c.dom_hubbub_parser = undefined;
var fragment: ?*c.dom_document_fragment = undefined; var fragment: ?*c.dom_document_fragment = undefined;
var err: c.hubbub_error = undefined; var err: c.hubbub_error = undefined;
var params = parseParams(enc);
var params = c.dom_hubbub_parser_params{ err = c.dom_hubbub_fragment_parser_create(&params, self, &parser, &fragment);
.enc = null, try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser);
try parseData(parser.?, reader);
return @as(*DocumentFragment, @ptrCast(fragment.?));
}
fn parseParams(enc: ?[:0]const u8) c.dom_hubbub_parser_params {
return .{
.enc = enc orelse null,
.fix_enc = true, .fix_enc = true,
.msg = null, .msg = null,
.script = null, .script = null,
@@ -1894,13 +1878,10 @@ pub fn documentParseFragment(self: *Document, reader: anytype, enc: ?[:0]const u
.ctx = null, .ctx = null,
.daf = null, .daf = null,
}; };
}
if (enc) |e| params.enc = e; fn parseData(parser: *c.dom_hubbub_parser, reader: anytype) !void {
var err: c.hubbub_error = undefined;
err = c.dom_hubbub_fragment_parser_create(&params, self, &parser, &fragment);
try parserErr(err);
defer c.dom_hubbub_parser_destroy(parser);
var buffer: [1024]u8 = undefined; var buffer: [1024]u8 = undefined;
var ln = buffer.len; var ln = buffer.len;
while (ln > 0) { while (ln > 0) {
@@ -1918,8 +1899,6 @@ pub fn documentParseFragment(self: *Document, reader: anytype, enc: ?[:0]const u
err = c.dom_hubbub_parser_completed(parser); err = c.dom_hubbub_parser_completed(parser);
try parserErr(err); try parserErr(err);
return @as(*DocumentFragment, @ptrCast(fragment.?));
} }
// documentHTMLClose closes the document. // documentHTMLClose closes the document.