mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 23:23:28 +00:00
Merge pull request #959 from lightpanda-io/html-pre
handle text content type with HTML
This commit is contained in:
@@ -109,6 +109,7 @@ pub const Page = struct {
|
|||||||
err: anyerror,
|
err: anyerror,
|
||||||
parsed: void,
|
parsed: void,
|
||||||
html: parser.Parser,
|
html: parser.Parser,
|
||||||
|
text: parser.Parser,
|
||||||
raw: std.ArrayListUnmanaged(u8),
|
raw: std.ArrayListUnmanaged(u8),
|
||||||
raw_done: []const u8,
|
raw_done: []const u8,
|
||||||
};
|
};
|
||||||
@@ -207,6 +208,20 @@ pub const Page = struct {
|
|||||||
return out.writeAll(buf.items);
|
return out.writeAll(buf.items);
|
||||||
},
|
},
|
||||||
.raw_done => |data| return out.writeAll(data),
|
.raw_done => |data| return out.writeAll(data),
|
||||||
|
.text => {
|
||||||
|
// returns the <pre> element from the HTML
|
||||||
|
const doc = parser.documentHTMLToDocument(self.window.document);
|
||||||
|
const list = try parser.documentGetElementsByTagName(doc, "pre");
|
||||||
|
const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
|
||||||
|
const walker = Walker{};
|
||||||
|
var next: ?*parser.Node = null;
|
||||||
|
while (true) {
|
||||||
|
next = try walker.get_next(pre, next) orelse break;
|
||||||
|
const v = try parser.nodeTextContent(next.?) orelse return;
|
||||||
|
try out.writeAll(v);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
},
|
||||||
.html => {
|
.html => {
|
||||||
// maybe page.wait timed-out, print what we have
|
// maybe page.wait timed-out, print what we have
|
||||||
log.warn(.http, "incomplete load", .{ .mode = "html" });
|
log.warn(.http, "incomplete load", .{ .mode = "html" });
|
||||||
@@ -284,7 +299,7 @@ pub const Page = struct {
|
|||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
SW: switch (self.mode) {
|
SW: switch (self.mode) {
|
||||||
.pre, .raw => {
|
.pre, .raw, .text => {
|
||||||
if (self.request_intercepted) {
|
if (self.request_intercepted) {
|
||||||
// the page request was intercepted.
|
// the page request was intercepted.
|
||||||
|
|
||||||
@@ -627,18 +642,44 @@ pub const Page = struct {
|
|||||||
break :blk Mime.sniff(data);
|
break :blk Mime.sniff(data);
|
||||||
} orelse .unknown;
|
} orelse .unknown;
|
||||||
|
|
||||||
const is_html = mime.isHTML();
|
log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
|
||||||
log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
|
|
||||||
|
|
||||||
if (is_html) {
|
self.mode = switch (mime.content_type) {
|
||||||
self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
|
.text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
|
||||||
} else {
|
|
||||||
self.mode = .{ .raw = .{} };
|
.application_json,
|
||||||
}
|
.text_javascript,
|
||||||
|
.text_css,
|
||||||
|
.text_plain,
|
||||||
|
=> blk: {
|
||||||
|
var p = try parser.Parser.init(mime.charset orelse "UTF-8");
|
||||||
|
try p.process("<html><head><meta charset=\"utf-8\"></head><body><pre>");
|
||||||
|
break :blk .{ .text = p };
|
||||||
|
},
|
||||||
|
|
||||||
|
else => .{ .raw = .{} },
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (self.mode) {
|
switch (self.mode) {
|
||||||
.html => |*p| try p.process(data),
|
.html => |*p| try p.process(data),
|
||||||
|
.text => |*p| {
|
||||||
|
// we have to escape the data...
|
||||||
|
var v = data;
|
||||||
|
while (v.len > 0) {
|
||||||
|
const index = std.mem.indexOfAnyPos(u8, v, 0, &.{ '<', '>' }) orelse {
|
||||||
|
try p.process(v);
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
try p.process(v[0..index]);
|
||||||
|
switch (v[index]) {
|
||||||
|
'<' => try p.process("<"),
|
||||||
|
'>' => try p.process(">"),
|
||||||
|
else => unreachable,
|
||||||
|
}
|
||||||
|
v = v[index + 1 ..];
|
||||||
|
}
|
||||||
|
},
|
||||||
.raw => |*buf| try buf.appendSlice(self.arena, data),
|
.raw => |*buf| try buf.appendSlice(self.arena, data),
|
||||||
.pre => unreachable,
|
.pre => unreachable,
|
||||||
.parsed => unreachable,
|
.parsed => unreachable,
|
||||||
@@ -658,6 +699,13 @@ pub const Page = struct {
|
|||||||
self.mode = .{ .raw_done = buf.items };
|
self.mode = .{ .raw_done = buf.items };
|
||||||
self.documentIsComplete();
|
self.documentIsComplete();
|
||||||
},
|
},
|
||||||
|
.text => |*p| {
|
||||||
|
try p.process("</pre></body></html>");
|
||||||
|
const html_doc = p.html_doc;
|
||||||
|
p.deinit(); // don't need the parser anymore
|
||||||
|
try self.setDocument(html_doc);
|
||||||
|
self.documentIsComplete();
|
||||||
|
},
|
||||||
.html => |*p| {
|
.html => |*p| {
|
||||||
const html_doc = p.html_doc;
|
const html_doc = p.html_doc;
|
||||||
p.deinit(); // don't need the parser anymore
|
p.deinit(); // don't need the parser anymore
|
||||||
@@ -719,7 +767,7 @@ pub const Page = struct {
|
|||||||
self.clearTransferArena();
|
self.clearTransferArena();
|
||||||
|
|
||||||
switch (self.mode) {
|
switch (self.mode) {
|
||||||
.html => |*p| p.deinit(), // don't need the parser anymore
|
.html, .text => |*p| p.deinit(), // don't need the parser anymore
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
self.mode = .{ .err = err };
|
self.mode = .{ .err = err };
|
||||||
@@ -1063,6 +1111,6 @@ pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) c
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.script_manager.addFromElement(element.?) catch |err| {
|
self.script_manager.addFromElement(element.?) catch |err| {
|
||||||
log.warn(.browser, "dynamcic script", .{ .err = err });
|
log.warn(.browser, "dynamic script", .{ .err = err });
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user