diff --git a/src/browser/page.zig b/src/browser/page.zig index c8f785c2..a533091f 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -109,6 +109,7 @@ pub const Page = struct { err: anyerror, parsed: void, html: parser.Parser, + text: parser.Parser, raw: std.ArrayListUnmanaged(u8), raw_done: []const u8, }; @@ -207,6 +208,20 @@ pub const Page = struct { return out.writeAll(buf.items); }, .raw_done => |data| return out.writeAll(data), + .text => { + // returns the
element from the HTML
+ const doc = parser.documentHTMLToDocument(self.window.document);
+ const list = try parser.documentGetElementsByTagName(doc, "pre");
+ const pre = try parser.nodeListItem(list, 0) orelse return error.InvalidHTML;
+ const walker = Walker{};
+ var next: ?*parser.Node = null;
+ while (true) {
+ next = try walker.get_next(pre, next) orelse break;
+ const v = try parser.nodeTextContent(next.?) orelse return;
+ try out.writeAll(v);
+ }
+ return;
+ },
.html => {
// maybe page.wait timed-out, print what we have
log.warn(.http, "incomplete load", .{ .mode = "html" });
@@ -284,7 +299,7 @@ pub const Page = struct {
while (true) {
SW: switch (self.mode) {
- .pre, .raw => {
+ .pre, .raw, .text => {
if (self.request_intercepted) {
// the page request was intercepted.
@@ -627,18 +642,44 @@ pub const Page = struct {
break :blk Mime.sniff(data);
} orelse .unknown;
- const is_html = mime.isHTML();
- log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
+ log.debug(.http, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len });
- if (is_html) {
- self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
- } else {
- self.mode = .{ .raw = .{} };
- }
+ self.mode = switch (mime.content_type) {
+ .text_html => .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") },
+
+ .application_json,
+ .text_javascript,
+ .text_css,
+ .text_plain,
+ => blk: {
+ var p = try parser.Parser.init(mime.charset orelse "UTF-8");
+ try p.process("");
+ break :blk .{ .text = p };
+ },
+
+ else => .{ .raw = .{} },
+ };
}
switch (self.mode) {
.html => |*p| try p.process(data),
+ .text => |*p| {
+ // we have to escape the data...
+ var v = data;
+ while (v.len > 0) {
+ const index = std.mem.indexOfAnyPos(u8, v, 0, &.{ '<', '>' }) orelse {
+ try p.process(v);
+ return;
+ };
+ try p.process(v[0..index]);
+ switch (v[index]) {
+ '<' => try p.process("<"),
+ '>' => try p.process(">"),
+ else => unreachable,
+ }
+ v = v[index + 1 ..];
+ }
+ },
.raw => |*buf| try buf.appendSlice(self.arena, data),
.pre => unreachable,
.parsed => unreachable,
@@ -658,6 +699,13 @@ pub const Page = struct {
self.mode = .{ .raw_done = buf.items };
self.documentIsComplete();
},
+ .text => |*p| {
+ try p.process("");
+ const html_doc = p.html_doc;
+ p.deinit(); // don't need the parser anymore
+ try self.setDocument(html_doc);
+ self.documentIsComplete();
+ },
.html => |*p| {
const html_doc = p.html_doc;
p.deinit(); // don't need the parser anymore
@@ -719,7 +767,7 @@ pub const Page = struct {
self.clearTransferArena();
switch (self.mode) {
- .html => |*p| p.deinit(), // don't need the parser anymore
+ .html, .text => |*p| p.deinit(), // don't need the parser anymore
else => {},
}
self.mode = .{ .err = err };
@@ -1063,6 +1111,6 @@ pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) c
}
self.script_manager.addFromElement(element.?) catch |err| {
- log.warn(.browser, "dynamcic script", .{ .err = err });
+ log.warn(.browser, "dynamic script", .{ .err = err });
};
}