mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 23:23:28 +00:00
Try to sniff the mime type based on the body content
Synchronous body reader now exposes a peek() function to get the first few bytes from the response body. This will be no less than 100 bytes (assuming the body is that big), but could be more. Streaming API, via res.next() continues to work as-is even if peek() is called. Introduce Mime.sniff() that detects a few common types - the ones that we care about right now - from the body content.
This commit is contained in:
@@ -435,24 +435,19 @@ pub const Page = struct {
|
|||||||
|
|
||||||
log.info("GET {any} {d}", .{ url, header.status });
|
log.info("GET {any} {d}", .{ url, header.status });
|
||||||
|
|
||||||
const ct = blk: {
|
const content_type = header.get("content-type");
|
||||||
break :blk header.get("content-type") orelse {
|
|
||||||
// no content type in HTTP headers.
|
|
||||||
// TODO try to sniff mime type from the body.
|
|
||||||
log.info("no content-type HTTP header", .{});
|
|
||||||
|
|
||||||
// Assume it's HTML for now.
|
const mime: Mime = blk: {
|
||||||
break :blk "text/html; charset=utf-8";
|
if (content_type) |ct| {
|
||||||
};
|
break :blk try Mime.parse(arena, ct);
|
||||||
};
|
}
|
||||||
|
break :blk Mime.sniff(try response.peek());
|
||||||
log.debug("header content-type: {s}", .{ct});
|
} orelse .unknown;
|
||||||
var mime = try Mime.parse(arena, ct);
|
|
||||||
|
|
||||||
if (mime.isHTML()) {
|
if (mime.isHTML()) {
|
||||||
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
|
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
|
||||||
} else {
|
} else {
|
||||||
log.info("non-HTML document: {s}", .{ct});
|
log.info("non-HTML document: {s}", .{content_type orelse "null"});
|
||||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||||
while (try response.next()) |data| {
|
while (try response.next()) |data| {
|
||||||
try arr.appendSlice(arena, try arena.dupe(u8, data));
|
try arr.appendSlice(arena, try arena.dupe(u8, data));
|
||||||
|
|||||||
@@ -24,10 +24,17 @@ pub const Mime = struct {
|
|||||||
params: []const u8 = "",
|
params: []const u8 = "",
|
||||||
charset: ?[]const u8 = null,
|
charset: ?[]const u8 = null,
|
||||||
|
|
||||||
|
pub const unknown = Mime{
|
||||||
|
.params = "",
|
||||||
|
.charset = "",
|
||||||
|
.content_type = .{ .unknown = {} },
|
||||||
|
};
|
||||||
|
|
||||||
pub const ContentTypeEnum = enum {
|
pub const ContentTypeEnum = enum {
|
||||||
text_xml,
|
text_xml,
|
||||||
text_html,
|
text_html,
|
||||||
text_plain,
|
text_plain,
|
||||||
|
unknown,
|
||||||
other,
|
other,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -35,21 +42,26 @@ pub const Mime = struct {
|
|||||||
text_xml: void,
|
text_xml: void,
|
||||||
text_html: void,
|
text_html: void,
|
||||||
text_plain: void,
|
text_plain: void,
|
||||||
|
unknown: void,
|
||||||
other: struct { type: []const u8, sub_type: []const u8 },
|
other: struct { type: []const u8, sub_type: []const u8 },
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn parse(arena: Allocator, input: []const u8) !Mime {
|
pub fn parse(arena: Allocator, input: []u8) !Mime {
|
||||||
if (input.len > 255) {
|
if (input.len > 255) {
|
||||||
return error.TooBig;
|
return error.TooBig;
|
||||||
}
|
}
|
||||||
var trimmed = trim(input);
|
|
||||||
|
|
||||||
const content_type, const type_len = try parseContentType(trimmed);
|
// Zig's trim API is broken. The return type is always `[]const u8`,
|
||||||
if (type_len >= trimmed.len) {
|
// even if the input type is `[]u8`. @constCast is safe here.
|
||||||
|
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
|
||||||
|
_ = std.ascii.lowerString(normalized, normalized);
|
||||||
|
|
||||||
|
const content_type, const type_len = try parseContentType(normalized);
|
||||||
|
if (type_len >= normalized.len) {
|
||||||
return .{ .content_type = content_type };
|
return .{ .content_type = content_type };
|
||||||
}
|
}
|
||||||
|
|
||||||
const params = trimLeft(trimmed[type_len..]);
|
const params = trimLeft(normalized[type_len..]);
|
||||||
|
|
||||||
var charset: ?[]const u8 = null;
|
var charset: ?[]const u8 = null;
|
||||||
|
|
||||||
@@ -63,11 +75,12 @@ pub const Mime = struct {
|
|||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (name.len) {
|
const attribute_name = std.meta.stringToEnum(enum {
|
||||||
7 => if (isCaseEqual("charset", name)) {
|
charset,
|
||||||
charset = try parseValue(arena, value);
|
}, name) orelse continue;
|
||||||
},
|
|
||||||
else => {},
|
switch (attribute_name) {
|
||||||
|
.charset => charset = try parseAttributeValue(arena, value),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,66 +91,113 @@ pub const Mime = struct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn sniff(body: []const u8) ?Mime {
|
||||||
|
// 0x0C is form feed
|
||||||
|
const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C });
|
||||||
|
if (content.len == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content[0] != '<') {
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
|
||||||
|
// UTF-8 BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
|
||||||
|
// UTF-16 big-endian BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
|
||||||
|
// UTF-16 little-endian BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The longest prefix we have is "<!DOCTYPE HTML ", 15 bytes. If we're
|
||||||
|
// here, we already know content[0] == '<', so we can skip that. So 14
|
||||||
|
// bytes.
|
||||||
|
|
||||||
|
// +1 because we don't need the leading '<'
|
||||||
|
var buf: [14]u8 = undefined;
|
||||||
|
|
||||||
|
const stripped = content[1..];
|
||||||
|
const prefix_len = @min(stripped.len, buf.len);
|
||||||
|
const prefix = std.ascii.lowerString(&buf, stripped[0..prefix_len]);
|
||||||
|
|
||||||
|
// we already know it starts with a <
|
||||||
|
const known_prefixes = [_]struct { []const u8, ContentType }{
|
||||||
|
.{ "!doctype html", .{ .text_html = {} } },
|
||||||
|
.{ "html", .{ .text_html = {} } },
|
||||||
|
.{ "script", .{ .text_html = {} } },
|
||||||
|
.{ "iframe", .{ .text_html = {} } },
|
||||||
|
.{ "h1", .{ .text_html = {} } },
|
||||||
|
.{ "div", .{ .text_html = {} } },
|
||||||
|
.{ "font", .{ .text_html = {} } },
|
||||||
|
.{ "table", .{ .text_html = {} } },
|
||||||
|
.{ "a", .{ .text_html = {} } },
|
||||||
|
.{ "style", .{ .text_html = {} } },
|
||||||
|
.{ "title", .{ .text_html = {} } },
|
||||||
|
.{ "b", .{ .text_html = {} } },
|
||||||
|
.{ "body", .{ .text_html = {} } },
|
||||||
|
.{ "br", .{ .text_html = {} } },
|
||||||
|
.{ "p", .{ .text_html = {} } },
|
||||||
|
.{ "!--", .{ .text_html = {} } },
|
||||||
|
.{ "xml", .{ .text_xml = {} } },
|
||||||
|
};
|
||||||
|
inline for (known_prefixes) |kp| {
|
||||||
|
const known_prefix = kp.@"0";
|
||||||
|
if (std.mem.startsWith(u8, prefix, known_prefix) and prefix.len > known_prefix.len) {
|
||||||
|
const next = prefix[known_prefix.len];
|
||||||
|
// a "tag-terminating-byte"
|
||||||
|
if (next == ' ' or next == '>') {
|
||||||
|
return .{ .content_type = kp.@"1" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn isHTML(self: *const Mime) bool {
|
pub fn isHTML(self: *const Mime) bool {
|
||||||
return self.content_type == .text_html;
|
return self.content_type == .text_html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we expect value to be lowercase
|
||||||
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
||||||
const separator = std.mem.indexOfScalarPos(u8, value, 0, '/') orelse {
|
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
|
||||||
return error.Invalid;
|
const type_name = trimRight(value[0..end]);
|
||||||
};
|
const attribute_start = end + 1;
|
||||||
const end = std.mem.indexOfScalarPos(u8, value, separator, ';') orelse blk: {
|
|
||||||
break :blk value.len;
|
if (std.meta.stringToEnum(enum {
|
||||||
|
@"text/xml",
|
||||||
|
@"text/html",
|
||||||
|
@"text/plain",
|
||||||
|
}, type_name)) |known_type| {
|
||||||
|
const ct: ContentType = switch (known_type) {
|
||||||
|
.@"text/xml" => .{ .text_xml = {} },
|
||||||
|
.@"text/html" => .{ .text_html = {} },
|
||||||
|
.@"text/plain" => .{ .text_plain = {} },
|
||||||
};
|
};
|
||||||
|
return .{ ct, attribute_start };
|
||||||
|
}
|
||||||
|
|
||||||
|
const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid;
|
||||||
|
|
||||||
const main_type = value[0..separator];
|
const main_type = value[0..separator];
|
||||||
const sub_type = trimRight(value[separator + 1 .. end]);
|
const sub_type = trimRight(value[separator + 1 .. end]);
|
||||||
|
|
||||||
if (parseCommonContentType(main_type, sub_type)) |content_type| {
|
if (main_type.len == 0 or validType(main_type) == false) {
|
||||||
return .{ content_type, end + 1 };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (main_type.len == 0) {
|
|
||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
if (validType(main_type) == false) {
|
if (sub_type.len == 0 or validType(sub_type) == false) {
|
||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sub_type.len == 0) {
|
return .{ .{ .other = .{
|
||||||
return error.Invalid;
|
|
||||||
}
|
|
||||||
if (validType(sub_type) == false) {
|
|
||||||
return error.Invalid;
|
|
||||||
}
|
|
||||||
|
|
||||||
const content_type = ContentType{ .other = .{
|
|
||||||
.type = main_type,
|
.type = main_type,
|
||||||
.sub_type = sub_type,
|
.sub_type = sub_type,
|
||||||
} };
|
} }, attribute_start };
|
||||||
|
|
||||||
return .{ content_type, end + 1 };
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parseCommonContentType(main_type: []const u8, sub_type: []const u8) ?ContentType {
|
|
||||||
switch (main_type.len) {
|
|
||||||
4 => if (isCaseEqual("text", main_type)) {
|
|
||||||
switch (sub_type.len) {
|
|
||||||
3 => if (isCaseEqual("xml", sub_type)) {
|
|
||||||
return .{ .text_xml = {} };
|
|
||||||
},
|
|
||||||
4 => if (isCaseEqual("html", sub_type)) {
|
|
||||||
return .{ .text_html = {} };
|
|
||||||
},
|
|
||||||
5 => if (isCaseEqual("plain", sub_type)) {
|
|
||||||
return .{ .text_plain = {} };
|
|
||||||
},
|
|
||||||
else => {},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
else => {},
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const T_SPECIAL = blk: {
|
const T_SPECIAL = blk: {
|
||||||
@@ -148,7 +208,7 @@ pub const Mime = struct {
|
|||||||
break :blk v;
|
break :blk v;
|
||||||
};
|
};
|
||||||
|
|
||||||
fn parseValue(arena: Allocator, value: []const u8) ![]const u8 {
|
fn parseAttributeValue(arena: Allocator, value: []const u8) ![]const u8 {
|
||||||
if (value[0] != '"') {
|
if (value[0] != '"') {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
@@ -218,10 +278,6 @@ pub const Mime = struct {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn trim(s: []const u8) []const u8 {
|
|
||||||
return std.mem.trim(u8, s, &std.ascii.whitespace);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn trimLeft(s: []const u8) []const u8 {
|
fn trimLeft(s: []const u8) []const u8 {
|
||||||
return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
|
return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
|
||||||
}
|
}
|
||||||
@@ -229,28 +285,12 @@ pub const Mime = struct {
|
|||||||
fn trimRight(s: []const u8) []const u8 {
|
fn trimRight(s: []const u8) []const u8 {
|
||||||
return std.mem.trimRight(u8, s, &std.ascii.whitespace);
|
return std.mem.trimRight(u8, s, &std.ascii.whitespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn isCaseEqual(comptime target: anytype, value: []const u8) bool {
|
|
||||||
// - 8 beause we don't care about the sentinel
|
|
||||||
const bit_len = @bitSizeOf(@TypeOf(target.*)) - 8;
|
|
||||||
const byte_len = bit_len / 8;
|
|
||||||
|
|
||||||
const T = @Type(.{ .int = .{
|
|
||||||
.bits = bit_len,
|
|
||||||
.signedness = .unsigned,
|
|
||||||
} });
|
|
||||||
|
|
||||||
const bit_target: T = @bitCast(@as(*const [byte_len]u8, target).*);
|
|
||||||
|
|
||||||
if (@as(T, @bitCast(value[0..byte_len].*)) == bit_target) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return std.ascii.eqlIgnoreCase(value, target);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const testing = std.testing;
|
const testing = @import("../testing.zig");
|
||||||
test "Mime: invalid " {
|
test "Mime: invalid " {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const invalids = [_][]const u8{
|
const invalids = [_][]const u8{
|
||||||
"",
|
"",
|
||||||
"text",
|
"text",
|
||||||
@@ -270,11 +310,14 @@ test "Mime: invalid " {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (invalids) |invalid| {
|
for (invalids) |invalid| {
|
||||||
try testing.expectError(error.Invalid, Mime.parse(undefined, invalid));
|
const mutable_input = try testing.arena_allocator.dupe(u8, invalid);
|
||||||
|
try testing.expectError(error.Invalid, Mime.parse(undefined, mutable_input));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse common" {
|
test "Mime: parse common" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
|
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
|
||||||
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
|
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
|
||||||
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
|
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
|
||||||
@@ -297,6 +340,8 @@ test "Mime: parse common" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse uncommon" {
|
test "Mime: parse uncommon" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const text_javascript = Expectation{
|
const text_javascript = Expectation{
|
||||||
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
|
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
|
||||||
};
|
};
|
||||||
@@ -306,12 +351,14 @@ test "Mime: parse uncommon" {
|
|||||||
try expect(text_javascript, " text/javascript\t ;");
|
try expect(text_javascript, " text/javascript\t ;");
|
||||||
|
|
||||||
try expect(
|
try expect(
|
||||||
.{ .content_type = .{ .other = .{ .type = "Text", .sub_type = "Javascript" } } },
|
.{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
|
||||||
"Text/Javascript",
|
"Text/Javascript",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse charset" {
|
test "Mime: parse charset" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
try expect(.{
|
try expect(.{
|
||||||
.content_type = .{ .text_xml = {} },
|
.content_type = .{ .text_xml = {} },
|
||||||
.charset = "utf-8",
|
.charset = "utf-8",
|
||||||
@@ -332,11 +379,12 @@ test "Mime: parse charset" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: isHTML" {
|
test "Mime: isHTML" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const isHTML = struct {
|
const isHTML = struct {
|
||||||
fn isHTML(expected: bool, input: []const u8) !void {
|
fn isHTML(expected: bool, input: []const u8) !void {
|
||||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||||
defer arena.deinit();
|
var mime = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||||
var mime = try Mime.parse(arena.allocator(), input);
|
|
||||||
try testing.expectEqual(expected, mime.isHTML());
|
try testing.expectEqual(expected, mime.isHTML());
|
||||||
}
|
}
|
||||||
}.isHTML;
|
}.isHTML;
|
||||||
@@ -348,6 +396,71 @@ test "Mime: isHTML" {
|
|||||||
try isHTML(false, "over/9000");
|
try isHTML(false, "over/9000");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "Mime: sniff" {
|
||||||
|
try testing.expectEqual(null, Mime.sniff(""));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<htm"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<html!"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<a_"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<!doctype html"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<!doctype html>"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("\n <!doctype html>"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("\n \t <font/>"));
|
||||||
|
|
||||||
|
const expectHTML = struct {
|
||||||
|
fn expect(input: []const u8) !void {
|
||||||
|
try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type));
|
||||||
|
}
|
||||||
|
}.expect;
|
||||||
|
|
||||||
|
try expectHTML("<!doctype html ");
|
||||||
|
try expectHTML("\n \t <!DOCTYPE HTML ");
|
||||||
|
|
||||||
|
try expectHTML("<html ");
|
||||||
|
try expectHTML("\n \t <HtmL> even more stufff");
|
||||||
|
|
||||||
|
try expectHTML("<script>");
|
||||||
|
try expectHTML("\n \t <SCRIpt >alert(document.cookies)</script>");
|
||||||
|
|
||||||
|
try expectHTML("<iframe>");
|
||||||
|
try expectHTML(" \t <ifRAME >");
|
||||||
|
|
||||||
|
try expectHTML("<h1>");
|
||||||
|
try expectHTML(" <H1>");
|
||||||
|
|
||||||
|
try expectHTML("<div>");
|
||||||
|
try expectHTML("\n\r\r <DiV>");
|
||||||
|
|
||||||
|
try expectHTML("<font>");
|
||||||
|
try expectHTML(" <fonT>");
|
||||||
|
|
||||||
|
try expectHTML("<table>");
|
||||||
|
try expectHTML("\t\t<TAblE>");
|
||||||
|
|
||||||
|
try expectHTML("<a>");
|
||||||
|
try expectHTML("\n\n<A>");
|
||||||
|
|
||||||
|
try expectHTML("<style>");
|
||||||
|
try expectHTML(" \n\t <STyLE>");
|
||||||
|
|
||||||
|
try expectHTML("<title>");
|
||||||
|
try expectHTML(" \n\t <TITLE>");
|
||||||
|
|
||||||
|
try expectHTML("<b>");
|
||||||
|
try expectHTML(" \n\t <B>");
|
||||||
|
|
||||||
|
try expectHTML("<body>");
|
||||||
|
try expectHTML(" \n\t <BODY>");
|
||||||
|
|
||||||
|
try expectHTML("<br>");
|
||||||
|
try expectHTML(" \n\t <BR>");
|
||||||
|
|
||||||
|
try expectHTML("<p>");
|
||||||
|
try expectHTML(" \n\t <P>");
|
||||||
|
|
||||||
|
try expectHTML("<!-->");
|
||||||
|
try expectHTML(" \n\t <!-->");
|
||||||
|
}
|
||||||
|
|
||||||
const Expectation = struct {
|
const Expectation = struct {
|
||||||
content_type: Mime.ContentType,
|
content_type: Mime.ContentType,
|
||||||
params: []const u8 = "",
|
params: []const u8 = "",
|
||||||
@@ -355,11 +468,9 @@ const Expectation = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn expect(expected: Expectation, input: []const u8) !void {
|
fn expect(expected: Expectation, input: []const u8) !void {
|
||||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||||
defer arena.deinit();
|
|
||||||
|
|
||||||
const actual = try Mime.parse(arena.allocator(), input);
|
|
||||||
|
|
||||||
|
const actual = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||||
try testing.expectEqual(
|
try testing.expectEqual(
|
||||||
std.meta.activeTag(expected.content_type),
|
std.meta.activeTag(expected.content_type),
|
||||||
std.meta.activeTag(actual.content_type),
|
std.meta.activeTag(actual.content_type),
|
||||||
@@ -368,16 +479,16 @@ fn expect(expected: Expectation, input: []const u8) !void {
|
|||||||
switch (expected.content_type) {
|
switch (expected.content_type) {
|
||||||
.other => |e| {
|
.other => |e| {
|
||||||
const a = actual.content_type.other;
|
const a = actual.content_type.other;
|
||||||
try testing.expectEqualStrings(e.type, a.type);
|
try testing.expectEqual(e.type, a.type);
|
||||||
try testing.expectEqualStrings(e.sub_type, a.sub_type);
|
try testing.expectEqual(e.sub_type, a.sub_type);
|
||||||
},
|
},
|
||||||
else => {}, // already asserted above
|
else => {}, // already asserted above
|
||||||
}
|
}
|
||||||
|
|
||||||
try testing.expectEqualStrings(expected.params, actual.params);
|
try testing.expectEqual(expected.params, actual.params);
|
||||||
|
|
||||||
if (expected.charset) |ec| {
|
if (expected.charset) |ec| {
|
||||||
try testing.expectEqualStrings(ec, actual.charset.?);
|
try testing.expectEqual(ec, actual.charset.?);
|
||||||
} else {
|
} else {
|
||||||
try testing.expectEqual(null, actual.charset);
|
try testing.expectEqual(null, actual.charset);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ pub const XMLHttpRequest = struct {
|
|||||||
};
|
};
|
||||||
const ResponseObj = union(ResponseObjTag) {
|
const ResponseObj = union(ResponseObjTag) {
|
||||||
Document: *parser.Document,
|
Document: *parser.Document,
|
||||||
Failure: bool,
|
Failure: void,
|
||||||
JSON: std.json.Parsed(JSONValue),
|
JSON: std.json.Parsed(JSONValue),
|
||||||
|
|
||||||
fn deinit(self: ResponseObj) void {
|
fn deinit(self: ResponseObj) void {
|
||||||
@@ -511,12 +511,8 @@ pub const XMLHttpRequest = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// extract a mime type from headers.
|
// extract a mime type from headers.
|
||||||
{
|
|
||||||
var raw: []const u8 = "text/xml";
|
|
||||||
if (header.get("content-type")) |ct| {
|
if (header.get("content-type")) |ct| {
|
||||||
raw = try self.arena.dupe(u8, ct);
|
self.response_mime = Mime.parse(self.arena, ct) catch |e| {
|
||||||
}
|
|
||||||
self.response_mime = Mime.parse(self.arena, raw) catch |e| {
|
|
||||||
return self.onErr(e);
|
return self.onErr(e);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -724,26 +720,24 @@ pub const XMLHttpRequest = struct {
|
|||||||
// TODO parse XML.
|
// TODO parse XML.
|
||||||
// https://xhr.spec.whatwg.org/#response-object
|
// https://xhr.spec.whatwg.org/#response-object
|
||||||
fn setResponseObjDocument(self: *XMLHttpRequest) void {
|
fn setResponseObjDocument(self: *XMLHttpRequest) void {
|
||||||
const response_mime = &self.response_mime.?;
|
const mime = self.response_mime orelse return;
|
||||||
const isHTML = response_mime.isHTML();
|
if (mime.isHTML() == false) {
|
||||||
|
|
||||||
// TODO If finalMIME is not an HTML MIME type or an XML MIME type, then
|
|
||||||
// return.
|
|
||||||
if (!isHTML) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var ccharset: [:0]const u8 = "utf-8";
|
var ccharset: [:0]const u8 = "utf-8";
|
||||||
if (response_mime.charset) |rc| {
|
if (mime.charset) |rc| {
|
||||||
|
if (std.mem.eql(u8, rc, "utf-8") == false) {
|
||||||
ccharset = self.arena.dupeZ(u8, rc) catch {
|
ccharset = self.arena.dupeZ(u8, rc) catch {
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
||||||
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -766,7 +760,7 @@ pub const XMLHttpRequest = struct {
|
|||||||
.{},
|
.{},
|
||||||
) catch |e| {
|
) catch |e| {
|
||||||
log.err("parse JSON: {}", .{e});
|
log.err("parse JSON: {}", .{e});
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -32,9 +32,13 @@ const Loop = @import("../runtime/loop.zig").Loop;
|
|||||||
|
|
||||||
const log = std.log.scoped(.http_client);
|
const log = std.log.scoped(.http_client);
|
||||||
|
|
||||||
|
// We might need to peek at the body to try and sniff the content-type.
|
||||||
|
// While we only need a few bytes, in most cases we need to ignore leading
|
||||||
|
// whitespace, so we want to get a reasonable-sized chunk.
|
||||||
|
const PEEK_BUF_LEN = 1024;
|
||||||
|
|
||||||
const BUFFER_LEN = 32 * 1024;
|
const BUFFER_LEN = 32 * 1024;
|
||||||
|
|
||||||
// The longest individual header line that we support
|
|
||||||
const MAX_HEADER_LINE_LEN = 4096;
|
const MAX_HEADER_LINE_LEN = 4096;
|
||||||
|
|
||||||
// Thread-safe. Holds our root certificate, connection pool and state pool
|
// Thread-safe. Holds our root certificate, connection pool and state pool
|
||||||
@@ -900,6 +904,7 @@ const SyncHandler = struct {
|
|||||||
// object which can be iterated to get the body.
|
// object which can be iterated to get the body.
|
||||||
std.debug.assert(result.done or reader.body_reader != null);
|
std.debug.assert(result.done or reader.body_reader != null);
|
||||||
std.debug.assert(result.data == null);
|
std.debug.assert(result.data == null);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
._buf = buf,
|
._buf = buf,
|
||||||
._request = request,
|
._request = request,
|
||||||
@@ -907,6 +912,8 @@ const SyncHandler = struct {
|
|||||||
._done = result.done,
|
._done = result.done,
|
||||||
._connection = connection,
|
._connection = connection,
|
||||||
._data = result.unprocessed,
|
._data = result.unprocessed,
|
||||||
|
._peek_len = 0,
|
||||||
|
._peek_buf = state.peek_buf,
|
||||||
.header = reader.response,
|
.header = reader.response,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -1046,7 +1053,7 @@ const Reader = struct {
|
|||||||
|
|
||||||
// Still parsing the header
|
// Still parsing the header
|
||||||
|
|
||||||
// what data do we have leftover in `data`.
|
// What data do we have leftover in `data`?
|
||||||
// When header_done == true, then this is part (or all) of the body
|
// When header_done == true, then this is part (or all) of the body
|
||||||
// When header_done == false, then this is a header line that we didn't
|
// When header_done == false, then this is a header line that we didn't
|
||||||
// have enough data for.
|
// have enough data for.
|
||||||
@@ -1504,23 +1511,49 @@ pub const Progress = struct {
|
|||||||
header: ResponseHeader,
|
header: ResponseHeader,
|
||||||
};
|
};
|
||||||
|
|
||||||
// The value that we return from a synchronous requst.
|
// The value that we return from a synchronous request.
|
||||||
pub const Response = struct {
|
pub const Response = struct {
|
||||||
_reader: Reader,
|
_reader: Reader,
|
||||||
_request: *Request,
|
_request: *Request,
|
||||||
|
|
||||||
_buf: []u8,
|
|
||||||
_connection: SyncHandler.Connection,
|
_connection: SyncHandler.Connection,
|
||||||
|
|
||||||
|
// the buffer to read the peeked data into
|
||||||
|
_peek_buf: []u8,
|
||||||
|
|
||||||
|
// the length of data we've peeked. The peeked_data is _peek_buf[0.._peek_len].
|
||||||
|
// It's possible for peek_len > 0 and _done == true, in which case, the
|
||||||
|
// _peeked data should be emitted once and subsequent calls to `next` should
|
||||||
|
// return null.
|
||||||
|
_peek_len: usize,
|
||||||
|
|
||||||
|
// What we'll read from the socket into. This is the State's read_buf
|
||||||
|
_buf: []u8,
|
||||||
|
|
||||||
|
// Whether or not we're done reading the response. When true, next will
|
||||||
|
// return null.
|
||||||
_done: bool,
|
_done: bool,
|
||||||
|
|
||||||
// Any data we over-read while parsing the header. This will be returned on
|
// Data that we've read. This can be set when the Response is first created
|
||||||
// the first call to next();
|
// from extra data received while parsing the body. Or, it can be set
|
||||||
|
// when `next` is called and we read more data from the socket.
|
||||||
_data: ?[]u8 = null,
|
_data: ?[]u8 = null,
|
||||||
header: ResponseHeader,
|
header: ResponseHeader,
|
||||||
|
|
||||||
pub fn next(self: *Response) !?[]u8 {
|
pub fn next(self: *Response) !?[]u8 {
|
||||||
var buf = self._buf;
|
// it's possible for peek_len > - and done == true. This would happen
|
||||||
|
// when, while peeking, we reached the end of the data. In that case,
|
||||||
|
// we return the peeked data once, and on subsequent call, we'll return
|
||||||
|
// null normally, because done == true;
|
||||||
|
const pl = self._peek_len;
|
||||||
|
if (pl > 0) {
|
||||||
|
self._peek_len = 0;
|
||||||
|
return self._peek_buf[0..pl];
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._nextIgnorePeek(self._buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _nextIgnorePeek(self: *Response, buf: []u8) !?[]u8 {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (try self.processData()) |data| {
|
if (try self.processData()) |data| {
|
||||||
return data;
|
return data;
|
||||||
@@ -1541,14 +1574,38 @@ pub const Response = struct {
|
|||||||
self._data = result.unprocessed; // for the next call
|
self._data = result.unprocessed; // for the next call
|
||||||
return result.data;
|
return result.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn peek(self: *Response) ![]u8 {
|
||||||
|
while (true) {
|
||||||
|
var peek_buf = self._peek_buf;
|
||||||
|
const peek_len = self._peek_len;
|
||||||
|
|
||||||
|
const data = (try self._nextIgnorePeek(peek_buf[peek_len..])) orelse {
|
||||||
|
return peek_buf[0..peek_len];
|
||||||
|
};
|
||||||
|
|
||||||
|
const peek_end = peek_len + data.len;
|
||||||
|
@memcpy(peek_buf[peek_len..peek_end], data);
|
||||||
|
self._peek_len = peek_end;
|
||||||
|
|
||||||
|
if (peek_end > 100) {
|
||||||
|
return peek_buf[peek_len..peek_end];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Pooled and re-used when creating a request
|
// Pooled and re-used when creating a request
|
||||||
const State = struct {
|
const State = struct {
|
||||||
// used for reading chunks of payload data.
|
// We might be asked to peek at the response, i.e. to sniff the mime type.
|
||||||
|
// This will require storing any peeked data so that, later, if we stream
|
||||||
|
// the body, we can present a cohesive body.
|
||||||
|
peek_buf: []u8,
|
||||||
|
|
||||||
|
// Used for reading chunks of payload data.
|
||||||
read_buf: []u8,
|
read_buf: []u8,
|
||||||
|
|
||||||
// use for writing data. If you're wondering why BOTH a read_buf and a
|
// Used for writing data. If you're wondering why BOTH a read_buf and a
|
||||||
// write_buf, even though HTTP is req -> resp, it's for TLS, which has
|
// write_buf, even though HTTP is req -> resp, it's for TLS, which has
|
||||||
// bidirectional data.
|
// bidirectional data.
|
||||||
write_buf: []u8,
|
write_buf: []u8,
|
||||||
@@ -1561,7 +1618,10 @@ const State = struct {
|
|||||||
// response headers.
|
// response headers.
|
||||||
arena: ArenaAllocator,
|
arena: ArenaAllocator,
|
||||||
|
|
||||||
fn init(allocator: Allocator, header_size: usize, buf_size: usize) !State {
|
fn init(allocator: Allocator, header_size: usize, peek_size: usize, buf_size: usize) !State {
|
||||||
|
const peek_buf = try allocator.alloc(u8, peek_size);
|
||||||
|
errdefer allocator.free(peek_buf);
|
||||||
|
|
||||||
const read_buf = try allocator.alloc(u8, buf_size);
|
const read_buf = try allocator.alloc(u8, buf_size);
|
||||||
errdefer allocator.free(read_buf);
|
errdefer allocator.free(read_buf);
|
||||||
|
|
||||||
@@ -1572,6 +1632,7 @@ const State = struct {
|
|||||||
errdefer allocator.free(header_buf);
|
errdefer allocator.free(header_buf);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
|
.peek_buf = peek_buf,
|
||||||
.read_buf = read_buf,
|
.read_buf = read_buf,
|
||||||
.write_buf = write_buf,
|
.write_buf = write_buf,
|
||||||
.header_buf = header_buf,
|
.header_buf = header_buf,
|
||||||
@@ -1585,6 +1646,7 @@ const State = struct {
|
|||||||
|
|
||||||
fn deinit(self: *State) void {
|
fn deinit(self: *State) void {
|
||||||
const allocator = self.arena.child_allocator;
|
const allocator = self.arena.child_allocator;
|
||||||
|
allocator.free(self.peek_buf);
|
||||||
allocator.free(self.read_buf);
|
allocator.free(self.read_buf);
|
||||||
allocator.free(self.write_buf);
|
allocator.free(self.write_buf);
|
||||||
allocator.free(self.header_buf);
|
allocator.free(self.header_buf);
|
||||||
@@ -1611,7 +1673,7 @@ const StatePool = struct {
|
|||||||
for (0..count) |i| {
|
for (0..count) |i| {
|
||||||
const state = try allocator.create(State);
|
const state = try allocator.create(State);
|
||||||
errdefer allocator.destroy(state);
|
errdefer allocator.destroy(state);
|
||||||
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, BUFFER_LEN);
|
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, PEEK_BUF_LEN, BUFFER_LEN);
|
||||||
states[i] = state;
|
states[i] = state;
|
||||||
started += 1;
|
started += 1;
|
||||||
}
|
}
|
||||||
@@ -1662,7 +1724,7 @@ const StatePool = struct {
|
|||||||
|
|
||||||
const testing = @import("../testing.zig");
|
const testing = @import("../testing.zig");
|
||||||
test "HttpClient Reader: fuzz" {
|
test "HttpClient Reader: fuzz" {
|
||||||
var state = try State.init(testing.allocator, 1024, 1024);
|
var state = try State.init(testing.allocator, 1024, 1024, 100);
|
||||||
defer state.deinit();
|
defer state.deinit();
|
||||||
|
|
||||||
var res = TestResponse.init();
|
var res = TestResponse.init();
|
||||||
@@ -1773,6 +1835,7 @@ test "HttpClient: sync connect error" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync no body" {
|
test "HttpClient: sync no body" {
|
||||||
|
for (0..2) |i| {
|
||||||
var client = try testClient();
|
var client = try testClient();
|
||||||
defer client.deinit();
|
defer client.deinit();
|
||||||
|
|
||||||
@@ -1780,11 +1843,15 @@ test "HttpClient: sync no body" {
|
|||||||
var req = try client.request(.GET, &uri);
|
var req = try client.request(.GET, &uri);
|
||||||
var res = try req.sendSync(.{});
|
var res = try req.sendSync(.{});
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
try testing.expectEqual("", try res.peek());
|
||||||
|
}
|
||||||
try testing.expectEqual(null, try res.next());
|
try testing.expectEqual(null, try res.next());
|
||||||
try testing.expectEqual(200, res.header.status);
|
try testing.expectEqual(200, res.header.status);
|
||||||
try testing.expectEqual(2, res.header.count());
|
try testing.expectEqual(2, res.header.count());
|
||||||
try testing.expectEqual("close", res.header.get("connection"));
|
try testing.expectEqual("close", res.header.get("connection"));
|
||||||
try testing.expectEqual("0", res.header.get("content-length"));
|
try testing.expectEqual("0", res.header.get("content-length"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync tls no body" {
|
test "HttpClient: sync tls no body" {
|
||||||
@@ -1804,6 +1871,7 @@ test "HttpClient: sync tls no body" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync with body" {
|
test "HttpClient: sync with body" {
|
||||||
|
for (0..2) |i| {
|
||||||
var client = try testClient();
|
var client = try testClient();
|
||||||
defer client.deinit();
|
defer client.deinit();
|
||||||
|
|
||||||
@@ -1811,6 +1879,9 @@ test "HttpClient: sync with body" {
|
|||||||
var req = try client.request(.GET, &uri);
|
var req = try client.request(.GET, &uri);
|
||||||
var res = try req.sendSync(.{});
|
var res = try req.sendSync(.{});
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
try testing.expectEqual("over 9000!", try res.peek());
|
||||||
|
}
|
||||||
try testing.expectEqual("over 9000!", try res.next());
|
try testing.expectEqual("over 9000!", try res.next());
|
||||||
try testing.expectEqual(201, res.header.status);
|
try testing.expectEqual(201, res.header.status);
|
||||||
try testing.expectEqual(5, res.header.count());
|
try testing.expectEqual(5, res.header.count());
|
||||||
@@ -1819,6 +1890,7 @@ test "HttpClient: sync with body" {
|
|||||||
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
||||||
try testing.expectEqual("Close", res.header.get("_connection"));
|
try testing.expectEqual("Close", res.header.get("_connection"));
|
||||||
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync tls with body" {
|
test "HttpClient: sync tls with body" {
|
||||||
|
|||||||
@@ -24,6 +24,17 @@ pub const expectError = std.testing.expectError;
|
|||||||
pub const expectString = std.testing.expectEqualStrings;
|
pub const expectString = std.testing.expectEqualStrings;
|
||||||
pub const expectEqualSlices = std.testing.expectEqualSlices;
|
pub const expectEqualSlices = std.testing.expectEqualSlices;
|
||||||
|
|
||||||
|
// sometimes it's super useful to have an arena you don't really care about
|
||||||
|
// in a test. Like, you need a mutable string, so you just want to dupe a
|
||||||
|
// string literal. It has nothing to do with the code under test, it's just
|
||||||
|
// infrastructure for the test itself.
|
||||||
|
pub var arena_instance = std.heap.ArenaAllocator.init(std.heap.c_allocator);
|
||||||
|
pub const arena_allocator = arena_instance.allocator();
|
||||||
|
|
||||||
|
pub fn reset() void {
|
||||||
|
_ = arena_instance.reset(.{ .retain_capacity = {} });
|
||||||
|
}
|
||||||
|
|
||||||
const App = @import("app.zig").App;
|
const App = @import("app.zig").App;
|
||||||
const parser = @import("browser/netsurf.zig");
|
const parser = @import("browser/netsurf.zig");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user