mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-12-16 08:18:59 +00:00
Merge pull request #553 from lightpanda-io/mime_sniffing
Try to sniff the mime type based on the body content
This commit is contained in:
@@ -435,24 +435,19 @@ pub const Page = struct {
|
|||||||
|
|
||||||
log.info("GET {any} {d}", .{ url, header.status });
|
log.info("GET {any} {d}", .{ url, header.status });
|
||||||
|
|
||||||
const ct = blk: {
|
const content_type = header.get("content-type");
|
||||||
break :blk header.get("content-type") orelse {
|
|
||||||
// no content type in HTTP headers.
|
|
||||||
// TODO try to sniff mime type from the body.
|
|
||||||
log.info("no content-type HTTP header", .{});
|
|
||||||
|
|
||||||
// Assume it's HTML for now.
|
const mime: Mime = blk: {
|
||||||
break :blk "text/html; charset=utf-8";
|
if (content_type) |ct| {
|
||||||
};
|
break :blk try Mime.parse(arena, ct);
|
||||||
};
|
}
|
||||||
|
break :blk Mime.sniff(try response.peek());
|
||||||
log.debug("header content-type: {s}", .{ct});
|
} orelse .unknown;
|
||||||
var mime = try Mime.parse(arena, ct);
|
|
||||||
|
|
||||||
if (mime.isHTML()) {
|
if (mime.isHTML()) {
|
||||||
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
|
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
|
||||||
} else {
|
} else {
|
||||||
log.info("non-HTML document: {s}", .{ct});
|
log.info("non-HTML document: {s}", .{content_type orelse "null"});
|
||||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||||
while (try response.next()) |data| {
|
while (try response.next()) |data| {
|
||||||
try arr.appendSlice(arena, try arena.dupe(u8, data));
|
try arr.appendSlice(arena, try arena.dupe(u8, data));
|
||||||
|
|||||||
@@ -24,10 +24,17 @@ pub const Mime = struct {
|
|||||||
params: []const u8 = "",
|
params: []const u8 = "",
|
||||||
charset: ?[]const u8 = null,
|
charset: ?[]const u8 = null,
|
||||||
|
|
||||||
|
pub const unknown = Mime{
|
||||||
|
.params = "",
|
||||||
|
.charset = "",
|
||||||
|
.content_type = .{ .unknown = {} },
|
||||||
|
};
|
||||||
|
|
||||||
pub const ContentTypeEnum = enum {
|
pub const ContentTypeEnum = enum {
|
||||||
text_xml,
|
text_xml,
|
||||||
text_html,
|
text_html,
|
||||||
text_plain,
|
text_plain,
|
||||||
|
unknown,
|
||||||
other,
|
other,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -35,21 +42,26 @@ pub const Mime = struct {
|
|||||||
text_xml: void,
|
text_xml: void,
|
||||||
text_html: void,
|
text_html: void,
|
||||||
text_plain: void,
|
text_plain: void,
|
||||||
|
unknown: void,
|
||||||
other: struct { type: []const u8, sub_type: []const u8 },
|
other: struct { type: []const u8, sub_type: []const u8 },
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn parse(arena: Allocator, input: []const u8) !Mime {
|
pub fn parse(arena: Allocator, input: []u8) !Mime {
|
||||||
if (input.len > 255) {
|
if (input.len > 255) {
|
||||||
return error.TooBig;
|
return error.TooBig;
|
||||||
}
|
}
|
||||||
var trimmed = trim(input);
|
|
||||||
|
|
||||||
const content_type, const type_len = try parseContentType(trimmed);
|
// Zig's trim API is broken. The return type is always `[]const u8`,
|
||||||
if (type_len >= trimmed.len) {
|
// even if the input type is `[]u8`. @constCast is safe here.
|
||||||
|
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
|
||||||
|
_ = std.ascii.lowerString(normalized, normalized);
|
||||||
|
|
||||||
|
const content_type, const type_len = try parseContentType(normalized);
|
||||||
|
if (type_len >= normalized.len) {
|
||||||
return .{ .content_type = content_type };
|
return .{ .content_type = content_type };
|
||||||
}
|
}
|
||||||
|
|
||||||
const params = trimLeft(trimmed[type_len..]);
|
const params = trimLeft(normalized[type_len..]);
|
||||||
|
|
||||||
var charset: ?[]const u8 = null;
|
var charset: ?[]const u8 = null;
|
||||||
|
|
||||||
@@ -63,11 +75,12 @@ pub const Mime = struct {
|
|||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (name.len) {
|
const attribute_name = std.meta.stringToEnum(enum {
|
||||||
7 => if (isCaseEqual("charset", name)) {
|
charset,
|
||||||
charset = try parseValue(arena, value);
|
}, name) orelse continue;
|
||||||
},
|
|
||||||
else => {},
|
switch (attribute_name) {
|
||||||
|
.charset => charset = try parseAttributeValue(arena, value),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,66 +91,113 @@ pub const Mime = struct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn sniff(body: []const u8) ?Mime {
|
||||||
|
// 0x0C is form feed
|
||||||
|
const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C });
|
||||||
|
if (content.len == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content[0] != '<') {
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
|
||||||
|
// UTF-8 BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
|
||||||
|
// UTF-16 big-endian BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
|
||||||
|
// UTF-16 little-endian BOM
|
||||||
|
return .{ .content_type = .{ .text_plain = {} } };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The longest prefix we have is "<!DOCTYPE HTML ", 15 bytes. If we're
|
||||||
|
// here, we already know content[0] == '<', so we can skip that. So 14
|
||||||
|
// bytes.
|
||||||
|
|
||||||
|
// +1 because we don't need the leading '<'
|
||||||
|
var buf: [14]u8 = undefined;
|
||||||
|
|
||||||
|
const stripped = content[1..];
|
||||||
|
const prefix_len = @min(stripped.len, buf.len);
|
||||||
|
const prefix = std.ascii.lowerString(&buf, stripped[0..prefix_len]);
|
||||||
|
|
||||||
|
// we already know it starts with a <
|
||||||
|
const known_prefixes = [_]struct { []const u8, ContentType }{
|
||||||
|
.{ "!doctype html", .{ .text_html = {} } },
|
||||||
|
.{ "html", .{ .text_html = {} } },
|
||||||
|
.{ "script", .{ .text_html = {} } },
|
||||||
|
.{ "iframe", .{ .text_html = {} } },
|
||||||
|
.{ "h1", .{ .text_html = {} } },
|
||||||
|
.{ "div", .{ .text_html = {} } },
|
||||||
|
.{ "font", .{ .text_html = {} } },
|
||||||
|
.{ "table", .{ .text_html = {} } },
|
||||||
|
.{ "a", .{ .text_html = {} } },
|
||||||
|
.{ "style", .{ .text_html = {} } },
|
||||||
|
.{ "title", .{ .text_html = {} } },
|
||||||
|
.{ "b", .{ .text_html = {} } },
|
||||||
|
.{ "body", .{ .text_html = {} } },
|
||||||
|
.{ "br", .{ .text_html = {} } },
|
||||||
|
.{ "p", .{ .text_html = {} } },
|
||||||
|
.{ "!--", .{ .text_html = {} } },
|
||||||
|
.{ "xml", .{ .text_xml = {} } },
|
||||||
|
};
|
||||||
|
inline for (known_prefixes) |kp| {
|
||||||
|
const known_prefix = kp.@"0";
|
||||||
|
if (std.mem.startsWith(u8, prefix, known_prefix) and prefix.len > known_prefix.len) {
|
||||||
|
const next = prefix[known_prefix.len];
|
||||||
|
// a "tag-terminating-byte"
|
||||||
|
if (next == ' ' or next == '>') {
|
||||||
|
return .{ .content_type = kp.@"1" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn isHTML(self: *const Mime) bool {
|
pub fn isHTML(self: *const Mime) bool {
|
||||||
return self.content_type == .text_html;
|
return self.content_type == .text_html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we expect value to be lowercase
|
||||||
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
||||||
const separator = std.mem.indexOfScalarPos(u8, value, 0, '/') orelse {
|
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
|
||||||
return error.Invalid;
|
const type_name = trimRight(value[0..end]);
|
||||||
};
|
const attribute_start = end + 1;
|
||||||
const end = std.mem.indexOfScalarPos(u8, value, separator, ';') orelse blk: {
|
|
||||||
break :blk value.len;
|
if (std.meta.stringToEnum(enum {
|
||||||
|
@"text/xml",
|
||||||
|
@"text/html",
|
||||||
|
@"text/plain",
|
||||||
|
}, type_name)) |known_type| {
|
||||||
|
const ct: ContentType = switch (known_type) {
|
||||||
|
.@"text/xml" => .{ .text_xml = {} },
|
||||||
|
.@"text/html" => .{ .text_html = {} },
|
||||||
|
.@"text/plain" => .{ .text_plain = {} },
|
||||||
};
|
};
|
||||||
|
return .{ ct, attribute_start };
|
||||||
|
}
|
||||||
|
|
||||||
|
const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid;
|
||||||
|
|
||||||
const main_type = value[0..separator];
|
const main_type = value[0..separator];
|
||||||
const sub_type = trimRight(value[separator + 1 .. end]);
|
const sub_type = trimRight(value[separator + 1 .. end]);
|
||||||
|
|
||||||
if (parseCommonContentType(main_type, sub_type)) |content_type| {
|
if (main_type.len == 0 or validType(main_type) == false) {
|
||||||
return .{ content_type, end + 1 };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (main_type.len == 0) {
|
|
||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
if (validType(main_type) == false) {
|
if (sub_type.len == 0 or validType(sub_type) == false) {
|
||||||
return error.Invalid;
|
return error.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sub_type.len == 0) {
|
return .{ .{ .other = .{
|
||||||
return error.Invalid;
|
|
||||||
}
|
|
||||||
if (validType(sub_type) == false) {
|
|
||||||
return error.Invalid;
|
|
||||||
}
|
|
||||||
|
|
||||||
const content_type = ContentType{ .other = .{
|
|
||||||
.type = main_type,
|
.type = main_type,
|
||||||
.sub_type = sub_type,
|
.sub_type = sub_type,
|
||||||
} };
|
} }, attribute_start };
|
||||||
|
|
||||||
return .{ content_type, end + 1 };
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parseCommonContentType(main_type: []const u8, sub_type: []const u8) ?ContentType {
|
|
||||||
switch (main_type.len) {
|
|
||||||
4 => if (isCaseEqual("text", main_type)) {
|
|
||||||
switch (sub_type.len) {
|
|
||||||
3 => if (isCaseEqual("xml", sub_type)) {
|
|
||||||
return .{ .text_xml = {} };
|
|
||||||
},
|
|
||||||
4 => if (isCaseEqual("html", sub_type)) {
|
|
||||||
return .{ .text_html = {} };
|
|
||||||
},
|
|
||||||
5 => if (isCaseEqual("plain", sub_type)) {
|
|
||||||
return .{ .text_plain = {} };
|
|
||||||
},
|
|
||||||
else => {},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
else => {},
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const T_SPECIAL = blk: {
|
const T_SPECIAL = blk: {
|
||||||
@@ -148,7 +208,7 @@ pub const Mime = struct {
|
|||||||
break :blk v;
|
break :blk v;
|
||||||
};
|
};
|
||||||
|
|
||||||
fn parseValue(arena: Allocator, value: []const u8) ![]const u8 {
|
fn parseAttributeValue(arena: Allocator, value: []const u8) ![]const u8 {
|
||||||
if (value[0] != '"') {
|
if (value[0] != '"') {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
@@ -218,10 +278,6 @@ pub const Mime = struct {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn trim(s: []const u8) []const u8 {
|
|
||||||
return std.mem.trim(u8, s, &std.ascii.whitespace);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn trimLeft(s: []const u8) []const u8 {
|
fn trimLeft(s: []const u8) []const u8 {
|
||||||
return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
|
return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
|
||||||
}
|
}
|
||||||
@@ -229,28 +285,12 @@ pub const Mime = struct {
|
|||||||
fn trimRight(s: []const u8) []const u8 {
|
fn trimRight(s: []const u8) []const u8 {
|
||||||
return std.mem.trimRight(u8, s, &std.ascii.whitespace);
|
return std.mem.trimRight(u8, s, &std.ascii.whitespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn isCaseEqual(comptime target: anytype, value: []const u8) bool {
|
|
||||||
// - 8 beause we don't care about the sentinel
|
|
||||||
const bit_len = @bitSizeOf(@TypeOf(target.*)) - 8;
|
|
||||||
const byte_len = bit_len / 8;
|
|
||||||
|
|
||||||
const T = @Type(.{ .int = .{
|
|
||||||
.bits = bit_len,
|
|
||||||
.signedness = .unsigned,
|
|
||||||
} });
|
|
||||||
|
|
||||||
const bit_target: T = @bitCast(@as(*const [byte_len]u8, target).*);
|
|
||||||
|
|
||||||
if (@as(T, @bitCast(value[0..byte_len].*)) == bit_target) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return std.ascii.eqlIgnoreCase(value, target);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const testing = std.testing;
|
const testing = @import("../testing.zig");
|
||||||
test "Mime: invalid " {
|
test "Mime: invalid " {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const invalids = [_][]const u8{
|
const invalids = [_][]const u8{
|
||||||
"",
|
"",
|
||||||
"text",
|
"text",
|
||||||
@@ -270,11 +310,14 @@ test "Mime: invalid " {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (invalids) |invalid| {
|
for (invalids) |invalid| {
|
||||||
try testing.expectError(error.Invalid, Mime.parse(undefined, invalid));
|
const mutable_input = try testing.arena_allocator.dupe(u8, invalid);
|
||||||
|
try testing.expectError(error.Invalid, Mime.parse(undefined, mutable_input));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse common" {
|
test "Mime: parse common" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
|
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
|
||||||
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
|
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
|
||||||
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
|
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
|
||||||
@@ -297,6 +340,8 @@ test "Mime: parse common" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse uncommon" {
|
test "Mime: parse uncommon" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const text_javascript = Expectation{
|
const text_javascript = Expectation{
|
||||||
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
|
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
|
||||||
};
|
};
|
||||||
@@ -306,12 +351,14 @@ test "Mime: parse uncommon" {
|
|||||||
try expect(text_javascript, " text/javascript\t ;");
|
try expect(text_javascript, " text/javascript\t ;");
|
||||||
|
|
||||||
try expect(
|
try expect(
|
||||||
.{ .content_type = .{ .other = .{ .type = "Text", .sub_type = "Javascript" } } },
|
.{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
|
||||||
"Text/Javascript",
|
"Text/Javascript",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: parse charset" {
|
test "Mime: parse charset" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
try expect(.{
|
try expect(.{
|
||||||
.content_type = .{ .text_xml = {} },
|
.content_type = .{ .text_xml = {} },
|
||||||
.charset = "utf-8",
|
.charset = "utf-8",
|
||||||
@@ -332,11 +379,12 @@ test "Mime: parse charset" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "Mime: isHTML" {
|
test "Mime: isHTML" {
|
||||||
|
defer testing.reset();
|
||||||
|
|
||||||
const isHTML = struct {
|
const isHTML = struct {
|
||||||
fn isHTML(expected: bool, input: []const u8) !void {
|
fn isHTML(expected: bool, input: []const u8) !void {
|
||||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||||
defer arena.deinit();
|
var mime = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||||
var mime = try Mime.parse(arena.allocator(), input);
|
|
||||||
try testing.expectEqual(expected, mime.isHTML());
|
try testing.expectEqual(expected, mime.isHTML());
|
||||||
}
|
}
|
||||||
}.isHTML;
|
}.isHTML;
|
||||||
@@ -348,6 +396,71 @@ test "Mime: isHTML" {
|
|||||||
try isHTML(false, "over/9000");
|
try isHTML(false, "over/9000");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "Mime: sniff" {
|
||||||
|
try testing.expectEqual(null, Mime.sniff(""));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<htm"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<html!"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<a_"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<!doctype html"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("<!doctype html>"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("\n <!doctype html>"));
|
||||||
|
try testing.expectEqual(null, Mime.sniff("\n \t <font/>"));
|
||||||
|
|
||||||
|
const expectHTML = struct {
|
||||||
|
fn expect(input: []const u8) !void {
|
||||||
|
try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type));
|
||||||
|
}
|
||||||
|
}.expect;
|
||||||
|
|
||||||
|
try expectHTML("<!doctype html ");
|
||||||
|
try expectHTML("\n \t <!DOCTYPE HTML ");
|
||||||
|
|
||||||
|
try expectHTML("<html ");
|
||||||
|
try expectHTML("\n \t <HtmL> even more stufff");
|
||||||
|
|
||||||
|
try expectHTML("<script>");
|
||||||
|
try expectHTML("\n \t <SCRIpt >alert(document.cookies)</script>");
|
||||||
|
|
||||||
|
try expectHTML("<iframe>");
|
||||||
|
try expectHTML(" \t <ifRAME >");
|
||||||
|
|
||||||
|
try expectHTML("<h1>");
|
||||||
|
try expectHTML(" <H1>");
|
||||||
|
|
||||||
|
try expectHTML("<div>");
|
||||||
|
try expectHTML("\n\r\r <DiV>");
|
||||||
|
|
||||||
|
try expectHTML("<font>");
|
||||||
|
try expectHTML(" <fonT>");
|
||||||
|
|
||||||
|
try expectHTML("<table>");
|
||||||
|
try expectHTML("\t\t<TAblE>");
|
||||||
|
|
||||||
|
try expectHTML("<a>");
|
||||||
|
try expectHTML("\n\n<A>");
|
||||||
|
|
||||||
|
try expectHTML("<style>");
|
||||||
|
try expectHTML(" \n\t <STyLE>");
|
||||||
|
|
||||||
|
try expectHTML("<title>");
|
||||||
|
try expectHTML(" \n\t <TITLE>");
|
||||||
|
|
||||||
|
try expectHTML("<b>");
|
||||||
|
try expectHTML(" \n\t <B>");
|
||||||
|
|
||||||
|
try expectHTML("<body>");
|
||||||
|
try expectHTML(" \n\t <BODY>");
|
||||||
|
|
||||||
|
try expectHTML("<br>");
|
||||||
|
try expectHTML(" \n\t <BR>");
|
||||||
|
|
||||||
|
try expectHTML("<p>");
|
||||||
|
try expectHTML(" \n\t <P>");
|
||||||
|
|
||||||
|
try expectHTML("<!-->");
|
||||||
|
try expectHTML(" \n\t <!-->");
|
||||||
|
}
|
||||||
|
|
||||||
const Expectation = struct {
|
const Expectation = struct {
|
||||||
content_type: Mime.ContentType,
|
content_type: Mime.ContentType,
|
||||||
params: []const u8 = "",
|
params: []const u8 = "",
|
||||||
@@ -355,11 +468,9 @@ const Expectation = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn expect(expected: Expectation, input: []const u8) !void {
|
fn expect(expected: Expectation, input: []const u8) !void {
|
||||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||||
defer arena.deinit();
|
|
||||||
|
|
||||||
const actual = try Mime.parse(arena.allocator(), input);
|
|
||||||
|
|
||||||
|
const actual = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||||
try testing.expectEqual(
|
try testing.expectEqual(
|
||||||
std.meta.activeTag(expected.content_type),
|
std.meta.activeTag(expected.content_type),
|
||||||
std.meta.activeTag(actual.content_type),
|
std.meta.activeTag(actual.content_type),
|
||||||
@@ -368,16 +479,16 @@ fn expect(expected: Expectation, input: []const u8) !void {
|
|||||||
switch (expected.content_type) {
|
switch (expected.content_type) {
|
||||||
.other => |e| {
|
.other => |e| {
|
||||||
const a = actual.content_type.other;
|
const a = actual.content_type.other;
|
||||||
try testing.expectEqualStrings(e.type, a.type);
|
try testing.expectEqual(e.type, a.type);
|
||||||
try testing.expectEqualStrings(e.sub_type, a.sub_type);
|
try testing.expectEqual(e.sub_type, a.sub_type);
|
||||||
},
|
},
|
||||||
else => {}, // already asserted above
|
else => {}, // already asserted above
|
||||||
}
|
}
|
||||||
|
|
||||||
try testing.expectEqualStrings(expected.params, actual.params);
|
try testing.expectEqual(expected.params, actual.params);
|
||||||
|
|
||||||
if (expected.charset) |ec| {
|
if (expected.charset) |ec| {
|
||||||
try testing.expectEqualStrings(ec, actual.charset.?);
|
try testing.expectEqual(ec, actual.charset.?);
|
||||||
} else {
|
} else {
|
||||||
try testing.expectEqual(null, actual.charset);
|
try testing.expectEqual(null, actual.charset);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ pub const XMLHttpRequest = struct {
|
|||||||
};
|
};
|
||||||
const ResponseObj = union(ResponseObjTag) {
|
const ResponseObj = union(ResponseObjTag) {
|
||||||
Document: *parser.Document,
|
Document: *parser.Document,
|
||||||
Failure: bool,
|
Failure: void,
|
||||||
JSON: std.json.Parsed(JSONValue),
|
JSON: std.json.Parsed(JSONValue),
|
||||||
|
|
||||||
fn deinit(self: ResponseObj) void {
|
fn deinit(self: ResponseObj) void {
|
||||||
@@ -511,12 +511,8 @@ pub const XMLHttpRequest = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// extract a mime type from headers.
|
// extract a mime type from headers.
|
||||||
{
|
|
||||||
var raw: []const u8 = "text/xml";
|
|
||||||
if (header.get("content-type")) |ct| {
|
if (header.get("content-type")) |ct| {
|
||||||
raw = try self.arena.dupe(u8, ct);
|
self.response_mime = Mime.parse(self.arena, ct) catch |e| {
|
||||||
}
|
|
||||||
self.response_mime = Mime.parse(self.arena, raw) catch |e| {
|
|
||||||
return self.onErr(e);
|
return self.onErr(e);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -724,26 +720,24 @@ pub const XMLHttpRequest = struct {
|
|||||||
// TODO parse XML.
|
// TODO parse XML.
|
||||||
// https://xhr.spec.whatwg.org/#response-object
|
// https://xhr.spec.whatwg.org/#response-object
|
||||||
fn setResponseObjDocument(self: *XMLHttpRequest) void {
|
fn setResponseObjDocument(self: *XMLHttpRequest) void {
|
||||||
const response_mime = &self.response_mime.?;
|
const mime = self.response_mime orelse return;
|
||||||
const isHTML = response_mime.isHTML();
|
if (mime.isHTML() == false) {
|
||||||
|
|
||||||
// TODO If finalMIME is not an HTML MIME type or an XML MIME type, then
|
|
||||||
// return.
|
|
||||||
if (!isHTML) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var ccharset: [:0]const u8 = "utf-8";
|
var ccharset: [:0]const u8 = "utf-8";
|
||||||
if (response_mime.charset) |rc| {
|
if (mime.charset) |rc| {
|
||||||
|
if (std.mem.eql(u8, rc, "utf-8") == false) {
|
||||||
ccharset = self.arena.dupeZ(u8, rc) catch {
|
ccharset = self.arena.dupeZ(u8, rc) catch {
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
||||||
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -766,7 +760,7 @@ pub const XMLHttpRequest = struct {
|
|||||||
.{},
|
.{},
|
||||||
) catch |e| {
|
) catch |e| {
|
||||||
log.err("parse JSON: {}", .{e});
|
log.err("parse JSON: {}", .{e});
|
||||||
self.response_obj = .{ .Failure = true };
|
self.response_obj = .{ .Failure = {} };
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -32,9 +32,13 @@ const Loop = @import("../runtime/loop.zig").Loop;
|
|||||||
|
|
||||||
const log = std.log.scoped(.http_client);
|
const log = std.log.scoped(.http_client);
|
||||||
|
|
||||||
|
// We might need to peek at the body to try and sniff the content-type.
|
||||||
|
// While we only need a few bytes, in most cases we need to ignore leading
|
||||||
|
// whitespace, so we want to get a reasonable-sized chunk.
|
||||||
|
const PEEK_BUF_LEN = 1024;
|
||||||
|
|
||||||
const BUFFER_LEN = 32 * 1024;
|
const BUFFER_LEN = 32 * 1024;
|
||||||
|
|
||||||
// The longest individual header line that we support
|
|
||||||
const MAX_HEADER_LINE_LEN = 4096;
|
const MAX_HEADER_LINE_LEN = 4096;
|
||||||
|
|
||||||
// Thread-safe. Holds our root certificate, connection pool and state pool
|
// Thread-safe. Holds our root certificate, connection pool and state pool
|
||||||
@@ -900,6 +904,7 @@ const SyncHandler = struct {
|
|||||||
// object which can be iterated to get the body.
|
// object which can be iterated to get the body.
|
||||||
std.debug.assert(result.done or reader.body_reader != null);
|
std.debug.assert(result.done or reader.body_reader != null);
|
||||||
std.debug.assert(result.data == null);
|
std.debug.assert(result.data == null);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
._buf = buf,
|
._buf = buf,
|
||||||
._request = request,
|
._request = request,
|
||||||
@@ -907,6 +912,8 @@ const SyncHandler = struct {
|
|||||||
._done = result.done,
|
._done = result.done,
|
||||||
._connection = connection,
|
._connection = connection,
|
||||||
._data = result.unprocessed,
|
._data = result.unprocessed,
|
||||||
|
._peek_len = 0,
|
||||||
|
._peek_buf = state.peek_buf,
|
||||||
.header = reader.response,
|
.header = reader.response,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -1046,7 +1053,7 @@ const Reader = struct {
|
|||||||
|
|
||||||
// Still parsing the header
|
// Still parsing the header
|
||||||
|
|
||||||
// what data do we have leftover in `data`.
|
// What data do we have leftover in `data`?
|
||||||
// When header_done == true, then this is part (or all) of the body
|
// When header_done == true, then this is part (or all) of the body
|
||||||
// When header_done == false, then this is a header line that we didn't
|
// When header_done == false, then this is a header line that we didn't
|
||||||
// have enough data for.
|
// have enough data for.
|
||||||
@@ -1504,23 +1511,49 @@ pub const Progress = struct {
|
|||||||
header: ResponseHeader,
|
header: ResponseHeader,
|
||||||
};
|
};
|
||||||
|
|
||||||
// The value that we return from a synchronous requst.
|
// The value that we return from a synchronous request.
|
||||||
pub const Response = struct {
|
pub const Response = struct {
|
||||||
_reader: Reader,
|
_reader: Reader,
|
||||||
_request: *Request,
|
_request: *Request,
|
||||||
|
|
||||||
_buf: []u8,
|
|
||||||
_connection: SyncHandler.Connection,
|
_connection: SyncHandler.Connection,
|
||||||
|
|
||||||
|
// the buffer to read the peeked data into
|
||||||
|
_peek_buf: []u8,
|
||||||
|
|
||||||
|
// the length of data we've peeked. The peeked_data is _peek_buf[0.._peek_len].
|
||||||
|
// It's possible for peek_len > 0 and _done == true, in which case, the
|
||||||
|
// _peeked data should be emitted once and subsequent calls to `next` should
|
||||||
|
// return null.
|
||||||
|
_peek_len: usize,
|
||||||
|
|
||||||
|
// What we'll read from the socket into. This is the State's read_buf
|
||||||
|
_buf: []u8,
|
||||||
|
|
||||||
|
// Whether or not we're done reading the response. When true, next will
|
||||||
|
// return null.
|
||||||
_done: bool,
|
_done: bool,
|
||||||
|
|
||||||
// Any data we over-read while parsing the header. This will be returned on
|
// Data that we've read. This can be set when the Response is first created
|
||||||
// the first call to next();
|
// from extra data received while parsing the body. Or, it can be set
|
||||||
|
// when `next` is called and we read more data from the socket.
|
||||||
_data: ?[]u8 = null,
|
_data: ?[]u8 = null,
|
||||||
header: ResponseHeader,
|
header: ResponseHeader,
|
||||||
|
|
||||||
pub fn next(self: *Response) !?[]u8 {
|
pub fn next(self: *Response) !?[]u8 {
|
||||||
var buf = self._buf;
|
// it's possible for peek_len > - and done == true. This would happen
|
||||||
|
// when, while peeking, we reached the end of the data. In that case,
|
||||||
|
// we return the peeked data once, and on subsequent call, we'll return
|
||||||
|
// null normally, because done == true;
|
||||||
|
const pl = self._peek_len;
|
||||||
|
if (pl > 0) {
|
||||||
|
self._peek_len = 0;
|
||||||
|
return self._peek_buf[0..pl];
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._nextIgnorePeek(self._buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _nextIgnorePeek(self: *Response, buf: []u8) !?[]u8 {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (try self.processData()) |data| {
|
if (try self.processData()) |data| {
|
||||||
return data;
|
return data;
|
||||||
@@ -1541,14 +1574,38 @@ pub const Response = struct {
|
|||||||
self._data = result.unprocessed; // for the next call
|
self._data = result.unprocessed; // for the next call
|
||||||
return result.data;
|
return result.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn peek(self: *Response) ![]u8 {
|
||||||
|
while (true) {
|
||||||
|
var peek_buf = self._peek_buf;
|
||||||
|
const peek_len = self._peek_len;
|
||||||
|
|
||||||
|
const data = (try self._nextIgnorePeek(peek_buf[peek_len..])) orelse {
|
||||||
|
return peek_buf[0..peek_len];
|
||||||
|
};
|
||||||
|
|
||||||
|
const peek_end = peek_len + data.len;
|
||||||
|
@memcpy(peek_buf[peek_len..peek_end], data);
|
||||||
|
self._peek_len = peek_end;
|
||||||
|
|
||||||
|
if (peek_end > 100) {
|
||||||
|
return peek_buf[peek_len..peek_end];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Pooled and re-used when creating a request
|
// Pooled and re-used when creating a request
|
||||||
const State = struct {
|
const State = struct {
|
||||||
// used for reading chunks of payload data.
|
// We might be asked to peek at the response, i.e. to sniff the mime type.
|
||||||
|
// This will require storing any peeked data so that, later, if we stream
|
||||||
|
// the body, we can present a cohesive body.
|
||||||
|
peek_buf: []u8,
|
||||||
|
|
||||||
|
// Used for reading chunks of payload data.
|
||||||
read_buf: []u8,
|
read_buf: []u8,
|
||||||
|
|
||||||
// use for writing data. If you're wondering why BOTH a read_buf and a
|
// Used for writing data. If you're wondering why BOTH a read_buf and a
|
||||||
// write_buf, even though HTTP is req -> resp, it's for TLS, which has
|
// write_buf, even though HTTP is req -> resp, it's for TLS, which has
|
||||||
// bidirectional data.
|
// bidirectional data.
|
||||||
write_buf: []u8,
|
write_buf: []u8,
|
||||||
@@ -1561,7 +1618,10 @@ const State = struct {
|
|||||||
// response headers.
|
// response headers.
|
||||||
arena: ArenaAllocator,
|
arena: ArenaAllocator,
|
||||||
|
|
||||||
fn init(allocator: Allocator, header_size: usize, buf_size: usize) !State {
|
fn init(allocator: Allocator, header_size: usize, peek_size: usize, buf_size: usize) !State {
|
||||||
|
const peek_buf = try allocator.alloc(u8, peek_size);
|
||||||
|
errdefer allocator.free(peek_buf);
|
||||||
|
|
||||||
const read_buf = try allocator.alloc(u8, buf_size);
|
const read_buf = try allocator.alloc(u8, buf_size);
|
||||||
errdefer allocator.free(read_buf);
|
errdefer allocator.free(read_buf);
|
||||||
|
|
||||||
@@ -1572,6 +1632,7 @@ const State = struct {
|
|||||||
errdefer allocator.free(header_buf);
|
errdefer allocator.free(header_buf);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
|
.peek_buf = peek_buf,
|
||||||
.read_buf = read_buf,
|
.read_buf = read_buf,
|
||||||
.write_buf = write_buf,
|
.write_buf = write_buf,
|
||||||
.header_buf = header_buf,
|
.header_buf = header_buf,
|
||||||
@@ -1585,6 +1646,7 @@ const State = struct {
|
|||||||
|
|
||||||
fn deinit(self: *State) void {
|
fn deinit(self: *State) void {
|
||||||
const allocator = self.arena.child_allocator;
|
const allocator = self.arena.child_allocator;
|
||||||
|
allocator.free(self.peek_buf);
|
||||||
allocator.free(self.read_buf);
|
allocator.free(self.read_buf);
|
||||||
allocator.free(self.write_buf);
|
allocator.free(self.write_buf);
|
||||||
allocator.free(self.header_buf);
|
allocator.free(self.header_buf);
|
||||||
@@ -1611,7 +1673,7 @@ const StatePool = struct {
|
|||||||
for (0..count) |i| {
|
for (0..count) |i| {
|
||||||
const state = try allocator.create(State);
|
const state = try allocator.create(State);
|
||||||
errdefer allocator.destroy(state);
|
errdefer allocator.destroy(state);
|
||||||
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, BUFFER_LEN);
|
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, PEEK_BUF_LEN, BUFFER_LEN);
|
||||||
states[i] = state;
|
states[i] = state;
|
||||||
started += 1;
|
started += 1;
|
||||||
}
|
}
|
||||||
@@ -1662,7 +1724,7 @@ const StatePool = struct {
|
|||||||
|
|
||||||
const testing = @import("../testing.zig");
|
const testing = @import("../testing.zig");
|
||||||
test "HttpClient Reader: fuzz" {
|
test "HttpClient Reader: fuzz" {
|
||||||
var state = try State.init(testing.allocator, 1024, 1024);
|
var state = try State.init(testing.allocator, 1024, 1024, 100);
|
||||||
defer state.deinit();
|
defer state.deinit();
|
||||||
|
|
||||||
var res = TestResponse.init();
|
var res = TestResponse.init();
|
||||||
@@ -1773,6 +1835,7 @@ test "HttpClient: sync connect error" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync no body" {
|
test "HttpClient: sync no body" {
|
||||||
|
for (0..2) |i| {
|
||||||
var client = try testClient();
|
var client = try testClient();
|
||||||
defer client.deinit();
|
defer client.deinit();
|
||||||
|
|
||||||
@@ -1780,11 +1843,15 @@ test "HttpClient: sync no body" {
|
|||||||
var req = try client.request(.GET, &uri);
|
var req = try client.request(.GET, &uri);
|
||||||
var res = try req.sendSync(.{});
|
var res = try req.sendSync(.{});
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
try testing.expectEqual("", try res.peek());
|
||||||
|
}
|
||||||
try testing.expectEqual(null, try res.next());
|
try testing.expectEqual(null, try res.next());
|
||||||
try testing.expectEqual(200, res.header.status);
|
try testing.expectEqual(200, res.header.status);
|
||||||
try testing.expectEqual(2, res.header.count());
|
try testing.expectEqual(2, res.header.count());
|
||||||
try testing.expectEqual("close", res.header.get("connection"));
|
try testing.expectEqual("close", res.header.get("connection"));
|
||||||
try testing.expectEqual("0", res.header.get("content-length"));
|
try testing.expectEqual("0", res.header.get("content-length"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync tls no body" {
|
test "HttpClient: sync tls no body" {
|
||||||
@@ -1804,6 +1871,7 @@ test "HttpClient: sync tls no body" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync with body" {
|
test "HttpClient: sync with body" {
|
||||||
|
for (0..2) |i| {
|
||||||
var client = try testClient();
|
var client = try testClient();
|
||||||
defer client.deinit();
|
defer client.deinit();
|
||||||
|
|
||||||
@@ -1811,6 +1879,9 @@ test "HttpClient: sync with body" {
|
|||||||
var req = try client.request(.GET, &uri);
|
var req = try client.request(.GET, &uri);
|
||||||
var res = try req.sendSync(.{});
|
var res = try req.sendSync(.{});
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
try testing.expectEqual("over 9000!", try res.peek());
|
||||||
|
}
|
||||||
try testing.expectEqual("over 9000!", try res.next());
|
try testing.expectEqual("over 9000!", try res.next());
|
||||||
try testing.expectEqual(201, res.header.status);
|
try testing.expectEqual(201, res.header.status);
|
||||||
try testing.expectEqual(5, res.header.count());
|
try testing.expectEqual(5, res.header.count());
|
||||||
@@ -1819,6 +1890,7 @@ test "HttpClient: sync with body" {
|
|||||||
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
||||||
try testing.expectEqual("Close", res.header.get("_connection"));
|
try testing.expectEqual("Close", res.header.get("_connection"));
|
||||||
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
test "HttpClient: sync tls with body" {
|
test "HttpClient: sync tls with body" {
|
||||||
|
|||||||
@@ -24,6 +24,17 @@ pub const expectError = std.testing.expectError;
|
|||||||
pub const expectString = std.testing.expectEqualStrings;
|
pub const expectString = std.testing.expectEqualStrings;
|
||||||
pub const expectEqualSlices = std.testing.expectEqualSlices;
|
pub const expectEqualSlices = std.testing.expectEqualSlices;
|
||||||
|
|
||||||
|
// sometimes it's super useful to have an arena you don't really care about
|
||||||
|
// in a test. Like, you need a mutable string, so you just want to dupe a
|
||||||
|
// string literal. It has nothing to do with the code under test, it's just
|
||||||
|
// infrastructure for the test itself.
|
||||||
|
pub var arena_instance = std.heap.ArenaAllocator.init(std.heap.c_allocator);
|
||||||
|
pub const arena_allocator = arena_instance.allocator();
|
||||||
|
|
||||||
|
pub fn reset() void {
|
||||||
|
_ = arena_instance.reset(.{ .retain_capacity = {} });
|
||||||
|
}
|
||||||
|
|
||||||
const App = @import("app.zig").App;
|
const App = @import("app.zig").App;
|
||||||
const parser = @import("browser/netsurf.zig");
|
const parser = @import("browser/netsurf.zig");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user