mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 07:03:29 +00:00
Merge pull request #553 from lightpanda-io/mime_sniffing
Try to sniff the mime type based on the body content
This commit is contained in:
@@ -435,24 +435,19 @@ pub const Page = struct {
|
||||
|
||||
log.info("GET {any} {d}", .{ url, header.status });
|
||||
|
||||
const ct = blk: {
|
||||
break :blk header.get("content-type") orelse {
|
||||
// no content type in HTTP headers.
|
||||
// TODO try to sniff mime type from the body.
|
||||
log.info("no content-type HTTP header", .{});
|
||||
const content_type = header.get("content-type");
|
||||
|
||||
// Assume it's HTML for now.
|
||||
break :blk "text/html; charset=utf-8";
|
||||
};
|
||||
};
|
||||
|
||||
log.debug("header content-type: {s}", .{ct});
|
||||
var mime = try Mime.parse(arena, ct);
|
||||
const mime: Mime = blk: {
|
||||
if (content_type) |ct| {
|
||||
break :blk try Mime.parse(arena, ct);
|
||||
}
|
||||
break :blk Mime.sniff(try response.peek());
|
||||
} orelse .unknown;
|
||||
|
||||
if (mime.isHTML()) {
|
||||
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
|
||||
} else {
|
||||
log.info("non-HTML document: {s}", .{ct});
|
||||
log.info("non-HTML document: {s}", .{content_type orelse "null"});
|
||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||
while (try response.next()) |data| {
|
||||
try arr.appendSlice(arena, try arena.dupe(u8, data));
|
||||
|
||||
@@ -24,10 +24,17 @@ pub const Mime = struct {
|
||||
params: []const u8 = "",
|
||||
charset: ?[]const u8 = null,
|
||||
|
||||
pub const unknown = Mime{
|
||||
.params = "",
|
||||
.charset = "",
|
||||
.content_type = .{ .unknown = {} },
|
||||
};
|
||||
|
||||
pub const ContentTypeEnum = enum {
|
||||
text_xml,
|
||||
text_html,
|
||||
text_plain,
|
||||
unknown,
|
||||
other,
|
||||
};
|
||||
|
||||
@@ -35,21 +42,26 @@ pub const Mime = struct {
|
||||
text_xml: void,
|
||||
text_html: void,
|
||||
text_plain: void,
|
||||
unknown: void,
|
||||
other: struct { type: []const u8, sub_type: []const u8 },
|
||||
};
|
||||
|
||||
pub fn parse(arena: Allocator, input: []const u8) !Mime {
|
||||
pub fn parse(arena: Allocator, input: []u8) !Mime {
|
||||
if (input.len > 255) {
|
||||
return error.TooBig;
|
||||
}
|
||||
var trimmed = trim(input);
|
||||
|
||||
const content_type, const type_len = try parseContentType(trimmed);
|
||||
if (type_len >= trimmed.len) {
|
||||
// Zig's trim API is broken. The return type is always `[]const u8`,
|
||||
// even if the input type is `[]u8`. @constCast is safe here.
|
||||
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
|
||||
_ = std.ascii.lowerString(normalized, normalized);
|
||||
|
||||
const content_type, const type_len = try parseContentType(normalized);
|
||||
if (type_len >= normalized.len) {
|
||||
return .{ .content_type = content_type };
|
||||
}
|
||||
|
||||
const params = trimLeft(trimmed[type_len..]);
|
||||
const params = trimLeft(normalized[type_len..]);
|
||||
|
||||
var charset: ?[]const u8 = null;
|
||||
|
||||
@@ -63,11 +75,12 @@ pub const Mime = struct {
|
||||
return error.Invalid;
|
||||
}
|
||||
|
||||
switch (name.len) {
|
||||
7 => if (isCaseEqual("charset", name)) {
|
||||
charset = try parseValue(arena, value);
|
||||
},
|
||||
else => {},
|
||||
const attribute_name = std.meta.stringToEnum(enum {
|
||||
charset,
|
||||
}, name) orelse continue;
|
||||
|
||||
switch (attribute_name) {
|
||||
.charset => charset = try parseAttributeValue(arena, value),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,66 +91,113 @@ pub const Mime = struct {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn sniff(body: []const u8) ?Mime {
|
||||
// 0x0C is form feed
|
||||
const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C });
|
||||
if (content.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (content[0] != '<') {
|
||||
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
|
||||
// UTF-8 BOM
|
||||
return .{ .content_type = .{ .text_plain = {} } };
|
||||
}
|
||||
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
|
||||
// UTF-16 big-endian BOM
|
||||
return .{ .content_type = .{ .text_plain = {} } };
|
||||
}
|
||||
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
|
||||
// UTF-16 little-endian BOM
|
||||
return .{ .content_type = .{ .text_plain = {} } };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// The longest prefix we have is "<!DOCTYPE HTML ", 15 bytes. If we're
|
||||
// here, we already know content[0] == '<', so we can skip that. So 14
|
||||
// bytes.
|
||||
|
||||
// +1 because we don't need the leading '<'
|
||||
var buf: [14]u8 = undefined;
|
||||
|
||||
const stripped = content[1..];
|
||||
const prefix_len = @min(stripped.len, buf.len);
|
||||
const prefix = std.ascii.lowerString(&buf, stripped[0..prefix_len]);
|
||||
|
||||
// we already know it starts with a <
|
||||
const known_prefixes = [_]struct { []const u8, ContentType }{
|
||||
.{ "!doctype html", .{ .text_html = {} } },
|
||||
.{ "html", .{ .text_html = {} } },
|
||||
.{ "script", .{ .text_html = {} } },
|
||||
.{ "iframe", .{ .text_html = {} } },
|
||||
.{ "h1", .{ .text_html = {} } },
|
||||
.{ "div", .{ .text_html = {} } },
|
||||
.{ "font", .{ .text_html = {} } },
|
||||
.{ "table", .{ .text_html = {} } },
|
||||
.{ "a", .{ .text_html = {} } },
|
||||
.{ "style", .{ .text_html = {} } },
|
||||
.{ "title", .{ .text_html = {} } },
|
||||
.{ "b", .{ .text_html = {} } },
|
||||
.{ "body", .{ .text_html = {} } },
|
||||
.{ "br", .{ .text_html = {} } },
|
||||
.{ "p", .{ .text_html = {} } },
|
||||
.{ "!--", .{ .text_html = {} } },
|
||||
.{ "xml", .{ .text_xml = {} } },
|
||||
};
|
||||
inline for (known_prefixes) |kp| {
|
||||
const known_prefix = kp.@"0";
|
||||
if (std.mem.startsWith(u8, prefix, known_prefix) and prefix.len > known_prefix.len) {
|
||||
const next = prefix[known_prefix.len];
|
||||
// a "tag-terminating-byte"
|
||||
if (next == ' ' or next == '>') {
|
||||
return .{ .content_type = kp.@"1" };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn isHTML(self: *const Mime) bool {
|
||||
return self.content_type == .text_html;
|
||||
}
|
||||
|
||||
// we expect value to be lowercase
|
||||
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
||||
const separator = std.mem.indexOfScalarPos(u8, value, 0, '/') orelse {
|
||||
return error.Invalid;
|
||||
};
|
||||
const end = std.mem.indexOfScalarPos(u8, value, separator, ';') orelse blk: {
|
||||
break :blk value.len;
|
||||
};
|
||||
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
|
||||
const type_name = trimRight(value[0..end]);
|
||||
const attribute_start = end + 1;
|
||||
|
||||
if (std.meta.stringToEnum(enum {
|
||||
@"text/xml",
|
||||
@"text/html",
|
||||
@"text/plain",
|
||||
}, type_name)) |known_type| {
|
||||
const ct: ContentType = switch (known_type) {
|
||||
.@"text/xml" => .{ .text_xml = {} },
|
||||
.@"text/html" => .{ .text_html = {} },
|
||||
.@"text/plain" => .{ .text_plain = {} },
|
||||
};
|
||||
return .{ ct, attribute_start };
|
||||
}
|
||||
|
||||
const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid;
|
||||
|
||||
const main_type = value[0..separator];
|
||||
const sub_type = trimRight(value[separator + 1 .. end]);
|
||||
|
||||
if (parseCommonContentType(main_type, sub_type)) |content_type| {
|
||||
return .{ content_type, end + 1 };
|
||||
}
|
||||
|
||||
if (main_type.len == 0) {
|
||||
if (main_type.len == 0 or validType(main_type) == false) {
|
||||
return error.Invalid;
|
||||
}
|
||||
if (validType(main_type) == false) {
|
||||
if (sub_type.len == 0 or validType(sub_type) == false) {
|
||||
return error.Invalid;
|
||||
}
|
||||
|
||||
if (sub_type.len == 0) {
|
||||
return error.Invalid;
|
||||
}
|
||||
if (validType(sub_type) == false) {
|
||||
return error.Invalid;
|
||||
}
|
||||
|
||||
const content_type = ContentType{ .other = .{
|
||||
return .{ .{ .other = .{
|
||||
.type = main_type,
|
||||
.sub_type = sub_type,
|
||||
} };
|
||||
|
||||
return .{ content_type, end + 1 };
|
||||
}
|
||||
|
||||
fn parseCommonContentType(main_type: []const u8, sub_type: []const u8) ?ContentType {
|
||||
switch (main_type.len) {
|
||||
4 => if (isCaseEqual("text", main_type)) {
|
||||
switch (sub_type.len) {
|
||||
3 => if (isCaseEqual("xml", sub_type)) {
|
||||
return .{ .text_xml = {} };
|
||||
},
|
||||
4 => if (isCaseEqual("html", sub_type)) {
|
||||
return .{ .text_html = {} };
|
||||
},
|
||||
5 => if (isCaseEqual("plain", sub_type)) {
|
||||
return .{ .text_plain = {} };
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
return null;
|
||||
} }, attribute_start };
|
||||
}
|
||||
|
||||
const T_SPECIAL = blk: {
|
||||
@@ -148,7 +208,7 @@ pub const Mime = struct {
|
||||
break :blk v;
|
||||
};
|
||||
|
||||
fn parseValue(arena: Allocator, value: []const u8) ![]const u8 {
|
||||
fn parseAttributeValue(arena: Allocator, value: []const u8) ![]const u8 {
|
||||
if (value[0] != '"') {
|
||||
return value;
|
||||
}
|
||||
@@ -218,10 +278,6 @@ pub const Mime = struct {
|
||||
return true;
|
||||
}
|
||||
|
||||
fn trim(s: []const u8) []const u8 {
|
||||
return std.mem.trim(u8, s, &std.ascii.whitespace);
|
||||
}
|
||||
|
||||
fn trimLeft(s: []const u8) []const u8 {
|
||||
return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
|
||||
}
|
||||
@@ -229,28 +285,12 @@ pub const Mime = struct {
|
||||
fn trimRight(s: []const u8) []const u8 {
|
||||
return std.mem.trimRight(u8, s, &std.ascii.whitespace);
|
||||
}
|
||||
|
||||
fn isCaseEqual(comptime target: anytype, value: []const u8) bool {
|
||||
// - 8 beause we don't care about the sentinel
|
||||
const bit_len = @bitSizeOf(@TypeOf(target.*)) - 8;
|
||||
const byte_len = bit_len / 8;
|
||||
|
||||
const T = @Type(.{ .int = .{
|
||||
.bits = bit_len,
|
||||
.signedness = .unsigned,
|
||||
} });
|
||||
|
||||
const bit_target: T = @bitCast(@as(*const [byte_len]u8, target).*);
|
||||
|
||||
if (@as(T, @bitCast(value[0..byte_len].*)) == bit_target) {
|
||||
return true;
|
||||
}
|
||||
return std.ascii.eqlIgnoreCase(value, target);
|
||||
}
|
||||
};
|
||||
|
||||
const testing = std.testing;
|
||||
const testing = @import("../testing.zig");
|
||||
test "Mime: invalid " {
|
||||
defer testing.reset();
|
||||
|
||||
const invalids = [_][]const u8{
|
||||
"",
|
||||
"text",
|
||||
@@ -270,11 +310,14 @@ test "Mime: invalid " {
|
||||
};
|
||||
|
||||
for (invalids) |invalid| {
|
||||
try testing.expectError(error.Invalid, Mime.parse(undefined, invalid));
|
||||
const mutable_input = try testing.arena_allocator.dupe(u8, invalid);
|
||||
try testing.expectError(error.Invalid, Mime.parse(undefined, mutable_input));
|
||||
}
|
||||
}
|
||||
|
||||
test "Mime: parse common" {
|
||||
defer testing.reset();
|
||||
|
||||
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
|
||||
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
|
||||
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
|
||||
@@ -297,6 +340,8 @@ test "Mime: parse common" {
|
||||
}
|
||||
|
||||
test "Mime: parse uncommon" {
|
||||
defer testing.reset();
|
||||
|
||||
const text_javascript = Expectation{
|
||||
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
|
||||
};
|
||||
@@ -306,12 +351,14 @@ test "Mime: parse uncommon" {
|
||||
try expect(text_javascript, " text/javascript\t ;");
|
||||
|
||||
try expect(
|
||||
.{ .content_type = .{ .other = .{ .type = "Text", .sub_type = "Javascript" } } },
|
||||
.{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
|
||||
"Text/Javascript",
|
||||
);
|
||||
}
|
||||
|
||||
test "Mime: parse charset" {
|
||||
defer testing.reset();
|
||||
|
||||
try expect(.{
|
||||
.content_type = .{ .text_xml = {} },
|
||||
.charset = "utf-8",
|
||||
@@ -332,11 +379,12 @@ test "Mime: parse charset" {
|
||||
}
|
||||
|
||||
test "Mime: isHTML" {
|
||||
defer testing.reset();
|
||||
|
||||
const isHTML = struct {
|
||||
fn isHTML(expected: bool, input: []const u8) !void {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var mime = try Mime.parse(arena.allocator(), input);
|
||||
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||
var mime = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||
try testing.expectEqual(expected, mime.isHTML());
|
||||
}
|
||||
}.isHTML;
|
||||
@@ -348,6 +396,71 @@ test "Mime: isHTML" {
|
||||
try isHTML(false, "over/9000");
|
||||
}
|
||||
|
||||
test "Mime: sniff" {
|
||||
try testing.expectEqual(null, Mime.sniff(""));
|
||||
try testing.expectEqual(null, Mime.sniff("<htm"));
|
||||
try testing.expectEqual(null, Mime.sniff("<html!"));
|
||||
try testing.expectEqual(null, Mime.sniff("<a_"));
|
||||
try testing.expectEqual(null, Mime.sniff("<!doctype html"));
|
||||
try testing.expectEqual(null, Mime.sniff("<!doctype html>"));
|
||||
try testing.expectEqual(null, Mime.sniff("\n <!doctype html>"));
|
||||
try testing.expectEqual(null, Mime.sniff("\n \t <font/>"));
|
||||
|
||||
const expectHTML = struct {
|
||||
fn expect(input: []const u8) !void {
|
||||
try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type));
|
||||
}
|
||||
}.expect;
|
||||
|
||||
try expectHTML("<!doctype html ");
|
||||
try expectHTML("\n \t <!DOCTYPE HTML ");
|
||||
|
||||
try expectHTML("<html ");
|
||||
try expectHTML("\n \t <HtmL> even more stufff");
|
||||
|
||||
try expectHTML("<script>");
|
||||
try expectHTML("\n \t <SCRIpt >alert(document.cookies)</script>");
|
||||
|
||||
try expectHTML("<iframe>");
|
||||
try expectHTML(" \t <ifRAME >");
|
||||
|
||||
try expectHTML("<h1>");
|
||||
try expectHTML(" <H1>");
|
||||
|
||||
try expectHTML("<div>");
|
||||
try expectHTML("\n\r\r <DiV>");
|
||||
|
||||
try expectHTML("<font>");
|
||||
try expectHTML(" <fonT>");
|
||||
|
||||
try expectHTML("<table>");
|
||||
try expectHTML("\t\t<TAblE>");
|
||||
|
||||
try expectHTML("<a>");
|
||||
try expectHTML("\n\n<A>");
|
||||
|
||||
try expectHTML("<style>");
|
||||
try expectHTML(" \n\t <STyLE>");
|
||||
|
||||
try expectHTML("<title>");
|
||||
try expectHTML(" \n\t <TITLE>");
|
||||
|
||||
try expectHTML("<b>");
|
||||
try expectHTML(" \n\t <B>");
|
||||
|
||||
try expectHTML("<body>");
|
||||
try expectHTML(" \n\t <BODY>");
|
||||
|
||||
try expectHTML("<br>");
|
||||
try expectHTML(" \n\t <BR>");
|
||||
|
||||
try expectHTML("<p>");
|
||||
try expectHTML(" \n\t <P>");
|
||||
|
||||
try expectHTML("<!-->");
|
||||
try expectHTML(" \n\t <!-->");
|
||||
}
|
||||
|
||||
const Expectation = struct {
|
||||
content_type: Mime.ContentType,
|
||||
params: []const u8 = "",
|
||||
@@ -355,11 +468,9 @@ const Expectation = struct {
|
||||
};
|
||||
|
||||
fn expect(expected: Expectation, input: []const u8) !void {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const actual = try Mime.parse(arena.allocator(), input);
|
||||
const mutable_input = try testing.arena_allocator.dupe(u8, input);
|
||||
|
||||
const actual = try Mime.parse(testing.arena_allocator, mutable_input);
|
||||
try testing.expectEqual(
|
||||
std.meta.activeTag(expected.content_type),
|
||||
std.meta.activeTag(actual.content_type),
|
||||
@@ -368,16 +479,16 @@ fn expect(expected: Expectation, input: []const u8) !void {
|
||||
switch (expected.content_type) {
|
||||
.other => |e| {
|
||||
const a = actual.content_type.other;
|
||||
try testing.expectEqualStrings(e.type, a.type);
|
||||
try testing.expectEqualStrings(e.sub_type, a.sub_type);
|
||||
try testing.expectEqual(e.type, a.type);
|
||||
try testing.expectEqual(e.sub_type, a.sub_type);
|
||||
},
|
||||
else => {}, // already asserted above
|
||||
}
|
||||
|
||||
try testing.expectEqualStrings(expected.params, actual.params);
|
||||
try testing.expectEqual(expected.params, actual.params);
|
||||
|
||||
if (expected.charset) |ec| {
|
||||
try testing.expectEqualStrings(ec, actual.charset.?);
|
||||
try testing.expectEqual(ec, actual.charset.?);
|
||||
} else {
|
||||
try testing.expectEqual(null, actual.charset);
|
||||
}
|
||||
|
||||
@@ -254,7 +254,7 @@ pub const XMLHttpRequest = struct {
|
||||
};
|
||||
const ResponseObj = union(ResponseObjTag) {
|
||||
Document: *parser.Document,
|
||||
Failure: bool,
|
||||
Failure: void,
|
||||
JSON: std.json.Parsed(JSONValue),
|
||||
|
||||
fn deinit(self: ResponseObj) void {
|
||||
@@ -511,12 +511,8 @@ pub const XMLHttpRequest = struct {
|
||||
}
|
||||
|
||||
// extract a mime type from headers.
|
||||
{
|
||||
var raw: []const u8 = "text/xml";
|
||||
if (header.get("content-type")) |ct| {
|
||||
raw = try self.arena.dupe(u8, ct);
|
||||
}
|
||||
self.response_mime = Mime.parse(self.arena, raw) catch |e| {
|
||||
if (header.get("content-type")) |ct| {
|
||||
self.response_mime = Mime.parse(self.arena, ct) catch |e| {
|
||||
return self.onErr(e);
|
||||
};
|
||||
}
|
||||
@@ -724,26 +720,24 @@ pub const XMLHttpRequest = struct {
|
||||
// TODO parse XML.
|
||||
// https://xhr.spec.whatwg.org/#response-object
|
||||
fn setResponseObjDocument(self: *XMLHttpRequest) void {
|
||||
const response_mime = &self.response_mime.?;
|
||||
const isHTML = response_mime.isHTML();
|
||||
|
||||
// TODO If finalMIME is not an HTML MIME type or an XML MIME type, then
|
||||
// return.
|
||||
if (!isHTML) {
|
||||
const mime = self.response_mime orelse return;
|
||||
if (mime.isHTML() == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
var ccharset: [:0]const u8 = "utf-8";
|
||||
if (response_mime.charset) |rc| {
|
||||
ccharset = self.arena.dupeZ(u8, rc) catch {
|
||||
self.response_obj = .{ .Failure = true };
|
||||
return;
|
||||
};
|
||||
if (mime.charset) |rc| {
|
||||
if (std.mem.eql(u8, rc, "utf-8") == false) {
|
||||
ccharset = self.arena.dupeZ(u8, rc) catch {
|
||||
self.response_obj = .{ .Failure = {} };
|
||||
return;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
||||
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
||||
self.response_obj = .{ .Failure = true };
|
||||
self.response_obj = .{ .Failure = {} };
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -766,7 +760,7 @@ pub const XMLHttpRequest = struct {
|
||||
.{},
|
||||
) catch |e| {
|
||||
log.err("parse JSON: {}", .{e});
|
||||
self.response_obj = .{ .Failure = true };
|
||||
self.response_obj = .{ .Failure = {} };
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
@@ -32,9 +32,13 @@ const Loop = @import("../runtime/loop.zig").Loop;
|
||||
|
||||
const log = std.log.scoped(.http_client);
|
||||
|
||||
// We might need to peek at the body to try and sniff the content-type.
|
||||
// While we only need a few bytes, in most cases we need to ignore leading
|
||||
// whitespace, so we want to get a reasonable-sized chunk.
|
||||
const PEEK_BUF_LEN = 1024;
|
||||
|
||||
const BUFFER_LEN = 32 * 1024;
|
||||
|
||||
// The longest individual header line that we support
|
||||
const MAX_HEADER_LINE_LEN = 4096;
|
||||
|
||||
// Thread-safe. Holds our root certificate, connection pool and state pool
|
||||
@@ -900,6 +904,7 @@ const SyncHandler = struct {
|
||||
// object which can be iterated to get the body.
|
||||
std.debug.assert(result.done or reader.body_reader != null);
|
||||
std.debug.assert(result.data == null);
|
||||
|
||||
return .{
|
||||
._buf = buf,
|
||||
._request = request,
|
||||
@@ -907,6 +912,8 @@ const SyncHandler = struct {
|
||||
._done = result.done,
|
||||
._connection = connection,
|
||||
._data = result.unprocessed,
|
||||
._peek_len = 0,
|
||||
._peek_buf = state.peek_buf,
|
||||
.header = reader.response,
|
||||
};
|
||||
}
|
||||
@@ -1046,7 +1053,7 @@ const Reader = struct {
|
||||
|
||||
// Still parsing the header
|
||||
|
||||
// what data do we have leftover in `data`.
|
||||
// What data do we have leftover in `data`?
|
||||
// When header_done == true, then this is part (or all) of the body
|
||||
// When header_done == false, then this is a header line that we didn't
|
||||
// have enough data for.
|
||||
@@ -1504,23 +1511,49 @@ pub const Progress = struct {
|
||||
header: ResponseHeader,
|
||||
};
|
||||
|
||||
// The value that we return from a synchronous requst.
|
||||
// The value that we return from a synchronous request.
|
||||
pub const Response = struct {
|
||||
_reader: Reader,
|
||||
_request: *Request,
|
||||
|
||||
_buf: []u8,
|
||||
_connection: SyncHandler.Connection,
|
||||
|
||||
// the buffer to read the peeked data into
|
||||
_peek_buf: []u8,
|
||||
|
||||
// the length of data we've peeked. The peeked_data is _peek_buf[0.._peek_len].
|
||||
// It's possible for peek_len > 0 and _done == true, in which case, the
|
||||
// _peeked data should be emitted once and subsequent calls to `next` should
|
||||
// return null.
|
||||
_peek_len: usize,
|
||||
|
||||
// What we'll read from the socket into. This is the State's read_buf
|
||||
_buf: []u8,
|
||||
|
||||
// Whether or not we're done reading the response. When true, next will
|
||||
// return null.
|
||||
_done: bool,
|
||||
|
||||
// Any data we over-read while parsing the header. This will be returned on
|
||||
// the first call to next();
|
||||
// Data that we've read. This can be set when the Response is first created
|
||||
// from extra data received while parsing the body. Or, it can be set
|
||||
// when `next` is called and we read more data from the socket.
|
||||
_data: ?[]u8 = null,
|
||||
header: ResponseHeader,
|
||||
|
||||
pub fn next(self: *Response) !?[]u8 {
|
||||
var buf = self._buf;
|
||||
// it's possible for peek_len > - and done == true. This would happen
|
||||
// when, while peeking, we reached the end of the data. In that case,
|
||||
// we return the peeked data once, and on subsequent call, we'll return
|
||||
// null normally, because done == true;
|
||||
const pl = self._peek_len;
|
||||
if (pl > 0) {
|
||||
self._peek_len = 0;
|
||||
return self._peek_buf[0..pl];
|
||||
}
|
||||
|
||||
return self._nextIgnorePeek(self._buf);
|
||||
}
|
||||
|
||||
fn _nextIgnorePeek(self: *Response, buf: []u8) !?[]u8 {
|
||||
while (true) {
|
||||
if (try self.processData()) |data| {
|
||||
return data;
|
||||
@@ -1541,14 +1574,38 @@ pub const Response = struct {
|
||||
self._data = result.unprocessed; // for the next call
|
||||
return result.data;
|
||||
}
|
||||
|
||||
pub fn peek(self: *Response) ![]u8 {
|
||||
while (true) {
|
||||
var peek_buf = self._peek_buf;
|
||||
const peek_len = self._peek_len;
|
||||
|
||||
const data = (try self._nextIgnorePeek(peek_buf[peek_len..])) orelse {
|
||||
return peek_buf[0..peek_len];
|
||||
};
|
||||
|
||||
const peek_end = peek_len + data.len;
|
||||
@memcpy(peek_buf[peek_len..peek_end], data);
|
||||
self._peek_len = peek_end;
|
||||
|
||||
if (peek_end > 100) {
|
||||
return peek_buf[peek_len..peek_end];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Pooled and re-used when creating a request
|
||||
const State = struct {
|
||||
// used for reading chunks of payload data.
|
||||
// We might be asked to peek at the response, i.e. to sniff the mime type.
|
||||
// This will require storing any peeked data so that, later, if we stream
|
||||
// the body, we can present a cohesive body.
|
||||
peek_buf: []u8,
|
||||
|
||||
// Used for reading chunks of payload data.
|
||||
read_buf: []u8,
|
||||
|
||||
// use for writing data. If you're wondering why BOTH a read_buf and a
|
||||
// Used for writing data. If you're wondering why BOTH a read_buf and a
|
||||
// write_buf, even though HTTP is req -> resp, it's for TLS, which has
|
||||
// bidirectional data.
|
||||
write_buf: []u8,
|
||||
@@ -1561,7 +1618,10 @@ const State = struct {
|
||||
// response headers.
|
||||
arena: ArenaAllocator,
|
||||
|
||||
fn init(allocator: Allocator, header_size: usize, buf_size: usize) !State {
|
||||
fn init(allocator: Allocator, header_size: usize, peek_size: usize, buf_size: usize) !State {
|
||||
const peek_buf = try allocator.alloc(u8, peek_size);
|
||||
errdefer allocator.free(peek_buf);
|
||||
|
||||
const read_buf = try allocator.alloc(u8, buf_size);
|
||||
errdefer allocator.free(read_buf);
|
||||
|
||||
@@ -1572,6 +1632,7 @@ const State = struct {
|
||||
errdefer allocator.free(header_buf);
|
||||
|
||||
return .{
|
||||
.peek_buf = peek_buf,
|
||||
.read_buf = read_buf,
|
||||
.write_buf = write_buf,
|
||||
.header_buf = header_buf,
|
||||
@@ -1585,6 +1646,7 @@ const State = struct {
|
||||
|
||||
fn deinit(self: *State) void {
|
||||
const allocator = self.arena.child_allocator;
|
||||
allocator.free(self.peek_buf);
|
||||
allocator.free(self.read_buf);
|
||||
allocator.free(self.write_buf);
|
||||
allocator.free(self.header_buf);
|
||||
@@ -1611,7 +1673,7 @@ const StatePool = struct {
|
||||
for (0..count) |i| {
|
||||
const state = try allocator.create(State);
|
||||
errdefer allocator.destroy(state);
|
||||
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, BUFFER_LEN);
|
||||
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, PEEK_BUF_LEN, BUFFER_LEN);
|
||||
states[i] = state;
|
||||
started += 1;
|
||||
}
|
||||
@@ -1662,7 +1724,7 @@ const StatePool = struct {
|
||||
|
||||
const testing = @import("../testing.zig");
|
||||
test "HttpClient Reader: fuzz" {
|
||||
var state = try State.init(testing.allocator, 1024, 1024);
|
||||
var state = try State.init(testing.allocator, 1024, 1024, 100);
|
||||
defer state.deinit();
|
||||
|
||||
var res = TestResponse.init();
|
||||
@@ -1773,18 +1835,23 @@ test "HttpClient: sync connect error" {
|
||||
}
|
||||
|
||||
test "HttpClient: sync no body" {
|
||||
var client = try testClient();
|
||||
defer client.deinit();
|
||||
for (0..2) |i| {
|
||||
var client = try testClient();
|
||||
defer client.deinit();
|
||||
|
||||
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple");
|
||||
var req = try client.request(.GET, &uri);
|
||||
var res = try req.sendSync(.{});
|
||||
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple");
|
||||
var req = try client.request(.GET, &uri);
|
||||
var res = try req.sendSync(.{});
|
||||
|
||||
try testing.expectEqual(null, try res.next());
|
||||
try testing.expectEqual(200, res.header.status);
|
||||
try testing.expectEqual(2, res.header.count());
|
||||
try testing.expectEqual("close", res.header.get("connection"));
|
||||
try testing.expectEqual("0", res.header.get("content-length"));
|
||||
if (i == 0) {
|
||||
try testing.expectEqual("", try res.peek());
|
||||
}
|
||||
try testing.expectEqual(null, try res.next());
|
||||
try testing.expectEqual(200, res.header.status);
|
||||
try testing.expectEqual(2, res.header.count());
|
||||
try testing.expectEqual("close", res.header.get("connection"));
|
||||
try testing.expectEqual("0", res.header.get("content-length"));
|
||||
}
|
||||
}
|
||||
|
||||
test "HttpClient: sync tls no body" {
|
||||
@@ -1804,21 +1871,26 @@ test "HttpClient: sync tls no body" {
|
||||
}
|
||||
|
||||
test "HttpClient: sync with body" {
|
||||
var client = try testClient();
|
||||
defer client.deinit();
|
||||
for (0..2) |i| {
|
||||
var client = try testClient();
|
||||
defer client.deinit();
|
||||
|
||||
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo");
|
||||
var req = try client.request(.GET, &uri);
|
||||
var res = try req.sendSync(.{});
|
||||
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo");
|
||||
var req = try client.request(.GET, &uri);
|
||||
var res = try req.sendSync(.{});
|
||||
|
||||
try testing.expectEqual("over 9000!", try res.next());
|
||||
try testing.expectEqual(201, res.header.status);
|
||||
try testing.expectEqual(5, res.header.count());
|
||||
try testing.expectEqual("close", res.header.get("connection"));
|
||||
try testing.expectEqual("10", res.header.get("content-length"));
|
||||
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
||||
try testing.expectEqual("Close", res.header.get("_connection"));
|
||||
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
||||
if (i == 0) {
|
||||
try testing.expectEqual("over 9000!", try res.peek());
|
||||
}
|
||||
try testing.expectEqual("over 9000!", try res.next());
|
||||
try testing.expectEqual(201, res.header.status);
|
||||
try testing.expectEqual(5, res.header.count());
|
||||
try testing.expectEqual("close", res.header.get("connection"));
|
||||
try testing.expectEqual("10", res.header.get("content-length"));
|
||||
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
|
||||
try testing.expectEqual("Close", res.header.get("_connection"));
|
||||
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
|
||||
}
|
||||
}
|
||||
|
||||
test "HttpClient: sync tls with body" {
|
||||
|
||||
@@ -24,6 +24,17 @@ pub const expectError = std.testing.expectError;
|
||||
pub const expectString = std.testing.expectEqualStrings;
|
||||
pub const expectEqualSlices = std.testing.expectEqualSlices;
|
||||
|
||||
// sometimes it's super useful to have an arena you don't really care about
|
||||
// in a test. Like, you need a mutable string, so you just want to dupe a
|
||||
// string literal. It has nothing to do with the code under test, it's just
|
||||
// infrastructure for the test itself.
|
||||
pub var arena_instance = std.heap.ArenaAllocator.init(std.heap.c_allocator);
|
||||
pub const arena_allocator = arena_instance.allocator();
|
||||
|
||||
pub fn reset() void {
|
||||
_ = arena_instance.reset(.{ .retain_capacity = {} });
|
||||
}
|
||||
|
||||
const App = @import("app.zig").App;
|
||||
const parser = @import("browser/netsurf.zig");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user