mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-28 14:43:28 +00:00
Compare commits
1 Commits
55e9d8d166
...
normalize-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a849e5693 |
@@ -161,7 +161,7 @@ test "matchFirst" {
|
||||
for (testcases) |tc| {
|
||||
matcher.reset();
|
||||
|
||||
const doc = try parser.documentHTMLParseFromStr(tc.html);
|
||||
const doc = try parser.documentHTMLParseFromStr(alloc, tc.html);
|
||||
defer parser.documentHTMLClose(doc) catch {};
|
||||
|
||||
const s = css.parse(alloc, tc.q, .{}) catch |e| {
|
||||
|
||||
@@ -196,7 +196,10 @@ fn testWriteFullHTML(comptime expected: []const u8, src: []const u8) !void {
|
||||
var buf = std.ArrayListUnmanaged(u8){};
|
||||
defer buf.deinit(testing.allocator);
|
||||
|
||||
const doc_html = try parser.documentHTMLParseFromStr(src);
|
||||
var aa = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer aa.deinit();
|
||||
|
||||
const doc_html = try parser.documentHTMLParseFromStr(aa.allocator(), src);
|
||||
defer parser.documentHTMLClose(doc_html) catch {};
|
||||
|
||||
const doc = parser.documentHTMLToDocument(doc_html);
|
||||
|
||||
@@ -29,6 +29,19 @@ const collection = @import("../dom/html_collection.zig");
|
||||
const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
|
||||
const Cookie = @import("../storage/cookie.zig").Cookie;
|
||||
|
||||
pub fn normalizeWhitespace(arena: std.mem.Allocator, title: []const u8) ![]const u8 {
|
||||
var normalized = try std.ArrayListUnmanaged(u8).initCapacity(arena, title.len);
|
||||
var tokens = std.mem.tokenizeAny(u8, title, &std.ascii.whitespace);
|
||||
|
||||
var prepend = false;
|
||||
while (tokens.next()) |token| {
|
||||
if (prepend) normalized.appendAssumeCapacity(' ') else prepend = true;
|
||||
normalized.appendSliceAssumeCapacity(token);
|
||||
}
|
||||
|
||||
return normalized.items;
|
||||
}
|
||||
|
||||
// WEB IDL https://html.spec.whatwg.org/#the-document-object
|
||||
pub const HTMLDocument = struct {
|
||||
pub const Self = parser.DocumentHTML;
|
||||
@@ -94,9 +107,10 @@ pub const HTMLDocument = struct {
|
||||
return try parser.documentHTMLGetTitle(self);
|
||||
}
|
||||
|
||||
pub fn set_title(self: *parser.DocumentHTML, v: []const u8) ![]const u8 {
|
||||
try parser.documentHTMLSetTitle(self, v);
|
||||
return v;
|
||||
pub fn set_title(self: *parser.DocumentHTML, v: []const u8, state: *SessionState) ![]const u8 {
|
||||
const normalized = try normalizeWhitespace(state.arena, v);
|
||||
try parser.documentHTMLSetTitle(self, normalized);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
pub fn _getElementsByName(self: *parser.DocumentHTML, name: []const u8, state: *SessionState) !NodeList {
|
||||
|
||||
@@ -29,6 +29,7 @@ const c = @cImport({
|
||||
});
|
||||
|
||||
const mimalloc = @import("mimalloc.zig");
|
||||
const normalizeWhitespace = @import("html/document.zig").normalizeWhitespace;
|
||||
|
||||
// init initializes netsurf lib.
|
||||
// init starts a mimalloc heap arena for the netsurf session. The caller must
|
||||
@@ -2152,12 +2153,12 @@ fn parserErr(err: HubbubErr) ParserError!void {
|
||||
|
||||
// documentHTMLParseFromStr parses the given HTML string.
|
||||
// The caller is responsible for closing the document.
|
||||
pub fn documentHTMLParseFromStr(str: []const u8) !*DocumentHTML {
|
||||
pub fn documentHTMLParseFromStr(arena: std.mem.Allocator, str: []const u8) !*DocumentHTML {
|
||||
var fbs = std.io.fixedBufferStream(str);
|
||||
return try documentHTMLParse(fbs.reader(), "UTF-8");
|
||||
return try documentHTMLParse(arena, fbs.reader(), "UTF-8");
|
||||
}
|
||||
|
||||
pub fn documentHTMLParse(reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
|
||||
pub fn documentHTMLParse(arena: std.mem.Allocator, reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
|
||||
var parser: ?*c.dom_hubbub_parser = undefined;
|
||||
var doc: ?*c.dom_document = undefined;
|
||||
var err: c.hubbub_error = undefined;
|
||||
@@ -2169,7 +2170,11 @@ pub fn documentHTMLParse(reader: anytype, enc: ?[:0]const u8) !*DocumentHTML {
|
||||
|
||||
try parseData(parser.?, reader);
|
||||
|
||||
return @as(*DocumentHTML, @ptrCast(doc.?));
|
||||
const html_doc: *DocumentHTML = @ptrCast(doc.?);
|
||||
const old_title = try documentHTMLGetTitle(html_doc);
|
||||
const normalized = try normalizeWhitespace(arena, old_title);
|
||||
try documentHTMLSetTitle(html_doc, normalized);
|
||||
return html_doc;
|
||||
}
|
||||
|
||||
pub fn documentParseFragmentFromStr(self: *Document, str: []const u8) !*DocumentFragment {
|
||||
|
||||
@@ -248,7 +248,7 @@ pub const Page = struct {
|
||||
|
||||
const ccharset = try arena.dupeZ(u8, charset);
|
||||
|
||||
const html_doc = try parser.documentHTMLParse(reader, ccharset);
|
||||
const html_doc = try parser.documentHTMLParse(arena, reader, ccharset);
|
||||
const doc = parser.documentHTMLToDocument(html_doc);
|
||||
|
||||
// save a document's pointer in the page.
|
||||
|
||||
@@ -703,7 +703,7 @@ pub const XMLHttpRequest = struct {
|
||||
}
|
||||
|
||||
var fbs = std.io.fixedBufferStream(self.response_bytes.items);
|
||||
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
|
||||
const doc = parser.documentHTMLParse(self.arena, fbs.reader(), ccharset) catch {
|
||||
self.response_obj = .{ .Failure = {} };
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -214,11 +214,13 @@ pub const Document = struct {
|
||||
parser.deinit();
|
||||
try parser.init();
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(allocator);
|
||||
|
||||
var fbs = std.io.fixedBufferStream(html);
|
||||
const html_doc = try parser.documentHTMLParse(fbs.reader(), "utf-8");
|
||||
const html_doc = try parser.documentHTMLParse(arena.allocator(), fbs.reader(), "utf-8");
|
||||
|
||||
return .{
|
||||
.arena = std.heap.ArenaAllocator.init(allocator),
|
||||
.arena = arena,
|
||||
.doc = parser.documentHTMLToDocument(html_doc),
|
||||
};
|
||||
}
|
||||
@@ -410,7 +412,7 @@ pub const JsRunner = struct {
|
||||
errdefer self.loop.deinit();
|
||||
|
||||
var html = std.io.fixedBufferStream(opts.html);
|
||||
const document = try parser.documentHTMLParse(html.reader(), "UTF-8");
|
||||
const document = try parser.documentHTMLParse(arena, html.reader(), "UTF-8");
|
||||
|
||||
self.state = .{
|
||||
.arena = arena,
|
||||
|
||||
Reference in New Issue
Block a user