Merge pull request #1326 from lightpanda-io/wp/mrdimidim/use-css-tokenizer

Use css tokenizer for parsing style attrs
2026-02-04 06:23:45 +00:00 · 2026-01-07 18:09:06 +08:00
parent dd3de6efea b908b0bf8a
commit 21d502b81f
3 changed files with 303 additions and 24 deletions
--- a/src/browser/css/Parser.zig
+++ b/src/browser/css/Parser.zig
@@ -0,0 +1,295 @@
+// Copyright (C) 2023-2025  Lightpanda (Selecy SAS)
+//
+// Francis Bouvier <francis@lightpanda.io>
+// Pierre Tachoire <pierre@lightpanda.io>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+const std = @import("std");
+const Tokenizer = @import("Tokenizer.zig");
+
+pub const Declaration = struct {
+    name: []const u8,
+    value: []const u8,
+    important: bool,
+};
+
+const TokenSpan = struct {
+    token: Tokenizer.Token,
+    start: usize,
+    end: usize,
+};
+
+const TokenStream = struct {
+    tokenizer: Tokenizer,
+    peeked: ?TokenSpan = null,
+
+    fn init(input: []const u8) TokenStream {
+        return .{ .tokenizer = .{ .input = input } };
+    }
+
+    fn nextRaw(self: *TokenStream) ?TokenSpan {
+        const start = self.tokenizer.position;
+        const token = self.tokenizer.next() orelse return null;
+        const end = self.tokenizer.position;
+        return .{ .token = token, .start = start, .end = end };
+    }
+
+    fn next(self: *TokenStream) ?TokenSpan {
+        if (self.peeked) |token| {
+            self.peeked = null;
+            return token;
+        }
+        return self.nextRaw();
+    }
+
+    fn peek(self: *TokenStream) ?TokenSpan {
+        if (self.peeked == null) {
+            self.peeked = self.nextRaw();
+        }
+        return self.peeked;
+    }
+};
+
+pub fn parseDeclarationsList(input: []const u8) DeclarationsIterator {
+    return DeclarationsIterator.init(input);
+}
+
+pub const DeclarationsIterator = struct {
+    input: []const u8,
+    stream: TokenStream,
+
+    pub fn init(input: []const u8) DeclarationsIterator {
+        return .{
+            .input = input,
+            .stream = TokenStream.init(input),
+        };
+    }
+
+    pub fn next(self: *DeclarationsIterator) ?Declaration {
+        while (true) {
+            self.skipTriviaAndSemicolons();
+            const peeked = self.stream.peek() orelse return null;
+
+            switch (peeked.token) {
+                .at_keyword => {
+                    _ = self.stream.next();
+                    self.skipAtRule();
+                },
+                .ident => |name| {
+                    _ = self.stream.next();
+                    if (self.consumeDeclaration(name)) |declaration| {
+                        return declaration;
+                    }
+                },
+                else => {
+                    _ = self.stream.next();
+                    self.skipInvalidDeclaration();
+                },
+            }
+        }
+
+        return null;
+    }
+
+    fn consumeDeclaration(self: *DeclarationsIterator, name: []const u8) ?Declaration {
+        self.skipTrivia();
+
+        const colon = self.stream.next() orelse return null;
+        if (!isColon(colon.token)) {
+            self.skipInvalidDeclaration();
+            return null;
+        }
+
+        const value = self.consumeValue() orelse return null;
+        return .{
+            .name = name,
+            .value = value.value,
+            .important = value.important,
+        };
+    }
+
+    const ValueResult = struct {
+        value: []const u8,
+        important: bool,
+    };
+
+    fn consumeValue(self: *DeclarationsIterator) ?ValueResult {
+        self.skipTrivia();
+
+        var depth: usize = 0;
+        var start: ?usize = null;
+        var last_sig: ?TokenSpan = null;
+        var prev_sig: ?TokenSpan = null;
+
+        while (true) {
+            const peeked = self.stream.peek() orelse break;
+            if (isSemicolon(peeked.token) and depth == 0) {
+                _ = self.stream.next();
+                break;
+            }
+
+            const span = self.stream.next() orelse break;
+            if (isWhitespaceOrComment(span.token)) {
+                continue;
+            }
+
+            if (start == null) start = span.start;
+            prev_sig = last_sig;
+            last_sig = span;
+            updateDepth(span.token, &depth);
+        }
+
+        const value_start = start orelse return null;
+        const last = last_sig orelse return null;
+
+        var important = false;
+        var end_pos = last.end;
+
+        if (isImportantPair(prev_sig, last)) {
+            important = true;
+            const bang = prev_sig orelse return null;
+            if (value_start >= bang.start) return null;
+            end_pos = bang.start;
+        }
+
+        var value_slice = self.input[value_start..end_pos];
+        value_slice = std.mem.trim(u8, value_slice, &std.ascii.whitespace);
+        if (value_slice.len == 0) return null;
+
+        return .{ .value = value_slice, .important = important };
+    }
+
+    fn skipTrivia(self: *DeclarationsIterator) void {
+        while (self.stream.peek()) |peeked| {
+            if (!isWhitespaceOrComment(peeked.token)) break;
+            _ = self.stream.next();
+        }
+    }
+
+    fn skipTriviaAndSemicolons(self: *DeclarationsIterator) void {
+        while (self.stream.peek()) |peeked| {
+            if (isWhitespaceOrComment(peeked.token) or isSemicolon(peeked.token)) {
+                _ = self.stream.next();
+            } else {
+                break;
+            }
+        }
+    }
+
+    fn skipAtRule(self: *DeclarationsIterator) void {
+        var depth: usize = 0;
+        var saw_block = false;
+
+        while (true) {
+            const peeked = self.stream.peek() orelse return;
+            if (!saw_block and isSemicolon(peeked.token) and depth == 0) {
+                _ = self.stream.next();
+                return;
+            }
+
+            const span = self.stream.next() orelse return;
+            if (isWhitespaceOrComment(span.token)) continue;
+
+            if (isBlockStart(span.token)) {
+                depth += 1;
+                saw_block = true;
+            } else if (isBlockEnd(span.token)) {
+                if (depth > 0) depth -= 1;
+                if (saw_block and depth == 0) return;
+            }
+        }
+    }
+
+    fn skipInvalidDeclaration(self: *DeclarationsIterator) void {
+        var depth: usize = 0;
+
+        while (self.stream.peek()) |peeked| {
+            if (isSemicolon(peeked.token) and depth == 0) {
+                _ = self.stream.next();
+                return;
+            }
+
+            const span = self.stream.next() orelse return;
+            if (isWhitespaceOrComment(span.token)) continue;
+            updateDepth(span.token, &depth);
+        }
+    }
+};
+
+fn isWhitespaceOrComment(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .white_space, .comment => true,
+        else => false,
+    };
+}
+
+fn isSemicolon(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .semicolon => true,
+        else => false,
+    };
+}
+
+fn isColon(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .colon => true,
+        else => false,
+    };
+}
+
+fn isBlockStart(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .curly_bracket_block, .square_bracket_block, .parenthesis_block, .function => true,
+        else => false,
+    };
+}
+
+fn isBlockEnd(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .close_curly_bracket, .close_parenthesis, .close_square_bracket => true,
+        else => false,
+    };
+}
+
+fn updateDepth(token: Tokenizer.Token, depth: *usize) void {
+    if (isBlockStart(token)) {
+        depth.* += 1;
+        return;
+    }
+
+    if (isBlockEnd(token)) {
+        if (depth.* > 0) depth.* -= 1;
+    }
+}
+
+fn isImportantPair(prev_sig: ?TokenSpan, last_sig: TokenSpan) bool {
+    if (!isIdentImportant(last_sig.token)) return false;
+    const prev = prev_sig orelse return false;
+    return isBang(prev.token);
+}
+
+fn isIdentImportant(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .ident => |name| std.ascii.eqlIgnoreCase(name, "important"),
+        else => false,
+    };
+}
+
+fn isBang(token: Tokenizer.Token) bool {
+    return switch (token) {
+        .delim => |c| c == '!',
+        else => false,
+    };
+}
--- a/src/browser/css/Tokenizer.zig
+++ b/src/browser/css/Tokenizer.zig
@@ -644,8 +644,10 @@ fn consumeNumeric(self: *Tokenizer) Token {
 fn consumeUnquotedUrl(self: *Tokenizer) ?Token {
    // TODO: true url parser
    if (self.nextByte()) |it| {
-        self.consumeString(it == '\'');
+        return self.consumeString(it == '\'');
    }
+
+    return null;
 }

 fn consumeIdentLike(self: *Tokenizer) Token {
--- a/src/browser/webapi/css/CSSStyleDeclaration.zig
+++ b/src/browser/webapi/css/CSSStyleDeclaration.zig
@@ -20,6 +20,8 @@ const std = @import("std");
 const log = @import("../../../log.zig");
 const String = @import("../../../string.zig").String;

+const CssParser = @import("../../css/Parser.zig");
+
 const js = @import("../../js/js.zig");
 const Page = @import("../../Page.zig");
 const Element = @import("../Element.zig");
@@ -149,30 +151,10 @@ pub fn setCssText(self: *CSSStyleDeclaration, text: []const u8, page: *Page) !vo
    }

    // Parse and set new properties
-    // This is a simple parser - a full implementation would use a proper CSS parser
-    var it = std.mem.splitScalar(u8, text, ';');
+    var it = CssParser.parseDeclarationsList(text);
    while (it.next()) |declaration| {
-        const trimmed = std.mem.trim(u8, declaration, &std.ascii.whitespace);
-        if (trimmed.len == 0) continue;
-
-        if (std.mem.indexOfScalar(u8, trimmed, ':')) |colon_pos| {
-            const name = std.mem.trim(u8, trimmed[0..colon_pos], &std.ascii.whitespace);
-            const value_part = std.mem.trim(u8, trimmed[colon_pos + 1 ..], &std.ascii.whitespace);
-
-            var value = value_part;
-            var priority: ?[]const u8 = null;
-
-            // Check for !important
-            if (std.mem.lastIndexOfScalar(u8, value_part, '!')) |bang_pos| {
-                const after_bang = std.mem.trim(u8, value_part[bang_pos + 1 ..], &std.ascii.whitespace);
-                if (std.mem.eql(u8, after_bang, "important")) {
-                    value = std.mem.trimRight(u8, value_part[0..bang_pos], &std.ascii.whitespace);
-                    priority = "important";
-                }
-            }
-
-            try self.setProperty(name, value, priority, page);
-        }
+        const priority: ?[]const u8 = if (declaration.important) "important" else null;
+        try self.setProperty(declaration.name, declaration.value, priority, page);
    }
 }