From 13b008b56ce4ce34ab64bae14b0e9fb09d55943d Mon Sep 17 00:00:00 2001
From: Pierre Tachoire <pierre@lightpanda.io>
Date: Tue, 3 Mar 2026 11:13:30 +0100
Subject: [PATCH] css: fix crash in consumeName() on UTF-8 multibyte sequences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

advance() asserts that each byte it steps over is either an ASCII byte
or a UTF-8 sequence leader, never a continuation byte (0x80–0xBF).
consumeName() was calling advance(1) for all non-ASCII bytes
('\x80'...'\xFF'), processing multi-byte sequences one byte at a time.
For a two-byte sequence like é (0xC3 0xA9), the second iteration landed
on the continuation byte 0xA9 and triggered the assertion, crashing the
browser in Debug mode.

Fix: replace advance(1) with consumeChar() for all non-ASCII bytes.
consumeChar() reads the lead byte, derives the sequence length via
utf8ByteSequenceLength, and advances the full code point in one step,
so the position never rests on a continuation byte.

Observed on saintcyrlecole.caliceo.com, whose root element carries an
inline style with custom property names containing French accented
characters (--color-store-bulles-été-fg, etc.). The crash aborted JS
execution before the Angular app could render any dynamic content.
---
 src/browser/css/Tokenizer.zig         |  9 +++++----
 src/browser/tests/css/stylesheet.html | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/src/browser/css/Tokenizer.zig b/src/browser/css/Tokenizer.zig
index e90c8d46..2e183ef3 100644
--- a/src/browser/css/Tokenizer.zig
+++ b/src/browser/css/Tokenizer.zig
@@ -480,10 +480,11 @@ fn consumeName(self: *Tokenizer) []const u8 {
                 self.consumeEscape();
             },
             0x0 => self.advance(1),
-            '\x80'...'\xBF', '\xC0'...'\xEF', '\xF0'...'\xFF' => {
-                // This byte *is* part of a multi-byte code point,
-                // we’ll end up copying the whole code point before this loop does something else.
-                self.advance(1);
+            '\x80'...'\xFF' => {
+                // Non-ASCII: advance over the complete UTF-8 code point in one step.
+                // Using consumeChar() instead of advance(1) ensures we never land on
+                // a continuation byte, which advance() asserts against.
+                self.consumeChar();
             },
             else => {
                 if (self.hasNonAsciiAt(0)) {
diff --git a/src/browser/tests/css/stylesheet.html b/src/browser/tests/css/stylesheet.html
index ec14f4fc..59f04d47 100644
--- a/src/browser/tests/css/stylesheet.html
+++ b/src/browser/tests/css/stylesheet.html
@@ -256,3 +256,22 @@
     testing.expectTrue(!html.includes('opacity:0'));
 }
 </script>
+
+<script id="CSSStyleDeclaration_non_ascii_custom_property">
+{
+    // Regression test: accessing element.style must not crash when the inline
+    // style attribute contains CSS custom properties with non-ASCII (UTF-8
+    // multibyte) names, such as French accented characters.
+    // The CSS Tokenizer's consumeName() must advance over whole UTF-8 sequences
+    // rather than byte-by-byte to avoid landing on a continuation byte.
+    const div = document.createElement('div');
+    div.setAttribute('style',
+        '--color-store-bulles-\u00e9t\u00e9-fg: #6a818f;' +
+        '--color-store-soir\u00e9es-odl-fg: #56b3b3;' +
+        'color: red;'
+    );
+
+    // Must not crash, and ASCII properties that follow non-ASCII ones must be readable.
+    testing.expectEqual('red', div.style.getPropertyValue('color'));
+}
+</script>