feat(messages): KaTeX math rendering (P4-4)

Renders LaTeX via spec data-mx-maths spans/divs (KaTeX render of the attr, children as fallback) and conservative $…$ / $$…$$ text detection (escape-aware, currency-guarded, never inside code/pre). KaTeX + CSS load lazily on first math (ReactPrism pattern) — verified absent from the eager bundle. Sanitizer unchanged by design (we render post-sanitize from attr/text; no incoming MathML accepted). +14 unit tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-01 21:19:02 -04:00
parent c1efa7b94e
commit ed51c39fe7
6 changed files with 377 additions and 10 deletions
@@ -0,0 +1,136 @@
+export type MathSegmentType = 'text' | 'inline' | 'block';
+
+export type MathSegment = {
+  type: MathSegmentType;
+  /**
+   * For `text` segments this is the literal text. For `inline`/`block` segments
+   * this is the LaTeX source WITHOUT its surrounding `$`/`$$` delimiters.
+   */
+  value: string;
+};
+
+/**
+ * Attempt to match an inline `$…$` span starting at `start` (the index of the
+ * opening `$`).
+ *
+ * Conservative rules (chosen to keep false positives low for prose that merely
+ * mentions currency, e.g. `$5 and $10`):
+ *  - The char immediately AFTER the opening `$` must exist, be non-space and not
+ *    another `$` (a lone `$` before whitespace, or `$$`, never opens inline math).
+ *  - The char immediately BEFORE the closing `$` must be non-space (so `x $` is
+ *    not a valid close; we keep scanning for a better `$`).
+ *  - The char immediately AFTER the closing `$` must not be a digit (so
+ *    `$5 and $10` reads as currency, never math).
+ *  - A backslash escapes the following char inside the span, so `\$` is not
+ *    treated as a delimiter and stays part of the LaTeX.
+ *  - Inline math may not span a newline.
+ *  - The LaTeX content must be non-empty.
+ */
+const matchInline = (text: string, start: number): { value: string; end: number } | null => {
+  const nextChar = text[start + 1];
+  if (nextChar === undefined || /\s/.test(nextChar) || nextChar === '$') return null;
+
+  let j = start + 1;
+  while (j < text.length) {
+    const c = text[j];
+    if (c === '\\') {
+      // Skip the escaped char (covers `\$` inside the span).
+      j += 2;
+      continue;
+    }
+    if (c === '\n') return null;
+    if (c === '$') {
+      const prev = text[j - 1];
+      // Closing `$` must hug non-space; otherwise this `$` cannot close, keep scanning.
+      if (prev !== undefined && /\s/.test(prev)) {
+        j += 1;
+        continue;
+      }
+      const after = text[j + 1];
+      // A `$` directly followed by a digit is treated as currency, not a closer.
+      if (after !== undefined && /\d/.test(after)) {
+        j += 1;
+        continue;
+      }
+      const value = text.slice(start + 1, j);
+      if (value.length === 0) return null;
+      return { value, end: j + 1 };
+    }
+    j += 1;
+  }
+  return null;
+};
+
+/**
+ * Split a plain-text string into text/inline-math/block-math segments.
+ *
+ * Delimiter rules:
+ *  - `$$…$$` (possibly multi-line) is block math; the first following `$$` closes it.
+ *  - `$…$` is inline math, subject to the conservative adjacency rules in
+ *    {@link matchInline}.
+ *  - `\$` is an escaped literal dollar: it never acts as a delimiter and is
+ *    emitted as a plain `$` in the surrounding text.
+ *  - Any `$`/`$$` run that cannot be balanced is left verbatim as text.
+ *
+ * This is a PURE function used by the HTML parser to render math with KaTeX. It
+ * must never be applied to text inside `<pre>`/`<code>` (the caller guards that).
+ */
+export const splitMathSegments = (text: string): MathSegment[] => {
+  const segments: MathSegment[] = [];
+  let buffer = '';
+  let i = 0;
+
+  const flushText = () => {
+    if (buffer.length > 0) {
+      segments.push({ type: 'text', value: buffer });
+      buffer = '';
+    }
+  };
+
+  while (i < text.length) {
+    // Escaped dollar: consume `\$` and emit a literal `$` as text.
+    if (text[i] === '\\' && text[i + 1] === '$') {
+      buffer += '$';
+      i += 2;
+      continue;
+    }
+
+    // Block math `$$…$$`.
+    if (text.startsWith('$$', i)) {
+      const close = text.indexOf('$$', i + 2);
+      if (close !== -1) {
+        const value = text.slice(i + 2, close);
+        if (value.trim().length > 0) {
+          flushText();
+          segments.push({ type: 'block', value });
+          i = close + 2;
+          continue;
+        }
+      }
+      // Unbalanced/empty `$$` — emit a single `$` and continue scanning.
+      buffer += text[i];
+      i += 1;
+      continue;
+    }
+
+    // Inline math `$…$`.
+    if (text[i] === '$') {
+      const match = matchInline(text, i);
+      if (match) {
+        flushText();
+        segments.push({ type: 'inline', value: match.value });
+        i = match.end;
+        continue;
+      }
+      buffer += text[i];
+      i += 1;
+      continue;
+    }
+
+    buffer += text[i];
+    i += 1;
+  }
+
+  flushText();
+  return segments;
+};