From ed51c39fe79d6111d4ecc13d2cdf3b4726a6dd90 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Wed, 1 Jul 2026 21:19:02 -0400 Subject: [PATCH] feat(messages): KaTeX math rendering (P4-4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renders LaTeX via spec data-mx-maths spans/divs (KaTeX render of the attr, children as fallback) and conservative $…$ / $$…$$ text detection (escape-aware, currency-guarded, never inside code/pre). KaTeX + CSS load lazily on first math (ReactPrism pattern) — verified absent from the eager bundle. Sanitizer unchanged by design (we render post-sanitize from attr/text; no incoming MathML accepted). +14 unit tests. Co-Authored-By: Claude Opus 4.8 --- package-lock.json | 34 +++++ package.json | 2 + src/app/components/math/KaTeX.tsx | 41 ++++++ src/app/plugins/react-custom-html-parser.tsx | 91 +++++++++++-- src/app/utils/mathParse.test.ts | 83 +++++++++++ src/app/utils/mathParse.ts | 136 +++++++++++++++++++ 6 files changed, 377 insertions(+), 10 deletions(-) create mode 100644 src/app/components/math/KaTeX.tsx create mode 100644 src/app/utils/mathParse.test.ts create mode 100644 src/app/utils/mathParse.ts diff --git a/package-lock.json b/package-lock.json index 23bac371b..a0f4c488f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -51,6 +51,7 @@ "immer": "11.1.8", "is-hotkey": "0.2.0", "jotai": "2.20.0", + "katex": "0.16.11", "linkify-react": "4.3.3", "linkifyjs": "4.3.3", "matrix-js-sdk": "41.6.0-rc.0", @@ -83,6 +84,7 @@ "@types/chroma-js": "3.1.2", "@types/file-saver": "2.0.7", "@types/is-hotkey": "0.1.10", + "@types/katex": "0.16.8", "@types/node": "25.9.1", "@types/prismjs": "1.26.6", "@types/react": "19.2.15", @@ -3974,6 +3976,13 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/katex": { + "version": "0.16.8", + "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.8.tgz", + "integrity": "sha512-trgaNyfU+Xh2Tc+ABIb44a5AYUpicB3uwirOioeOkNPPbmgRNtcWyDeeFRzjPZENO9Vq8gvVqfhaaXWLlevVwg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/node": { "version": "25.9.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.9.1.tgz", @@ -9087,6 +9096,31 @@ "node": ">=18" } }, + "node_modules/katex": { + "version": "0.16.11", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.11.tgz", + "integrity": "sha512-RQrI8rlHY92OLf3rho/Ts8i/XvjgguEjOkO1BEXcU3N8BqPpSzBNwV/G0Ukr+P/l3ivvJUE/Fa/CwbS6HesGNQ==", + "funding": [ + "https://opencollective.com/katex", + "https://github.com/sponsors/katex" + ], + "license": "MIT", + "dependencies": { + "commander": "^8.3.0" + }, + "bin": { + "katex": "cli.js" + } + }, + "node_modules/katex/node_modules/commander": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", + "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", diff --git a/package.json b/package.json index ca9d5726a..b94e088c7 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "immer": "11.1.8", "is-hotkey": "0.2.0", "jotai": "2.20.0", + "katex": "0.16.11", "linkify-react": "4.3.3", "linkifyjs": "4.3.3", "matrix-js-sdk": "41.6.0-rc.0", @@ -108,6 +109,7 @@ "@types/chroma-js": "3.1.2", "@types/file-saver": "2.0.7", "@types/is-hotkey": "0.1.10", + "@types/katex": "0.16.8", "@types/node": "25.9.1", "@types/prismjs": "1.26.6", "@types/react": "19.2.15", diff --git a/src/app/components/math/KaTeX.tsx b/src/app/components/math/KaTeX.tsx new file mode 100644 index 000000000..ebaf17e7d --- /dev/null +++ b/src/app/components/math/KaTeX.tsx @@ -0,0 +1,41 @@ +import React from 'react'; +import katex from 'katex'; +import 'katex/dist/katex.min.css'; + +type KaTeXProps = { + /** Raw LaTeX source (without `$`/`$$` delimiters). */ + latex: string; + /** Render as block (display) math when true, inline otherwise. */ + displayMode?: boolean; +}; + +/** + * Lazily-loaded KaTeX renderer. + * + * This module statically imports `katex` and its stylesheet, so both only enter + * the bundle via the dynamic `import()` of this file (see the `lazy()` wrapper + * in `react-custom-html-parser.tsx`). They are therefore NOT part of the eager + * import graph. + * + * We render with `throwOnError: false`, so KaTeX itself renders a parse error + * inline (in its error colour) rather than throwing. The HTML returned by + * `renderToString` is produced by our own trusted call from a fixed options + * object — it is safe to inject via `dangerouslySetInnerHTML`. + */ +export default function KaTeX({ latex, displayMode = false }: KaTeXProps) { + const html = katex.renderToString(latex, { + displayMode, + throwOnError: false, + output: 'htmlAndMathml', + }); + + const Wrapper = displayMode ? 'div' : 'span'; + + return ( + + ); +} diff --git a/src/app/plugins/react-custom-html-parser.tsx b/src/app/plugins/react-custom-html-parser.tsx index 5f3a33531..35bc4bff1 100644 --- a/src/app/plugins/react-custom-html-parser.tsx +++ b/src/app/plugins/react-custom-html-parser.tsx @@ -43,9 +43,14 @@ import { onEnterOrSpace } from '../utils/keyboard'; import { copyToClipboard, tryDecodeURIComponent } from '../utils/dom'; import { useTimeoutToggle } from '../hooks/useTimeoutToggle'; import { tokenize, tokenStyle } from '../utils/syntaxHighlight'; +import { splitMathSegments } from '../utils/mathParse'; const ReactPrism = lazy(() => import('./react-prism/ReactPrism')); +// KaTeX (and its CSS) is heavy, so it is code-split behind this dynamic import +// and is NOT part of the eager import graph — see src/app/components/math/KaTeX.tsx. +const KaTeXMath = lazy(() => import('../components/math/KaTeX')); + /** Languages handled by the custom TDS tokenizer. */ const TDS_TOKENIZER_LANGS = new Set([ 'js', @@ -78,6 +83,27 @@ function renderTokenizedCode(code: string, lang: string): React.ReactNode { )); } +/** + * Renders LaTeX via the lazily-loaded KaTeX component. + * + * `suspenseFallback` is shown while the KaTeX chunk loads (the raw LaTeX text). + * `errorFallback` is shown if rendering fails outright — for the spec + * `data-mx-maths` path this is the element's original children (the spec + * fallback content); for the plain-text `$…$` path it is the raw source. + */ +const renderMath = ( + latex: string, + displayMode: boolean, + suspenseFallback: React.ReactNode, + errorFallback: React.ReactNode, +): JSX.Element => ( + {errorFallback}}> + {suspenseFallback}}> + + + +); + const EMOJI_REG_G = new RegExp(`${URL_NEG_LB}(${EMOJI_PATTERN})`, 'g'); export const LINKIFY_OPTS: LinkifyOpts = { @@ -503,6 +529,21 @@ export const getReactCustomHtmlParser = ( if (mention) return mention; } + if ((name === 'span' || name === 'div') && 'data-mx-maths' in props) { + // Spec (CS-API §11.5): render the `data-mx-maths` LaTeX with KaTeX + // (block for
, inline for ). On failure fall back to the + // element's existing children, which the spec defines as the fallback + // representation. + const latex = String(props['data-mx-maths']); + const displayMode = name === 'div'; + const fallback = displayMode ? ( +
{domToReact(children as unknown as DOMNode[], opts)}
+ ) : ( + {domToReact(children as unknown as DOMNode[], opts)} + ); + return renderMath(latex, displayMode, latex, fallback); + } + if (name === 'span' && 'data-mx-spoiler' in props) { return ( / (verbatim regions). + const mathAllowed = parentName !== 'code' && parentName !== 'pre'; - let jsx = scaleSystemEmoji(domNode.data); + const renderTextChunk = (text: string): (string | JSX.Element)[] | JSX.Element => { + let jsx = scaleSystemEmoji(text); + if (params.highlightRegex) { + jsx = highlightText(params.highlightRegex, jsx); + } + if (linkify) { + return {jsx}; + } + return jsx; + }; - if (params.highlightRegex) { - jsx = highlightText(params.highlightRegex, jsx); + if (mathAllowed) { + const segments = splitMathSegments(domNode.data); + if (segments.some((segment) => segment.type !== 'text')) { + return ( + <> + {segments.map((segment, index) => { + if (segment.type === 'text') { + // eslint-disable-next-line react/no-array-index-key + return ( + {renderTextChunk(segment.value)} + ); + } + const raw = + segment.type === 'block' ? `$$${segment.value}$$` : `$${segment.value}$`; + return ( + // eslint-disable-next-line react/no-array-index-key + + {renderMath(segment.value, segment.type === 'block', raw, raw)} + + ); + })} + + ); + } } - if (linkify) { - return {jsx}; - } - return jsx; + return renderTextChunk(domNode.data); } return undefined; }, diff --git a/src/app/utils/mathParse.test.ts b/src/app/utils/mathParse.test.ts new file mode 100644 index 000000000..4824fe09a --- /dev/null +++ b/src/app/utils/mathParse.test.ts @@ -0,0 +1,83 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { splitMathSegments } from './mathParse'; + +test('plain text with no dollars is a single text segment', () => { + assert.deepEqual(splitMathSegments('hello world'), [{ type: 'text', value: 'hello world' }]); +}); + +test('empty string yields no segments', () => { + assert.deepEqual(splitMathSegments(''), []); +}); + +test('inline $…$ is extracted between surrounding text', () => { + assert.deepEqual(splitMathSegments('a $x^2$ b'), [ + { type: 'text', value: 'a ' }, + { type: 'inline', value: 'x^2' }, + { type: 'text', value: ' b' }, + ]); +}); + +test('block $$…$$ is extracted', () => { + assert.deepEqual(splitMathSegments('$$block$$'), [{ type: 'block', value: 'block' }]); +}); + +test('block math may span newlines', () => { + assert.deepEqual(splitMathSegments('$$\na=b\n$$'), [{ type: 'block', value: '\na=b\n' }]); +}); + +test('currency "$5 and $10" is NOT treated as math', () => { + assert.deepEqual(splitMathSegments('$5 and $10'), [{ type: 'text', value: '$5 and $10' }]); +}); + +test('escaped \\$ never opens or closes math', () => { + assert.deepEqual(splitMathSegments('cost \\$5 today'), [ + { type: 'text', value: 'cost $5 today' }, + ]); + assert.deepEqual(splitMathSegments('\\$x\\$'), [{ type: 'text', value: '$x$' }]); +}); + +test('unbalanced single $ stays as text', () => { + assert.deepEqual(splitMathSegments('price is $ here'), [ + { type: 'text', value: 'price is $ here' }, + ]); +}); + +test('unbalanced $$ stays as text', () => { + assert.deepEqual(splitMathSegments('$$x'), [{ type: 'text', value: '$$x' }]); +}); + +test('inline requires non-space adjacency on both delimiters', () => { + // Space right after opening $ -> not math. + assert.deepEqual(splitMathSegments('$ x$'), [{ type: 'text', value: '$ x$' }]); + // Space right before closing $ -> not math. + assert.deepEqual(splitMathSegments('$x $'), [{ type: 'text', value: '$x $' }]); +}); + +test('multiple inline spans on one line', () => { + assert.deepEqual(splitMathSegments('$a$ and $b$'), [ + { type: 'inline', value: 'a' }, + { type: 'text', value: ' and ' }, + { type: 'inline', value: 'b' }, + ]); +}); + +test('escaped dollar inside inline math is preserved in LaTeX', () => { + assert.deepEqual(splitMathSegments('$a\\$b$'), [{ type: 'inline', value: 'a\\$b' }]); +}); + +test('closing $ followed by a digit is skipped (currency guard) then recovers', () => { + // The first candidate closer is followed by `2` so it is skipped; the later + // `$` closes the span. + assert.deepEqual(splitMathSegments('$x$2 + y$'), [{ type: 'inline', value: 'x$2 + y' }]); +}); + +test('block and inline mixed with text', () => { + assert.deepEqual(splitMathSegments('see $$E=mc^2$$ and $a$ ok'), [ + { type: 'text', value: 'see ' }, + { type: 'block', value: 'E=mc^2' }, + { type: 'text', value: ' and ' }, + { type: 'inline', value: 'a' }, + { type: 'text', value: ' ok' }, + ]); +}); diff --git a/src/app/utils/mathParse.ts b/src/app/utils/mathParse.ts new file mode 100644 index 000000000..c228b3748 --- /dev/null +++ b/src/app/utils/mathParse.ts @@ -0,0 +1,136 @@ +export type MathSegmentType = 'text' | 'inline' | 'block'; + +export type MathSegment = { + type: MathSegmentType; + /** + * For `text` segments this is the literal text. For `inline`/`block` segments + * this is the LaTeX source WITHOUT its surrounding `$`/`$$` delimiters. + */ + value: string; +}; + +/** + * Attempt to match an inline `$…$` span starting at `start` (the index of the + * opening `$`). + * + * Conservative rules (chosen to keep false positives low for prose that merely + * mentions currency, e.g. `$5 and $10`): + * - The char immediately AFTER the opening `$` must exist, be non-space and not + * another `$` (a lone `$` before whitespace, or `$$`, never opens inline math). + * - The char immediately BEFORE the closing `$` must be non-space (so `x $` is + * not a valid close; we keep scanning for a better `$`). + * - The char immediately AFTER the closing `$` must not be a digit (so + * `$5 and $10` reads as currency, never math). + * - A backslash escapes the following char inside the span, so `\$` is not + * treated as a delimiter and stays part of the LaTeX. + * - Inline math may not span a newline. + * - The LaTeX content must be non-empty. + */ +const matchInline = (text: string, start: number): { value: string; end: number } | null => { + const nextChar = text[start + 1]; + if (nextChar === undefined || /\s/.test(nextChar) || nextChar === '$') return null; + + let j = start + 1; + while (j < text.length) { + const c = text[j]; + if (c === '\\') { + // Skip the escaped char (covers `\$` inside the span). + j += 2; + continue; + } + if (c === '\n') return null; + if (c === '$') { + const prev = text[j - 1]; + // Closing `$` must hug non-space; otherwise this `$` cannot close, keep scanning. + if (prev !== undefined && /\s/.test(prev)) { + j += 1; + continue; + } + const after = text[j + 1]; + // A `$` directly followed by a digit is treated as currency, not a closer. + if (after !== undefined && /\d/.test(after)) { + j += 1; + continue; + } + const value = text.slice(start + 1, j); + if (value.length === 0) return null; + return { value, end: j + 1 }; + } + j += 1; + } + return null; +}; + +/** + * Split a plain-text string into text/inline-math/block-math segments. + * + * Delimiter rules: + * - `$$…$$` (possibly multi-line) is block math; the first following `$$` closes it. + * - `$…$` is inline math, subject to the conservative adjacency rules in + * {@link matchInline}. + * - `\$` is an escaped literal dollar: it never acts as a delimiter and is + * emitted as a plain `$` in the surrounding text. + * - Any `$`/`$$` run that cannot be balanced is left verbatim as text. + * + * This is a PURE function used by the HTML parser to render math with KaTeX. It + * must never be applied to text inside `
`/`` (the caller guards that).
+ */
+export const splitMathSegments = (text: string): MathSegment[] => {
+  const segments: MathSegment[] = [];
+  let buffer = '';
+  let i = 0;
+
+  const flushText = () => {
+    if (buffer.length > 0) {
+      segments.push({ type: 'text', value: buffer });
+      buffer = '';
+    }
+  };
+
+  while (i < text.length) {
+    // Escaped dollar: consume `\$` and emit a literal `$` as text.
+    if (text[i] === '\\' && text[i + 1] === '$') {
+      buffer += '$';
+      i += 2;
+      continue;
+    }
+
+    // Block math `$$…$$`.
+    if (text.startsWith('$$', i)) {
+      const close = text.indexOf('$$', i + 2);
+      if (close !== -1) {
+        const value = text.slice(i + 2, close);
+        if (value.trim().length > 0) {
+          flushText();
+          segments.push({ type: 'block', value });
+          i = close + 2;
+          continue;
+        }
+      }
+      // Unbalanced/empty `$$` — emit a single `$` and continue scanning.
+      buffer += text[i];
+      i += 1;
+      continue;
+    }
+
+    // Inline math `$…$`.
+    if (text[i] === '$') {
+      const match = matchInline(text, i);
+      if (match) {
+        flushText();
+        segments.push({ type: 'inline', value: match.value });
+        i = match.end;
+        continue;
+      }
+      buffer += text[i];
+      i += 1;
+      continue;
+    }
+
+    buffer += text[i];
+    i += 1;
+  }
+
+  flushText();
+  return segments;
+};