feat(messages): KaTeX math rendering (P4-4)
Renders LaTeX via spec data-mx-maths spans/divs (KaTeX render of the attr, children as fallback) and conservative $…$ / $$…$$ text detection (escape-aware, currency-guarded, never inside code/pre). KaTeX + CSS load lazily on first math (ReactPrism pattern) — verified absent from the eager bundle. Sanitizer unchanged by design (we render post-sanitize from attr/text; no incoming MathML accepted). +14 unit tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { splitMathSegments } from './mathParse';
|
||||
|
||||
test('plain text with no dollars is a single text segment', () => {
|
||||
assert.deepEqual(splitMathSegments('hello world'), [{ type: 'text', value: 'hello world' }]);
|
||||
});
|
||||
|
||||
test('empty string yields no segments', () => {
|
||||
assert.deepEqual(splitMathSegments(''), []);
|
||||
});
|
||||
|
||||
test('inline $…$ is extracted between surrounding text', () => {
|
||||
assert.deepEqual(splitMathSegments('a $x^2$ b'), [
|
||||
{ type: 'text', value: 'a ' },
|
||||
{ type: 'inline', value: 'x^2' },
|
||||
{ type: 'text', value: ' b' },
|
||||
]);
|
||||
});
|
||||
|
||||
test('block $$…$$ is extracted', () => {
|
||||
assert.deepEqual(splitMathSegments('$$block$$'), [{ type: 'block', value: 'block' }]);
|
||||
});
|
||||
|
||||
test('block math may span newlines', () => {
|
||||
assert.deepEqual(splitMathSegments('$$\na=b\n$$'), [{ type: 'block', value: '\na=b\n' }]);
|
||||
});
|
||||
|
||||
test('currency "$5 and $10" is NOT treated as math', () => {
|
||||
assert.deepEqual(splitMathSegments('$5 and $10'), [{ type: 'text', value: '$5 and $10' }]);
|
||||
});
|
||||
|
||||
test('escaped \\$ never opens or closes math', () => {
|
||||
assert.deepEqual(splitMathSegments('cost \\$5 today'), [
|
||||
{ type: 'text', value: 'cost $5 today' },
|
||||
]);
|
||||
assert.deepEqual(splitMathSegments('\\$x\\$'), [{ type: 'text', value: '$x$' }]);
|
||||
});
|
||||
|
||||
test('unbalanced single $ stays as text', () => {
|
||||
assert.deepEqual(splitMathSegments('price is $ here'), [
|
||||
{ type: 'text', value: 'price is $ here' },
|
||||
]);
|
||||
});
|
||||
|
||||
test('unbalanced $$ stays as text', () => {
|
||||
assert.deepEqual(splitMathSegments('$$x'), [{ type: 'text', value: '$$x' }]);
|
||||
});
|
||||
|
||||
test('inline requires non-space adjacency on both delimiters', () => {
|
||||
// Space right after opening $ -> not math.
|
||||
assert.deepEqual(splitMathSegments('$ x$'), [{ type: 'text', value: '$ x$' }]);
|
||||
// Space right before closing $ -> not math.
|
||||
assert.deepEqual(splitMathSegments('$x $'), [{ type: 'text', value: '$x $' }]);
|
||||
});
|
||||
|
||||
test('multiple inline spans on one line', () => {
|
||||
assert.deepEqual(splitMathSegments('$a$ and $b$'), [
|
||||
{ type: 'inline', value: 'a' },
|
||||
{ type: 'text', value: ' and ' },
|
||||
{ type: 'inline', value: 'b' },
|
||||
]);
|
||||
});
|
||||
|
||||
test('escaped dollar inside inline math is preserved in LaTeX', () => {
|
||||
assert.deepEqual(splitMathSegments('$a\\$b$'), [{ type: 'inline', value: 'a\\$b' }]);
|
||||
});
|
||||
|
||||
test('closing $ followed by a digit is skipped (currency guard) then recovers', () => {
|
||||
// The first candidate closer is followed by `2` so it is skipped; the later
|
||||
// `$` closes the span.
|
||||
assert.deepEqual(splitMathSegments('$x$2 + y$'), [{ type: 'inline', value: 'x$2 + y' }]);
|
||||
});
|
||||
|
||||
test('block and inline mixed with text', () => {
|
||||
assert.deepEqual(splitMathSegments('see $$E=mc^2$$ and $a$ ok'), [
|
||||
{ type: 'text', value: 'see ' },
|
||||
{ type: 'block', value: 'E=mc^2' },
|
||||
{ type: 'text', value: ' and ' },
|
||||
{ type: 'inline', value: 'a' },
|
||||
{ type: 'text', value: ' ok' },
|
||||
]);
|
||||
});
|
||||
@@ -0,0 +1,136 @@
|
||||
export type MathSegmentType = 'text' | 'inline' | 'block';
|
||||
|
||||
export type MathSegment = {
|
||||
type: MathSegmentType;
|
||||
/**
|
||||
* For `text` segments this is the literal text. For `inline`/`block` segments
|
||||
* this is the LaTeX source WITHOUT its surrounding `$`/`$$` delimiters.
|
||||
*/
|
||||
value: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Attempt to match an inline `$…$` span starting at `start` (the index of the
|
||||
* opening `$`).
|
||||
*
|
||||
* Conservative rules (chosen to keep false positives low for prose that merely
|
||||
* mentions currency, e.g. `$5 and $10`):
|
||||
* - The char immediately AFTER the opening `$` must exist, be non-space and not
|
||||
* another `$` (a lone `$` before whitespace, or `$$`, never opens inline math).
|
||||
* - The char immediately BEFORE the closing `$` must be non-space (so `x $` is
|
||||
* not a valid close; we keep scanning for a better `$`).
|
||||
* - The char immediately AFTER the closing `$` must not be a digit (so
|
||||
* `$5 and $10` reads as currency, never math).
|
||||
* - A backslash escapes the following char inside the span, so `\$` is not
|
||||
* treated as a delimiter and stays part of the LaTeX.
|
||||
* - Inline math may not span a newline.
|
||||
* - The LaTeX content must be non-empty.
|
||||
*/
|
||||
const matchInline = (text: string, start: number): { value: string; end: number } | null => {
|
||||
const nextChar = text[start + 1];
|
||||
if (nextChar === undefined || /\s/.test(nextChar) || nextChar === '$') return null;
|
||||
|
||||
let j = start + 1;
|
||||
while (j < text.length) {
|
||||
const c = text[j];
|
||||
if (c === '\\') {
|
||||
// Skip the escaped char (covers `\$` inside the span).
|
||||
j += 2;
|
||||
continue;
|
||||
}
|
||||
if (c === '\n') return null;
|
||||
if (c === '$') {
|
||||
const prev = text[j - 1];
|
||||
// Closing `$` must hug non-space; otherwise this `$` cannot close, keep scanning.
|
||||
if (prev !== undefined && /\s/.test(prev)) {
|
||||
j += 1;
|
||||
continue;
|
||||
}
|
||||
const after = text[j + 1];
|
||||
// A `$` directly followed by a digit is treated as currency, not a closer.
|
||||
if (after !== undefined && /\d/.test(after)) {
|
||||
j += 1;
|
||||
continue;
|
||||
}
|
||||
const value = text.slice(start + 1, j);
|
||||
if (value.length === 0) return null;
|
||||
return { value, end: j + 1 };
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Split a plain-text string into text/inline-math/block-math segments.
|
||||
*
|
||||
* Delimiter rules:
|
||||
* - `$$…$$` (possibly multi-line) is block math; the first following `$$` closes it.
|
||||
* - `$…$` is inline math, subject to the conservative adjacency rules in
|
||||
* {@link matchInline}.
|
||||
* - `\$` is an escaped literal dollar: it never acts as a delimiter and is
|
||||
* emitted as a plain `$` in the surrounding text.
|
||||
* - Any `$`/`$$` run that cannot be balanced is left verbatim as text.
|
||||
*
|
||||
* This is a PURE function used by the HTML parser to render math with KaTeX. It
|
||||
* must never be applied to text inside `<pre>`/`<code>` (the caller guards that).
|
||||
*/
|
||||
export const splitMathSegments = (text: string): MathSegment[] => {
|
||||
const segments: MathSegment[] = [];
|
||||
let buffer = '';
|
||||
let i = 0;
|
||||
|
||||
const flushText = () => {
|
||||
if (buffer.length > 0) {
|
||||
segments.push({ type: 'text', value: buffer });
|
||||
buffer = '';
|
||||
}
|
||||
};
|
||||
|
||||
while (i < text.length) {
|
||||
// Escaped dollar: consume `\$` and emit a literal `$` as text.
|
||||
if (text[i] === '\\' && text[i + 1] === '$') {
|
||||
buffer += '$';
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Block math `$$…$$`.
|
||||
if (text.startsWith('$$', i)) {
|
||||
const close = text.indexOf('$$', i + 2);
|
||||
if (close !== -1) {
|
||||
const value = text.slice(i + 2, close);
|
||||
if (value.trim().length > 0) {
|
||||
flushText();
|
||||
segments.push({ type: 'block', value });
|
||||
i = close + 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Unbalanced/empty `$$` — emit a single `$` and continue scanning.
|
||||
buffer += text[i];
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Inline math `$…$`.
|
||||
if (text[i] === '$') {
|
||||
const match = matchInline(text, i);
|
||||
if (match) {
|
||||
flushText();
|
||||
segments.push({ type: 'inline', value: match.value });
|
||||
i = match.end;
|
||||
continue;
|
||||
}
|
||||
buffer += text[i];
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
buffer += text[i];
|
||||
i += 1;
|
||||
}
|
||||
|
||||
flushText();
|
||||
return segments;
|
||||
};
|
||||
Reference in New Issue
Block a user