export type MathSegmentType = 'text' | 'inline' | 'block'; export type MathSegment = { type: MathSegmentType; /** * For `text` segments this is the literal text. For `inline`/`block` segments * this is the LaTeX source WITHOUT its surrounding `$`/`$$` delimiters. */ value: string; }; /** * Attempt to match an inline `$…$` span starting at `start` (the index of the * opening `$`). * * Conservative rules (chosen to keep false positives low for prose that merely * mentions currency, e.g. `$5 and $10`): * - The char immediately AFTER the opening `$` must exist, be non-space and not * another `$` (a lone `$` before whitespace, or `$$`, never opens inline math). * - The char immediately BEFORE the closing `$` must be non-space (so `x $` is * not a valid close; we keep scanning for a better `$`). * - The char immediately AFTER the closing `$` must not be a digit (so * `$5 and $10` reads as currency, never math). * - A backslash escapes the following char inside the span, so `\$` is not * treated as a delimiter and stays part of the LaTeX. * - Inline math may not span a newline. * - The LaTeX content must be non-empty. */ const matchInline = (text: string, start: number): { value: string; end: number } | null => { const nextChar = text[start + 1]; if (nextChar === undefined || /\s/.test(nextChar) || nextChar === '$') return null; let j = start + 1; while (j < text.length) { const c = text[j]; if (c === '\\') { // Skip the escaped char (covers `\$` inside the span). j += 2; continue; } if (c === '\n') return null; if (c === '$') { const prev = text[j - 1]; // Closing `$` must hug non-space; otherwise this `$` cannot close, keep scanning. if (prev !== undefined && /\s/.test(prev)) { j += 1; continue; } const after = text[j + 1]; // A `$` directly followed by a digit is treated as currency, not a closer. if (after !== undefined && /\d/.test(after)) { j += 1; continue; } const value = text.slice(start + 1, j); if (value.length === 0) return null; return { value, end: j + 1 }; } j += 1; } return null; }; /** * Split a plain-text string into text/inline-math/block-math segments. * * Delimiter rules: * - `$$…$$` (possibly multi-line) is block math; the first following `$$` closes it. * - `$…$` is inline math, subject to the conservative adjacency rules in * {@link matchInline}. * - `\$` is an escaped literal dollar: it never acts as a delimiter and is * emitted as a plain `$` in the surrounding text. * - Any `$`/`$$` run that cannot be balanced is left verbatim as text. * * This is a PURE function used by the HTML parser to render math with KaTeX. It * must never be applied to text inside `
`/`` (the caller guards that).
*/
export const splitMathSegments = (text: string): MathSegment[] => {
const segments: MathSegment[] = [];
let buffer = '';
let i = 0;
const flushText = () => {
if (buffer.length > 0) {
segments.push({ type: 'text', value: buffer });
buffer = '';
}
};
while (i < text.length) {
// Escaped dollar: consume `\$` and emit a literal `$` as text.
if (text[i] === '\\' && text[i + 1] === '$') {
buffer += '$';
i += 2;
continue;
}
// Block math `$$…$$`.
if (text.startsWith('$$', i)) {
const close = text.indexOf('$$', i + 2);
if (close !== -1) {
const value = text.slice(i + 2, close);
if (value.trim().length > 0) {
flushText();
segments.push({ type: 'block', value });
i = close + 2;
continue;
}
}
// Unbalanced/empty `$$` — emit a single `$` and continue scanning.
buffer += text[i];
i += 1;
continue;
}
// Inline math `$…$`.
if (text[i] === '$') {
const match = matchInline(text, i);
if (match) {
flushText();
segments.push({ type: 'inline', value: match.value });
i = match.end;
continue;
}
buffer += text[i];
i += 1;
continue;
}
buffer += text[i];
i += 1;
}
flushText();
return segments;
};