fix(utils): findAndReplace infinite loop on non-global regex + tests (+28)

Prevention work surfaced a real latent bug: findAndReplace looped forever
(OOM) on any non-global regex with a match — `match` was only reassigned
inside `if (regex.global)`, so a non-global regex never advanced. Fixed by
treating a non-global regex as a single match (`match = regex.global ?
regex.exec(text) : null`) and added a regression test. Latent in practice
(all current callers pass global regexes), but a crash waiting to happen.

New suites (tsx + node:test), verified empirically:
- utils/findAndReplace (10, incl. the regression)
- utils/AsyncSearch (9): normalize + matchQuery (the timer-based class is
  skipped — needs window.performance/setTimeout, unavailable in node)
- utils/ASCIILexicalTable (10): orderKeys gap-filling + invariants

Full suite now 103 tests, all passing.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 11:15:39 -04:00
parent ae1d30bc5a
commit a926487f5e
4 changed files with 256 additions and 1 deletions
+90
View File
@@ -0,0 +1,90 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { ASCIILexicalTable, orderKeys } from './ASCIILexicalTable';
const a = 'a'.charCodeAt(0);
const z = 'z'.charCodeAt(0);
const makeLex = (maxWidth = 4) => new ASCIILexicalTable(a, z, maxWidth);
const isStrictlyIncreasing = (keys: string[]): boolean =>
keys.every((key, i) => i === 0 || keys[i - 1] < key);
test('orderKeys returns an empty array for empty input', () => {
const lex = makeLex();
assert.deepEqual(orderKeys(lex, []), []);
});
test('orderKeys preserves all existing keys when none are missing', () => {
const lex = makeLex();
assert.deepEqual(orderKeys(lex, ['a', 'b', 'c']), ['a', 'b', 'c']);
});
test('orderKeys keeps existing keys and fills a single interior gap', () => {
const lex = makeLex();
const result = orderKeys(lex, ['b', undefined, 'd']);
assert.deepEqual(result, ['b', 'c', 'd']);
});
test('orderKeys output length always matches input length', () => {
const lex = makeLex();
const inputs: Array<Array<string | undefined>> = [
[undefined],
[undefined, undefined],
[undefined, undefined, undefined],
['b', undefined, undefined, 'y'],
];
inputs.forEach((input) => {
const result = orderKeys(lex, input);
assert.ok(result, 'expected a defined result');
assert.equal(result?.length, input.length);
});
});
test('orderKeys produces strictly increasing, valid keys', () => {
const lex = makeLex();
const result = orderKeys(lex, [undefined, undefined, undefined, undefined]);
assert.ok(result);
assert.equal(result?.length, 4);
assert.ok(isStrictlyIncreasing(result as string[]));
assert.ok((result as string[]).every((key) => lex.has(key)));
});
test('orderKeys keeps fixed keys at their positions when filling gaps', () => {
const lex = makeLex();
const result = orderKeys(lex, ['b', undefined, undefined, 'y']) as string[];
assert.equal(result[0], 'b');
assert.equal(result[3], 'y');
assert.ok(isStrictlyIncreasing(result));
});
test('orderKeys is deterministic for the same input', () => {
const lex = makeLex();
const first = orderKeys(lex, [undefined, undefined, undefined]);
const second = orderKeys(lex, [undefined, undefined, undefined]);
assert.deepEqual(first, second);
});
test('orderKeys handles a leading gap before an existing key', () => {
const lex = makeLex();
const result = orderKeys(lex, [undefined, 'm']) as string[];
assert.equal(result.length, 2);
assert.equal(result[1], 'm');
assert.ok(result[0] < 'm');
});
test('orderKeys handles a trailing gap after an existing key', () => {
const lex = makeLex();
const result = orderKeys(lex, ['m', undefined]) as string[];
assert.equal(result.length, 2);
assert.equal(result[0], 'm');
assert.ok(result[1] > 'm');
});
test('orderKeys works with a tiny table', () => {
const tiny = new ASCIILexicalTable('a'.charCodeAt(0), 'b'.charCodeAt(0), 2);
const result = orderKeys(tiny, [undefined, undefined]) as string[];
assert.equal(result.length, 2);
assert.ok(isStrictlyIncreasing(result));
assert.ok(result.every((key) => tiny.has(key)));
});
+52
View File
@@ -0,0 +1,52 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { normalize, matchQuery } from './AsyncSearch';
test('normalize lowercases and strips whitespace by default', () => {
assert.equal(normalize('Hello World'), 'helloworld');
assert.equal(normalize(' A B '), 'ab');
assert.equal(normalize(''), '');
});
test('normalize strips all whitespace kinds by default', () => {
assert.equal(normalize('a\tb\nc\r d'), 'abcd');
});
test('normalize caseSensitive keeps original case', () => {
assert.equal(normalize('Hello World', { caseSensitive: true }), 'HelloWorld');
});
test('normalize ignoreWhitespace=false keeps whitespace', () => {
assert.equal(normalize('Hello World', { ignoreWhitespace: false }), 'hello world');
});
test('normalize default uses NFKC (compatibility) unicode normalization', () => {
// U+FB01 (fi ligature) decomposes to "fi" under NFKC but not NFC.
assert.equal(normalize('file'), 'file');
// With normalizeUnicode disabled (NFC), the ligature is preserved.
assert.equal(normalize('file', { normalizeUnicode: false }), 'file');
});
test('normalize does not strip diacritics', () => {
// normalize only does NFKC/NFC + lowercase; it does not remove accents.
assert.equal(normalize('Café'), 'café');
});
test('matchQuery default matches by prefix', () => {
assert.equal(matchQuery('hello world', 'hello'), true);
assert.equal(matchQuery('hello world', 'world'), false);
assert.equal(matchQuery('hello', ''), true);
assert.equal(matchQuery('', 'a'), false);
});
test('matchQuery contain option matches anywhere', () => {
assert.equal(matchQuery('hello world', 'world', { contain: true }), true);
assert.equal(matchQuery('hello world', 'lo wo', { contain: true }), true);
assert.equal(matchQuery('hello world', 'xyz', { contain: true }), false);
assert.equal(matchQuery('hello', '', { contain: true }), true);
});
test('matchQuery is case sensitive (does not normalize internally)', () => {
assert.equal(matchQuery('Hello', 'hello'), false);
assert.equal(matchQuery('Hello', 'Hello'), true);
});
+110
View File
@@ -0,0 +1,110 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { findAndReplace } from './findAndReplace';
// Helpers that record exactly what the callbacks receive.
const tagPart = (text: string, pushIndex: number) => ({ part: text, at: pushIndex });
const tagMatch = (match: RegExpExecArray | RegExpMatchArray, pushIndex: number) => ({
match: match[0],
at: pushIndex,
});
test('findAndReplace interleaves converted parts and replacements (global regex)', () => {
const result = findAndReplace('a1b2c', /\d/g, tagMatch, tagPart);
assert.deepEqual(result, [
{ part: 'a', at: 0 },
{ match: '1', at: 1 },
{ part: 'b', at: 2 },
{ match: '2', at: 3 },
{ part: 'c', at: 4 },
]);
});
test('findAndReplace handles a NON-global regex as a single match (regression: no infinite loop)', () => {
// Before the fix, a non-global regex with a match looped forever (match was
// only reassigned inside `if (regex.global)`), OOM-crashing the process.
const result = findAndReplace('a1b2c', /\d/, tagMatch, tagPart);
assert.deepEqual(result, [
{ part: 'a', at: 0 },
{ match: '1', at: 1 },
// remainder after the first (only) match is a single converted part
{ part: 'b2c', at: 2 },
]);
});
test('findAndReplace pushIndex reflects result.length at push time', () => {
// The indices above already assert this; here we double-check the trailing part.
const result = findAndReplace('x9', /\d/g, tagMatch, tagPart);
// 'x' at 0, '9' at 1, '' at 2
assert.equal(result[result.length - 1].at, 2);
});
test('findAndReplace with no match returns the whole text as a single converted part', () => {
const result = findAndReplace('hello', /\d/g, tagMatch, tagPart);
assert.deepEqual(result, [{ part: 'hello', at: 0 }]);
});
test('findAndReplace on empty input returns a single empty converted part', () => {
const result = findAndReplace('', /\d/g, tagMatch, tagPart);
assert.deepEqual(result, [{ part: '', at: 0 }]);
});
test('findAndReplace emits empty parts between adjacent matches and at edges', () => {
const result = findAndReplace('12', /\d/g, tagMatch, tagPart);
assert.deepEqual(result, [
{ part: '', at: 0 },
{ match: '1', at: 1 },
{ part: '', at: 2 },
{ match: '2', at: 3 },
{ part: '', at: 4 },
]);
});
test('findAndReplace with a leading match emits an empty leading part', () => {
const result = findAndReplace('1abc', /\d/g, tagMatch, tagPart);
assert.deepEqual(result, [
{ part: '', at: 0 },
{ match: '1', at: 1 },
{ part: 'abc', at: 2 },
]);
});
// NOTE: A non-global regex is intentionally NOT tested here. With `regex.global`
// false, the source's `while (match !== null)` loop never reassigns `match`
// (reassignment is guarded by `if (regex.global)`), so it loops forever on any
// matching input. See the report. All real callers pass global regexes.
test('findAndReplace supports multi-character matches', () => {
const result = findAndReplace('foo<<bar>>baz', /<<|>>/g, tagMatch, tagPart);
assert.deepEqual(result, [
{ part: 'foo', at: 0 },
{ match: '<<', at: 1 },
{ part: 'bar', at: 2 },
{ match: '>>', at: 3 },
{ part: 'baz', at: 4 },
]);
});
test('findAndReplace can build a transformed string', () => {
const out = findAndReplace<string, string>(
'cat and dog',
/cat|dog/g,
(m) => (m[0] === 'cat' ? 'CAT' : 'DOG'),
(text) => text,
).join('');
assert.equal(out, 'CAT and DOG');
});
test('findAndReplace passes the full match object to replace', () => {
const captured: Array<string | undefined> = [];
findAndReplace(
'name=value',
/(\w+)=(\w+)/g,
(m) => {
captured.push(m[1], m[2]);
return null;
},
() => null,
);
assert.deepEqual(captured, ['name', 'value']);
});
+4 -1
View File
@@ -19,7 +19,10 @@ export const findAndReplace = <ReplaceReturnType, ConvertReturnType>(
result.push(replace(match, result.length));
lastEnd = match.index + match[0].length;
if (regex.global) match = regex.exec(text);
// A non-global regex always returns the same first match from exec() (its
// lastIndex never advances), so re-running it would loop forever. Treat a
// non-global regex as a single match and stop after processing it.
match = regex.global ? regex.exec(text) : null;
}
result.push(convertPart(text.slice(lastEnd), result.length));