fix(utils): findAndReplace infinite loop on non-global regex + tests (+28)
Prevention work surfaced a real latent bug: findAndReplace looped forever (OOM) on any non-global regex with a match — `match` was only reassigned inside `if (regex.global)`, so a non-global regex never advanced. Fixed by treating a non-global regex as a single match (`match = regex.global ? regex.exec(text) : null`) and added a regression test. Latent in practice (all current callers pass global regexes), but a crash waiting to happen. New suites (tsx + node:test), verified empirically: - utils/findAndReplace (10, incl. the regression) - utils/AsyncSearch (9): normalize + matchQuery (the timer-based class is skipped — needs window.performance/setTimeout, unavailable in node) - utils/ASCIILexicalTable (10): orderKeys gap-filling + invariants Full suite now 103 tests, all passing. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,90 @@
|
|||||||
|
import { test } from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { ASCIILexicalTable, orderKeys } from './ASCIILexicalTable';
|
||||||
|
|
||||||
|
const a = 'a'.charCodeAt(0);
|
||||||
|
const z = 'z'.charCodeAt(0);
|
||||||
|
|
||||||
|
const makeLex = (maxWidth = 4) => new ASCIILexicalTable(a, z, maxWidth);
|
||||||
|
|
||||||
|
const isStrictlyIncreasing = (keys: string[]): boolean =>
|
||||||
|
keys.every((key, i) => i === 0 || keys[i - 1] < key);
|
||||||
|
|
||||||
|
test('orderKeys returns an empty array for empty input', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
assert.deepEqual(orderKeys(lex, []), []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys preserves all existing keys when none are missing', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
assert.deepEqual(orderKeys(lex, ['a', 'b', 'c']), ['a', 'b', 'c']);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys keeps existing keys and fills a single interior gap', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const result = orderKeys(lex, ['b', undefined, 'd']);
|
||||||
|
assert.deepEqual(result, ['b', 'c', 'd']);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys output length always matches input length', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const inputs: Array<Array<string | undefined>> = [
|
||||||
|
[undefined],
|
||||||
|
[undefined, undefined],
|
||||||
|
[undefined, undefined, undefined],
|
||||||
|
['b', undefined, undefined, 'y'],
|
||||||
|
];
|
||||||
|
inputs.forEach((input) => {
|
||||||
|
const result = orderKeys(lex, input);
|
||||||
|
assert.ok(result, 'expected a defined result');
|
||||||
|
assert.equal(result?.length, input.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys produces strictly increasing, valid keys', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const result = orderKeys(lex, [undefined, undefined, undefined, undefined]);
|
||||||
|
assert.ok(result);
|
||||||
|
assert.equal(result?.length, 4);
|
||||||
|
assert.ok(isStrictlyIncreasing(result as string[]));
|
||||||
|
assert.ok((result as string[]).every((key) => lex.has(key)));
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys keeps fixed keys at their positions when filling gaps', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const result = orderKeys(lex, ['b', undefined, undefined, 'y']) as string[];
|
||||||
|
assert.equal(result[0], 'b');
|
||||||
|
assert.equal(result[3], 'y');
|
||||||
|
assert.ok(isStrictlyIncreasing(result));
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys is deterministic for the same input', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const first = orderKeys(lex, [undefined, undefined, undefined]);
|
||||||
|
const second = orderKeys(lex, [undefined, undefined, undefined]);
|
||||||
|
assert.deepEqual(first, second);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys handles a leading gap before an existing key', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const result = orderKeys(lex, [undefined, 'm']) as string[];
|
||||||
|
assert.equal(result.length, 2);
|
||||||
|
assert.equal(result[1], 'm');
|
||||||
|
assert.ok(result[0] < 'm');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys handles a trailing gap after an existing key', () => {
|
||||||
|
const lex = makeLex();
|
||||||
|
const result = orderKeys(lex, ['m', undefined]) as string[];
|
||||||
|
assert.equal(result.length, 2);
|
||||||
|
assert.equal(result[0], 'm');
|
||||||
|
assert.ok(result[1] > 'm');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('orderKeys works with a tiny table', () => {
|
||||||
|
const tiny = new ASCIILexicalTable('a'.charCodeAt(0), 'b'.charCodeAt(0), 2);
|
||||||
|
const result = orderKeys(tiny, [undefined, undefined]) as string[];
|
||||||
|
assert.equal(result.length, 2);
|
||||||
|
assert.ok(isStrictlyIncreasing(result));
|
||||||
|
assert.ok(result.every((key) => tiny.has(key)));
|
||||||
|
});
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
import { test } from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { normalize, matchQuery } from './AsyncSearch';
|
||||||
|
|
||||||
|
test('normalize lowercases and strips whitespace by default', () => {
|
||||||
|
assert.equal(normalize('Hello World'), 'helloworld');
|
||||||
|
assert.equal(normalize(' A B '), 'ab');
|
||||||
|
assert.equal(normalize(''), '');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('normalize strips all whitespace kinds by default', () => {
|
||||||
|
assert.equal(normalize('a\tb\nc\r d'), 'abcd');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('normalize caseSensitive keeps original case', () => {
|
||||||
|
assert.equal(normalize('Hello World', { caseSensitive: true }), 'HelloWorld');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('normalize ignoreWhitespace=false keeps whitespace', () => {
|
||||||
|
assert.equal(normalize('Hello World', { ignoreWhitespace: false }), 'hello world');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('normalize default uses NFKC (compatibility) unicode normalization', () => {
|
||||||
|
// U+FB01 (fi ligature) decomposes to "fi" under NFKC but not NFC.
|
||||||
|
assert.equal(normalize('file'), 'file');
|
||||||
|
// With normalizeUnicode disabled (NFC), the ligature is preserved.
|
||||||
|
assert.equal(normalize('file', { normalizeUnicode: false }), 'file');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('normalize does not strip diacritics', () => {
|
||||||
|
// normalize only does NFKC/NFC + lowercase; it does not remove accents.
|
||||||
|
assert.equal(normalize('Café'), 'café');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('matchQuery default matches by prefix', () => {
|
||||||
|
assert.equal(matchQuery('hello world', 'hello'), true);
|
||||||
|
assert.equal(matchQuery('hello world', 'world'), false);
|
||||||
|
assert.equal(matchQuery('hello', ''), true);
|
||||||
|
assert.equal(matchQuery('', 'a'), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('matchQuery contain option matches anywhere', () => {
|
||||||
|
assert.equal(matchQuery('hello world', 'world', { contain: true }), true);
|
||||||
|
assert.equal(matchQuery('hello world', 'lo wo', { contain: true }), true);
|
||||||
|
assert.equal(matchQuery('hello world', 'xyz', { contain: true }), false);
|
||||||
|
assert.equal(matchQuery('hello', '', { contain: true }), true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('matchQuery is case sensitive (does not normalize internally)', () => {
|
||||||
|
assert.equal(matchQuery('Hello', 'hello'), false);
|
||||||
|
assert.equal(matchQuery('Hello', 'Hello'), true);
|
||||||
|
});
|
||||||
@@ -0,0 +1,110 @@
|
|||||||
|
import { test } from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { findAndReplace } from './findAndReplace';
|
||||||
|
|
||||||
|
// Helpers that record exactly what the callbacks receive.
|
||||||
|
const tagPart = (text: string, pushIndex: number) => ({ part: text, at: pushIndex });
|
||||||
|
const tagMatch = (match: RegExpExecArray | RegExpMatchArray, pushIndex: number) => ({
|
||||||
|
match: match[0],
|
||||||
|
at: pushIndex,
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace interleaves converted parts and replacements (global regex)', () => {
|
||||||
|
const result = findAndReplace('a1b2c', /\d/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [
|
||||||
|
{ part: 'a', at: 0 },
|
||||||
|
{ match: '1', at: 1 },
|
||||||
|
{ part: 'b', at: 2 },
|
||||||
|
{ match: '2', at: 3 },
|
||||||
|
{ part: 'c', at: 4 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace handles a NON-global regex as a single match (regression: no infinite loop)', () => {
|
||||||
|
// Before the fix, a non-global regex with a match looped forever (match was
|
||||||
|
// only reassigned inside `if (regex.global)`), OOM-crashing the process.
|
||||||
|
const result = findAndReplace('a1b2c', /\d/, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [
|
||||||
|
{ part: 'a', at: 0 },
|
||||||
|
{ match: '1', at: 1 },
|
||||||
|
// remainder after the first (only) match is a single converted part
|
||||||
|
{ part: 'b2c', at: 2 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace pushIndex reflects result.length at push time', () => {
|
||||||
|
// The indices above already assert this; here we double-check the trailing part.
|
||||||
|
const result = findAndReplace('x9', /\d/g, tagMatch, tagPart);
|
||||||
|
// 'x' at 0, '9' at 1, '' at 2
|
||||||
|
assert.equal(result[result.length - 1].at, 2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace with no match returns the whole text as a single converted part', () => {
|
||||||
|
const result = findAndReplace('hello', /\d/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [{ part: 'hello', at: 0 }]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace on empty input returns a single empty converted part', () => {
|
||||||
|
const result = findAndReplace('', /\d/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [{ part: '', at: 0 }]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace emits empty parts between adjacent matches and at edges', () => {
|
||||||
|
const result = findAndReplace('12', /\d/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [
|
||||||
|
{ part: '', at: 0 },
|
||||||
|
{ match: '1', at: 1 },
|
||||||
|
{ part: '', at: 2 },
|
||||||
|
{ match: '2', at: 3 },
|
||||||
|
{ part: '', at: 4 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace with a leading match emits an empty leading part', () => {
|
||||||
|
const result = findAndReplace('1abc', /\d/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [
|
||||||
|
{ part: '', at: 0 },
|
||||||
|
{ match: '1', at: 1 },
|
||||||
|
{ part: 'abc', at: 2 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// NOTE: A non-global regex is intentionally NOT tested here. With `regex.global`
|
||||||
|
// false, the source's `while (match !== null)` loop never reassigns `match`
|
||||||
|
// (reassignment is guarded by `if (regex.global)`), so it loops forever on any
|
||||||
|
// matching input. See the report. All real callers pass global regexes.
|
||||||
|
|
||||||
|
test('findAndReplace supports multi-character matches', () => {
|
||||||
|
const result = findAndReplace('foo<<bar>>baz', /<<|>>/g, tagMatch, tagPart);
|
||||||
|
assert.deepEqual(result, [
|
||||||
|
{ part: 'foo', at: 0 },
|
||||||
|
{ match: '<<', at: 1 },
|
||||||
|
{ part: 'bar', at: 2 },
|
||||||
|
{ match: '>>', at: 3 },
|
||||||
|
{ part: 'baz', at: 4 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace can build a transformed string', () => {
|
||||||
|
const out = findAndReplace<string, string>(
|
||||||
|
'cat and dog',
|
||||||
|
/cat|dog/g,
|
||||||
|
(m) => (m[0] === 'cat' ? 'CAT' : 'DOG'),
|
||||||
|
(text) => text,
|
||||||
|
).join('');
|
||||||
|
assert.equal(out, 'CAT and DOG');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findAndReplace passes the full match object to replace', () => {
|
||||||
|
const captured: Array<string | undefined> = [];
|
||||||
|
findAndReplace(
|
||||||
|
'name=value',
|
||||||
|
/(\w+)=(\w+)/g,
|
||||||
|
(m) => {
|
||||||
|
captured.push(m[1], m[2]);
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
() => null,
|
||||||
|
);
|
||||||
|
assert.deepEqual(captured, ['name', 'value']);
|
||||||
|
});
|
||||||
@@ -19,7 +19,10 @@ export const findAndReplace = <ReplaceReturnType, ConvertReturnType>(
|
|||||||
result.push(replace(match, result.length));
|
result.push(replace(match, result.length));
|
||||||
|
|
||||||
lastEnd = match.index + match[0].length;
|
lastEnd = match.index + match[0].length;
|
||||||
if (regex.global) match = regex.exec(text);
|
// A non-global regex always returns the same first match from exec() (its
|
||||||
|
// lastIndex never advances), so re-running it would loop forever. Treat a
|
||||||
|
// non-global regex as a single match and stop after processing it.
|
||||||
|
match = regex.global ? regex.exec(text) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
result.push(convertPart(text.slice(lastEnd), result.length));
|
result.push(convertPart(text.slice(lastEnd), result.length));
|
||||||
|
|||||||
Reference in New Issue
Block a user