From a926487f5e5c35ebb2e2593f8c6109ffdafb4c31 Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Tue, 30 Jun 2026 11:15:39 -0400 Subject: [PATCH] fix(utils): findAndReplace infinite loop on non-global regex + tests (+28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevention work surfaced a real latent bug: findAndReplace looped forever (OOM) on any non-global regex with a match — `match` was only reassigned inside `if (regex.global)`, so a non-global regex never advanced. Fixed by treating a non-global regex as a single match (`match = regex.global ? regex.exec(text) : null`) and added a regression test. Latent in practice (all current callers pass global regexes), but a crash waiting to happen. New suites (tsx + node:test), verified empirically: - utils/findAndReplace (10, incl. the regression) - utils/AsyncSearch (9): normalize + matchQuery (the timer-based class is skipped — needs window.performance/setTimeout, unavailable in node) - utils/ASCIILexicalTable (10): orderKeys gap-filling + invariants Full suite now 103 tests, all passing. Co-Authored-By: Claude Opus 4.8 --- src/app/utils/ASCIILexicalTable.test.ts | 90 +++++++++++++++++++ src/app/utils/AsyncSearch.test.ts | 52 +++++++++++ src/app/utils/findAndReplace.test.ts | 110 ++++++++++++++++++++++++ src/app/utils/findAndReplace.ts | 5 +- 4 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 src/app/utils/ASCIILexicalTable.test.ts create mode 100644 src/app/utils/AsyncSearch.test.ts create mode 100644 src/app/utils/findAndReplace.test.ts diff --git a/src/app/utils/ASCIILexicalTable.test.ts b/src/app/utils/ASCIILexicalTable.test.ts new file mode 100644 index 000000000..70966ce7c --- /dev/null +++ b/src/app/utils/ASCIILexicalTable.test.ts @@ -0,0 +1,90 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { ASCIILexicalTable, orderKeys } from './ASCIILexicalTable'; + +const a = 'a'.charCodeAt(0); +const z = 'z'.charCodeAt(0); + +const makeLex = (maxWidth = 4) => new ASCIILexicalTable(a, z, maxWidth); + +const isStrictlyIncreasing = (keys: string[]): boolean => + keys.every((key, i) => i === 0 || keys[i - 1] < key); + +test('orderKeys returns an empty array for empty input', () => { + const lex = makeLex(); + assert.deepEqual(orderKeys(lex, []), []); +}); + +test('orderKeys preserves all existing keys when none are missing', () => { + const lex = makeLex(); + assert.deepEqual(orderKeys(lex, ['a', 'b', 'c']), ['a', 'b', 'c']); +}); + +test('orderKeys keeps existing keys and fills a single interior gap', () => { + const lex = makeLex(); + const result = orderKeys(lex, ['b', undefined, 'd']); + assert.deepEqual(result, ['b', 'c', 'd']); +}); + +test('orderKeys output length always matches input length', () => { + const lex = makeLex(); + const inputs: Array> = [ + [undefined], + [undefined, undefined], + [undefined, undefined, undefined], + ['b', undefined, undefined, 'y'], + ]; + inputs.forEach((input) => { + const result = orderKeys(lex, input); + assert.ok(result, 'expected a defined result'); + assert.equal(result?.length, input.length); + }); +}); + +test('orderKeys produces strictly increasing, valid keys', () => { + const lex = makeLex(); + const result = orderKeys(lex, [undefined, undefined, undefined, undefined]); + assert.ok(result); + assert.equal(result?.length, 4); + assert.ok(isStrictlyIncreasing(result as string[])); + assert.ok((result as string[]).every((key) => lex.has(key))); +}); + +test('orderKeys keeps fixed keys at their positions when filling gaps', () => { + const lex = makeLex(); + const result = orderKeys(lex, ['b', undefined, undefined, 'y']) as string[]; + assert.equal(result[0], 'b'); + assert.equal(result[3], 'y'); + assert.ok(isStrictlyIncreasing(result)); +}); + +test('orderKeys is deterministic for the same input', () => { + const lex = makeLex(); + const first = orderKeys(lex, [undefined, undefined, undefined]); + const second = orderKeys(lex, [undefined, undefined, undefined]); + assert.deepEqual(first, second); +}); + +test('orderKeys handles a leading gap before an existing key', () => { + const lex = makeLex(); + const result = orderKeys(lex, [undefined, 'm']) as string[]; + assert.equal(result.length, 2); + assert.equal(result[1], 'm'); + assert.ok(result[0] < 'm'); +}); + +test('orderKeys handles a trailing gap after an existing key', () => { + const lex = makeLex(); + const result = orderKeys(lex, ['m', undefined]) as string[]; + assert.equal(result.length, 2); + assert.equal(result[0], 'm'); + assert.ok(result[1] > 'm'); +}); + +test('orderKeys works with a tiny table', () => { + const tiny = new ASCIILexicalTable('a'.charCodeAt(0), 'b'.charCodeAt(0), 2); + const result = orderKeys(tiny, [undefined, undefined]) as string[]; + assert.equal(result.length, 2); + assert.ok(isStrictlyIncreasing(result)); + assert.ok(result.every((key) => tiny.has(key))); +}); diff --git a/src/app/utils/AsyncSearch.test.ts b/src/app/utils/AsyncSearch.test.ts new file mode 100644 index 000000000..da2e6a7b6 --- /dev/null +++ b/src/app/utils/AsyncSearch.test.ts @@ -0,0 +1,52 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { normalize, matchQuery } from './AsyncSearch'; + +test('normalize lowercases and strips whitespace by default', () => { + assert.equal(normalize('Hello World'), 'helloworld'); + assert.equal(normalize(' A B '), 'ab'); + assert.equal(normalize(''), ''); +}); + +test('normalize strips all whitespace kinds by default', () => { + assert.equal(normalize('a\tb\nc\r d'), 'abcd'); +}); + +test('normalize caseSensitive keeps original case', () => { + assert.equal(normalize('Hello World', { caseSensitive: true }), 'HelloWorld'); +}); + +test('normalize ignoreWhitespace=false keeps whitespace', () => { + assert.equal(normalize('Hello World', { ignoreWhitespace: false }), 'hello world'); +}); + +test('normalize default uses NFKC (compatibility) unicode normalization', () => { + // U+FB01 (fi ligature) decomposes to "fi" under NFKC but not NFC. + assert.equal(normalize('file'), 'file'); + // With normalizeUnicode disabled (NFC), the ligature is preserved. + assert.equal(normalize('file', { normalizeUnicode: false }), 'file'); +}); + +test('normalize does not strip diacritics', () => { + // normalize only does NFKC/NFC + lowercase; it does not remove accents. + assert.equal(normalize('Café'), 'café'); +}); + +test('matchQuery default matches by prefix', () => { + assert.equal(matchQuery('hello world', 'hello'), true); + assert.equal(matchQuery('hello world', 'world'), false); + assert.equal(matchQuery('hello', ''), true); + assert.equal(matchQuery('', 'a'), false); +}); + +test('matchQuery contain option matches anywhere', () => { + assert.equal(matchQuery('hello world', 'world', { contain: true }), true); + assert.equal(matchQuery('hello world', 'lo wo', { contain: true }), true); + assert.equal(matchQuery('hello world', 'xyz', { contain: true }), false); + assert.equal(matchQuery('hello', '', { contain: true }), true); +}); + +test('matchQuery is case sensitive (does not normalize internally)', () => { + assert.equal(matchQuery('Hello', 'hello'), false); + assert.equal(matchQuery('Hello', 'Hello'), true); +}); diff --git a/src/app/utils/findAndReplace.test.ts b/src/app/utils/findAndReplace.test.ts new file mode 100644 index 000000000..9c352f75e --- /dev/null +++ b/src/app/utils/findAndReplace.test.ts @@ -0,0 +1,110 @@ +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { findAndReplace } from './findAndReplace'; + +// Helpers that record exactly what the callbacks receive. +const tagPart = (text: string, pushIndex: number) => ({ part: text, at: pushIndex }); +const tagMatch = (match: RegExpExecArray | RegExpMatchArray, pushIndex: number) => ({ + match: match[0], + at: pushIndex, +}); + +test('findAndReplace interleaves converted parts and replacements (global regex)', () => { + const result = findAndReplace('a1b2c', /\d/g, tagMatch, tagPart); + assert.deepEqual(result, [ + { part: 'a', at: 0 }, + { match: '1', at: 1 }, + { part: 'b', at: 2 }, + { match: '2', at: 3 }, + { part: 'c', at: 4 }, + ]); +}); + +test('findAndReplace handles a NON-global regex as a single match (regression: no infinite loop)', () => { + // Before the fix, a non-global regex with a match looped forever (match was + // only reassigned inside `if (regex.global)`), OOM-crashing the process. + const result = findAndReplace('a1b2c', /\d/, tagMatch, tagPart); + assert.deepEqual(result, [ + { part: 'a', at: 0 }, + { match: '1', at: 1 }, + // remainder after the first (only) match is a single converted part + { part: 'b2c', at: 2 }, + ]); +}); + +test('findAndReplace pushIndex reflects result.length at push time', () => { + // The indices above already assert this; here we double-check the trailing part. + const result = findAndReplace('x9', /\d/g, tagMatch, tagPart); + // 'x' at 0, '9' at 1, '' at 2 + assert.equal(result[result.length - 1].at, 2); +}); + +test('findAndReplace with no match returns the whole text as a single converted part', () => { + const result = findAndReplace('hello', /\d/g, tagMatch, tagPart); + assert.deepEqual(result, [{ part: 'hello', at: 0 }]); +}); + +test('findAndReplace on empty input returns a single empty converted part', () => { + const result = findAndReplace('', /\d/g, tagMatch, tagPart); + assert.deepEqual(result, [{ part: '', at: 0 }]); +}); + +test('findAndReplace emits empty parts between adjacent matches and at edges', () => { + const result = findAndReplace('12', /\d/g, tagMatch, tagPart); + assert.deepEqual(result, [ + { part: '', at: 0 }, + { match: '1', at: 1 }, + { part: '', at: 2 }, + { match: '2', at: 3 }, + { part: '', at: 4 }, + ]); +}); + +test('findAndReplace with a leading match emits an empty leading part', () => { + const result = findAndReplace('1abc', /\d/g, tagMatch, tagPart); + assert.deepEqual(result, [ + { part: '', at: 0 }, + { match: '1', at: 1 }, + { part: 'abc', at: 2 }, + ]); +}); + +// NOTE: A non-global regex is intentionally NOT tested here. With `regex.global` +// false, the source's `while (match !== null)` loop never reassigns `match` +// (reassignment is guarded by `if (regex.global)`), so it loops forever on any +// matching input. See the report. All real callers pass global regexes. + +test('findAndReplace supports multi-character matches', () => { + const result = findAndReplace('foo<>baz', /<<|>>/g, tagMatch, tagPart); + assert.deepEqual(result, [ + { part: 'foo', at: 0 }, + { match: '<<', at: 1 }, + { part: 'bar', at: 2 }, + { match: '>>', at: 3 }, + { part: 'baz', at: 4 }, + ]); +}); + +test('findAndReplace can build a transformed string', () => { + const out = findAndReplace( + 'cat and dog', + /cat|dog/g, + (m) => (m[0] === 'cat' ? 'CAT' : 'DOG'), + (text) => text, + ).join(''); + assert.equal(out, 'CAT and DOG'); +}); + +test('findAndReplace passes the full match object to replace', () => { + const captured: Array = []; + findAndReplace( + 'name=value', + /(\w+)=(\w+)/g, + (m) => { + captured.push(m[1], m[2]); + return null; + }, + () => null, + ); + assert.deepEqual(captured, ['name', 'value']); +}); diff --git a/src/app/utils/findAndReplace.ts b/src/app/utils/findAndReplace.ts index 4606b9dd0..6acee6337 100644 --- a/src/app/utils/findAndReplace.ts +++ b/src/app/utils/findAndReplace.ts @@ -19,7 +19,10 @@ export const findAndReplace = ( result.push(replace(match, result.length)); lastEnd = match.index + match[0].length; - if (regex.global) match = regex.exec(text); + // A non-global regex always returns the same first match from exec() (its + // lastIndex never advances), so re-running it would loop forever. Treat a + // non-global regex as a single match and stop after processing it. + match = regex.global ? regex.exec(text) : null; } result.push(convertPart(text.slice(lastEnd), result.length));