From a926487f5e5c35ebb2e2593f8c6109ffdafb4c31 Mon Sep 17 00:00:00 2001
From: Jared Vititoe <jjvititoe1@gmail.com>
Date: Tue, 30 Jun 2026 11:15:39 -0400
Subject: [PATCH] fix(utils): findAndReplace infinite loop on non-global regex
 + tests (+28)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prevention work surfaced a real latent bug: findAndReplace looped forever
(OOM) on any non-global regex with a match — `match` was only reassigned
inside `if (regex.global)`, so a non-global regex never advanced. Fixed by
treating a non-global regex as a single match (`match = regex.global ?
regex.exec(text) : null`) and added a regression test. Latent in practice
(all current callers pass global regexes), but a crash waiting to happen.

New suites (tsx + node:test), verified empirically:
- utils/findAndReplace (10, incl. the regression)
- utils/AsyncSearch (9): normalize + matchQuery (the timer-based class is
  skipped — needs window.performance/setTimeout, unavailable in node)
- utils/ASCIILexicalTable (10): orderKeys gap-filling + invariants

Full suite now 103 tests, all passing.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/app/utils/ASCIILexicalTable.test.ts |  90 +++++++++++++++++++
 src/app/utils/AsyncSearch.test.ts       |  52 +++++++++++
 src/app/utils/findAndReplace.test.ts    | 110 ++++++++++++++++++++++++
 src/app/utils/findAndReplace.ts         |   5 +-
 4 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 src/app/utils/ASCIILexicalTable.test.ts
 create mode 100644 src/app/utils/AsyncSearch.test.ts
 create mode 100644 src/app/utils/findAndReplace.test.ts
diff --git a/src/app/utils/ASCIILexicalTable.test.ts b/src/app/utils/ASCIILexicalTable.test.ts
new file mode 100644
index 000000000..70966ce7c
--- /dev/null
+++ b/src/app/utils/ASCIILexicalTable.test.ts
@@ -0,0 +1,90 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { ASCIILexicalTable, orderKeys } from './ASCIILexicalTable';
+
+const a = 'a'.charCodeAt(0);
+const z = 'z'.charCodeAt(0);
+
+const makeLex = (maxWidth = 4) => new ASCIILexicalTable(a, z, maxWidth);
+
+const isStrictlyIncreasing = (keys: string[]): boolean =>
+  keys.every((key, i) => i === 0 || keys[i - 1] < key);
+
+test('orderKeys returns an empty array for empty input', () => {
+  const lex = makeLex();
+  assert.deepEqual(orderKeys(lex, []), []);
+});
+
+test('orderKeys preserves all existing keys when none are missing', () => {
+  const lex = makeLex();
+  assert.deepEqual(orderKeys(lex, ['a', 'b', 'c']), ['a', 'b', 'c']);
+});
+
+test('orderKeys keeps existing keys and fills a single interior gap', () => {
+  const lex = makeLex();
+  const result = orderKeys(lex, ['b', undefined, 'd']);
+  assert.deepEqual(result, ['b', 'c', 'd']);
+});
+
+test('orderKeys output length always matches input length', () => {
+  const lex = makeLex();
+  const inputs: Array<Array<string | undefined>> = [
+    [undefined],
+    [undefined, undefined],
+    [undefined, undefined, undefined],
+    ['b', undefined, undefined, 'y'],
+  ];
+  inputs.forEach((input) => {
+    const result = orderKeys(lex, input);
+    assert.ok(result, 'expected a defined result');
+    assert.equal(result?.length, input.length);
+  });
+});
+
+test('orderKeys produces strictly increasing, valid keys', () => {
+  const lex = makeLex();
+  const result = orderKeys(lex, [undefined, undefined, undefined, undefined]);
+  assert.ok(result);
+  assert.equal(result?.length, 4);
+  assert.ok(isStrictlyIncreasing(result as string[]));
+  assert.ok((result as string[]).every((key) => lex.has(key)));
+});
+
+test('orderKeys keeps fixed keys at their positions when filling gaps', () => {
+  const lex = makeLex();
+  const result = orderKeys(lex, ['b', undefined, undefined, 'y']) as string[];
+  assert.equal(result[0], 'b');
+  assert.equal(result[3], 'y');
+  assert.ok(isStrictlyIncreasing(result));
+});
+
+test('orderKeys is deterministic for the same input', () => {
+  const lex = makeLex();
+  const first = orderKeys(lex, [undefined, undefined, undefined]);
+  const second = orderKeys(lex, [undefined, undefined, undefined]);
+  assert.deepEqual(first, second);
+});
+
+test('orderKeys handles a leading gap before an existing key', () => {
+  const lex = makeLex();
+  const result = orderKeys(lex, [undefined, 'm']) as string[];
+  assert.equal(result.length, 2);
+  assert.equal(result[1], 'm');
+  assert.ok(result[0] < 'm');
+});
+
+test('orderKeys handles a trailing gap after an existing key', () => {
+  const lex = makeLex();
+  const result = orderKeys(lex, ['m', undefined]) as string[];
+  assert.equal(result.length, 2);
+  assert.equal(result[0], 'm');
+  assert.ok(result[1] > 'm');
+});
+
+test('orderKeys works with a tiny table', () => {
+  const tiny = new ASCIILexicalTable('a'.charCodeAt(0), 'b'.charCodeAt(0), 2);
+  const result = orderKeys(tiny, [undefined, undefined]) as string[];
+  assert.equal(result.length, 2);
+  assert.ok(isStrictlyIncreasing(result));
+  assert.ok(result.every((key) => tiny.has(key)));
+});
diff --git a/src/app/utils/AsyncSearch.test.ts b/src/app/utils/AsyncSearch.test.ts
new file mode 100644
index 000000000..da2e6a7b6
--- /dev/null
+++ b/src/app/utils/AsyncSearch.test.ts
@@ -0,0 +1,52 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { normalize, matchQuery } from './AsyncSearch';
+
+test('normalize lowercases and strips whitespace by default', () => {
+  assert.equal(normalize('Hello World'), 'helloworld');
+  assert.equal(normalize('  A  B  '), 'ab');
+  assert.equal(normalize(''), '');
+});
+
+test('normalize strips all whitespace kinds by default', () => {
+  assert.equal(normalize('a\tb\nc\r d'), 'abcd');
+});
+
+test('normalize caseSensitive keeps original case', () => {
+  assert.equal(normalize('Hello World', { caseSensitive: true }), 'HelloWorld');
+});
+
+test('normalize ignoreWhitespace=false keeps whitespace', () => {
+  assert.equal(normalize('Hello World', { ignoreWhitespace: false }), 'hello world');
+});
+
+test('normalize default uses NFKC (compatibility) unicode normalization', () => {
+  // U+FB01 (ﬁ ligature) decomposes to "fi" under NFKC but not NFC.
+  assert.equal(normalize('ﬁle'), 'file');
+  // With normalizeUnicode disabled (NFC), the ligature is preserved.
+  assert.equal(normalize('ﬁle', { normalizeUnicode: false }), 'ﬁle');
+});
+
+test('normalize does not strip diacritics', () => {
+  // normalize only does NFKC/NFC + lowercase; it does not remove accents.
+  assert.equal(normalize('Café'), 'café');
+});
+
+test('matchQuery default matches by prefix', () => {
+  assert.equal(matchQuery('hello world', 'hello'), true);
+  assert.equal(matchQuery('hello world', 'world'), false);
+  assert.equal(matchQuery('hello', ''), true);
+  assert.equal(matchQuery('', 'a'), false);
+});
+
+test('matchQuery contain option matches anywhere', () => {
+  assert.equal(matchQuery('hello world', 'world', { contain: true }), true);
+  assert.equal(matchQuery('hello world', 'lo wo', { contain: true }), true);
+  assert.equal(matchQuery('hello world', 'xyz', { contain: true }), false);
+  assert.equal(matchQuery('hello', '', { contain: true }), true);
+});
+
+test('matchQuery is case sensitive (does not normalize internally)', () => {
+  assert.equal(matchQuery('Hello', 'hello'), false);
+  assert.equal(matchQuery('Hello', 'Hello'), true);
+});
diff --git a/src/app/utils/findAndReplace.test.ts b/src/app/utils/findAndReplace.test.ts
new file mode 100644
index 000000000..9c352f75e
--- /dev/null
+++ b/src/app/utils/findAndReplace.test.ts
@@ -0,0 +1,110 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { findAndReplace } from './findAndReplace';
+
+// Helpers that record exactly what the callbacks receive.
+const tagPart = (text: string, pushIndex: number) => ({ part: text, at: pushIndex });
+const tagMatch = (match: RegExpExecArray | RegExpMatchArray, pushIndex: number) => ({
+  match: match[0],
+  at: pushIndex,
+});
+
+test('findAndReplace interleaves converted parts and replacements (global regex)', () => {
+  const result = findAndReplace('a1b2c', /\d/g, tagMatch, tagPart);
+  assert.deepEqual(result, [
+    { part: 'a', at: 0 },
+    { match: '1', at: 1 },
+    { part: 'b', at: 2 },
+    { match: '2', at: 3 },
+    { part: 'c', at: 4 },
+  ]);
+});
+
+test('findAndReplace handles a NON-global regex as a single match (regression: no infinite loop)', () => {
+  // Before the fix, a non-global regex with a match looped forever (match was
+  // only reassigned inside `if (regex.global)`), OOM-crashing the process.
+  const result = findAndReplace('a1b2c', /\d/, tagMatch, tagPart);
+  assert.deepEqual(result, [
+    { part: 'a', at: 0 },
+    { match: '1', at: 1 },
+    // remainder after the first (only) match is a single converted part
+    { part: 'b2c', at: 2 },
+  ]);
+});
+
+test('findAndReplace pushIndex reflects result.length at push time', () => {
+  // The indices above already assert this; here we double-check the trailing part.
+  const result = findAndReplace('x9', /\d/g, tagMatch, tagPart);
+  // 'x' at 0, '9' at 1, '' at 2
+  assert.equal(result[result.length - 1].at, 2);
+});
+
+test('findAndReplace with no match returns the whole text as a single converted part', () => {
+  const result = findAndReplace('hello', /\d/g, tagMatch, tagPart);
+  assert.deepEqual(result, [{ part: 'hello', at: 0 }]);
+});
+
+test('findAndReplace on empty input returns a single empty converted part', () => {
+  const result = findAndReplace('', /\d/g, tagMatch, tagPart);
+  assert.deepEqual(result, [{ part: '', at: 0 }]);
+});
+
+test('findAndReplace emits empty parts between adjacent matches and at edges', () => {
+  const result = findAndReplace('12', /\d/g, tagMatch, tagPart);
+  assert.deepEqual(result, [
+    { part: '', at: 0 },
+    { match: '1', at: 1 },
+    { part: '', at: 2 },
+    { match: '2', at: 3 },
+    { part: '', at: 4 },
+  ]);
+});
+
+test('findAndReplace with a leading match emits an empty leading part', () => {
+  const result = findAndReplace('1abc', /\d/g, tagMatch, tagPart);
+  assert.deepEqual(result, [
+    { part: '', at: 0 },
+    { match: '1', at: 1 },
+    { part: 'abc', at: 2 },
+  ]);
+});
+
+// NOTE: A non-global regex is intentionally NOT tested here. With `regex.global`
+// false, the source's `while (match !== null)` loop never reassigns `match`
+// (reassignment is guarded by `if (regex.global)`), so it loops forever on any
+// matching input. See the report. All real callers pass global regexes.
+
+test('findAndReplace supports multi-character matches', () => {
+  const result = findAndReplace('foo<<bar>>baz', /<<|>>/g, tagMatch, tagPart);
+  assert.deepEqual(result, [
+    { part: 'foo', at: 0 },
+    { match: '<<', at: 1 },
+    { part: 'bar', at: 2 },
+    { match: '>>', at: 3 },
+    { part: 'baz', at: 4 },
+  ]);
+});
+
+test('findAndReplace can build a transformed string', () => {
+  const out = findAndReplace<string, string>(
+    'cat and dog',
+    /cat|dog/g,
+    (m) => (m[0] === 'cat' ? 'CAT' : 'DOG'),
+    (text) => text,
+  ).join('');
+  assert.equal(out, 'CAT and DOG');
+});
+
+test('findAndReplace passes the full match object to replace', () => {
+  const captured: Array<string | undefined> = [];
+  findAndReplace(
+    'name=value',
+    /(\w+)=(\w+)/g,
+    (m) => {
+      captured.push(m[1], m[2]);
+      return null;
+    },
+    () => null,
+  );
+  assert.deepEqual(captured, ['name', 'value']);
+});
diff --git a/src/app/utils/findAndReplace.ts b/src/app/utils/findAndReplace.ts
index 4606b9dd0..6acee6337 100644
--- a/src/app/utils/findAndReplace.ts
+++ b/src/app/utils/findAndReplace.ts
@@ -19,7 +19,10 @@ export const findAndReplace = <ReplaceReturnType, ConvertReturnType>(
     result.push(replace(match, result.length));
 
     lastEnd = match.index + match[0].length;
-    if (regex.global) match = regex.exec(text);
+    // A non-global regex always returns the same first match from exec() (its
+    // lastIndex never advances), so re-running it would loop forever. Treat a
+    // non-global regex as a single match and stop after processing it.
+    match = regex.global ? regex.exec(text) : null;
   }
 
   result.push(convertPart(text.slice(lastEnd), result.length));