fix(calls): harden ML denoise shim against static; fix lint/format

ML noise suppression produced loud static on real calls. RNNoise requires mono 48kHz float input; feeding it stereo or wrong-rate data is the classic cause of that static. Harden the shim: - request mono (channelCount:1) + 48kHz capture - run a 48kHz AudioContext and BAIL to the raw mic if the browser won't give a true 48kHz context (wrong-rate data -> static) - force the worklet node to explicit mono in/out - use the non-SIMD rnnoise.wasm (SIMD build artifacts on some GPUs) - share one AudioContext across captures Also fix the two CI-blocking eslint errors (unused vars in UrlPreviewCard and useLocalMessageSearch) and apply repo-wide prettier formatting so check:eslint and check:prettier pass. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 20:50:00 -04:00
parent 5deed79b42
commit 4a401cf816
13 changed files with 388 additions and 357 deletions
@@ -10,12 +10,14 @@
 * captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
 * and hand the processed track back to EC/LiveKit.
 *
- * This mirrors Element Call's own (still-unmerged) PR #3892 pipeline, executed
- * from the realm we already control instead of forking and rebuilding EC.
+ * RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
+ * or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
+ *   - request mono + 48 kHz capture,
+ *   - run a 48 kHz AudioContext and BAIL to the raw mic if the browser refuses
+ *     to give us a real 48 kHz context,
+ *   - use the non-SIMD wasm (the SIMD build has produced artifacts on some GPUs).
 *
- * Known beta caveat: routing capture through WebAudio can weaken the browser's
- * acoustic echo cancellation (AEC operates on the native capture track). We keep
- * echoCancellation/autoGainControl enabled on the raw capture to mitigate.
+ * Any failure falls back to the unprocessed mic so calls never break.
 */
 (function () {
  'use strict';
@@ -37,25 +39,13 @@

  var origGetUserMedia = md.getUserMedia.bind(md);
  var wasmPromise = null;
-
-  // SIMD feature detection (bytes from @sapphi-red/web-noise-suppressor / wasm-feature-detect)
-  function hasSimd() {
-    try {
-      return WebAssembly.validate(
-        new Uint8Array([
-          0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
-          253, 15, 253, 98, 11,
-        ])
-      );
-    } catch (e) {
-      return false;
-    }
-  }
+  var ctxPromise = null; // shared AudioContext + worklet module, created once

  function loadWasm() {
    if (!wasmPromise) {
-      var url = ASSET_BASE + (hasSimd() ? 'rnnoise_simd.wasm' : 'rnnoise.wasm');
-      wasmPromise = fetch(url).then(function (r) {
+      // Non-SIMD build for maximum compatibility — the SIMD wasm has produced
+      // static on some browser/GPU combinations.
+      wasmPromise = fetch(ASSET_BASE + 'rnnoise.wasm').then(function (r) {
        if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
        return r.arrayBuffer();
      });
@@ -63,71 +53,98 @@
    return wasmPromise;
  }

+  function getContext() {
+    if (!ctxPromise) {
+      ctxPromise = (function () {
+        var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
+        // If the browser ignored our 48 kHz request, RNNoise would receive
+        // wrong-rate data and emit static. Refuse to process in that case.
+        if (ctx.sampleRate !== SAMPLE_RATE) {
+          try {
+            ctx.close();
+          } catch (e) {}
+          return Promise.reject(
+            new Error('AudioContext sampleRate is ' + ctx.sampleRate + ', need ' + SAMPLE_RATE),
+          );
+        }
+        return ctx.audioWorklet.addModule(ASSET_BASE + 'rnnoiseWorklet.js').then(function () {
+          return ctx.state === 'suspended'
+            ? ctx.resume().then(function () {
+                return ctx;
+              })
+            : ctx;
+        });
+      })();
+      // Don't cache a rejected context forever — allow a later retry.
+      ctxPromise.catch(function () {
+        ctxPromise = null;
+      });
+    }
+    return ctxPromise;
+  }
+
  function processStream(stream) {
    var audioTracks = stream.getAudioTracks();
    if (audioTracks.length === 0) return Promise.resolve(stream);

-    return loadWasm()
-      .then(function (wasmBinary) {
-        var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
-        return ctx.audioWorklet
-          .addModule(ASSET_BASE + 'rnnoiseWorklet.js')
-          .then(function () {
-            if (ctx.state === 'suspended') return ctx.resume().then(function () { return ctx; });
-            return ctx;
-          })
-          .then(function () {
-            var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
-              processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
-            });
-            var source = ctx.createMediaStreamSource(stream);
-            var dest = ctx.createMediaStreamDestination();
-            source.connect(node).connect(dest);
+    return Promise.all([loadWasm(), getContext()])
+      .then(function (res) {
+        var wasmBinary = res[0];
+        var ctx = res[1];

-            var origTrack = audioTracks[0];
-            var processedTrack = dest.stream.getAudioTracks()[0];
+        var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
+          channelCount: 1,
+          channelCountMode: 'explicit',
+          channelInterpretation: 'speakers',
+          numberOfInputs: 1,
+          numberOfOutputs: 1,
+          outputChannelCount: [1],
+          processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
+        });
+        var source = ctx.createMediaStreamSource(stream);
+        var dest = ctx.createMediaStreamDestination();
+        source.connect(node).connect(dest);

-            var torndown = false;
-            function cleanup() {
-              if (torndown) return;
-              torndown = true;
-              try {
-                node.port.postMessage('destroy');
-              } catch (e) {}
-              try {
-                source.disconnect();
-                node.disconnect();
-              } catch (e) {}
-              try {
-                origTrack.stop();
-              } catch (e) {}
-              try {
-                ctx.close();
-              } catch (e) {}
-            }
+        var origTrack = audioTracks[0];
+        var processedTrack = dest.stream.getAudioTracks()[0];

-            // When EC stops the track we handed it, release the raw capture + graph.
-            var rawStop = processedTrack.stop.bind(processedTrack);
-            processedTrack.stop = function () {
-              cleanup();
-              rawStop();
-            };
-            // Device unplugged / capture ended involuntarily.
-            origTrack.addEventListener('ended', function () {
-              try {
-                rawStop();
-              } catch (e) {}
-              cleanup();
-            });
+        var torndown = false;
+        function cleanup() {
+          if (torndown) return;
+          torndown = true;
+          try {
+            node.port.postMessage('destroy');
+          } catch (e) {}
+          try {
+            source.disconnect();
+            node.disconnect();
+          } catch (e) {}
+          try {
+            origTrack.stop();
+          } catch (e) {}
+          // Keep the shared AudioContext alive for the next capture.
+        }

-            // Return a stream with the processed audio plus any original video.
-            var out = new MediaStream();
-            out.addTrack(processedTrack);
-            stream.getVideoTracks().forEach(function (t) {
-              out.addTrack(t);
-            });
-            return out;
-          });
+        // When EC stops the track we handed it, release the raw capture + graph.
+        var rawStop = processedTrack.stop.bind(processedTrack);
+        processedTrack.stop = function () {
+          cleanup();
+          rawStop();
+        };
+        origTrack.addEventListener('ended', function () {
+          try {
+            rawStop();
+          } catch (e) {}
+          cleanup();
+        });
+
+        // Return a stream with the processed audio plus any original video.
+        var out = new MediaStream();
+        out.addTrack(processedTrack);
+        stream.getVideoTracks().forEach(function (t) {
+          out.addTrack(t);
+        });
+        return out;
      })
      .catch(function (e) {
        // Any failure -> fall back to the raw mic so calls never break.
@@ -141,9 +158,13 @@
    var wantsAudio = !!(constraints && constraints.audio);
    var effective = constraints;
    if (wantsAudio) {
-      // RNNoise owns noise suppression; keep AEC + AGC on the raw capture.
-      var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
+      // RNNoise needs mono 48 kHz; it owns suppression. Keep AEC + AGC on the
+      // raw capture (they run before our processing).
+      var audioC =
+        typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
      audioC.noiseSuppression = false;
+      audioC.channelCount = 1;
+      audioC.sampleRate = SAMPLE_RATE;
      if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
      if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
      effective = Object.assign({}, constraints, { audio: audioC });