feat(calls): 3-tier mic noise suppression with on-device ML (P5-30)

Replace the boolean call noise-suppression setting with a 3-way control (Off / Browser-native / ML beta) in Settings -> General -> Calls. - Off: noiseSuppression=false to Element Call - Browser-native: EC's built-in WebRTC suppressor (prior default) - ML (beta): on-device RNNoise (@sapphi-red/web-noise-suppressor) Element Call captures the mic inside its iframe and publishes to LiveKit, so the host can't reach that track; LiveKit's Krisp filter is Cloud-only (we self-host the SFU) and EC's own RNNoise PR #3892 is unmerged. The ML tier instead injects a same-origin pre-init shim into the vendored EC index.html (build/lotus-denoise.js, wired by the lotusDenoise vite plugin) that patches getUserMedia and routes the captured mic through an RNNoise AudioWorklet before LiveKit sees it -- the same post-capture pipeline as #3892, with no EC fork/AGPL/rebase burden. Falls back to the raw mic if setup fails; keeps echoCancellation/AGC on the raw capture. - settings.ts: callNoiseSuppression -> 'off'|'browser'|'ml' + legacy boolean migration (true->browser, false->off) - CallEmbed/useCallEmbed: tier maps to noiseSuppression param and appends lotusDenoise=ml (native suppressor off in ML mode) - vite.config.js: copy RNNoise worklet/wasm + shim into the EC bundle and inject the shim <script> before EC's module entry - docs: LOTUS_FEATURES.md, LOTUS_TODO.md (P5-30 done) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 20:29:59 -04:00
parent f9edd2023d
commit 5deed79b42
10 changed files with 299 additions and 38 deletions
@@ -0,0 +1,155 @@
+/*
+ * Lotus Chat — client-side ML noise suppression shim for Element Call.
+ *
+ * Element Call runs as a same-origin iframe widget that captures the mic
+ * internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
+ * We can't reach that track from the host. Instead this classic <script> is
+ * injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
+ * deferred module entry, so it runs first and monkeypatches getUserMedia. When
+ * the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
+ * captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
+ * and hand the processed track back to EC/LiveKit.
+ *
+ * This mirrors Element Call's own (still-unmerged) PR #3892 pipeline, executed
+ * from the realm we already control instead of forking and rebuilding EC.
+ *
+ * Known beta caveat: routing capture through WebAudio can weaken the browser's
+ * acoustic echo cancellation (AEC operates on the native capture track). We keep
+ * echoCancellation/autoGainControl enabled on the raw capture to mitigate.
+ */
+(function () {
+  'use strict';
+
+  try {
+    var params = new URLSearchParams(window.location.search);
+    if (params.get('lotusDenoise') !== 'ml') return;
+  } catch (e) {
+    return;
+  }
+
+  var md = navigator.mediaDevices;
+  if (!md || typeof md.getUserMedia !== 'function') return;
+  if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
+
+  var PROCESSOR_NAME = '@sapphi-red/web-noise-suppressor/rnnoise';
+  var ASSET_BASE = './denoise/';
+  var SAMPLE_RATE = 48000; // RNNoise worklet assumes 48kHz
+
+  var origGetUserMedia = md.getUserMedia.bind(md);
+  var wasmPromise = null;
+
+  // SIMD feature detection (bytes from @sapphi-red/web-noise-suppressor / wasm-feature-detect)
+  function hasSimd() {
+    try {
+      return WebAssembly.validate(
+        new Uint8Array([
+          0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
+          253, 15, 253, 98, 11,
+        ])
+      );
+    } catch (e) {
+      return false;
+    }
+  }
+
+  function loadWasm() {
+    if (!wasmPromise) {
+      var url = ASSET_BASE + (hasSimd() ? 'rnnoise_simd.wasm' : 'rnnoise.wasm');
+      wasmPromise = fetch(url).then(function (r) {
+        if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
+        return r.arrayBuffer();
+      });
+    }
+    return wasmPromise;
+  }
+
+  function processStream(stream) {
+    var audioTracks = stream.getAudioTracks();
+    if (audioTracks.length === 0) return Promise.resolve(stream);
+
+    return loadWasm()
+      .then(function (wasmBinary) {
+        var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
+        return ctx.audioWorklet
+          .addModule(ASSET_BASE + 'rnnoiseWorklet.js')
+          .then(function () {
+            if (ctx.state === 'suspended') return ctx.resume().then(function () { return ctx; });
+            return ctx;
+          })
+          .then(function () {
+            var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
+              processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
+            });
+            var source = ctx.createMediaStreamSource(stream);
+            var dest = ctx.createMediaStreamDestination();
+            source.connect(node).connect(dest);
+
+            var origTrack = audioTracks[0];
+            var processedTrack = dest.stream.getAudioTracks()[0];
+
+            var torndown = false;
+            function cleanup() {
+              if (torndown) return;
+              torndown = true;
+              try {
+                node.port.postMessage('destroy');
+              } catch (e) {}
+              try {
+                source.disconnect();
+                node.disconnect();
+              } catch (e) {}
+              try {
+                origTrack.stop();
+              } catch (e) {}
+              try {
+                ctx.close();
+              } catch (e) {}
+            }
+
+            // When EC stops the track we handed it, release the raw capture + graph.
+            var rawStop = processedTrack.stop.bind(processedTrack);
+            processedTrack.stop = function () {
+              cleanup();
+              rawStop();
+            };
+            // Device unplugged / capture ended involuntarily.
+            origTrack.addEventListener('ended', function () {
+              try {
+                rawStop();
+              } catch (e) {}
+              cleanup();
+            });
+
+            // Return a stream with the processed audio plus any original video.
+            var out = new MediaStream();
+            out.addTrack(processedTrack);
+            stream.getVideoTracks().forEach(function (t) {
+              out.addTrack(t);
+            });
+            return out;
+          });
+      })
+      .catch(function (e) {
+        // Any failure -> fall back to the raw mic so calls never break.
+        // eslint-disable-next-line no-console
+        console.error('[lotus-denoise] RNNoise setup failed, using raw mic', e);
+        return stream;
+      });
+  }
+
+  navigator.mediaDevices.getUserMedia = function (constraints) {
+    var wantsAudio = !!(constraints && constraints.audio);
+    var effective = constraints;
+    if (wantsAudio) {
+      // RNNoise owns noise suppression; keep AEC + AGC on the raw capture.
+      var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
+      audioC.noiseSuppression = false;
+      if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
+      if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
+      effective = Object.assign({}, constraints, { audio: audioC });
+    }
+    return origGetUserMedia(effective).then(function (stream) {
+      return wantsAudio ? processStream(stream) : stream;
+    });
+  };
+})();