feat(calls): 3-tier mic noise suppression with on-device ML (P5-30)
CI / Build & Quality Checks (push) Successful in 10m33s
Trigger Desktop Build / trigger (push) Successful in 6s

Replace the boolean call noise-suppression setting with a 3-way control
(Off / Browser-native / ML beta) in Settings -> General -> Calls.

- Off: noiseSuppression=false to Element Call
- Browser-native: EC's built-in WebRTC suppressor (prior default)
- ML (beta): on-device RNNoise (@sapphi-red/web-noise-suppressor)

Element Call captures the mic inside its iframe and publishes to LiveKit,
so the host can't reach that track; LiveKit's Krisp filter is Cloud-only
(we self-host the SFU) and EC's own RNNoise PR #3892 is unmerged. The ML
tier instead injects a same-origin pre-init shim into the vendored EC
index.html (build/lotus-denoise.js, wired by the lotusDenoise vite plugin)
that patches getUserMedia and routes the captured mic through an RNNoise
AudioWorklet before LiveKit sees it -- the same post-capture pipeline as
#3892, with no EC fork/AGPL/rebase burden. Falls back to the raw mic if
setup fails; keeps echoCancellation/AGC on the raw capture.

- settings.ts: callNoiseSuppression -> 'off'|'browser'|'ml' + legacy
  boolean migration (true->browser, false->off)
- CallEmbed/useCallEmbed: tier maps to noiseSuppression param and appends
  lotusDenoise=ml (native suppressor off in ML mode)
- vite.config.js: copy RNNoise worklet/wasm + shim into the EC bundle and
  inject the shim <script> before EC's module entry
- docs: LOTUS_FEATURES.md, LOTUS_TODO.md (P5-30 done)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 20:29:59 -04:00
parent f9edd2023d
commit 5deed79b42
10 changed files with 299 additions and 38 deletions
+155
View File
@@ -0,0 +1,155 @@
/*
* Lotus Chat — client-side ML noise suppression shim for Element Call.
*
* Element Call runs as a same-origin iframe widget that captures the mic
* internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
* We can't reach that track from the host. Instead this classic <script> is
* injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
* deferred module entry, so it runs first and monkeypatches getUserMedia. When
* the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
* and hand the processed track back to EC/LiveKit.
*
* This mirrors Element Call's own (still-unmerged) PR #3892 pipeline, executed
* from the realm we already control instead of forking and rebuilding EC.
*
* Known beta caveat: routing capture through WebAudio can weaken the browser's
* acoustic echo cancellation (AEC operates on the native capture track). We keep
* echoCancellation/autoGainControl enabled on the raw capture to mitigate.
*/
(function () {
'use strict';
try {
var params = new URLSearchParams(window.location.search);
if (params.get('lotusDenoise') !== 'ml') return;
} catch (e) {
return;
}
var md = navigator.mediaDevices;
if (!md || typeof md.getUserMedia !== 'function') return;
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
var PROCESSOR_NAME = '@sapphi-red/web-noise-suppressor/rnnoise';
var ASSET_BASE = './denoise/';
var SAMPLE_RATE = 48000; // RNNoise worklet assumes 48kHz
var origGetUserMedia = md.getUserMedia.bind(md);
var wasmPromise = null;
// SIMD feature detection (bytes from @sapphi-red/web-noise-suppressor / wasm-feature-detect)
function hasSimd() {
try {
return WebAssembly.validate(
new Uint8Array([
0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
253, 15, 253, 98, 11,
])
);
} catch (e) {
return false;
}
}
function loadWasm() {
if (!wasmPromise) {
var url = ASSET_BASE + (hasSimd() ? 'rnnoise_simd.wasm' : 'rnnoise.wasm');
wasmPromise = fetch(url).then(function (r) {
if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
return r.arrayBuffer();
});
}
return wasmPromise;
}
function processStream(stream) {
var audioTracks = stream.getAudioTracks();
if (audioTracks.length === 0) return Promise.resolve(stream);
return loadWasm()
.then(function (wasmBinary) {
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
return ctx.audioWorklet
.addModule(ASSET_BASE + 'rnnoiseWorklet.js')
.then(function () {
if (ctx.state === 'suspended') return ctx.resume().then(function () { return ctx; });
return ctx;
})
.then(function () {
var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
});
var source = ctx.createMediaStreamSource(stream);
var dest = ctx.createMediaStreamDestination();
source.connect(node).connect(dest);
var origTrack = audioTracks[0];
var processedTrack = dest.stream.getAudioTracks()[0];
var torndown = false;
function cleanup() {
if (torndown) return;
torndown = true;
try {
node.port.postMessage('destroy');
} catch (e) {}
try {
source.disconnect();
node.disconnect();
} catch (e) {}
try {
origTrack.stop();
} catch (e) {}
try {
ctx.close();
} catch (e) {}
}
// When EC stops the track we handed it, release the raw capture + graph.
var rawStop = processedTrack.stop.bind(processedTrack);
processedTrack.stop = function () {
cleanup();
rawStop();
};
// Device unplugged / capture ended involuntarily.
origTrack.addEventListener('ended', function () {
try {
rawStop();
} catch (e) {}
cleanup();
});
// Return a stream with the processed audio plus any original video.
var out = new MediaStream();
out.addTrack(processedTrack);
stream.getVideoTracks().forEach(function (t) {
out.addTrack(t);
});
return out;
});
})
.catch(function (e) {
// Any failure -> fall back to the raw mic so calls never break.
// eslint-disable-next-line no-console
console.error('[lotus-denoise] RNNoise setup failed, using raw mic', e);
return stream;
});
}
navigator.mediaDevices.getUserMedia = function (constraints) {
var wantsAudio = !!(constraints && constraints.audio);
var effective = constraints;
if (wantsAudio) {
// RNNoise owns noise suppression; keep AEC + AGC on the raw capture.
var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
audioC.noiseSuppression = false;
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
effective = Object.assign({}, constraints, { audio: audioC });
}
return origGetUserMedia(effective).then(function (stream) {
return wantsAudio ? processStream(stream) : stream;
});
};
})();