Files
cinny/build/lotus-denoise.js
T
jared 4a401cf816
CI / Build & Quality Checks (push) Successful in 10m26s
Trigger Desktop Build / trigger (push) Successful in 17s
fix(calls): harden ML denoise shim against static; fix lint/format
ML noise suppression produced loud static on real calls. RNNoise requires
mono 48kHz float input; feeding it stereo or wrong-rate data is the classic
cause of that static. Harden the shim:
- request mono (channelCount:1) + 48kHz capture
- run a 48kHz AudioContext and BAIL to the raw mic if the browser won't
  give a true 48kHz context (wrong-rate data -> static)
- force the worklet node to explicit mono in/out
- use the non-SIMD rnnoise.wasm (SIMD build artifacts on some GPUs)
- share one AudioContext across captures

Also fix the two CI-blocking eslint errors (unused vars in UrlPreviewCard
and useLocalMessageSearch) and apply repo-wide prettier formatting so
check:eslint and check:prettier pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 20:50:00 -04:00

177 lines
6.3 KiB
JavaScript

/*
* Lotus Chat — client-side ML noise suppression shim for Element Call.
*
* Element Call runs as a same-origin iframe widget that captures the mic
* internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
* We can't reach that track from the host. Instead this classic <script> is
* injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
* deferred module entry, so it runs first and monkeypatches getUserMedia. When
* the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
* and hand the processed track back to EC/LiveKit.
*
* RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
* or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
* - request mono + 48 kHz capture,
* - run a 48 kHz AudioContext and BAIL to the raw mic if the browser refuses
* to give us a real 48 kHz context,
* - use the non-SIMD wasm (the SIMD build has produced artifacts on some GPUs).
*
* Any failure falls back to the unprocessed mic so calls never break.
*/
(function () {
'use strict';
try {
var params = new URLSearchParams(window.location.search);
if (params.get('lotusDenoise') !== 'ml') return;
} catch (e) {
return;
}
var md = navigator.mediaDevices;
if (!md || typeof md.getUserMedia !== 'function') return;
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
var PROCESSOR_NAME = '@sapphi-red/web-noise-suppressor/rnnoise';
var ASSET_BASE = './denoise/';
var SAMPLE_RATE = 48000; // RNNoise worklet assumes 48kHz
var origGetUserMedia = md.getUserMedia.bind(md);
var wasmPromise = null;
var ctxPromise = null; // shared AudioContext + worklet module, created once
function loadWasm() {
if (!wasmPromise) {
// Non-SIMD build for maximum compatibility — the SIMD wasm has produced
// static on some browser/GPU combinations.
wasmPromise = fetch(ASSET_BASE + 'rnnoise.wasm').then(function (r) {
if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
return r.arrayBuffer();
});
}
return wasmPromise;
}
function getContext() {
if (!ctxPromise) {
ctxPromise = (function () {
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
// If the browser ignored our 48 kHz request, RNNoise would receive
// wrong-rate data and emit static. Refuse to process in that case.
if (ctx.sampleRate !== SAMPLE_RATE) {
try {
ctx.close();
} catch (e) {}
return Promise.reject(
new Error('AudioContext sampleRate is ' + ctx.sampleRate + ', need ' + SAMPLE_RATE),
);
}
return ctx.audioWorklet.addModule(ASSET_BASE + 'rnnoiseWorklet.js').then(function () {
return ctx.state === 'suspended'
? ctx.resume().then(function () {
return ctx;
})
: ctx;
});
})();
// Don't cache a rejected context forever — allow a later retry.
ctxPromise.catch(function () {
ctxPromise = null;
});
}
return ctxPromise;
}
function processStream(stream) {
var audioTracks = stream.getAudioTracks();
if (audioTracks.length === 0) return Promise.resolve(stream);
return Promise.all([loadWasm(), getContext()])
.then(function (res) {
var wasmBinary = res[0];
var ctx = res[1];
var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
channelCount: 1,
channelCountMode: 'explicit',
channelInterpretation: 'speakers',
numberOfInputs: 1,
numberOfOutputs: 1,
outputChannelCount: [1],
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
});
var source = ctx.createMediaStreamSource(stream);
var dest = ctx.createMediaStreamDestination();
source.connect(node).connect(dest);
var origTrack = audioTracks[0];
var processedTrack = dest.stream.getAudioTracks()[0];
var torndown = false;
function cleanup() {
if (torndown) return;
torndown = true;
try {
node.port.postMessage('destroy');
} catch (e) {}
try {
source.disconnect();
node.disconnect();
} catch (e) {}
try {
origTrack.stop();
} catch (e) {}
// Keep the shared AudioContext alive for the next capture.
}
// When EC stops the track we handed it, release the raw capture + graph.
var rawStop = processedTrack.stop.bind(processedTrack);
processedTrack.stop = function () {
cleanup();
rawStop();
};
origTrack.addEventListener('ended', function () {
try {
rawStop();
} catch (e) {}
cleanup();
});
// Return a stream with the processed audio plus any original video.
var out = new MediaStream();
out.addTrack(processedTrack);
stream.getVideoTracks().forEach(function (t) {
out.addTrack(t);
});
return out;
})
.catch(function (e) {
// Any failure -> fall back to the raw mic so calls never break.
// eslint-disable-next-line no-console
console.error('[lotus-denoise] RNNoise setup failed, using raw mic', e);
return stream;
});
}
navigator.mediaDevices.getUserMedia = function (constraints) {
var wantsAudio = !!(constraints && constraints.audio);
var effective = constraints;
if (wantsAudio) {
// RNNoise needs mono 48 kHz; it owns suppression. Keep AEC + AGC on the
// raw capture (they run before our processing).
var audioC =
typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
audioC.noiseSuppression = false;
audioC.channelCount = 1;
audioC.sampleRate = SAMPLE_RATE;
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
effective = Object.assign({}, constraints, { audio: audioC });
}
return origGetUserMedia(effective).then(function (stream) {
return wantsAudio ? processStream(stream) : stream;
});
};
})();