156 lines
5.7 KiB
JavaScript
156 lines
5.7 KiB
JavaScript
|
|
/*
|
||
|
|
* Lotus Chat — client-side ML noise suppression shim for Element Call.
|
||
|
|
*
|
||
|
|
* Element Call runs as a same-origin iframe widget that captures the mic
|
||
|
|
* internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
|
||
|
|
* We can't reach that track from the host. Instead this classic <script> is
|
||
|
|
* injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
|
||
|
|
* deferred module entry, so it runs first and monkeypatches getUserMedia. When
|
||
|
|
* the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
|
||
|
|
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
|
||
|
|
* and hand the processed track back to EC/LiveKit.
|
||
|
|
*
|
||
|
|
* This mirrors Element Call's own (still-unmerged) PR #3892 pipeline, executed
|
||
|
|
* from the realm we already control instead of forking and rebuilding EC.
|
||
|
|
*
|
||
|
|
* Known beta caveat: routing capture through WebAudio can weaken the browser's
|
||
|
|
* acoustic echo cancellation (AEC operates on the native capture track). We keep
|
||
|
|
* echoCancellation/autoGainControl enabled on the raw capture to mitigate.
|
||
|
|
*/
|
||
|
|
(function () {
|
||
|
|
'use strict';
|
||
|
|
|
||
|
|
try {
|
||
|
|
var params = new URLSearchParams(window.location.search);
|
||
|
|
if (params.get('lotusDenoise') !== 'ml') return;
|
||
|
|
} catch (e) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
var md = navigator.mediaDevices;
|
||
|
|
if (!md || typeof md.getUserMedia !== 'function') return;
|
||
|
|
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
|
||
|
|
|
||
|
|
var PROCESSOR_NAME = '@sapphi-red/web-noise-suppressor/rnnoise';
|
||
|
|
var ASSET_BASE = './denoise/';
|
||
|
|
var SAMPLE_RATE = 48000; // RNNoise worklet assumes 48kHz
|
||
|
|
|
||
|
|
var origGetUserMedia = md.getUserMedia.bind(md);
|
||
|
|
var wasmPromise = null;
|
||
|
|
|
||
|
|
// SIMD feature detection (bytes from @sapphi-red/web-noise-suppressor / wasm-feature-detect)
|
||
|
|
function hasSimd() {
|
||
|
|
try {
|
||
|
|
return WebAssembly.validate(
|
||
|
|
new Uint8Array([
|
||
|
|
0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
|
||
|
|
253, 15, 253, 98, 11,
|
||
|
|
])
|
||
|
|
);
|
||
|
|
} catch (e) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
function loadWasm() {
|
||
|
|
if (!wasmPromise) {
|
||
|
|
var url = ASSET_BASE + (hasSimd() ? 'rnnoise_simd.wasm' : 'rnnoise.wasm');
|
||
|
|
wasmPromise = fetch(url).then(function (r) {
|
||
|
|
if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
|
||
|
|
return r.arrayBuffer();
|
||
|
|
});
|
||
|
|
}
|
||
|
|
return wasmPromise;
|
||
|
|
}
|
||
|
|
|
||
|
|
function processStream(stream) {
|
||
|
|
var audioTracks = stream.getAudioTracks();
|
||
|
|
if (audioTracks.length === 0) return Promise.resolve(stream);
|
||
|
|
|
||
|
|
return loadWasm()
|
||
|
|
.then(function (wasmBinary) {
|
||
|
|
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||
|
|
return ctx.audioWorklet
|
||
|
|
.addModule(ASSET_BASE + 'rnnoiseWorklet.js')
|
||
|
|
.then(function () {
|
||
|
|
if (ctx.state === 'suspended') return ctx.resume().then(function () { return ctx; });
|
||
|
|
return ctx;
|
||
|
|
})
|
||
|
|
.then(function () {
|
||
|
|
var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
|
||
|
|
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
|
||
|
|
});
|
||
|
|
var source = ctx.createMediaStreamSource(stream);
|
||
|
|
var dest = ctx.createMediaStreamDestination();
|
||
|
|
source.connect(node).connect(dest);
|
||
|
|
|
||
|
|
var origTrack = audioTracks[0];
|
||
|
|
var processedTrack = dest.stream.getAudioTracks()[0];
|
||
|
|
|
||
|
|
var torndown = false;
|
||
|
|
function cleanup() {
|
||
|
|
if (torndown) return;
|
||
|
|
torndown = true;
|
||
|
|
try {
|
||
|
|
node.port.postMessage('destroy');
|
||
|
|
} catch (e) {}
|
||
|
|
try {
|
||
|
|
source.disconnect();
|
||
|
|
node.disconnect();
|
||
|
|
} catch (e) {}
|
||
|
|
try {
|
||
|
|
origTrack.stop();
|
||
|
|
} catch (e) {}
|
||
|
|
try {
|
||
|
|
ctx.close();
|
||
|
|
} catch (e) {}
|
||
|
|
}
|
||
|
|
|
||
|
|
// When EC stops the track we handed it, release the raw capture + graph.
|
||
|
|
var rawStop = processedTrack.stop.bind(processedTrack);
|
||
|
|
processedTrack.stop = function () {
|
||
|
|
cleanup();
|
||
|
|
rawStop();
|
||
|
|
};
|
||
|
|
// Device unplugged / capture ended involuntarily.
|
||
|
|
origTrack.addEventListener('ended', function () {
|
||
|
|
try {
|
||
|
|
rawStop();
|
||
|
|
} catch (e) {}
|
||
|
|
cleanup();
|
||
|
|
});
|
||
|
|
|
||
|
|
// Return a stream with the processed audio plus any original video.
|
||
|
|
var out = new MediaStream();
|
||
|
|
out.addTrack(processedTrack);
|
||
|
|
stream.getVideoTracks().forEach(function (t) {
|
||
|
|
out.addTrack(t);
|
||
|
|
});
|
||
|
|
return out;
|
||
|
|
});
|
||
|
|
})
|
||
|
|
.catch(function (e) {
|
||
|
|
// Any failure -> fall back to the raw mic so calls never break.
|
||
|
|
// eslint-disable-next-line no-console
|
||
|
|
console.error('[lotus-denoise] RNNoise setup failed, using raw mic', e);
|
||
|
|
return stream;
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
navigator.mediaDevices.getUserMedia = function (constraints) {
|
||
|
|
var wantsAudio = !!(constraints && constraints.audio);
|
||
|
|
var effective = constraints;
|
||
|
|
if (wantsAudio) {
|
||
|
|
// RNNoise owns noise suppression; keep AEC + AGC on the raw capture.
|
||
|
|
var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
|
||
|
|
audioC.noiseSuppression = false;
|
||
|
|
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
|
||
|
|
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
|
||
|
|
effective = Object.assign({}, constraints, { audio: audioC });
|
||
|
|
}
|
||
|
|
return origGetUserMedia(effective).then(function (stream) {
|
||
|
|
return wantsAudio ? processStream(stream) : stream;
|
||
|
|
});
|
||
|
|
};
|
||
|
|
})();
|