fix(calls): harden ML denoise shim against static; fix lint/format
ML noise suppression produced loud static on real calls. RNNoise requires mono 48kHz float input; feeding it stereo or wrong-rate data is the classic cause of that static. Harden the shim: - request mono (channelCount:1) + 48kHz capture - run a 48kHz AudioContext and BAIL to the raw mic if the browser won't give a true 48kHz context (wrong-rate data -> static) - force the worklet node to explicit mono in/out - use the non-SIMD rnnoise.wasm (SIMD build artifacts on some GPUs) - share one AudioContext across captures Also fix the two CI-blocking eslint errors (unused vars in UrlPreviewCard and useLocalMessageSearch) and apply repo-wide prettier formatting so check:eslint and check:prettier pass. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+101
-80
@@ -10,12 +10,14 @@
|
||||
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
|
||||
* and hand the processed track back to EC/LiveKit.
|
||||
*
|
||||
* This mirrors Element Call's own (still-unmerged) PR #3892 pipeline, executed
|
||||
* from the realm we already control instead of forking and rebuilding EC.
|
||||
* RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
|
||||
* or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
|
||||
* - request mono + 48 kHz capture,
|
||||
* - run a 48 kHz AudioContext and BAIL to the raw mic if the browser refuses
|
||||
* to give us a real 48 kHz context,
|
||||
* - use the non-SIMD wasm (the SIMD build has produced artifacts on some GPUs).
|
||||
*
|
||||
* Known beta caveat: routing capture through WebAudio can weaken the browser's
|
||||
* acoustic echo cancellation (AEC operates on the native capture track). We keep
|
||||
* echoCancellation/autoGainControl enabled on the raw capture to mitigate.
|
||||
* Any failure falls back to the unprocessed mic so calls never break.
|
||||
*/
|
||||
(function () {
|
||||
'use strict';
|
||||
@@ -37,25 +39,13 @@
|
||||
|
||||
var origGetUserMedia = md.getUserMedia.bind(md);
|
||||
var wasmPromise = null;
|
||||
|
||||
// SIMD feature detection (bytes from @sapphi-red/web-noise-suppressor / wasm-feature-detect)
|
||||
function hasSimd() {
|
||||
try {
|
||||
return WebAssembly.validate(
|
||||
new Uint8Array([
|
||||
0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
|
||||
253, 15, 253, 98, 11,
|
||||
])
|
||||
);
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
var ctxPromise = null; // shared AudioContext + worklet module, created once
|
||||
|
||||
function loadWasm() {
|
||||
if (!wasmPromise) {
|
||||
var url = ASSET_BASE + (hasSimd() ? 'rnnoise_simd.wasm' : 'rnnoise.wasm');
|
||||
wasmPromise = fetch(url).then(function (r) {
|
||||
// Non-SIMD build for maximum compatibility — the SIMD wasm has produced
|
||||
// static on some browser/GPU combinations.
|
||||
wasmPromise = fetch(ASSET_BASE + 'rnnoise.wasm').then(function (r) {
|
||||
if (!r.ok) throw new Error('rnnoise wasm fetch failed: ' + r.status);
|
||||
return r.arrayBuffer();
|
||||
});
|
||||
@@ -63,71 +53,98 @@
|
||||
return wasmPromise;
|
||||
}
|
||||
|
||||
function getContext() {
|
||||
if (!ctxPromise) {
|
||||
ctxPromise = (function () {
|
||||
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||||
// If the browser ignored our 48 kHz request, RNNoise would receive
|
||||
// wrong-rate data and emit static. Refuse to process in that case.
|
||||
if (ctx.sampleRate !== SAMPLE_RATE) {
|
||||
try {
|
||||
ctx.close();
|
||||
} catch (e) {}
|
||||
return Promise.reject(
|
||||
new Error('AudioContext sampleRate is ' + ctx.sampleRate + ', need ' + SAMPLE_RATE),
|
||||
);
|
||||
}
|
||||
return ctx.audioWorklet.addModule(ASSET_BASE + 'rnnoiseWorklet.js').then(function () {
|
||||
return ctx.state === 'suspended'
|
||||
? ctx.resume().then(function () {
|
||||
return ctx;
|
||||
})
|
||||
: ctx;
|
||||
});
|
||||
})();
|
||||
// Don't cache a rejected context forever — allow a later retry.
|
||||
ctxPromise.catch(function () {
|
||||
ctxPromise = null;
|
||||
});
|
||||
}
|
||||
return ctxPromise;
|
||||
}
|
||||
|
||||
function processStream(stream) {
|
||||
var audioTracks = stream.getAudioTracks();
|
||||
if (audioTracks.length === 0) return Promise.resolve(stream);
|
||||
|
||||
return loadWasm()
|
||||
.then(function (wasmBinary) {
|
||||
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||||
return ctx.audioWorklet
|
||||
.addModule(ASSET_BASE + 'rnnoiseWorklet.js')
|
||||
.then(function () {
|
||||
if (ctx.state === 'suspended') return ctx.resume().then(function () { return ctx; });
|
||||
return ctx;
|
||||
})
|
||||
.then(function () {
|
||||
var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
|
||||
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
|
||||
});
|
||||
var source = ctx.createMediaStreamSource(stream);
|
||||
var dest = ctx.createMediaStreamDestination();
|
||||
source.connect(node).connect(dest);
|
||||
return Promise.all([loadWasm(), getContext()])
|
||||
.then(function (res) {
|
||||
var wasmBinary = res[0];
|
||||
var ctx = res[1];
|
||||
|
||||
var origTrack = audioTracks[0];
|
||||
var processedTrack = dest.stream.getAudioTracks()[0];
|
||||
var node = new AudioWorkletNode(ctx, PROCESSOR_NAME, {
|
||||
channelCount: 1,
|
||||
channelCountMode: 'explicit',
|
||||
channelInterpretation: 'speakers',
|
||||
numberOfInputs: 1,
|
||||
numberOfOutputs: 1,
|
||||
outputChannelCount: [1],
|
||||
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
|
||||
});
|
||||
var source = ctx.createMediaStreamSource(stream);
|
||||
var dest = ctx.createMediaStreamDestination();
|
||||
source.connect(node).connect(dest);
|
||||
|
||||
var torndown = false;
|
||||
function cleanup() {
|
||||
if (torndown) return;
|
||||
torndown = true;
|
||||
try {
|
||||
node.port.postMessage('destroy');
|
||||
} catch (e) {}
|
||||
try {
|
||||
source.disconnect();
|
||||
node.disconnect();
|
||||
} catch (e) {}
|
||||
try {
|
||||
origTrack.stop();
|
||||
} catch (e) {}
|
||||
try {
|
||||
ctx.close();
|
||||
} catch (e) {}
|
||||
}
|
||||
var origTrack = audioTracks[0];
|
||||
var processedTrack = dest.stream.getAudioTracks()[0];
|
||||
|
||||
// When EC stops the track we handed it, release the raw capture + graph.
|
||||
var rawStop = processedTrack.stop.bind(processedTrack);
|
||||
processedTrack.stop = function () {
|
||||
cleanup();
|
||||
rawStop();
|
||||
};
|
||||
// Device unplugged / capture ended involuntarily.
|
||||
origTrack.addEventListener('ended', function () {
|
||||
try {
|
||||
rawStop();
|
||||
} catch (e) {}
|
||||
cleanup();
|
||||
});
|
||||
var torndown = false;
|
||||
function cleanup() {
|
||||
if (torndown) return;
|
||||
torndown = true;
|
||||
try {
|
||||
node.port.postMessage('destroy');
|
||||
} catch (e) {}
|
||||
try {
|
||||
source.disconnect();
|
||||
node.disconnect();
|
||||
} catch (e) {}
|
||||
try {
|
||||
origTrack.stop();
|
||||
} catch (e) {}
|
||||
// Keep the shared AudioContext alive for the next capture.
|
||||
}
|
||||
|
||||
// Return a stream with the processed audio plus any original video.
|
||||
var out = new MediaStream();
|
||||
out.addTrack(processedTrack);
|
||||
stream.getVideoTracks().forEach(function (t) {
|
||||
out.addTrack(t);
|
||||
});
|
||||
return out;
|
||||
});
|
||||
// When EC stops the track we handed it, release the raw capture + graph.
|
||||
var rawStop = processedTrack.stop.bind(processedTrack);
|
||||
processedTrack.stop = function () {
|
||||
cleanup();
|
||||
rawStop();
|
||||
};
|
||||
origTrack.addEventListener('ended', function () {
|
||||
try {
|
||||
rawStop();
|
||||
} catch (e) {}
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// Return a stream with the processed audio plus any original video.
|
||||
var out = new MediaStream();
|
||||
out.addTrack(processedTrack);
|
||||
stream.getVideoTracks().forEach(function (t) {
|
||||
out.addTrack(t);
|
||||
});
|
||||
return out;
|
||||
})
|
||||
.catch(function (e) {
|
||||
// Any failure -> fall back to the raw mic so calls never break.
|
||||
@@ -141,9 +158,13 @@
|
||||
var wantsAudio = !!(constraints && constraints.audio);
|
||||
var effective = constraints;
|
||||
if (wantsAudio) {
|
||||
// RNNoise owns noise suppression; keep AEC + AGC on the raw capture.
|
||||
var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
|
||||
// RNNoise needs mono 48 kHz; it owns suppression. Keep AEC + AGC on the
|
||||
// raw capture (they run before our processing).
|
||||
var audioC =
|
||||
typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
|
||||
audioC.noiseSuppression = false;
|
||||
audioC.channelCount = 1;
|
||||
audioC.sampleRate = SAMPLE_RATE;
|
||||
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
|
||||
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
|
||||
effective = Object.assign({}, constraints, { audio: audioC });
|
||||
|
||||
Reference in New Issue
Block a user