feat(calls): implement advanced multi-model ML noise suppression system
CI / Build & Quality Checks (push) Failing after 4m49s
Trigger Desktop Build / trigger (push) Successful in 11s

Implement a flexible, multi-model noise suppression pipeline for Element Call/LiveKit integration:

- ML Engines: Added support for RNNoise, Speex, DTLN, and DeepFilterNet 3 models.
- Pipeline Architecture: Implemented modular audio processing in lotus-denoise.js, supporting 'Series Suppression' (running browser-native NSNet2 before ML) and a hardware-style Noise Gate.
- UI & UX Enhancements:
  - Settings UI: Added model comparison chart with CPU/Quality metadata.
  - Tuning: Added Live Microphone Meter for calibrating Noise Gate thresholds.
  - Reporting: Added LotusToast system to alert users when ML suppression fails or falls back to raw input.
- Robustness & Quality:
  - Capture Fidelity: Removed forced 48kHz capture constraints to allow native-rate capture (solving static issues with high-end audio interfaces).
  - Performance: Added WASM SIMD detection with transparent fallback.
  - Capability Detection: Added browser feature detection to disable unsupported ML modes.
- Build Integration: Updated Vite config to self-host all model WASM/tflite assets in /denoise/ directory.
This commit is contained in:
2026-06-16 00:50:12 -04:00
parent 938ead79f7
commit 5d5f5f4516
10 changed files with 606 additions and 105 deletions
+257 -4
View File
@@ -69,6 +69,7 @@ import { useDateFormatItems } from '../../../hooks/useDateFormat';
import { SequenceCardStyle } from '../styles.css';
import { useTauriUpdater } from '../../../hooks/useTauriUpdater';
import { playCallJoinSound } from '../../../utils/callSounds';
import { isMLDenoiseSupported, ML_DENOISE_REQUIREMENTS } from '../../../utils/lotusDenoiseUtils';
type ThemeSelectorProps = {
themeNames: Record<string, string>;
@@ -157,7 +158,7 @@ function SelectTheme({ disabled }: { disabled?: boolean }) {
);
}
type SettingsSelectOption<T extends string> = { value: T; label: string };
type SettingsSelectOption<T extends string> = { value: T; label: string; disabled?: boolean };
function SettingsSelect<T extends string>({
value,
@@ -219,7 +220,8 @@ function SettingsSelect<T extends string>({
size="300"
variant={opt.value === value ? 'Primary' : 'Surface'}
radii="300"
onClick={() => handleSelect(opt.value)}
disabled={opt.disabled}
onClick={() => !opt.disabled && handleSelect(opt.value)}
>
<Text size="T300">{opt.label}</Text>
</MenuItem>
@@ -1196,12 +1198,114 @@ function useKeyBind(setter: (code: string) => void) {
const keyLabel = (code: string) =>
code === 'Space' ? 'Space' : code.replace('Key', '').replace('Digit', '');
import {
DENOISE_MODELS,
isMLDenoiseSupported,
ML_DENOISE_REQUIREMENTS,
} from '../../../utils/lotusDenoiseUtils';
function MicMeter() {
const [level, setLevel] = useState(0);
const [active, setActive] = useState(false);
const streamRef = useRef<MediaStream | null>(null);
const ctxRef = useRef<AudioContext | null>(null);
const rafRef = useRef<number | null>(null);
const stop = useCallback(() => {
if (rafRef.current !== null) cancelAnimationFrame(rafRef.current);
rafRef.current = null;
streamRef.current?.getTracks().forEach((t) => t.stop());
streamRef.current = null;
ctxRef.current?.close();
ctxRef.current = null;
setActive(false);
setLevel(0);
}, []);
const start = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
const ctx = new AudioContext();
ctxRef.current = ctx;
const source = ctx.createMediaStreamSource(stream);
const analyser = ctx.createAnalyser();
analyser.fftSize = 256;
source.connect(analyser);
const buffer = new Uint8Array(analyser.frequencyBinCount);
const update = () => {
analyser.getByteFrequencyData(buffer);
let sum = 0;
for (let i = 0; i < buffer.length; i += 1) sum += buffer[i];
setLevel(sum / buffer.length);
rafRef.current = requestAnimationFrame(update);
};
update();
setActive(true);
} catch (e) {
// eslint-disable-next-line no-console
console.error('Mic test failed', e);
}
};
useEffect(() => () => stop(), [stop]);
return (
<Box direction="Column" gap="100" style={{ padding: '8px 0' }}>
<Box direction="Row" gap="200" align="Center">
<Button size="300" variant="Secondary" outlined onClick={active ? stop : start}>
<Text size="T300">{active ? 'Stop Test' : 'Test Microphone'}</Text>
</Button>
<Box
grow="Yes"
style={{
height: '10px',
background: 'var(--lt-bg-card, rgba(0,0,0,0.2))',
borderRadius: '5px',
overflow: 'hidden',
position: 'relative',
border: '1px solid var(--lt-border-color)',
}}
>
<Box
style={{
position: 'absolute',
top: 0,
left: 0,
bottom: 0,
width: `${Math.min(100, (level / 128) * 100)}%`,
background: 'var(--lt-accent-green, #00FF88)',
transition: 'width 0.05s linear',
boxShadow: '0 0 8px var(--lt-accent-green)',
}}
/>
</Box>
</Box>
<Text size="S300" variant="Secondary">
The green bar shows your live volume. Use this to tune the Gate Threshold.
</Text>
</Box>
);
}
function Calls() {
const [cameraOnJoin, setCameraOnJoin] = useSetting(settingsAtom, 'cameraOnJoin');
const [callNoiseSuppression, setCallNoiseSuppression] = useSetting(
settingsAtom,
'callNoiseSuppression',
);
const [callDenoiseModel, setCallDenoiseModel] = useSetting(settingsAtom, 'callDenoiseModel');
const [callDenoiseNativeNS, setCallDenoiseNativeNS] = useSetting(
settingsAtom,
'callDenoiseNativeNS',
);
const [callDenoiseGate, setCallDenoiseGate] = useSetting(settingsAtom, 'callDenoiseGate');
const [callDenoiseGateThreshold, setCallDenoiseGateThreshold] = useSetting(
settingsAtom,
'callDenoiseGateThreshold',
);
const [pttMode, setPttMode] = useSetting(settingsAtom, 'pttMode');
const [pttKey, setPttKey] = useSetting(settingsAtom, 'pttKey');
const [deafenKey, setDeafenKey] = useSetting(settingsAtom, 'deafenKey');
@@ -1220,6 +1324,8 @@ function Calls() {
const pttBind = useKeyBind(setPttKey);
const deafenBind = useKeyBind(setDeafenKey);
const mlSupported = isMLDenoiseSupported();
return (
<Box direction="Column" gap="100">
<Text size="L400">Calls</Text>
@@ -1233,7 +1339,79 @@ function Calls() {
<SequenceCard className={SequenceCardStyle} variant="SurfaceVariant" direction="Column">
<SettingTile
title="Noise Suppression"
description="Filter background noise from your mic during calls. Browser-native uses the built-in WebRTC suppressor; ML runs on-device RNNoise for stronger, Krisp-style removal (higher CPU)."
description={
<Box direction="Column" gap="200">
<Text>
Filter background noise from your mic during calls. Browser-native uses the
built-in WebRTC suppressor (Google NSNet2).
</Text>
<Box direction="Column" gap="100" style={{ overflowX: 'auto' }}>
<Box
direction="Row"
gap="100"
style={{ borderBottom: '1px solid var(--lt-border-color)', paddingBottom: '4px' }}
>
<Box style={{ width: '120px' }}>
<Text size="S300" bold>
Model
</Text>
</Box>
<Box style={{ width: '80px' }}>
<Text size="S300" bold>
CPU
</Text>
</Box>
<Box style={{ width: '80px' }}>
<Text size="S300" bold>
Quality
</Text>
</Box>
<Box grow="Yes">
<Text size="S300" bold>
Transients
</Text>
</Box>
</Box>
{DENOISE_MODELS.map((model) => (
<Box key={model.id} direction="Row" gap="100">
<Box style={{ width: '120px' }}>
<Text size="S300">{model.name}</Text>
</Box>
<Box style={{ width: '80px' }}>
<Text size="S300">{model.cpuUsage}</Text>
</Box>
<Box style={{ width: '80px' }}>
<Text size="S300">{model.voiceQuality}</Text>
</Box>
<Box grow="Yes">
<Text size="S300">{model.transients}</Text>
</Box>
</Box>
))}
</Box>
{!mlSupported && (
<Box direction="Column" gap="100">
<Text variant="Warning" size="S300">
ML options are not supported in this browser.
</Text>
<Box as="ul" style={{ paddingLeft: '20px', margin: 0 }}>
{ML_DENOISE_REQUIREMENTS.map((req) => (
<Text as="li" key={req} size="S300">
{req}
</Text>
))}
</Box>
</Box>
)}
{callNoiseSuppression === 'ml' && (
<Text variant="Warning" size="S300">
Note: Applying changes requires rejoining the call.
</Text>
)}
</Box>
}
after={
<SettingsSelect<NoiseSuppressionMode>
value={callNoiseSuppression}
@@ -1241,11 +1419,86 @@ function Calls() {
options={[
{ value: 'off', label: 'Off' },
{ value: 'browser', label: 'Browser-native' },
{ value: 'ml', label: 'ML (beta)' },
{
value: 'ml',
label: 'ML (Advanced)',
disabled: !mlSupported,
},
]}
/>
}
/>
{callNoiseSuppression === 'ml' && (
<Box
direction="Column"
gap="300"
style={{
padding: '16px',
marginTop: '8px',
borderTop: '1px solid var(--lt-border-color)',
background: 'rgba(0,0,0,0.1)',
}}
>
<SettingTile
title="ML Model"
description="Choose the machine learning model to use for noise removal."
after={
<SettingsSelect<DenoiseModelId>
value={callDenoiseModel}
onChange={setCallDenoiseModel}
options={[
{ value: 'rnnoise', label: 'RNNoise' },
{ value: 'speex', label: 'Speex (Legacy)' },
{ value: 'dtln', label: 'DTLN (Balanced)' },
{ value: 'deepfilternet', label: 'DeepFilterNet 3 (Pro)' },
]}
/>
}
/>
<SettingTile
title="Series Suppression"
description="Run the browser's native stationary noise filter before the ML model. Recommended for eliminating fan hum."
after={
<Switch
variant="Primary"
value={callDenoiseNativeNS}
onChange={setCallDenoiseNativeNS}
/>
}
/>
<SettingTile
title="Noise Gate"
description="Hard-cut audio when you aren't speaking to ensure absolute silence between sentences."
after={
<Switch variant="Primary" value={callDenoiseGate} onChange={setCallDenoiseGate} />
}
/>
{callDenoiseGate && (
<Box direction="Column" gap="100">
<Box direction="Row" justify="SpaceBetween">
<Text size="S300">Gate Threshold</Text>
<Text size="S300" bold>
{callDenoiseGateThreshold} dB
</Text>
</Box>
<input
type="range"
min="-100"
max="0"
step="1"
value={callDenoiseGateThreshold}
onChange={(e) => setCallDenoiseGateThreshold(parseInt(e.target.value, 10))}
style={{ width: '100%', accentColor: 'var(--lt-accent-orange)' }}
/>
<MicMeter />
</Box>
)}
</Box>
)}
</SequenceCard>
<SequenceCard
className={SequenceCardStyle}
+29 -1
View File
@@ -46,6 +46,10 @@ export const createCallEmbed = (
container: HTMLElement,
pref?: CallPreferences,
denoiseMode: NoiseSuppressionMode = 'browser',
denoiseModel: string = 'rnnoise',
denoiseNativeNS: boolean = true,
denoiseGate: boolean = false,
denoiseGateThreshold: number = -45,
forceAudioOff = false,
): CallEmbed => {
const rtcSession = mx.matrixRTC.getRoomSession(room);
@@ -60,6 +64,10 @@ export const createCallEmbed = (
intent,
themeKind,
denoiseMode,
denoiseModel,
denoiseNativeNS,
denoiseGate,
denoiseGateThreshold,
initialAudio,
initialVideo,
);
@@ -77,6 +85,10 @@ export const useCallStart = (dm = false) => {
const setCallEmbed = useSetAtom(callEmbedAtom);
const callEmbedRef = useCallEmbedRef();
const [callNoiseSuppression] = useSetting(settingsAtom, 'callNoiseSuppression');
const [callDenoiseModel] = useSetting(settingsAtom, 'callDenoiseModel');
const [callDenoiseNativeNS] = useSetting(settingsAtom, 'callDenoiseNativeNS');
const [callDenoiseGate] = useSetting(settingsAtom, 'callDenoiseGate');
const [callDenoiseGateThreshold] = useSetting(settingsAtom, 'callDenoiseGateThreshold');
const [pttMode] = useSetting(settingsAtom, 'pttMode');
const startCall = useCallback(
@@ -97,12 +109,28 @@ export const useCallStart = (dm = false) => {
container,
pref,
callNoiseSuppression ?? 'browser',
callDenoiseModel ?? 'rnnoise',
callDenoiseNativeNS ?? true,
callDenoiseGate ?? false,
callDenoiseGateThreshold ?? -45,
!!pttMode,
);
setCallEmbed(callEmbed);
},
[mx, dm, theme, setCallEmbed, callEmbedRef, callNoiseSuppression, pttMode],
[
mx,
dm,
theme,
setCallEmbed,
callEmbedRef,
callNoiseSuppression,
callDenoiseModel,
callDenoiseNativeNS,
callDenoiseGate,
callDenoiseGateThreshold,
pttMode,
],
);
return startCall;
@@ -382,6 +382,32 @@ function DeepLinkNavigator() {
return null;
}
function LotusDenoiseFeature() {
const setToast = useSetAtom(toastQueueAtom);
useEffect(() => {
const handleMessage = (event: MessageEvent) => {
if (event.data?.type === 'lotus-denoise-status') {
const { active, error } = event.data;
if (!active) {
setToast({
id: `denoise-fail-${Date.now()}`,
displayName: 'Audio Quality',
body: `ML Noise Suppression failed: ${error || 'Unknown error'}. Falling back to raw mic.`,
roomName: 'System',
roomId: '',
});
}
}
};
window.addEventListener('message', handleMessage);
return () => window.removeEventListener('message', handleMessage);
}, [setToast]);
return null;
}
export function ClientNonUIFeatures({ children }: ClientNonUIFeaturesProps) {
return (
<>
@@ -391,6 +417,7 @@ export function ClientNonUIFeatures({ children }: ClientNonUIFeaturesProps) {
<PresenceUpdater />
<InviteNotifications />
<MessageNotifications />
<LotusDenoiseFeature />
<DeepLinkNavigator />
{children}
</>
+11 -4
View File
@@ -102,6 +102,10 @@ export class CallEmbed {
intent: ElementCallIntent,
themeKind: ElementCallThemeKind,
denoiseMode: NoiseSuppressionMode = 'browser',
denoiseModel: string = 'rnnoise',
denoiseNativeNS: boolean = true,
denoiseGate: boolean = false,
denoiseGateThreshold: number = -45,
initialAudio = true,
initialVideo = false,
): Widget {
@@ -126,8 +130,8 @@ export class CallEmbed {
lang: 'en-EN',
theme: themeKind,
// EC's built-in WebRTC suppressor: on only for 'browser' tier. For 'ml' we
// disable it here so RNNoise (the Lotus denoise shim) owns suppression and
// the two don't fight each other.
// disable it here so EC doesn't do its own extra processing, and let the
// Lotus denoise shim (which keeps native NS on) handle the pipeline.
noiseSuppression: (denoiseMode === 'browser').toString(),
audio: initialAudio.toString(),
video: initialVideo.toString(),
@@ -135,9 +139,12 @@ export class CallEmbed {
});
if (denoiseMode === 'ml') {
// Signal the Lotus denoise shim (injected into the EC index.html) to route
// the mic through the RNNoise worklet before LiveKit publishes the track.
// Signal the Lotus denoise shim to route the mic through the ML processors.
params.append('lotusDenoise', 'ml');
params.append('lotusModel', denoiseModel);
params.append('lotusNativeNS', denoiseNativeNS.toString());
params.append('lotusGate', denoiseGate.toString());
params.append('lotusGateThreshold', denoiseGateThreshold.toString());
}
if (CallEmbed.startingCall(intent)) {
+9
View File
@@ -14,6 +14,7 @@ export type MessageSpacing = '0' | '100' | '200' | '300' | '400' | '500';
// - 'browser' : WebRTC built-in suppression (Element Call noiseSuppression param)
// - 'ml' : client-side RNNoise ML suppression (Lotus denoise shim)
export type NoiseSuppressionMode = 'off' | 'browser' | 'ml';
export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln' | 'deepfilternet';
export type ChatBackground =
| 'none'
| 'blueprint'
@@ -115,6 +116,10 @@ export interface Settings {
cameraOnJoin: boolean;
callNoiseSuppression: NoiseSuppressionMode;
callDenoiseModel: DenoiseModelId;
callDenoiseNativeNS: boolean;
callDenoiseGate: boolean;
callDenoiseGateThreshold: number;
pttMode: boolean;
pttKey: string;
@@ -205,6 +210,10 @@ const defaultSettings: Settings = {
cameraOnJoin: false,
callNoiseSuppression: 'browser',
callDenoiseModel: 'rnnoise',
callDenoiseNativeNS: true,
callDenoiseGate: false,
callDenoiseGateThreshold: -45,
pttMode: false,
pttKey: 'Space',
+68
View File
@@ -0,0 +1,68 @@
/**
* Detection utilities for Lotus ML noise suppression (RNNoise).
*/
export type DenoiseModel = {
id: string;
name: string;
description: string;
cpuUsage: string;
binarySize: string;
transients: 'Poor' | 'Good' | 'Excellent';
voiceQuality: 'Moderate' | 'High' | 'Very High';
};
export const DENOISE_MODELS: DenoiseModel[] = [
{
id: 'rnnoise',
name: 'RNNoise (Mozilla)',
description: 'Lightweight hybrid model. Best for consistent noise like fans.',
cpuUsage: '< 5%',
binarySize: '< 1 MB',
transients: 'Poor',
voiceQuality: 'Moderate',
},
{
id: 'dtln',
name: 'DTLN (Balanced)',
description: 'Deep learning model with a good balance of quality and CPU.',
cpuUsage: '10-20%',
binarySize: '3-4 MB',
transients: 'Good',
voiceQuality: 'High',
},
{
id: 'deepfilternet',
name: 'DeepFilterNet 3 (Pro)',
description: 'State-of-the-art studio quality. Removes all background noise.',
cpuUsage: '25-50%+',
binarySize: '15-20 MB',
transients: 'Excellent',
voiceQuality: 'Very High',
},
];
export const isMLDenoiseSupported = (): boolean => {
if (typeof window === 'undefined') return false;
// Requirements:
// 1. AudioContext/webkitAudioContext (Web Audio API)
// 2. AudioWorklet (Real-time processing in a background thread)
// 3. getUserMedia (Microphone access)
const hasAudioContext = !!(window.AudioContext || (window as any).webkitAudioContext);
const hasAudioWorklet = hasAudioContext && !!AudioWorkletNode;
const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
return hasAudioWorklet && hasGetUserMedia;
};
/**
* EXACT requirements for ML Denoise (for UI display).
*/
export const ML_DENOISE_REQUIREMENTS = [
'Modern browser with Web Audio API support',
'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)',
'Microphone access',
'48kHz AudioContext capability',
];