feat(calls): implement advanced multi-model ML noise suppression system
Implement a flexible, multi-model noise suppression pipeline for Element Call/LiveKit integration: - ML Engines: Added support for RNNoise, Speex, DTLN, and DeepFilterNet 3 models. - Pipeline Architecture: Implemented modular audio processing in lotus-denoise.js, supporting 'Series Suppression' (running browser-native NSNet2 before ML) and a hardware-style Noise Gate. - UI & UX Enhancements: - Settings UI: Added model comparison chart with CPU/Quality metadata. - Tuning: Added Live Microphone Meter for calibrating Noise Gate thresholds. - Reporting: Added LotusToast system to alert users when ML suppression fails or falls back to raw input. - Robustness & Quality: - Capture Fidelity: Removed forced 48kHz capture constraints to allow native-rate capture (solving static issues with high-end audio interfaces). - Performance: Added WASM SIMD detection with transparent fallback. - Capability Detection: Added browser feature detection to disable unsupported ML modes. - Build Integration: Updated Vite config to self-host all model WASM/tflite assets in /denoise/ directory.
This commit is contained in:
@@ -69,6 +69,7 @@ import { useDateFormatItems } from '../../../hooks/useDateFormat';
|
||||
import { SequenceCardStyle } from '../styles.css';
|
||||
import { useTauriUpdater } from '../../../hooks/useTauriUpdater';
|
||||
import { playCallJoinSound } from '../../../utils/callSounds';
|
||||
import { isMLDenoiseSupported, ML_DENOISE_REQUIREMENTS } from '../../../utils/lotusDenoiseUtils';
|
||||
|
||||
type ThemeSelectorProps = {
|
||||
themeNames: Record<string, string>;
|
||||
@@ -157,7 +158,7 @@ function SelectTheme({ disabled }: { disabled?: boolean }) {
|
||||
);
|
||||
}
|
||||
|
||||
type SettingsSelectOption<T extends string> = { value: T; label: string };
|
||||
type SettingsSelectOption<T extends string> = { value: T; label: string; disabled?: boolean };
|
||||
|
||||
function SettingsSelect<T extends string>({
|
||||
value,
|
||||
@@ -219,7 +220,8 @@ function SettingsSelect<T extends string>({
|
||||
size="300"
|
||||
variant={opt.value === value ? 'Primary' : 'Surface'}
|
||||
radii="300"
|
||||
onClick={() => handleSelect(opt.value)}
|
||||
disabled={opt.disabled}
|
||||
onClick={() => !opt.disabled && handleSelect(opt.value)}
|
||||
>
|
||||
<Text size="T300">{opt.label}</Text>
|
||||
</MenuItem>
|
||||
@@ -1196,12 +1198,114 @@ function useKeyBind(setter: (code: string) => void) {
|
||||
const keyLabel = (code: string) =>
|
||||
code === 'Space' ? 'Space' : code.replace('Key', '').replace('Digit', '');
|
||||
|
||||
import {
|
||||
DENOISE_MODELS,
|
||||
isMLDenoiseSupported,
|
||||
ML_DENOISE_REQUIREMENTS,
|
||||
} from '../../../utils/lotusDenoiseUtils';
|
||||
|
||||
function MicMeter() {
|
||||
const [level, setLevel] = useState(0);
|
||||
const [active, setActive] = useState(false);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const ctxRef = useRef<AudioContext | null>(null);
|
||||
const rafRef = useRef<number | null>(null);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
if (rafRef.current !== null) cancelAnimationFrame(rafRef.current);
|
||||
rafRef.current = null;
|
||||
streamRef.current?.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
ctxRef.current?.close();
|
||||
ctxRef.current = null;
|
||||
setActive(false);
|
||||
setLevel(0);
|
||||
}, []);
|
||||
|
||||
const start = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
streamRef.current = stream;
|
||||
const ctx = new AudioContext();
|
||||
ctxRef.current = ctx;
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
const analyser = ctx.createAnalyser();
|
||||
analyser.fftSize = 256;
|
||||
source.connect(analyser);
|
||||
|
||||
const buffer = new Uint8Array(analyser.frequencyBinCount);
|
||||
const update = () => {
|
||||
analyser.getByteFrequencyData(buffer);
|
||||
let sum = 0;
|
||||
for (let i = 0; i < buffer.length; i += 1) sum += buffer[i];
|
||||
setLevel(sum / buffer.length);
|
||||
rafRef.current = requestAnimationFrame(update);
|
||||
};
|
||||
update();
|
||||
setActive(true);
|
||||
} catch (e) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('Mic test failed', e);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => () => stop(), [stop]);
|
||||
|
||||
return (
|
||||
<Box direction="Column" gap="100" style={{ padding: '8px 0' }}>
|
||||
<Box direction="Row" gap="200" align="Center">
|
||||
<Button size="300" variant="Secondary" outlined onClick={active ? stop : start}>
|
||||
<Text size="T300">{active ? 'Stop Test' : 'Test Microphone'}</Text>
|
||||
</Button>
|
||||
<Box
|
||||
grow="Yes"
|
||||
style={{
|
||||
height: '10px',
|
||||
background: 'var(--lt-bg-card, rgba(0,0,0,0.2))',
|
||||
borderRadius: '5px',
|
||||
overflow: 'hidden',
|
||||
position: 'relative',
|
||||
border: '1px solid var(--lt-border-color)',
|
||||
}}
|
||||
>
|
||||
<Box
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: 0,
|
||||
left: 0,
|
||||
bottom: 0,
|
||||
width: `${Math.min(100, (level / 128) * 100)}%`,
|
||||
background: 'var(--lt-accent-green, #00FF88)',
|
||||
transition: 'width 0.05s linear',
|
||||
boxShadow: '0 0 8px var(--lt-accent-green)',
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
<Text size="S300" variant="Secondary">
|
||||
The green bar shows your live volume. Use this to tune the Gate Threshold.
|
||||
</Text>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function Calls() {
|
||||
const [cameraOnJoin, setCameraOnJoin] = useSetting(settingsAtom, 'cameraOnJoin');
|
||||
const [callNoiseSuppression, setCallNoiseSuppression] = useSetting(
|
||||
settingsAtom,
|
||||
'callNoiseSuppression',
|
||||
);
|
||||
const [callDenoiseModel, setCallDenoiseModel] = useSetting(settingsAtom, 'callDenoiseModel');
|
||||
const [callDenoiseNativeNS, setCallDenoiseNativeNS] = useSetting(
|
||||
settingsAtom,
|
||||
'callDenoiseNativeNS',
|
||||
);
|
||||
const [callDenoiseGate, setCallDenoiseGate] = useSetting(settingsAtom, 'callDenoiseGate');
|
||||
const [callDenoiseGateThreshold, setCallDenoiseGateThreshold] = useSetting(
|
||||
settingsAtom,
|
||||
'callDenoiseGateThreshold',
|
||||
);
|
||||
|
||||
const [pttMode, setPttMode] = useSetting(settingsAtom, 'pttMode');
|
||||
const [pttKey, setPttKey] = useSetting(settingsAtom, 'pttKey');
|
||||
const [deafenKey, setDeafenKey] = useSetting(settingsAtom, 'deafenKey');
|
||||
@@ -1220,6 +1324,8 @@ function Calls() {
|
||||
const pttBind = useKeyBind(setPttKey);
|
||||
const deafenBind = useKeyBind(setDeafenKey);
|
||||
|
||||
const mlSupported = isMLDenoiseSupported();
|
||||
|
||||
return (
|
||||
<Box direction="Column" gap="100">
|
||||
<Text size="L400">Calls</Text>
|
||||
@@ -1233,7 +1339,79 @@ function Calls() {
|
||||
<SequenceCard className={SequenceCardStyle} variant="SurfaceVariant" direction="Column">
|
||||
<SettingTile
|
||||
title="Noise Suppression"
|
||||
description="Filter background noise from your mic during calls. Browser-native uses the built-in WebRTC suppressor; ML runs on-device RNNoise for stronger, Krisp-style removal (higher CPU)."
|
||||
description={
|
||||
<Box direction="Column" gap="200">
|
||||
<Text>
|
||||
Filter background noise from your mic during calls. Browser-native uses the
|
||||
built-in WebRTC suppressor (Google NSNet2).
|
||||
</Text>
|
||||
|
||||
<Box direction="Column" gap="100" style={{ overflowX: 'auto' }}>
|
||||
<Box
|
||||
direction="Row"
|
||||
gap="100"
|
||||
style={{ borderBottom: '1px solid var(--lt-border-color)', paddingBottom: '4px' }}
|
||||
>
|
||||
<Box style={{ width: '120px' }}>
|
||||
<Text size="S300" bold>
|
||||
Model
|
||||
</Text>
|
||||
</Box>
|
||||
<Box style={{ width: '80px' }}>
|
||||
<Text size="S300" bold>
|
||||
CPU
|
||||
</Text>
|
||||
</Box>
|
||||
<Box style={{ width: '80px' }}>
|
||||
<Text size="S300" bold>
|
||||
Quality
|
||||
</Text>
|
||||
</Box>
|
||||
<Box grow="Yes">
|
||||
<Text size="S300" bold>
|
||||
Transients
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
{DENOISE_MODELS.map((model) => (
|
||||
<Box key={model.id} direction="Row" gap="100">
|
||||
<Box style={{ width: '120px' }}>
|
||||
<Text size="S300">{model.name}</Text>
|
||||
</Box>
|
||||
<Box style={{ width: '80px' }}>
|
||||
<Text size="S300">{model.cpuUsage}</Text>
|
||||
</Box>
|
||||
<Box style={{ width: '80px' }}>
|
||||
<Text size="S300">{model.voiceQuality}</Text>
|
||||
</Box>
|
||||
<Box grow="Yes">
|
||||
<Text size="S300">{model.transients}</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
|
||||
{!mlSupported && (
|
||||
<Box direction="Column" gap="100">
|
||||
<Text variant="Warning" size="S300">
|
||||
ML options are not supported in this browser.
|
||||
</Text>
|
||||
<Box as="ul" style={{ paddingLeft: '20px', margin: 0 }}>
|
||||
{ML_DENOISE_REQUIREMENTS.map((req) => (
|
||||
<Text as="li" key={req} size="S300">
|
||||
{req}
|
||||
</Text>
|
||||
))}
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
{callNoiseSuppression === 'ml' && (
|
||||
<Text variant="Warning" size="S300">
|
||||
Note: Applying changes requires rejoining the call.
|
||||
</Text>
|
||||
)}
|
||||
</Box>
|
||||
}
|
||||
after={
|
||||
<SettingsSelect<NoiseSuppressionMode>
|
||||
value={callNoiseSuppression}
|
||||
@@ -1241,11 +1419,86 @@ function Calls() {
|
||||
options={[
|
||||
{ value: 'off', label: 'Off' },
|
||||
{ value: 'browser', label: 'Browser-native' },
|
||||
{ value: 'ml', label: 'ML (beta)' },
|
||||
{
|
||||
value: 'ml',
|
||||
label: 'ML (Advanced)',
|
||||
disabled: !mlSupported,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
}
|
||||
/>
|
||||
|
||||
{callNoiseSuppression === 'ml' && (
|
||||
<Box
|
||||
direction="Column"
|
||||
gap="300"
|
||||
style={{
|
||||
padding: '16px',
|
||||
marginTop: '8px',
|
||||
borderTop: '1px solid var(--lt-border-color)',
|
||||
background: 'rgba(0,0,0,0.1)',
|
||||
}}
|
||||
>
|
||||
<SettingTile
|
||||
title="ML Model"
|
||||
description="Choose the machine learning model to use for noise removal."
|
||||
after={
|
||||
<SettingsSelect<DenoiseModelId>
|
||||
value={callDenoiseModel}
|
||||
onChange={setCallDenoiseModel}
|
||||
options={[
|
||||
{ value: 'rnnoise', label: 'RNNoise' },
|
||||
{ value: 'speex', label: 'Speex (Legacy)' },
|
||||
{ value: 'dtln', label: 'DTLN (Balanced)' },
|
||||
{ value: 'deepfilternet', label: 'DeepFilterNet 3 (Pro)' },
|
||||
]}
|
||||
/>
|
||||
}
|
||||
/>
|
||||
|
||||
<SettingTile
|
||||
title="Series Suppression"
|
||||
description="Run the browser's native stationary noise filter before the ML model. Recommended for eliminating fan hum."
|
||||
after={
|
||||
<Switch
|
||||
variant="Primary"
|
||||
value={callDenoiseNativeNS}
|
||||
onChange={setCallDenoiseNativeNS}
|
||||
/>
|
||||
}
|
||||
/>
|
||||
|
||||
<SettingTile
|
||||
title="Noise Gate"
|
||||
description="Hard-cut audio when you aren't speaking to ensure absolute silence between sentences."
|
||||
after={
|
||||
<Switch variant="Primary" value={callDenoiseGate} onChange={setCallDenoiseGate} />
|
||||
}
|
||||
/>
|
||||
|
||||
{callDenoiseGate && (
|
||||
<Box direction="Column" gap="100">
|
||||
<Box direction="Row" justify="SpaceBetween">
|
||||
<Text size="S300">Gate Threshold</Text>
|
||||
<Text size="S300" bold>
|
||||
{callDenoiseGateThreshold} dB
|
||||
</Text>
|
||||
</Box>
|
||||
<input
|
||||
type="range"
|
||||
min="-100"
|
||||
max="0"
|
||||
step="1"
|
||||
value={callDenoiseGateThreshold}
|
||||
onChange={(e) => setCallDenoiseGateThreshold(parseInt(e.target.value, 10))}
|
||||
style={{ width: '100%', accentColor: 'var(--lt-accent-orange)' }}
|
||||
/>
|
||||
<MicMeter />
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
</SequenceCard>
|
||||
<SequenceCard
|
||||
className={SequenceCardStyle}
|
||||
|
||||
Reference in New Issue
Block a user