feat(calls): implement advanced multi-model ML noise suppression system

Implement a flexible, multi-model noise suppression pipeline for Element Call/LiveKit integration: - ML Engines: Added support for RNNoise, Speex, DTLN, and DeepFilterNet 3 models. - Pipeline Architecture: Implemented modular audio processing in lotus-denoise.js, supporting 'Series Suppression' (running browser-native NSNet2 before ML) and a hardware-style Noise Gate. - UI & UX Enhancements: - Settings UI: Added model comparison chart with CPU/Quality metadata. - Tuning: Added Live Microphone Meter for calibrating Noise Gate thresholds. - Reporting: Added LotusToast system to alert users when ML suppression fails or falls back to raw input. - Robustness & Quality: - Capture Fidelity: Removed forced 48kHz capture constraints to allow native-rate capture (solving static issues with high-end audio interfaces). - Performance: Added WASM SIMD detection with transparent fallback. - Capability Detection: Added browser feature detection to disable unsupported ML modes. - Build Integration: Updated Vite config to self-host all model WASM/tflite assets in /denoise/ directory.
2026-06-16 00:50:12 -04:00
parent 938ead79f7
commit 5d5f5f4516
10 changed files with 606 additions and 105 deletions
@@ -69,6 +69,7 @@ import { useDateFormatItems } from '../../../hooks/useDateFormat';
 import { SequenceCardStyle } from '../styles.css';
 import { useTauriUpdater } from '../../../hooks/useTauriUpdater';
 import { playCallJoinSound } from '../../../utils/callSounds';
+import { isMLDenoiseSupported, ML_DENOISE_REQUIREMENTS } from '../../../utils/lotusDenoiseUtils';

 type ThemeSelectorProps = {
  themeNames: Record<string, string>;
@@ -157,7 +158,7 @@ function SelectTheme({ disabled }: { disabled?: boolean }) {
  );
 }

-type SettingsSelectOption<T extends string> = { value: T; label: string };
+type SettingsSelectOption<T extends string> = { value: T; label: string; disabled?: boolean };

 function SettingsSelect<T extends string>({
  value,
@@ -219,7 +220,8 @@ function SettingsSelect<T extends string>({
                    size="300"
                    variant={opt.value === value ? 'Primary' : 'Surface'}
                    radii="300"
-                    onClick={() => handleSelect(opt.value)}
+                    disabled={opt.disabled}
+                    onClick={() => !opt.disabled && handleSelect(opt.value)}
                  >
                    <Text size="T300">{opt.label}</Text>
                  </MenuItem>
@@ -1196,12 +1198,114 @@ function useKeyBind(setter: (code: string) => void) {
 const keyLabel = (code: string) =>
  code === 'Space' ? 'Space' : code.replace('Key', '').replace('Digit', '');

+import {
+  DENOISE_MODELS,
+  isMLDenoiseSupported,
+  ML_DENOISE_REQUIREMENTS,
+} from '../../../utils/lotusDenoiseUtils';
+
+function MicMeter() {
+  const [level, setLevel] = useState(0);
+  const [active, setActive] = useState(false);
+  const streamRef = useRef<MediaStream | null>(null);
+  const ctxRef = useRef<AudioContext | null>(null);
+  const rafRef = useRef<number | null>(null);
+
+  const stop = useCallback(() => {
+    if (rafRef.current !== null) cancelAnimationFrame(rafRef.current);
+    rafRef.current = null;
+    streamRef.current?.getTracks().forEach((t) => t.stop());
+    streamRef.current = null;
+    ctxRef.current?.close();
+    ctxRef.current = null;
+    setActive(false);
+    setLevel(0);
+  }, []);
+
+  const start = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      streamRef.current = stream;
+      const ctx = new AudioContext();
+      ctxRef.current = ctx;
+      const source = ctx.createMediaStreamSource(stream);
+      const analyser = ctx.createAnalyser();
+      analyser.fftSize = 256;
+      source.connect(analyser);
+
+      const buffer = new Uint8Array(analyser.frequencyBinCount);
+      const update = () => {
+        analyser.getByteFrequencyData(buffer);
+        let sum = 0;
+        for (let i = 0; i < buffer.length; i += 1) sum += buffer[i];
+        setLevel(sum / buffer.length);
+        rafRef.current = requestAnimationFrame(update);
+      };
+      update();
+      setActive(true);
+    } catch (e) {
+      // eslint-disable-next-line no-console
+      console.error('Mic test failed', e);
+    }
+  };
+
+  useEffect(() => () => stop(), [stop]);
+
+  return (
+    <Box direction="Column" gap="100" style={{ padding: '8px 0' }}>
+      <Box direction="Row" gap="200" align="Center">
+        <Button size="300" variant="Secondary" outlined onClick={active ? stop : start}>
+          <Text size="T300">{active ? 'Stop Test' : 'Test Microphone'}</Text>
+        </Button>
+        <Box
+          grow="Yes"
+          style={{
+            height: '10px',
+            background: 'var(--lt-bg-card, rgba(0,0,0,0.2))',
+            borderRadius: '5px',
+            overflow: 'hidden',
+            position: 'relative',
+            border: '1px solid var(--lt-border-color)',
+          }}
+        >
+          <Box
+            style={{
+              position: 'absolute',
+              top: 0,
+              left: 0,
+              bottom: 0,
+              width: `${Math.min(100, (level / 128) * 100)}%`,
+              background: 'var(--lt-accent-green, #00FF88)',
+              transition: 'width 0.05s linear',
+              boxShadow: '0 0 8px var(--lt-accent-green)',
+            }}
+          />
+        </Box>
+      </Box>
+      <Text size="S300" variant="Secondary">
+        The green bar shows your live volume. Use this to tune the Gate Threshold.
+      </Text>
+    </Box>
+  );
+}
+
 function Calls() {
  const [cameraOnJoin, setCameraOnJoin] = useSetting(settingsAtom, 'cameraOnJoin');
  const [callNoiseSuppression, setCallNoiseSuppression] = useSetting(
    settingsAtom,
    'callNoiseSuppression',
  );
+  const [callDenoiseModel, setCallDenoiseModel] = useSetting(settingsAtom, 'callDenoiseModel');
+  const [callDenoiseNativeNS, setCallDenoiseNativeNS] = useSetting(
+    settingsAtom,
+    'callDenoiseNativeNS',
+  );
+  const [callDenoiseGate, setCallDenoiseGate] = useSetting(settingsAtom, 'callDenoiseGate');
+  const [callDenoiseGateThreshold, setCallDenoiseGateThreshold] = useSetting(
+    settingsAtom,
+    'callDenoiseGateThreshold',
+  );
+
  const [pttMode, setPttMode] = useSetting(settingsAtom, 'pttMode');
  const [pttKey, setPttKey] = useSetting(settingsAtom, 'pttKey');
  const [deafenKey, setDeafenKey] = useSetting(settingsAtom, 'deafenKey');
@@ -1220,6 +1324,8 @@ function Calls() {
  const pttBind = useKeyBind(setPttKey);
  const deafenBind = useKeyBind(setDeafenKey);

+  const mlSupported = isMLDenoiseSupported();
+
  return (
    <Box direction="Column" gap="100">
      <Text size="L400">Calls</Text>
@@ -1233,7 +1339,79 @@ function Calls() {
      <SequenceCard className={SequenceCardStyle} variant="SurfaceVariant" direction="Column">
        <SettingTile
          title="Noise Suppression"
-          description="Filter background noise from your mic during calls. Browser-native uses the built-in WebRTC suppressor; ML runs on-device RNNoise for stronger, Krisp-style removal (higher CPU)."
+          description={
+            <Box direction="Column" gap="200">
+              <Text>
+                Filter background noise from your mic during calls. Browser-native uses the
+                built-in WebRTC suppressor (Google NSNet2).
+              </Text>
+
+              <Box direction="Column" gap="100" style={{ overflowX: 'auto' }}>
+                <Box
+                  direction="Row"
+                  gap="100"
+                  style={{ borderBottom: '1px solid var(--lt-border-color)', paddingBottom: '4px' }}
+                >
+                  <Box style={{ width: '120px' }}>
+                    <Text size="S300" bold>
+                      Model
+                    </Text>
+                  </Box>
+                  <Box style={{ width: '80px' }}>
+                    <Text size="S300" bold>
+                      CPU
+                    </Text>
+                  </Box>
+                  <Box style={{ width: '80px' }}>
+                    <Text size="S300" bold>
+                      Quality
+                    </Text>
+                  </Box>
+                  <Box grow="Yes">
+                    <Text size="S300" bold>
+                      Transients
+                    </Text>
+                  </Box>
+                </Box>
+                {DENOISE_MODELS.map((model) => (
+                  <Box key={model.id} direction="Row" gap="100">
+                    <Box style={{ width: '120px' }}>
+                      <Text size="S300">{model.name}</Text>
+                    </Box>
+                    <Box style={{ width: '80px' }}>
+                      <Text size="S300">{model.cpuUsage}</Text>
+                    </Box>
+                    <Box style={{ width: '80px' }}>
+                      <Text size="S300">{model.voiceQuality}</Text>
+                    </Box>
+                    <Box grow="Yes">
+                      <Text size="S300">{model.transients}</Text>
+                    </Box>
+                  </Box>
+                ))}
+              </Box>
+
+              {!mlSupported && (
+                <Box direction="Column" gap="100">
+                  <Text variant="Warning" size="S300">
+                    ML options are not supported in this browser.
+                  </Text>
+                  <Box as="ul" style={{ paddingLeft: '20px', margin: 0 }}>
+                    {ML_DENOISE_REQUIREMENTS.map((req) => (
+                      <Text as="li" key={req} size="S300">
+                        {req}
+                      </Text>
+                    ))}
+                  </Box>
+                </Box>
+              )}
+              {callNoiseSuppression === 'ml' && (
+                <Text variant="Warning" size="S300">
+                  Note: Applying changes requires rejoining the call.
+                </Text>
+              )}
+            </Box>
+          }
          after={
            <SettingsSelect<NoiseSuppressionMode>
              value={callNoiseSuppression}
@@ -1241,11 +1419,86 @@ function Calls() {
              options={[
                { value: 'off', label: 'Off' },
                { value: 'browser', label: 'Browser-native' },
-                { value: 'ml', label: 'ML (beta)' },
+                {
+                  value: 'ml',
+                  label: 'ML (Advanced)',
+                  disabled: !mlSupported,
+                },
              ]}
            />
          }
        />
+
+        {callNoiseSuppression === 'ml' && (
+          <Box
+            direction="Column"
+            gap="300"
+            style={{
+              padding: '16px',
+              marginTop: '8px',
+              borderTop: '1px solid var(--lt-border-color)',
+              background: 'rgba(0,0,0,0.1)',
+            }}
+          >
+            <SettingTile
+              title="ML Model"
+              description="Choose the machine learning model to use for noise removal."
+              after={
+                <SettingsSelect<DenoiseModelId>
+                  value={callDenoiseModel}
+                  onChange={setCallDenoiseModel}
+                  options={[
+                    { value: 'rnnoise', label: 'RNNoise' },
+                    { value: 'speex', label: 'Speex (Legacy)' },
+                    { value: 'dtln', label: 'DTLN (Balanced)' },
+                    { value: 'deepfilternet', label: 'DeepFilterNet 3 (Pro)' },
+                  ]}
+                />
+              }
+            />
+
+            <SettingTile
+              title="Series Suppression"
+              description="Run the browser's native stationary noise filter before the ML model. Recommended for eliminating fan hum."
+              after={
+                <Switch
+                  variant="Primary"
+                  value={callDenoiseNativeNS}
+                  onChange={setCallDenoiseNativeNS}
+                />
+              }
+            />
+
+            <SettingTile
+              title="Noise Gate"
+              description="Hard-cut audio when you aren't speaking to ensure absolute silence between sentences."
+              after={
+                <Switch variant="Primary" value={callDenoiseGate} onChange={setCallDenoiseGate} />
+              }
+            />
+
+            {callDenoiseGate && (
+              <Box direction="Column" gap="100">
+                <Box direction="Row" justify="SpaceBetween">
+                  <Text size="S300">Gate Threshold</Text>
+                  <Text size="S300" bold>
+                    {callDenoiseGateThreshold} dB
+                  </Text>
+                </Box>
+                <input
+                  type="range"
+                  min="-100"
+                  max="0"
+                  step="1"
+                  value={callDenoiseGateThreshold}
+                  onChange={(e) => setCallDenoiseGateThreshold(parseInt(e.target.value, 10))}
+                  style={{ width: '100%', accentColor: 'var(--lt-accent-orange)' }}
+                />
+                <MicMeter />
+              </Box>
+            )}
+          </Box>
+        )}
      </SequenceCard>
      <SequenceCard
        className={SequenceCardStyle}
@@ -46,6 +46,10 @@ export const createCallEmbed = (
  container: HTMLElement,
  pref?: CallPreferences,
  denoiseMode: NoiseSuppressionMode = 'browser',
+  denoiseModel: string = 'rnnoise',
+  denoiseNativeNS: boolean = true,
+  denoiseGate: boolean = false,
+  denoiseGateThreshold: number = -45,
  forceAudioOff = false,
 ): CallEmbed => {
  const rtcSession = mx.matrixRTC.getRoomSession(room);
@@ -60,6 +64,10 @@ export const createCallEmbed = (
    intent,
    themeKind,
    denoiseMode,
+    denoiseModel,
+    denoiseNativeNS,
+    denoiseGate,
+    denoiseGateThreshold,
    initialAudio,
    initialVideo,
  );
@@ -77,6 +85,10 @@ export const useCallStart = (dm = false) => {
  const setCallEmbed = useSetAtom(callEmbedAtom);
  const callEmbedRef = useCallEmbedRef();
  const [callNoiseSuppression] = useSetting(settingsAtom, 'callNoiseSuppression');
+  const [callDenoiseModel] = useSetting(settingsAtom, 'callDenoiseModel');
+  const [callDenoiseNativeNS] = useSetting(settingsAtom, 'callDenoiseNativeNS');
+  const [callDenoiseGate] = useSetting(settingsAtom, 'callDenoiseGate');
+  const [callDenoiseGateThreshold] = useSetting(settingsAtom, 'callDenoiseGateThreshold');
  const [pttMode] = useSetting(settingsAtom, 'pttMode');

  const startCall = useCallback(
@@ -97,12 +109,28 @@ export const useCallStart = (dm = false) => {
        container,
        pref,
        callNoiseSuppression ?? 'browser',
+        callDenoiseModel ?? 'rnnoise',
+        callDenoiseNativeNS ?? true,
+        callDenoiseGate ?? false,
+        callDenoiseGateThreshold ?? -45,
        !!pttMode,
      );

      setCallEmbed(callEmbed);
    },
-    [mx, dm, theme, setCallEmbed, callEmbedRef, callNoiseSuppression, pttMode],
+    [
+      mx,
+      dm,
+      theme,
+      setCallEmbed,
+      callEmbedRef,
+      callNoiseSuppression,
+      callDenoiseModel,
+      callDenoiseNativeNS,
+      callDenoiseGate,
+      callDenoiseGateThreshold,
+      pttMode,
+    ],
  );

  return startCall;
@@ -382,6 +382,32 @@ function DeepLinkNavigator() {
  return null;
 }

+function LotusDenoiseFeature() {
+  const setToast = useSetAtom(toastQueueAtom);
+
+  useEffect(() => {
+    const handleMessage = (event: MessageEvent) => {
+      if (event.data?.type === 'lotus-denoise-status') {
+        const { active, error } = event.data;
+        if (!active) {
+          setToast({
+            id: `denoise-fail-${Date.now()}`,
+            displayName: 'Audio Quality',
+            body: `ML Noise Suppression failed: ${error || 'Unknown error'}. Falling back to raw mic.`,
+            roomName: 'System',
+            roomId: '',
+          });
+        }
+      }
+    };
+
+    window.addEventListener('message', handleMessage);
+    return () => window.removeEventListener('message', handleMessage);
+  }, [setToast]);
+
+  return null;
+}
+
 export function ClientNonUIFeatures({ children }: ClientNonUIFeaturesProps) {
  return (
    <>
@@ -391,6 +417,7 @@ export function ClientNonUIFeatures({ children }: ClientNonUIFeaturesProps) {
      <PresenceUpdater />
      <InviteNotifications />
      <MessageNotifications />
+      <LotusDenoiseFeature />
      <DeepLinkNavigator />
      {children}
    </>
@@ -102,6 +102,10 @@ export class CallEmbed {
    intent: ElementCallIntent,
    themeKind: ElementCallThemeKind,
    denoiseMode: NoiseSuppressionMode = 'browser',
+    denoiseModel: string = 'rnnoise',
+    denoiseNativeNS: boolean = true,
+    denoiseGate: boolean = false,
+    denoiseGateThreshold: number = -45,
    initialAudio = true,
    initialVideo = false,
  ): Widget {
@@ -126,8 +130,8 @@ export class CallEmbed {
      lang: 'en-EN',
      theme: themeKind,
      // EC's built-in WebRTC suppressor: on only for 'browser' tier. For 'ml' we
-      // disable it here so RNNoise (the Lotus denoise shim) owns suppression and
-      // the two don't fight each other.
+      // disable it here so EC doesn't do its own extra processing, and let the
+      // Lotus denoise shim (which keeps native NS on) handle the pipeline.
      noiseSuppression: (denoiseMode === 'browser').toString(),
      audio: initialAudio.toString(),
      video: initialVideo.toString(),
@@ -135,9 +139,12 @@ export class CallEmbed {
    });

    if (denoiseMode === 'ml') {
-      // Signal the Lotus denoise shim (injected into the EC index.html) to route
-      // the mic through the RNNoise worklet before LiveKit publishes the track.
+      // Signal the Lotus denoise shim to route the mic through the ML processors.
      params.append('lotusDenoise', 'ml');
+      params.append('lotusModel', denoiseModel);
+      params.append('lotusNativeNS', denoiseNativeNS.toString());
+      params.append('lotusGate', denoiseGate.toString());
+      params.append('lotusGateThreshold', denoiseGateThreshold.toString());
    }

    if (CallEmbed.startingCall(intent)) {
@@ -14,6 +14,7 @@ export type MessageSpacing = '0' | '100' | '200' | '300' | '400' | '500';
 //  - 'browser' : WebRTC built-in suppression (Element Call noiseSuppression param)
 //  - 'ml'      : client-side RNNoise ML suppression (Lotus denoise shim)
 export type NoiseSuppressionMode = 'off' | 'browser' | 'ml';
+export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln' | 'deepfilternet';
 export type ChatBackground =
  | 'none'
  | 'blueprint'
@@ -115,6 +116,10 @@ export interface Settings {

  cameraOnJoin: boolean;
  callNoiseSuppression: NoiseSuppressionMode;
+  callDenoiseModel: DenoiseModelId;
+  callDenoiseNativeNS: boolean;
+  callDenoiseGate: boolean;
+  callDenoiseGateThreshold: number;
  pttMode: boolean;
  pttKey: string;

@@ -205,6 +210,10 @@ const defaultSettings: Settings = {

  cameraOnJoin: false,
  callNoiseSuppression: 'browser',
+  callDenoiseModel: 'rnnoise',
+  callDenoiseNativeNS: true,
+  callDenoiseGate: false,
+  callDenoiseGateThreshold: -45,
  pttMode: false,
  pttKey: 'Space',

@@ -0,0 +1,68 @@
+/**
+ * Detection utilities for Lotus ML noise suppression (RNNoise).
+ */
+
+export type DenoiseModel = {
+  id: string;
+  name: string;
+  description: string;
+  cpuUsage: string;
+  binarySize: string;
+  transients: 'Poor' | 'Good' | 'Excellent';
+  voiceQuality: 'Moderate' | 'High' | 'Very High';
+};
+
+export const DENOISE_MODELS: DenoiseModel[] = [
+  {
+    id: 'rnnoise',
+    name: 'RNNoise (Mozilla)',
+    description: 'Lightweight hybrid model. Best for consistent noise like fans.',
+    cpuUsage: '< 5%',
+    binarySize: '< 1 MB',
+    transients: 'Poor',
+    voiceQuality: 'Moderate',
+  },
+  {
+    id: 'dtln',
+    name: 'DTLN (Balanced)',
+    description: 'Deep learning model with a good balance of quality and CPU.',
+    cpuUsage: '10-20%',
+    binarySize: '3-4 MB',
+    transients: 'Good',
+    voiceQuality: 'High',
+  },
+  {
+    id: 'deepfilternet',
+    name: 'DeepFilterNet 3 (Pro)',
+    description: 'State-of-the-art studio quality. Removes all background noise.',
+    cpuUsage: '25-50%+',
+    binarySize: '15-20 MB',
+    transients: 'Excellent',
+    voiceQuality: 'Very High',
+  },
+];
+
+export const isMLDenoiseSupported = (): boolean => {
+  if (typeof window === 'undefined') return false;
+
+  // Requirements:
+  // 1. AudioContext/webkitAudioContext (Web Audio API)
+  // 2. AudioWorklet (Real-time processing in a background thread)
+  // 3. getUserMedia (Microphone access)
+  const hasAudioContext = !!(window.AudioContext || (window as any).webkitAudioContext);
+  const hasAudioWorklet = hasAudioContext && !!AudioWorkletNode;
+  const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
+
+  return hasAudioWorklet && hasGetUserMedia;
+};
+
+/**
+ * EXACT requirements for ML Denoise (for UI display).
+ */
+export const ML_DENOISE_REQUIREMENTS = [
+  'Modern browser with Web Audio API support',
+  'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)',
+  'Microphone access',
+  '48kHz AudioContext capability',
+];
+