feat(denoise): browser-native default, quality-ordered model picker, wire native-NS

- Model dropdown is now ordered by quality/CPU, best first (DeepFilterNet 3 → DTLN → RNNoise → Speex); fix RNNoise's inaccurate "High" voice-quality label. - When a user opts into the ML tier, default to the highest-quality model (DeepFilterNet 3). The tier default stays browser-native (known-good, best perceived in testing so far). - Wire the "Series Suppression" (native-NS-before-ML) toggle into the real call path — it was applied only in the settings tester, so the tester could sound better than the actual call. Default it OFF (a single NS stage is best practice; it's an opt-in test aid). - isMLDenoiseSupported now also requires WebAssembly, so ML isn't offered on strict-CSP shells where it would silently fall back to the raw mic. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 23:02:41 -04:00
parent 7939dc92d4
commit ebc782b16c
4 changed files with 61 additions and 43 deletions
@@ -138,9 +138,9 @@ export class CallEmbed {
    themeKind: ElementCallThemeKind,
    denoiseMode: NoiseSuppressionMode = 'browser',
    denoiseModel: string = 'rnnoise',
-    // [lotus] no longer used by the in-source denoise path; kept positionally
-    // for callers. Prefixed with _ to satisfy no-unused-vars.
-    _denoiseNativeNS: boolean = true,
+    // [lotus] "Series suppression": also run EC's built-in WebRTC NS before the
+    // in-source ML model (opt-in test aid for stacking browser NS + ML).
+    denoiseNativeNS: boolean = false,
    denoiseGate: boolean = false,
    denoiseGateThreshold: number = -45,
    initialAudio = true,
@@ -166,10 +166,14 @@ export class CallEmbed {
      perParticipantE2EE: room.hasEncryptionStateEvent().toString(),
      lang: 'en-EN',
      theme: themeKind,
-      // EC's built-in WebRTC suppressor: on only for 'browser' tier. For 'ml'
-      // we disable it so EC captures a raw mic and the fork's in-source denoise
-      // TrackProcessor (lotusDenoiseSource) handles the pipeline.
-      noiseSuppression: (denoiseMode === 'browser').toString(),
+      // EC's built-in WebRTC suppressor: on for the 'browser' tier, and for the
+      // 'ml' tier only when "series suppression" is opted into (stack browser NS
+      // before the fork's in-source ML model). Plain 'ml' keeps it OFF so the
+      // fork's TrackProcessor (lotusDenoiseSource) gets a raw mic.
+      noiseSuppression: (
+        denoiseMode === 'browser' ||
+        (denoiseMode === 'ml' && denoiseNativeNS)
+      ).toString(),
      audio: initialAudio.toString(),
      video: initialVideo.toString(),
      header: 'none',
@@ -236,9 +236,13 @@ const defaultSettings: Settings = {
  perMessageProfiles: false,

  cameraOnJoin: false,
+  // Tier default stays browser-native (known-good; best-perceived in testing so
+  // far). If a user opts into the ML tier, default to the highest-quality model.
  callNoiseSuppression: 'browser',
-  callDenoiseModel: 'rnnoise',
-  callDenoiseNativeNS: true,
+  callDenoiseModel: 'deepfilternet',
+  // "Series suppression" (stack the browser's native NS before the ML model) is
+  // off by default — best practice is a single NS stage; it's an opt-in test aid.
+  callDenoiseNativeNS: false,
  callDenoiseGate: false,
  callDenoiseGateThreshold: -45,
  pttMode: false,
@@ -1,18 +1,14 @@
 import { test, beforeEach, afterEach } from 'node:test';
 import assert from 'node:assert/strict';

-import {
-  DENOISE_MODELS,
-  ML_DENOISE_REQUIREMENTS,
-  isMLDenoiseSupported,
-} from './lotusDenoiseUtils';
+import { DENOISE_MODELS, ML_DENOISE_REQUIREMENTS, isMLDenoiseSupported } from './lotusDenoiseUtils';

 // ── Model catalog (data integrity) ──────────────────────────────────────────

-test('DENOISE_MODELS lists the four expected models in order', () => {
+test('DENOISE_MODELS lists the four models ordered best-quality (highest CPU) first', () => {
  assert.deepEqual(
    DENOISE_MODELS.map((m) => m.id),
-    ['rnnoise', 'speex', 'dtln', 'deepfilternet'],
+    ['deepfilternet', 'dtln', 'rnnoise', 'speex'],
  );
 });

@@ -1,5 +1,8 @@
 /**
- * Detection utilities for Lotus ML noise suppression (RNNoise).
+ * Detection utilities + model catalog for Lotus ML noise suppression
+ * (DeepFilterNet 3 / DTLN / RNNoise / Speex). The catalog is ordered by
+ * quality (and, correspondingly, CPU cost) — highest first — and drives the
+ * order of the model dropdown in settings.
 */

 import { DenoiseModelId } from '../state/settings';
@@ -14,42 +17,47 @@ export type DenoiseModel = {
  voiceQuality: 'Moderate' | 'High' | 'Very High';
 };

+// Ordered best-quality (highest CPU) first — this is the dropdown order.
 export const DENOISE_MODELS: DenoiseModel[] = [
  {
-    id: 'rnnoise',
-    name: 'RNNoise',
-    description: 'Lightweight hybrid model. Best for consistent noise like fans.',
-    cpuUsage: '< 5%',
-    binarySize: '< 1 MB',
-    transients: 'Good',
-    voiceQuality: 'High',
-  },
-  {
-    id: 'speex',
-    name: 'Speex (Legacy)',
-    description: 'Classic DSP noise suppressor. Minimal CPU, gentler on voice.',
-    cpuUsage: '< 2%',
-    binarySize: '< 1 MB',
-    transients: 'Poor',
-    voiceQuality: 'Moderate',
+    id: 'deepfilternet',
+    name: 'DeepFilterNet 3 (beta)',
+    description:
+      'Studio-grade deep-learning model (48 kHz fullband, ONNX). Best quality; highest CPU and a larger one-time download.',
+    cpuUsage: '25-50%',
+    binarySize: '~18 MB',
+    transients: 'Excellent',
+    voiceQuality: 'Very High',
  },
  {
    id: 'dtln',
    name: 'DTLN (beta)',
-    description: 'Deep-learning model (TFLite). Stronger on transient noise; higher CPU.',
+    description:
+      'Dual-signal deep-learning model (16 kHz). Strong on transient noise; moderate CPU.',
    cpuUsage: '10-20%',
    binarySize: '~4 MB',
    transients: 'Excellent',
    voiceQuality: 'High',
  },
  {
-    id: 'deepfilternet',
-    name: 'DeepFilterNet 3 (beta)',
-    description: 'Studio-grade deep-learning model (48 kHz, ONNX). Best quality; highest CPU.',
-    cpuUsage: '25-50%',
-    binarySize: '~18 MB',
-    transients: 'Excellent',
-    voiceQuality: 'Very High',
+    id: 'rnnoise',
+    name: 'RNNoise',
+    description:
+      'Lightweight hybrid model (48 kHz). Very low CPU; good for steady noise like fans, but can sound processed at full strength.',
+    cpuUsage: '< 5%',
+    binarySize: '< 1 MB',
+    transients: 'Good',
+    voiceQuality: 'Moderate',
+  },
+  {
+    id: 'speex',
+    name: 'Speex (Legacy)',
+    description:
+      'Classic DSP noise suppressor. Minimal CPU, gentlest on voice; weakest suppression.',
+    cpuUsage: '< 2%',
+    binarySize: '< 1 MB',
+    transients: 'Poor',
+    voiceQuality: 'Moderate',
  },
 ];

@@ -67,8 +75,14 @@ export const isMLDenoiseSupported = (): boolean => {
  // instead of returning false.
  const hasAudioWorklet = hasAudioContext && typeof AudioWorkletNode !== 'undefined';
  const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
+  // Every ML model compiles WebAssembly (and DFN/DTLN load worklets via blob
+  // URLs). Under a strict CSP without `wasm-unsafe-eval` (e.g. some desktop/Tauri
+  // shells) WASM is unavailable, so gate on it — otherwise we'd offer ML and then
+  // silently fall back to the raw mic in-call.
+  const hasWasm =
+    typeof WebAssembly !== 'undefined' && typeof WebAssembly.instantiate === 'function';

-  return hasAudioWorklet && hasGetUserMedia;
+  return hasAudioWorklet && hasGetUserMedia && hasWasm;
 };

 /**
@@ -77,6 +91,6 @@ export const isMLDenoiseSupported = (): boolean => {
 export const ML_DENOISE_REQUIREMENTS = [
  'Modern browser with Web Audio API support',
  'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)',
+  'WebAssembly (WASM) support',
  'Microphone access',
-  '48kHz AudioContext capability',
 ];