diff --git a/src/app/plugins/call/CallEmbed.ts b/src/app/plugins/call/CallEmbed.ts index 4a81dd34d..3500b1d05 100644 --- a/src/app/plugins/call/CallEmbed.ts +++ b/src/app/plugins/call/CallEmbed.ts @@ -138,9 +138,9 @@ export class CallEmbed { themeKind: ElementCallThemeKind, denoiseMode: NoiseSuppressionMode = 'browser', denoiseModel: string = 'rnnoise', - // [lotus] no longer used by the in-source denoise path; kept positionally - // for callers. Prefixed with _ to satisfy no-unused-vars. - _denoiseNativeNS: boolean = true, + // [lotus] "Series suppression": also run EC's built-in WebRTC NS before the + // in-source ML model (opt-in test aid for stacking browser NS + ML). + denoiseNativeNS: boolean = false, denoiseGate: boolean = false, denoiseGateThreshold: number = -45, initialAudio = true, @@ -166,10 +166,14 @@ export class CallEmbed { perParticipantE2EE: room.hasEncryptionStateEvent().toString(), lang: 'en-EN', theme: themeKind, - // EC's built-in WebRTC suppressor: on only for 'browser' tier. For 'ml' - // we disable it so EC captures a raw mic and the fork's in-source denoise - // TrackProcessor (lotusDenoiseSource) handles the pipeline. - noiseSuppression: (denoiseMode === 'browser').toString(), + // EC's built-in WebRTC suppressor: on for the 'browser' tier, and for the + // 'ml' tier only when "series suppression" is opted into (stack browser NS + // before the fork's in-source ML model). Plain 'ml' keeps it OFF so the + // fork's TrackProcessor (lotusDenoiseSource) gets a raw mic. + noiseSuppression: ( + denoiseMode === 'browser' || + (denoiseMode === 'ml' && denoiseNativeNS) + ).toString(), audio: initialAudio.toString(), video: initialVideo.toString(), header: 'none', diff --git a/src/app/state/settings.ts b/src/app/state/settings.ts index 947f8b4e1..6d083c013 100644 --- a/src/app/state/settings.ts +++ b/src/app/state/settings.ts @@ -236,9 +236,13 @@ const defaultSettings: Settings = { perMessageProfiles: false, cameraOnJoin: false, + // Tier default stays browser-native (known-good; best-perceived in testing so + // far). If a user opts into the ML tier, default to the highest-quality model. callNoiseSuppression: 'browser', - callDenoiseModel: 'rnnoise', - callDenoiseNativeNS: true, + callDenoiseModel: 'deepfilternet', + // "Series suppression" (stack the browser's native NS before the ML model) is + // off by default — best practice is a single NS stage; it's an opt-in test aid. + callDenoiseNativeNS: false, callDenoiseGate: false, callDenoiseGateThreshold: -45, pttMode: false, diff --git a/src/app/utils/lotusDenoiseUtils.test.ts b/src/app/utils/lotusDenoiseUtils.test.ts index ad5e8ed42..201ab2947 100644 --- a/src/app/utils/lotusDenoiseUtils.test.ts +++ b/src/app/utils/lotusDenoiseUtils.test.ts @@ -1,18 +1,14 @@ import { test, beforeEach, afterEach } from 'node:test'; import assert from 'node:assert/strict'; -import { - DENOISE_MODELS, - ML_DENOISE_REQUIREMENTS, - isMLDenoiseSupported, -} from './lotusDenoiseUtils'; +import { DENOISE_MODELS, ML_DENOISE_REQUIREMENTS, isMLDenoiseSupported } from './lotusDenoiseUtils'; // ── Model catalog (data integrity) ────────────────────────────────────────── -test('DENOISE_MODELS lists the four expected models in order', () => { +test('DENOISE_MODELS lists the four models ordered best-quality (highest CPU) first', () => { assert.deepEqual( DENOISE_MODELS.map((m) => m.id), - ['rnnoise', 'speex', 'dtln', 'deepfilternet'], + ['deepfilternet', 'dtln', 'rnnoise', 'speex'], ); }); diff --git a/src/app/utils/lotusDenoiseUtils.ts b/src/app/utils/lotusDenoiseUtils.ts index 73c559ab2..d4f721a2e 100644 --- a/src/app/utils/lotusDenoiseUtils.ts +++ b/src/app/utils/lotusDenoiseUtils.ts @@ -1,5 +1,8 @@ /** - * Detection utilities for Lotus ML noise suppression (RNNoise). + * Detection utilities + model catalog for Lotus ML noise suppression + * (DeepFilterNet 3 / DTLN / RNNoise / Speex). The catalog is ordered by + * quality (and, correspondingly, CPU cost) — highest first — and drives the + * order of the model dropdown in settings. */ import { DenoiseModelId } from '../state/settings'; @@ -14,42 +17,47 @@ export type DenoiseModel = { voiceQuality: 'Moderate' | 'High' | 'Very High'; }; +// Ordered best-quality (highest CPU) first — this is the dropdown order. export const DENOISE_MODELS: DenoiseModel[] = [ { - id: 'rnnoise', - name: 'RNNoise', - description: 'Lightweight hybrid model. Best for consistent noise like fans.', - cpuUsage: '< 5%', - binarySize: '< 1 MB', - transients: 'Good', - voiceQuality: 'High', - }, - { - id: 'speex', - name: 'Speex (Legacy)', - description: 'Classic DSP noise suppressor. Minimal CPU, gentler on voice.', - cpuUsage: '< 2%', - binarySize: '< 1 MB', - transients: 'Poor', - voiceQuality: 'Moderate', + id: 'deepfilternet', + name: 'DeepFilterNet 3 (beta)', + description: + 'Studio-grade deep-learning model (48 kHz fullband, ONNX). Best quality; highest CPU and a larger one-time download.', + cpuUsage: '25-50%', + binarySize: '~18 MB', + transients: 'Excellent', + voiceQuality: 'Very High', }, { id: 'dtln', name: 'DTLN (beta)', - description: 'Deep-learning model (TFLite). Stronger on transient noise; higher CPU.', + description: + 'Dual-signal deep-learning model (16 kHz). Strong on transient noise; moderate CPU.', cpuUsage: '10-20%', binarySize: '~4 MB', transients: 'Excellent', voiceQuality: 'High', }, { - id: 'deepfilternet', - name: 'DeepFilterNet 3 (beta)', - description: 'Studio-grade deep-learning model (48 kHz, ONNX). Best quality; highest CPU.', - cpuUsage: '25-50%', - binarySize: '~18 MB', - transients: 'Excellent', - voiceQuality: 'Very High', + id: 'rnnoise', + name: 'RNNoise', + description: + 'Lightweight hybrid model (48 kHz). Very low CPU; good for steady noise like fans, but can sound processed at full strength.', + cpuUsage: '< 5%', + binarySize: '< 1 MB', + transients: 'Good', + voiceQuality: 'Moderate', + }, + { + id: 'speex', + name: 'Speex (Legacy)', + description: + 'Classic DSP noise suppressor. Minimal CPU, gentlest on voice; weakest suppression.', + cpuUsage: '< 2%', + binarySize: '< 1 MB', + transients: 'Poor', + voiceQuality: 'Moderate', }, ]; @@ -67,8 +75,14 @@ export const isMLDenoiseSupported = (): boolean => { // instead of returning false. const hasAudioWorklet = hasAudioContext && typeof AudioWorkletNode !== 'undefined'; const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia); + // Every ML model compiles WebAssembly (and DFN/DTLN load worklets via blob + // URLs). Under a strict CSP without `wasm-unsafe-eval` (e.g. some desktop/Tauri + // shells) WASM is unavailable, so gate on it — otherwise we'd offer ML and then + // silently fall back to the raw mic in-call. + const hasWasm = + typeof WebAssembly !== 'undefined' && typeof WebAssembly.instantiate === 'function'; - return hasAudioWorklet && hasGetUserMedia; + return hasAudioWorklet && hasGetUserMedia && hasWasm; }; /** @@ -77,6 +91,6 @@ export const isMLDenoiseSupported = (): boolean => { export const ML_DENOISE_REQUIREMENTS = [ 'Modern browser with Web Audio API support', 'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)', + 'WebAssembly (WASM) support', 'Microphone access', - '48kHz AudioContext capability', ];