feat(denoise): browser-native default, quality-ordered model picker, wire native-NS
CI / Build & Quality Checks (push) Successful in 11m15s
CI / Trigger Desktop Build (push) Successful in 18s

- Model dropdown is now ordered by quality/CPU, best first (DeepFilterNet 3 →
  DTLN → RNNoise → Speex); fix RNNoise's inaccurate "High" voice-quality label.
- When a user opts into the ML tier, default to the highest-quality model
  (DeepFilterNet 3). The tier default stays browser-native (known-good, best
  perceived in testing so far).
- Wire the "Series Suppression" (native-NS-before-ML) toggle into the real call
  path — it was applied only in the settings tester, so the tester could sound
  better than the actual call. Default it OFF (a single NS stage is best
  practice; it's an opt-in test aid).
- isMLDenoiseSupported now also requires WebAssembly, so ML isn't offered on
  strict-CSP shells where it would silently fall back to the raw mic.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 23:02:41 -04:00
parent 7939dc92d4
commit ebc782b16c
4 changed files with 61 additions and 43 deletions
+11 -7
View File
@@ -138,9 +138,9 @@ export class CallEmbed {
themeKind: ElementCallThemeKind, themeKind: ElementCallThemeKind,
denoiseMode: NoiseSuppressionMode = 'browser', denoiseMode: NoiseSuppressionMode = 'browser',
denoiseModel: string = 'rnnoise', denoiseModel: string = 'rnnoise',
// [lotus] no longer used by the in-source denoise path; kept positionally // [lotus] "Series suppression": also run EC's built-in WebRTC NS before the
// for callers. Prefixed with _ to satisfy no-unused-vars. // in-source ML model (opt-in test aid for stacking browser NS + ML).
_denoiseNativeNS: boolean = true, denoiseNativeNS: boolean = false,
denoiseGate: boolean = false, denoiseGate: boolean = false,
denoiseGateThreshold: number = -45, denoiseGateThreshold: number = -45,
initialAudio = true, initialAudio = true,
@@ -166,10 +166,14 @@ export class CallEmbed {
perParticipantE2EE: room.hasEncryptionStateEvent().toString(), perParticipantE2EE: room.hasEncryptionStateEvent().toString(),
lang: 'en-EN', lang: 'en-EN',
theme: themeKind, theme: themeKind,
// EC's built-in WebRTC suppressor: on only for 'browser' tier. For 'ml' // EC's built-in WebRTC suppressor: on for the 'browser' tier, and for the
// we disable it so EC captures a raw mic and the fork's in-source denoise // 'ml' tier only when "series suppression" is opted into (stack browser NS
// TrackProcessor (lotusDenoiseSource) handles the pipeline. // before the fork's in-source ML model). Plain 'ml' keeps it OFF so the
noiseSuppression: (denoiseMode === 'browser').toString(), // fork's TrackProcessor (lotusDenoiseSource) gets a raw mic.
noiseSuppression: (
denoiseMode === 'browser' ||
(denoiseMode === 'ml' && denoiseNativeNS)
).toString(),
audio: initialAudio.toString(), audio: initialAudio.toString(),
video: initialVideo.toString(), video: initialVideo.toString(),
header: 'none', header: 'none',
+6 -2
View File
@@ -236,9 +236,13 @@ const defaultSettings: Settings = {
perMessageProfiles: false, perMessageProfiles: false,
cameraOnJoin: false, cameraOnJoin: false,
// Tier default stays browser-native (known-good; best-perceived in testing so
// far). If a user opts into the ML tier, default to the highest-quality model.
callNoiseSuppression: 'browser', callNoiseSuppression: 'browser',
callDenoiseModel: 'rnnoise', callDenoiseModel: 'deepfilternet',
callDenoiseNativeNS: true, // "Series suppression" (stack the browser's native NS before the ML model) is
// off by default — best practice is a single NS stage; it's an opt-in test aid.
callDenoiseNativeNS: false,
callDenoiseGate: false, callDenoiseGate: false,
callDenoiseGateThreshold: -45, callDenoiseGateThreshold: -45,
pttMode: false, pttMode: false,
+3 -7
View File
@@ -1,18 +1,14 @@
import { test, beforeEach, afterEach } from 'node:test'; import { test, beforeEach, afterEach } from 'node:test';
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import { import { DENOISE_MODELS, ML_DENOISE_REQUIREMENTS, isMLDenoiseSupported } from './lotusDenoiseUtils';
DENOISE_MODELS,
ML_DENOISE_REQUIREMENTS,
isMLDenoiseSupported,
} from './lotusDenoiseUtils';
// ── Model catalog (data integrity) ────────────────────────────────────────── // ── Model catalog (data integrity) ──────────────────────────────────────────
test('DENOISE_MODELS lists the four expected models in order', () => { test('DENOISE_MODELS lists the four models ordered best-quality (highest CPU) first', () => {
assert.deepEqual( assert.deepEqual(
DENOISE_MODELS.map((m) => m.id), DENOISE_MODELS.map((m) => m.id),
['rnnoise', 'speex', 'dtln', 'deepfilternet'], ['deepfilternet', 'dtln', 'rnnoise', 'speex'],
); );
}); });
+41 -27
View File
@@ -1,5 +1,8 @@
/** /**
* Detection utilities for Lotus ML noise suppression (RNNoise). * Detection utilities + model catalog for Lotus ML noise suppression
* (DeepFilterNet 3 / DTLN / RNNoise / Speex). The catalog is ordered by
* quality (and, correspondingly, CPU cost) — highest first — and drives the
* order of the model dropdown in settings.
*/ */
import { DenoiseModelId } from '../state/settings'; import { DenoiseModelId } from '../state/settings';
@@ -14,42 +17,47 @@ export type DenoiseModel = {
voiceQuality: 'Moderate' | 'High' | 'Very High'; voiceQuality: 'Moderate' | 'High' | 'Very High';
}; };
// Ordered best-quality (highest CPU) first — this is the dropdown order.
export const DENOISE_MODELS: DenoiseModel[] = [ export const DENOISE_MODELS: DenoiseModel[] = [
{ {
id: 'rnnoise', id: 'deepfilternet',
name: 'RNNoise', name: 'DeepFilterNet 3 (beta)',
description: 'Lightweight hybrid model. Best for consistent noise like fans.', description:
cpuUsage: '< 5%', 'Studio-grade deep-learning model (48 kHz fullband, ONNX). Best quality; highest CPU and a larger one-time download.',
binarySize: '< 1 MB', cpuUsage: '25-50%',
transients: 'Good', binarySize: '~18 MB',
voiceQuality: 'High', transients: 'Excellent',
}, voiceQuality: 'Very High',
{
id: 'speex',
name: 'Speex (Legacy)',
description: 'Classic DSP noise suppressor. Minimal CPU, gentler on voice.',
cpuUsage: '< 2%',
binarySize: '< 1 MB',
transients: 'Poor',
voiceQuality: 'Moderate',
}, },
{ {
id: 'dtln', id: 'dtln',
name: 'DTLN (beta)', name: 'DTLN (beta)',
description: 'Deep-learning model (TFLite). Stronger on transient noise; higher CPU.', description:
'Dual-signal deep-learning model (16 kHz). Strong on transient noise; moderate CPU.',
cpuUsage: '10-20%', cpuUsage: '10-20%',
binarySize: '~4 MB', binarySize: '~4 MB',
transients: 'Excellent', transients: 'Excellent',
voiceQuality: 'High', voiceQuality: 'High',
}, },
{ {
id: 'deepfilternet', id: 'rnnoise',
name: 'DeepFilterNet 3 (beta)', name: 'RNNoise',
description: 'Studio-grade deep-learning model (48 kHz, ONNX). Best quality; highest CPU.', description:
cpuUsage: '25-50%', 'Lightweight hybrid model (48 kHz). Very low CPU; good for steady noise like fans, but can sound processed at full strength.',
binarySize: '~18 MB', cpuUsage: '< 5%',
transients: 'Excellent', binarySize: '< 1 MB',
voiceQuality: 'Very High', transients: 'Good',
voiceQuality: 'Moderate',
},
{
id: 'speex',
name: 'Speex (Legacy)',
description:
'Classic DSP noise suppressor. Minimal CPU, gentlest on voice; weakest suppression.',
cpuUsage: '< 2%',
binarySize: '< 1 MB',
transients: 'Poor',
voiceQuality: 'Moderate',
}, },
]; ];
@@ -67,8 +75,14 @@ export const isMLDenoiseSupported = (): boolean => {
// instead of returning false. // instead of returning false.
const hasAudioWorklet = hasAudioContext && typeof AudioWorkletNode !== 'undefined'; const hasAudioWorklet = hasAudioContext && typeof AudioWorkletNode !== 'undefined';
const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia); const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
// Every ML model compiles WebAssembly (and DFN/DTLN load worklets via blob
// URLs). Under a strict CSP without `wasm-unsafe-eval` (e.g. some desktop/Tauri
// shells) WASM is unavailable, so gate on it — otherwise we'd offer ML and then
// silently fall back to the raw mic in-call.
const hasWasm =
typeof WebAssembly !== 'undefined' && typeof WebAssembly.instantiate === 'function';
return hasAudioWorklet && hasGetUserMedia; return hasAudioWorklet && hasGetUserMedia && hasWasm;
}; };
/** /**
@@ -77,6 +91,6 @@ export const isMLDenoiseSupported = (): boolean => {
export const ML_DENOISE_REQUIREMENTS = [ export const ML_DENOISE_REQUIREMENTS = [
'Modern browser with Web Audio API support', 'Modern browser with Web Audio API support',
'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)', 'AudioWorklet support (Chrome 66+, Firefox 76+, Safari 14.1+)',
'WebAssembly (WASM) support',
'Microphone access', 'Microphone access',
'48kHz AudioContext capability',
]; ];