diff --git a/build/lotus-denoise.js b/build/lotus-denoise.js index fa93227ae..66b18e170 100644 --- a/build/lotus-denoise.js +++ b/build/lotus-denoise.js @@ -35,9 +35,13 @@ if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return; var ASSET_BASE = './denoise/'; - var SAMPLE_RATE = 48000; var MODEL = params.get('lotusModel') || 'rnnoise'; + // DTLN (@workadventure) targets 16 kHz and does not resample internally, so + // its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) need + // 48 kHz. The processed MediaStreamTrack is published to LiveKit either way + // (WebRTC/Opus resamples as needed). + var SAMPLE_RATE = MODEL === 'dtln' ? 16000 : 48000; var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true'; var USE_GATE = params.get('lotusGate') === 'true'; var GATE_THRESHOLD = parseFloat(params.get('lotusGateThreshold') || '-45'); diff --git a/src/app/features/settings/general/DenoiseTester.tsx b/src/app/features/settings/general/DenoiseTester.tsx index 1f584b25b..e3d48531f 100644 --- a/src/app/features/settings/general/DenoiseTester.tsx +++ b/src/app/features/settings/general/DenoiseTester.tsx @@ -2,15 +2,16 @@ import React, { useCallback, useEffect, useRef, useState } from 'react'; import { Box, Button, Text } from 'folds'; import { DenoiseModelId } from '../../../state/settings'; import { - DENOISE_SAMPLE_RATE, DenoiseNode, buildGateNode, buildModelNode, readDb, + sampleRateFor, } from '../../../utils/denoisePipeline'; const MAX_RECORD_MS = 6000; +// Live monitor mirrors the call's capture (respects the user's native-NS choice). const MIC_CONSTRAINTS = (nativeNS: boolean): MediaStreamConstraints => ({ audio: { noiseSuppression: nativeNS, @@ -20,6 +21,19 @@ const MIC_CONSTRAINTS = (nativeNS: boolean): MediaStreamConstraints => ({ }, }); +// Record & compare captures fully RAW audio (no browser noise suppression / AGC +// / echo cancel) so each model's effect on real background noise is audible. +// Capturing with native NS on would pre-clean the clip and make Raw/RNNoise/ +// Speex sound identical. +const RAW_CONSTRAINTS: MediaStreamConstraints = { + audio: { + noiseSuppression: false, + echoCancellation: false, + autoGainControl: false, + channelCount: 1, + }, +}; + /** A -100..0 dBFS bar with optional threshold marker. */ function DbMeter({ label, db, threshold }: { label: string; db: number; threshold?: number }) { const pct = Math.max(0, Math.min(100, db + 100)); @@ -112,7 +126,7 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi const startLive = async () => { try { - const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE }); + const ctx = new AudioContext({ sampleRate: sampleRateFor(model) }); const stream = await navigator.mediaDevices.getUserMedia(MIC_CONSTRAINTS(nativeNS)); const source = ctx.createMediaStreamSource(stream); const inAnalyser = ctx.createAnalyser(); @@ -192,8 +206,8 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi const startRecord = async () => { try { - const stream = await navigator.mediaDevices.getUserMedia(MIC_CONSTRAINTS(nativeNS)); - const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE }); + const stream = await navigator.mediaDevices.getUserMedia(RAW_CONSTRAINTS); + const ctx = new AudioContext(); const source = ctx.createMediaStreamSource(stream); const analyser = ctx.createAnalyser(); analyser.fftSize = 1024; @@ -209,7 +223,7 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi teardownRecorder(); setRecDb(-100); try { - const decodeCtx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE }); + const decodeCtx = new AudioContext({ sampleRate: 48000 }); clipRef.current = await decodeCtx.decodeAudioData(await blob.arrayBuffer()); decodeCtx.close().catch(() => undefined); setHasClip(true); @@ -254,7 +268,9 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi const clip = clipRef.current; if (!clip) return; try { - const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE }); + // bufferSource auto-resamples the 48 kHz clip to the context rate, so DTLN + // gets the 16 kHz it needs while raw/RNNoise/Speex stay at 48 kHz. + const ctx = new AudioContext({ sampleRate: sampleRateFor(playModel ?? 'rnnoise') }); const source = ctx.createBufferSource(); source.buffer = clip; if (playModel) { @@ -327,8 +343,9 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi Record & compare Record up to {MAX_RECORD_MS / 1000}s of yourself with your usual background noise, then - play the same clip back raw vs through each model to A/B them. (Uses the gate when it's - enabled above.) + play the same clip back raw vs through each model to A/B them. Captured fully raw (browser + noise suppression off) so each model's effect is audible; uses the gate when enabled + above.