fix(calls): DTLN at 16kHz + raw-capture A/B; explains weak/robotic results
Two issues found from real testing of the in-app tester:
1. Raw ≈ RNNoise ≈ Speex sounded identical in Record & compare because the clip
was captured with browser noise suppression ON (the user's native-NS
setting), so "Raw" was already cleaned and the models had nothing left to
remove. Record & compare now captures fully raw audio (noiseSuppression /
AGC / echoCancellation off) so each model's effect on real noise is audible.
(Friends still heard differences in calls — the models work; the test was
feeding them pre-cleaned audio.)
2. DTLN was robotic/choppy/quiet because @workadventure/noise-suppression
targets 16 kHz (AUDIO_CONFIG.sampleRate) and does NOT resample internally,
while we ran it at 48 kHz. Run DTLN's whole graph in a 16 kHz context:
- denoisePipeline: add sampleRateFor(model) (16k for dtln, 48k otherwise);
tester live-monitor + playback contexts use it (bufferSource resamples the
48k clip down for DTLN).
- shim (build/lotus-denoise.js): SAMPLE_RATE is now model-aware, so DTLN is
correct in real calls too (it was previously broken at 48 kHz). The 16 kHz
processed track is still published to LiveKit (WebRTC/Opus resamples).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -35,9 +35,13 @@
|
||||
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
|
||||
|
||||
var ASSET_BASE = './denoise/';
|
||||
var SAMPLE_RATE = 48000;
|
||||
|
||||
var MODEL = params.get('lotusModel') || 'rnnoise';
|
||||
// DTLN (@workadventure) targets 16 kHz and does not resample internally, so
|
||||
// its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) need
|
||||
// 48 kHz. The processed MediaStreamTrack is published to LiveKit either way
|
||||
// (WebRTC/Opus resamples as needed).
|
||||
var SAMPLE_RATE = MODEL === 'dtln' ? 16000 : 48000;
|
||||
var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true';
|
||||
var USE_GATE = params.get('lotusGate') === 'true';
|
||||
var GATE_THRESHOLD = parseFloat(params.get('lotusGateThreshold') || '-45');
|
||||
|
||||
@@ -2,15 +2,16 @@ import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { Box, Button, Text } from 'folds';
|
||||
import { DenoiseModelId } from '../../../state/settings';
|
||||
import {
|
||||
DENOISE_SAMPLE_RATE,
|
||||
DenoiseNode,
|
||||
buildGateNode,
|
||||
buildModelNode,
|
||||
readDb,
|
||||
sampleRateFor,
|
||||
} from '../../../utils/denoisePipeline';
|
||||
|
||||
const MAX_RECORD_MS = 6000;
|
||||
|
||||
// Live monitor mirrors the call's capture (respects the user's native-NS choice).
|
||||
const MIC_CONSTRAINTS = (nativeNS: boolean): MediaStreamConstraints => ({
|
||||
audio: {
|
||||
noiseSuppression: nativeNS,
|
||||
@@ -20,6 +21,19 @@ const MIC_CONSTRAINTS = (nativeNS: boolean): MediaStreamConstraints => ({
|
||||
},
|
||||
});
|
||||
|
||||
// Record & compare captures fully RAW audio (no browser noise suppression / AGC
|
||||
// / echo cancel) so each model's effect on real background noise is audible.
|
||||
// Capturing with native NS on would pre-clean the clip and make Raw/RNNoise/
|
||||
// Speex sound identical.
|
||||
const RAW_CONSTRAINTS: MediaStreamConstraints = {
|
||||
audio: {
|
||||
noiseSuppression: false,
|
||||
echoCancellation: false,
|
||||
autoGainControl: false,
|
||||
channelCount: 1,
|
||||
},
|
||||
};
|
||||
|
||||
/** A -100..0 dBFS bar with optional threshold marker. */
|
||||
function DbMeter({ label, db, threshold }: { label: string; db: number; threshold?: number }) {
|
||||
const pct = Math.max(0, Math.min(100, db + 100));
|
||||
@@ -112,7 +126,7 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi
|
||||
|
||||
const startLive = async () => {
|
||||
try {
|
||||
const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE });
|
||||
const ctx = new AudioContext({ sampleRate: sampleRateFor(model) });
|
||||
const stream = await navigator.mediaDevices.getUserMedia(MIC_CONSTRAINTS(nativeNS));
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
const inAnalyser = ctx.createAnalyser();
|
||||
@@ -192,8 +206,8 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi
|
||||
|
||||
const startRecord = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia(MIC_CONSTRAINTS(nativeNS));
|
||||
const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE });
|
||||
const stream = await navigator.mediaDevices.getUserMedia(RAW_CONSTRAINTS);
|
||||
const ctx = new AudioContext();
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
const analyser = ctx.createAnalyser();
|
||||
analyser.fftSize = 1024;
|
||||
@@ -209,7 +223,7 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi
|
||||
teardownRecorder();
|
||||
setRecDb(-100);
|
||||
try {
|
||||
const decodeCtx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE });
|
||||
const decodeCtx = new AudioContext({ sampleRate: 48000 });
|
||||
clipRef.current = await decodeCtx.decodeAudioData(await blob.arrayBuffer());
|
||||
decodeCtx.close().catch(() => undefined);
|
||||
setHasClip(true);
|
||||
@@ -254,7 +268,9 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi
|
||||
const clip = clipRef.current;
|
||||
if (!clip) return;
|
||||
try {
|
||||
const ctx = new AudioContext({ sampleRate: DENOISE_SAMPLE_RATE });
|
||||
// bufferSource auto-resamples the 48 kHz clip to the context rate, so DTLN
|
||||
// gets the 16 kHz it needs while raw/RNNoise/Speex stay at 48 kHz.
|
||||
const ctx = new AudioContext({ sampleRate: sampleRateFor(playModel ?? 'rnnoise') });
|
||||
const source = ctx.createBufferSource();
|
||||
source.buffer = clip;
|
||||
if (playModel) {
|
||||
@@ -327,8 +343,9 @@ export function DenoiseTester({ model, useGate, gateThreshold, nativeNS }: Denoi
|
||||
<Text size="T300">Record & compare</Text>
|
||||
<Text size="T200" priority="300">
|
||||
Record up to {MAX_RECORD_MS / 1000}s of yourself with your usual background noise, then
|
||||
play the same clip back raw vs through each model to A/B them. (Uses the gate when it's
|
||||
enabled above.)
|
||||
play the same clip back raw vs through each model to A/B them. Captured fully raw (browser
|
||||
noise suppression off) so each model's effect is audible; uses the gate when enabled
|
||||
above.
|
||||
</Text>
|
||||
<Box direction="Row" gap="200" alignItems="Center">
|
||||
<Button
|
||||
|
||||
@@ -13,8 +13,13 @@ import { DenoiseModelId } from '../state/settings';
|
||||
// Mirror CallEmbed's widget-base resolution so assets resolve under any base.
|
||||
const BASE = `${import.meta.env.BASE_URL.replace(/\/+$/, '')}/public/element-call/denoise/`;
|
||||
|
||||
/** RNNoise/Speex/DTLN all assume mono 48 kHz, matching the call pipeline. */
|
||||
export const DENOISE_SAMPLE_RATE = 48000;
|
||||
/**
|
||||
* Required AudioContext sample rate per model. RNNoise/Speex (sapphi) assume
|
||||
* 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT resample internally
|
||||
* — running it at 48 kHz produces robotic/choppy/quiet output, so its whole
|
||||
* graph must run in a 16 kHz context.
|
||||
*/
|
||||
export const sampleRateFor = (model: DenoiseModelId): number => (model === 'dtln' ? 16000 : 48000);
|
||||
|
||||
export type DenoiseNode = {
|
||||
node: AudioWorkletNode;
|
||||
|
||||
Reference in New Issue
Block a user