2026-06-16 17:53:57 -04:00
|
|
|
/**
|
|
|
|
|
* Shared client-side denoise pipeline for the in-app model tester.
|
|
|
|
|
*
|
|
|
|
|
* The same RNNoise/Speex/DTLN worklets that the Element Call shim
|
|
|
|
|
* (build/lotus-denoise.js) injects are shipped under
|
|
|
|
|
* /public/element-call/denoise/. Here we load them into a normal main-app
|
|
|
|
|
* AudioContext so users can audition the models and calibrate the noise gate
|
|
|
|
|
* without joining a real call. The graph mirrors the shim:
|
|
|
|
|
* source -> [noise gate] -> model -> output
|
|
|
|
|
*/
|
|
|
|
|
import { DenoiseModelId } from '../state/settings';
|
|
|
|
|
|
|
|
|
|
// Mirror CallEmbed's widget-base resolution so assets resolve under any base.
|
|
|
|
|
const BASE = `${import.meta.env.BASE_URL.replace(/\/+$/, '')}/public/element-call/denoise/`;
|
|
|
|
|
|
2026-06-17 17:27:15 -04:00
|
|
|
/**
|
2026-06-17 19:57:08 -04:00
|
|
|
* Required AudioContext sample rate per model. RNNoise/Speex (sapphi) and
|
|
|
|
|
* DeepFilterNet 3 are 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT
|
|
|
|
|
* resample internally — running it at 48 kHz produces robotic/choppy/quiet
|
|
|
|
|
* output, so its whole graph must run in a 16 kHz context.
|
2026-06-17 17:27:15 -04:00
|
|
|
*/
|
|
|
|
|
export const sampleRateFor = (model: DenoiseModelId): number => (model === 'dtln' ? 16000 : 48000);
|
2026-06-16 17:53:57 -04:00
|
|
|
|
|
|
|
|
export type DenoiseNode = {
|
|
|
|
|
node: AudioWorkletNode;
|
|
|
|
|
dispose: () => void;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const wasmCache: Record<string, Promise<ArrayBuffer>> = {};
|
|
|
|
|
function fetchWasm(file: string): Promise<ArrayBuffer> {
|
|
|
|
|
if (!wasmCache[file]) {
|
|
|
|
|
wasmCache[file] = fetch(BASE + file).then((r) => {
|
|
|
|
|
if (!r.ok) throw new Error(`denoise asset ${file} unavailable (${r.status})`);
|
|
|
|
|
return r.arrayBuffer();
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
return wasmCache[file];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// addModule throws if the same module URL is added twice to one context.
|
|
|
|
|
const addedModules = new WeakMap<BaseAudioContext, Set<string>>();
|
|
|
|
|
async function addModuleOnce(ctx: BaseAudioContext, script: string): Promise<void> {
|
|
|
|
|
let set = addedModules.get(ctx);
|
|
|
|
|
if (!set) {
|
|
|
|
|
set = new Set();
|
|
|
|
|
addedModules.set(ctx, set);
|
|
|
|
|
}
|
|
|
|
|
if (set.has(script)) return;
|
|
|
|
|
await ctx.audioWorklet.addModule(BASE + script);
|
|
|
|
|
set.add(script);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const SAPPHI: Record<'rnnoise' | 'speex', { proc: string; script: string; wasm: string }> = {
|
|
|
|
|
rnnoise: {
|
|
|
|
|
proc: '@sapphi-red/web-noise-suppressor/rnnoise',
|
|
|
|
|
script: 'rnnoiseWorklet.js',
|
|
|
|
|
wasm: 'rnnoise.wasm',
|
|
|
|
|
},
|
|
|
|
|
speex: {
|
|
|
|
|
proc: '@sapphi-red/web-noise-suppressor/speex',
|
|
|
|
|
script: 'speexWorklet.js',
|
|
|
|
|
wasm: 'speex.wasm',
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Build the model denoise node. RNNoise/Speex are flat sapphi worklets; DTLN
|
|
|
|
|
* uses @workadventure's self-resolving ES-module helper. */
|
|
|
|
|
export async function buildModelNode(
|
|
|
|
|
ctx: BaseAudioContext,
|
|
|
|
|
model: DenoiseModelId,
|
|
|
|
|
): Promise<DenoiseNode> {
|
|
|
|
|
if (model === 'dtln') {
|
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
|
|
|
const mod: any = await import(/* @vite-ignore */ `${BASE}workadventure/audio-worklet.js`);
|
|
|
|
|
const handle = await mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true });
|
|
|
|
|
return { node: handle.node, dispose: () => handle.dispose() };
|
|
|
|
|
}
|
2026-06-17 19:57:08 -04:00
|
|
|
if (model === 'deepfilternet') {
|
|
|
|
|
// deepfilternet3-noise-filter ESM: inlines its worklet/wasm-bindgen glue and
|
|
|
|
|
// fetches only df_bg.wasm + the ONNX model, which we self-host under
|
|
|
|
|
// deepfilternet/v2/... Override its cdnUrl to our absolute base so nothing
|
|
|
|
|
// hits the upstream CDN. DeepFilterNet3Core builds the worklet node directly.
|
|
|
|
|
const dfnBase = new URL(`${BASE}deepfilternet`, window.location.href).href;
|
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
|
|
|
const mod: any = await import(/* @vite-ignore */ `${BASE}deepfilternet/index.esm.js`);
|
|
|
|
|
const core = new mod.DeepFilterNet3Core({
|
|
|
|
|
sampleRate: sampleRateFor(model),
|
|
|
|
|
noiseReductionLevel: 80,
|
|
|
|
|
assetConfig: { cdnUrl: dfnBase },
|
|
|
|
|
});
|
|
|
|
|
await core.initialize();
|
|
|
|
|
const node: AudioWorkletNode = await core.createAudioWorkletNode(ctx);
|
|
|
|
|
return {
|
|
|
|
|
node,
|
|
|
|
|
dispose: () => {
|
|
|
|
|
try {
|
|
|
|
|
core.destroy();
|
|
|
|
|
} catch {
|
|
|
|
|
/* noop */
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
2026-06-16 17:53:57 -04:00
|
|
|
const cfg = SAPPHI[model];
|
|
|
|
|
const [, wasmBinary] = await Promise.all([addModuleOnce(ctx, cfg.script), fetchWasm(cfg.wasm)]);
|
|
|
|
|
const node = new AudioWorkletNode(ctx, cfg.proc, {
|
|
|
|
|
channelCount: 1,
|
|
|
|
|
numberOfInputs: 1,
|
|
|
|
|
numberOfOutputs: 1,
|
|
|
|
|
processorOptions: { maxChannels: 1, wasmBinary },
|
|
|
|
|
});
|
|
|
|
|
return {
|
|
|
|
|
node,
|
|
|
|
|
dispose: () => {
|
|
|
|
|
try {
|
|
|
|
|
node.port.postMessage('destroy');
|
|
|
|
|
} catch {
|
|
|
|
|
/* noop */
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function buildGateNode(
|
|
|
|
|
ctx: BaseAudioContext,
|
|
|
|
|
thresholdDb: number,
|
|
|
|
|
): Promise<AudioWorkletNode> {
|
|
|
|
|
await addModuleOnce(ctx, 'noiseGateWorklet.js');
|
|
|
|
|
return new AudioWorkletNode(ctx, '@sapphi-red/web-noise-suppressor/noise-gate', {
|
|
|
|
|
processorOptions: {
|
|
|
|
|
openThreshold: thresholdDb,
|
|
|
|
|
closeThreshold: thresholdDb - 5,
|
|
|
|
|
holdMs: 150,
|
|
|
|
|
maxChannels: 1,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** RMS level of an analyser as dBFS, clamped to [-100, 0]. */
|
|
|
|
|
export function readDb(analyser: AnalyserNode): number {
|
|
|
|
|
const buf = new Float32Array(analyser.fftSize);
|
|
|
|
|
analyser.getFloatTimeDomainData(buf);
|
|
|
|
|
let sum = 0;
|
|
|
|
|
for (let i = 0; i < buf.length; i += 1) sum += buf[i] * buf[i];
|
|
|
|
|
const rms = Math.sqrt(sum / buf.length);
|
|
|
|
|
return rms > 0 ? Math.max(-100, 20 * Math.log10(rms)) : -100;
|
|
|
|
|
}
|