feat(denoise): add self-hosted DeepFilterNet 3 ML noise-suppression model
Integrate DeepFilterNet 3 (deepfilternet3-noise-filter@1.2.1) as a new client-side denoise model id 'deepfilternet', mirroring the DTLN pattern. The npm package ships only an ESM whose AudioWorklet processor + wasm-bindgen glue are inlined as a string (loaded via a Blob URL — no CDN for the worklet). Its only runtime fetches are a single-threaded df_bg.wasm and an ONNX model tarball, which previously loaded from an external CDN. We now VENDOR both (build/denoise-vendor/deepfilternet/v2/...) and self-host them under denoise/deepfilternet/, overriding the package's cdnUrl so nothing hits the upstream CDN — keeping it self-hosted / Tauri-CSP safe. The wasm is single-threaded (no SharedArrayBuffer / atomics / imported shared memory), so it needs no COOP/COEP cross-origin isolation and runs fine in EC's non-isolated iframe. Runs at 48 kHz fullband. Any init/runtime failure falls back to the raw mic, like the other models. - vite.config.js: copy ESM + vendored wasm/model into the EC denoise dir with a required-asset guard that aborts the build if any entry is missing. - build/lotus-denoise.js: 'deepfilternet' branch — dynamic-import the ESM, build a DeepFilterNet3Core pointed at the self-hosted base, await init, return the worklet node; 48 kHz; raw-mic fail-safe preserved. - denoisePipeline.ts: 'deepfilternet' branch for the in-app tester + sampleRate. - settings.ts: add 'deepfilternet' to DenoiseModelId + getSettings whitelist. - lotusDenoiseUtils.ts: add the comparison-chart row. - General.tsx: add the "DeepFilterNet 3 (beta)" dropdown option. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
Binary file not shown.
+40
-3
@@ -38,9 +38,9 @@
|
||||
|
||||
var MODEL = params.get('lotusModel') || 'rnnoise';
|
||||
// DTLN (@workadventure) targets 16 kHz and does not resample internally, so
|
||||
// its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) need
|
||||
// 48 kHz. The processed MediaStreamTrack is published to LiveKit either way
|
||||
// (WebRTC/Opus resamples as needed).
|
||||
// its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) and
|
||||
// DeepFilterNet 3 are 48 kHz fullband. The processed MediaStreamTrack is
|
||||
// published to LiveKit either way (WebRTC/Opus resamples as needed).
|
||||
var SAMPLE_RATE = MODEL === 'dtln' ? 16000 : 48000;
|
||||
var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true';
|
||||
var USE_GATE = params.get('lotusGate') === 'true';
|
||||
@@ -65,6 +65,15 @@
|
||||
// node, rather than addModule-ing a flat worklet ourselves.
|
||||
helper: 'workadventure/audio-worklet.js',
|
||||
},
|
||||
deepfilternet: {
|
||||
// deepfilternet3-noise-filter ships an ESM whose AudioWorklet processor +
|
||||
// wasm-bindgen glue are INLINED as a string (loaded via a Blob URL — no
|
||||
// CDN for the worklet). The only assets it fetches are its single-threaded
|
||||
// df_bg.wasm + ONNX model, which we vendor + self-host under
|
||||
// deepfilternet/v2/... We dynamic-import the ESM, build a DeepFilterNet3Core
|
||||
// pointed at the self-hosted base, and let it create the worklet node.
|
||||
esm: 'deepfilternet/index.esm.js',
|
||||
},
|
||||
gate: {
|
||||
name: '@sapphi-red/web-noise-suppressor/noise-gate',
|
||||
script: 'noiseGateWorklet.js',
|
||||
@@ -164,6 +173,34 @@
|
||||
return mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true });
|
||||
});
|
||||
}
|
||||
if (MODEL === 'deepfilternet') {
|
||||
// Resolve an absolute self-hosted base so the package's cdnUrl override
|
||||
// fetches our vendored df_bg.wasm + ONNX model (never the upstream CDN).
|
||||
var dfnBase = new URL(ASSET_BASE + 'deepfilternet', window.location.href).href;
|
||||
return import(ASSET_BASE + PROCESSORS.deepfilternet.esm).then(function (mod) {
|
||||
var core = new mod.DeepFilterNet3Core({
|
||||
sampleRate: SAMPLE_RATE,
|
||||
noiseReductionLevel: 80,
|
||||
assetConfig: { cdnUrl: dfnBase },
|
||||
});
|
||||
// initialize() fetches + compiles the wasm and loads the model on the
|
||||
// main thread; the worklet node only exists once that resolves, so the
|
||||
// graph is connected with a ready model (no half-initialised passthrough).
|
||||
return core.initialize().then(function () {
|
||||
return core.createAudioWorkletNode(ctx).then(function (node) {
|
||||
return {
|
||||
node: node,
|
||||
ready: Promise.resolve(),
|
||||
dispose: function () {
|
||||
try {
|
||||
core.destroy();
|
||||
} catch (e) {}
|
||||
},
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
var node = new AudioWorkletNode(ctx, PROCESSORS[MODEL].name, {
|
||||
channelCount: 1,
|
||||
numberOfInputs: 1,
|
||||
|
||||
Generated
+13
@@ -35,6 +35,7 @@
|
||||
"classnames": "2.5.1",
|
||||
"dateformat": "5.0.3",
|
||||
"dayjs": "1.11.20",
|
||||
"deepfilternet3-noise-filter": "1.2.1",
|
||||
"domhandler": "6.0.1",
|
||||
"dompurify": "3.4.5",
|
||||
"emojibase": "17.0.0",
|
||||
@@ -6399,6 +6400,18 @@
|
||||
"integrity": "sha512-Rn+RuwkmkDwCi2/oXOFS9Gsr5lJZu/yTGpK7wAaAIE75CC+LCGEZHpY6VQJa/RoJcrmaA/docWJZvYohlNkWPA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/deepfilternet3-noise-filter": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/deepfilternet3-noise-filter/-/deepfilternet3-noise-filter-1.2.1.tgz",
|
||||
"integrity": "sha512-OAyrHTDlUHH+AhfpVNKYEOhVqb9cZpu0fdNThplA/tB/Ts4PF/UsI+abl2n1IbSxUkhiF0OqDejEhk1n42Oqpw==",
|
||||
"license": "(Apache-2.0 OR MIT)",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"livekit-client": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/deepmerge": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
||||
|
||||
@@ -59,6 +59,7 @@
|
||||
"classnames": "2.5.1",
|
||||
"dateformat": "5.0.3",
|
||||
"dayjs": "1.11.20",
|
||||
"deepfilternet3-noise-filter": "1.2.1",
|
||||
"domhandler": "6.0.1",
|
||||
"dompurify": "3.4.5",
|
||||
"emojibase": "17.0.0",
|
||||
|
||||
@@ -1358,6 +1358,7 @@ function Calls() {
|
||||
{ value: 'rnnoise', label: 'RNNoise' },
|
||||
{ value: 'speex', label: 'Speex (Legacy)' },
|
||||
{ value: 'dtln', label: 'DTLN (beta)' },
|
||||
{ value: 'deepfilternet', label: 'DeepFilterNet 3 (beta)' },
|
||||
]}
|
||||
/>
|
||||
}
|
||||
|
||||
@@ -14,10 +14,12 @@ export type MessageSpacing = '0' | '100' | '200' | '300' | '400' | '500';
|
||||
// - 'browser' : WebRTC built-in suppression (Element Call noiseSuppression param)
|
||||
// - 'ml' : client-side RNNoise ML suppression (Lotus denoise shim)
|
||||
export type NoiseSuppressionMode = 'off' | 'browser' | 'ml';
|
||||
// Self-hostable, build-bundled ML models. DeepFilterNet remains excluded — it
|
||||
// loads its runtime/models from external CDNs, which breaks the self-hosted /
|
||||
// Tauri-CSP strategy (see LOTUS_DENOISE_ENGINEERING_REVIEW.md).
|
||||
export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln';
|
||||
// Self-hostable, build-bundled ML models. DeepFilterNet 3 is included via
|
||||
// deepfilternet3-noise-filter with its df_bg.wasm + ONNX model VENDORED and
|
||||
// self-hosted (its cdnUrl is overridden), so it no longer depends on an external
|
||||
// CDN. Its wasm is single-threaded, so no COOP/COEP cross-origin isolation is
|
||||
// required (see LOTUS_DENOISE_ENGINEERING_REVIEW.md).
|
||||
export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln' | 'deepfilternet';
|
||||
export type ChatBackground =
|
||||
| 'none'
|
||||
| 'blueprint'
|
||||
@@ -260,12 +262,13 @@ export const getSettings = (): Settings => {
|
||||
? 'browser'
|
||||
: 'off'
|
||||
: (saved.callNoiseSuppression ?? defaultSettings.callNoiseSuppression),
|
||||
// Coerce any retired/unknown persisted model (e.g. 'dtln', 'deepfilternet'
|
||||
// from earlier beta builds) back to the default working model.
|
||||
// Coerce any retired/unknown persisted model back to the default working
|
||||
// model; only whitelisted ids pass through.
|
||||
callDenoiseModel:
|
||||
saved.callDenoiseModel === 'rnnoise' ||
|
||||
saved.callDenoiseModel === 'speex' ||
|
||||
saved.callDenoiseModel === 'dtln'
|
||||
saved.callDenoiseModel === 'dtln' ||
|
||||
saved.callDenoiseModel === 'deepfilternet'
|
||||
? saved.callDenoiseModel
|
||||
: defaultSettings.callDenoiseModel,
|
||||
composerToolbarButtons: {
|
||||
|
||||
@@ -14,10 +14,10 @@ import { DenoiseModelId } from '../state/settings';
|
||||
const BASE = `${import.meta.env.BASE_URL.replace(/\/+$/, '')}/public/element-call/denoise/`;
|
||||
|
||||
/**
|
||||
* Required AudioContext sample rate per model. RNNoise/Speex (sapphi) assume
|
||||
* 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT resample internally
|
||||
* — running it at 48 kHz produces robotic/choppy/quiet output, so its whole
|
||||
* graph must run in a 16 kHz context.
|
||||
* Required AudioContext sample rate per model. RNNoise/Speex (sapphi) and
|
||||
* DeepFilterNet 3 are 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT
|
||||
* resample internally — running it at 48 kHz produces robotic/choppy/quiet
|
||||
* output, so its whole graph must run in a 16 kHz context.
|
||||
*/
|
||||
export const sampleRateFor = (model: DenoiseModelId): number => (model === 'dtln' ? 16000 : 48000);
|
||||
|
||||
@@ -75,6 +75,32 @@ export async function buildModelNode(
|
||||
const handle = await mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true });
|
||||
return { node: handle.node, dispose: () => handle.dispose() };
|
||||
}
|
||||
if (model === 'deepfilternet') {
|
||||
// deepfilternet3-noise-filter ESM: inlines its worklet/wasm-bindgen glue and
|
||||
// fetches only df_bg.wasm + the ONNX model, which we self-host under
|
||||
// deepfilternet/v2/... Override its cdnUrl to our absolute base so nothing
|
||||
// hits the upstream CDN. DeepFilterNet3Core builds the worklet node directly.
|
||||
const dfnBase = new URL(`${BASE}deepfilternet`, window.location.href).href;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const mod: any = await import(/* @vite-ignore */ `${BASE}deepfilternet/index.esm.js`);
|
||||
const core = new mod.DeepFilterNet3Core({
|
||||
sampleRate: sampleRateFor(model),
|
||||
noiseReductionLevel: 80,
|
||||
assetConfig: { cdnUrl: dfnBase },
|
||||
});
|
||||
await core.initialize();
|
||||
const node: AudioWorkletNode = await core.createAudioWorkletNode(ctx);
|
||||
return {
|
||||
node,
|
||||
dispose: () => {
|
||||
try {
|
||||
core.destroy();
|
||||
} catch {
|
||||
/* noop */
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
const cfg = SAPPHI[model];
|
||||
const [, wasmBinary] = await Promise.all([addModuleOnce(ctx, cfg.script), fetchWasm(cfg.wasm)]);
|
||||
const node = new AudioWorkletNode(ctx, cfg.proc, {
|
||||
|
||||
@@ -40,6 +40,15 @@ export const DENOISE_MODELS: DenoiseModel[] = [
|
||||
transients: 'Excellent',
|
||||
voiceQuality: 'High',
|
||||
},
|
||||
{
|
||||
id: 'deepfilternet',
|
||||
name: 'DeepFilterNet 3 (beta)',
|
||||
description: 'Studio-grade deep-learning model (48 kHz, ONNX). Best quality; highest CPU.',
|
||||
cpuUsage: '25-50%',
|
||||
binarySize: '~18 MB',
|
||||
transients: 'Excellent',
|
||||
voiceQuality: 'Very High',
|
||||
},
|
||||
];
|
||||
|
||||
export const isMLDenoiseSupported = (): boolean => {
|
||||
|
||||
@@ -123,6 +123,43 @@ function lotusDenoise() {
|
||||
}
|
||||
fs.cpSync(dtlnSrc, path.join(denoiseDir, 'workadventure'), { recursive: true });
|
||||
|
||||
// DeepFilterNet 3 (deepfilternet3-noise-filter): the npm package ships only
|
||||
// its ESM (index.esm.js) with the AudioWorklet processor + wasm-bindgen glue
|
||||
// INLINED as a string (loaded via a Blob URL, no CDN for the worklet). The
|
||||
// only runtime CDN fetches are its single-threaded `df_bg.wasm` and the
|
||||
// ONNX `DeepFilterNet3_onnx.tar.gz` model — which we VENDOR locally (under
|
||||
// build/denoise-vendor/deepfilternet/) and serve, overriding the package's
|
||||
// cdnUrl to our self-hosted base. This keeps the feature CDN-free / Tauri-CSP
|
||||
// safe. We copy the ESM (the shim dynamic-imports it, mirroring DTLN) plus
|
||||
// the vendored assets, preserving the package's expected v2/... layout. All
|
||||
// are required — a missing entry means a broken install, so fail the build.
|
||||
const dfnEsm = path.resolve('node_modules/deepfilternet3-noise-filter/dist/index.esm.js');
|
||||
const dfnVendor = path.resolve('build/denoise-vendor/deepfilternet');
|
||||
const dfnAssets = [
|
||||
[dfnEsm, path.join(denoiseDir, 'deepfilternet/index.esm.js')],
|
||||
[
|
||||
path.join(dfnVendor, 'v2/pkg/df_bg.wasm'),
|
||||
path.join(denoiseDir, 'deepfilternet/v2/pkg/df_bg.wasm'),
|
||||
],
|
||||
[
|
||||
path.join(dfnVendor, 'v2/models/DeepFilterNet3_onnx.tar.gz'),
|
||||
path.join(denoiseDir, 'deepfilternet/v2/models/DeepFilterNet3_onnx.tar.gz'),
|
||||
],
|
||||
];
|
||||
const dfnMissing = dfnAssets.filter(([s]) => !fs.existsSync(s)).map(([s]) => s);
|
||||
if (dfnMissing.length > 0) {
|
||||
throw new Error(
|
||||
'[lotus-denoise] DeepFilterNet 3 asset(s) missing — build aborted to avoid ' +
|
||||
'shipping a broken ML feature:\n ' +
|
||||
dfnMissing.join('\n ') +
|
||||
'\n(Run `npm ci`; the vendored wasm/model live under build/denoise-vendor/deepfilternet/.)',
|
||||
);
|
||||
}
|
||||
dfnAssets.forEach(([s, d]) => {
|
||||
fs.mkdirSync(path.dirname(d), { recursive: true });
|
||||
fs.copyFileSync(s, d);
|
||||
});
|
||||
|
||||
const shimSrc = path.resolve('build/lotus-denoise.js');
|
||||
if (!fs.existsSync(shimSrc)) {
|
||||
throw new Error(`[lotus-denoise] Missing shim source ${shimSrc} — build aborted.`);
|
||||
|
||||
Reference in New Issue
Block a user