diff --git a/build/denoise-vendor/deepfilternet/v2/models/DeepFilterNet3_onnx.tar.gz b/build/denoise-vendor/deepfilternet/v2/models/DeepFilterNet3_onnx.tar.gz new file mode 100644 index 000000000..1c4f4ffe1 Binary files /dev/null and b/build/denoise-vendor/deepfilternet/v2/models/DeepFilterNet3_onnx.tar.gz differ diff --git a/build/denoise-vendor/deepfilternet/v2/pkg/df_bg.wasm b/build/denoise-vendor/deepfilternet/v2/pkg/df_bg.wasm new file mode 100644 index 000000000..fac0f9e70 Binary files /dev/null and b/build/denoise-vendor/deepfilternet/v2/pkg/df_bg.wasm differ diff --git a/build/lotus-denoise.js b/build/lotus-denoise.js index 66b18e170..9a997288b 100644 --- a/build/lotus-denoise.js +++ b/build/lotus-denoise.js @@ -38,9 +38,9 @@ var MODEL = params.get('lotusModel') || 'rnnoise'; // DTLN (@workadventure) targets 16 kHz and does not resample internally, so - // its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) need - // 48 kHz. The processed MediaStreamTrack is published to LiveKit either way - // (WebRTC/Opus resamples as needed). + // its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) and + // DeepFilterNet 3 are 48 kHz fullband. The processed MediaStreamTrack is + // published to LiveKit either way (WebRTC/Opus resamples as needed). var SAMPLE_RATE = MODEL === 'dtln' ? 16000 : 48000; var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true'; var USE_GATE = params.get('lotusGate') === 'true'; @@ -65,6 +65,15 @@ // node, rather than addModule-ing a flat worklet ourselves. helper: 'workadventure/audio-worklet.js', }, + deepfilternet: { + // deepfilternet3-noise-filter ships an ESM whose AudioWorklet processor + + // wasm-bindgen glue are INLINED as a string (loaded via a Blob URL — no + // CDN for the worklet). The only assets it fetches are its single-threaded + // df_bg.wasm + ONNX model, which we vendor + self-host under + // deepfilternet/v2/... We dynamic-import the ESM, build a DeepFilterNet3Core + // pointed at the self-hosted base, and let it create the worklet node. + esm: 'deepfilternet/index.esm.js', + }, gate: { name: '@sapphi-red/web-noise-suppressor/noise-gate', script: 'noiseGateWorklet.js', @@ -164,6 +173,34 @@ return mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true }); }); } + if (MODEL === 'deepfilternet') { + // Resolve an absolute self-hosted base so the package's cdnUrl override + // fetches our vendored df_bg.wasm + ONNX model (never the upstream CDN). + var dfnBase = new URL(ASSET_BASE + 'deepfilternet', window.location.href).href; + return import(ASSET_BASE + PROCESSORS.deepfilternet.esm).then(function (mod) { + var core = new mod.DeepFilterNet3Core({ + sampleRate: SAMPLE_RATE, + noiseReductionLevel: 80, + assetConfig: { cdnUrl: dfnBase }, + }); + // initialize() fetches + compiles the wasm and loads the model on the + // main thread; the worklet node only exists once that resolves, so the + // graph is connected with a ready model (no half-initialised passthrough). + return core.initialize().then(function () { + return core.createAudioWorkletNode(ctx).then(function (node) { + return { + node: node, + ready: Promise.resolve(), + dispose: function () { + try { + core.destroy(); + } catch (e) {} + }, + }; + }); + }); + }); + } var node = new AudioWorkletNode(ctx, PROCESSORS[MODEL].name, { channelCount: 1, numberOfInputs: 1, diff --git a/package-lock.json b/package-lock.json index 2f94804d1..970749d6a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35,6 +35,7 @@ "classnames": "2.5.1", "dateformat": "5.0.3", "dayjs": "1.11.20", + "deepfilternet3-noise-filter": "1.2.1", "domhandler": "6.0.1", "dompurify": "3.4.5", "emojibase": "17.0.0", @@ -6399,6 +6400,18 @@ "integrity": "sha512-Rn+RuwkmkDwCi2/oXOFS9Gsr5lJZu/yTGpK7wAaAIE75CC+LCGEZHpY6VQJa/RoJcrmaA/docWJZvYohlNkWPA==", "dev": true }, + "node_modules/deepfilternet3-noise-filter": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/deepfilternet3-noise-filter/-/deepfilternet3-noise-filter-1.2.1.tgz", + "integrity": "sha512-OAyrHTDlUHH+AhfpVNKYEOhVqb9cZpu0fdNThplA/tB/Ts4PF/UsI+abl2n1IbSxUkhiF0OqDejEhk1n42Oqpw==", + "license": "(Apache-2.0 OR MIT)", + "engines": { + "node": ">=18.0.0" + }, + "peerDependencies": { + "livekit-client": "^2.0.0" + } + }, "node_modules/deepmerge": { "version": "4.3.1", "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", diff --git a/package.json b/package.json index 48390ad0a..c28d25b5e 100644 --- a/package.json +++ b/package.json @@ -59,6 +59,7 @@ "classnames": "2.5.1", "dateformat": "5.0.3", "dayjs": "1.11.20", + "deepfilternet3-noise-filter": "1.2.1", "domhandler": "6.0.1", "dompurify": "3.4.5", "emojibase": "17.0.0", diff --git a/src/app/features/settings/general/General.tsx b/src/app/features/settings/general/General.tsx index 5d3482ded..bfdf9a6cc 100644 --- a/src/app/features/settings/general/General.tsx +++ b/src/app/features/settings/general/General.tsx @@ -1358,6 +1358,7 @@ function Calls() { { value: 'rnnoise', label: 'RNNoise' }, { value: 'speex', label: 'Speex (Legacy)' }, { value: 'dtln', label: 'DTLN (beta)' }, + { value: 'deepfilternet', label: 'DeepFilterNet 3 (beta)' }, ]} /> } diff --git a/src/app/state/settings.ts b/src/app/state/settings.ts index 2440aee45..689c9ae87 100644 --- a/src/app/state/settings.ts +++ b/src/app/state/settings.ts @@ -14,10 +14,12 @@ export type MessageSpacing = '0' | '100' | '200' | '300' | '400' | '500'; // - 'browser' : WebRTC built-in suppression (Element Call noiseSuppression param) // - 'ml' : client-side RNNoise ML suppression (Lotus denoise shim) export type NoiseSuppressionMode = 'off' | 'browser' | 'ml'; -// Self-hostable, build-bundled ML models. DeepFilterNet remains excluded — it -// loads its runtime/models from external CDNs, which breaks the self-hosted / -// Tauri-CSP strategy (see LOTUS_DENOISE_ENGINEERING_REVIEW.md). -export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln'; +// Self-hostable, build-bundled ML models. DeepFilterNet 3 is included via +// deepfilternet3-noise-filter with its df_bg.wasm + ONNX model VENDORED and +// self-hosted (its cdnUrl is overridden), so it no longer depends on an external +// CDN. Its wasm is single-threaded, so no COOP/COEP cross-origin isolation is +// required (see LOTUS_DENOISE_ENGINEERING_REVIEW.md). +export type DenoiseModelId = 'rnnoise' | 'speex' | 'dtln' | 'deepfilternet'; export type ChatBackground = | 'none' | 'blueprint' @@ -260,12 +262,13 @@ export const getSettings = (): Settings => { ? 'browser' : 'off' : (saved.callNoiseSuppression ?? defaultSettings.callNoiseSuppression), - // Coerce any retired/unknown persisted model (e.g. 'dtln', 'deepfilternet' - // from earlier beta builds) back to the default working model. + // Coerce any retired/unknown persisted model back to the default working + // model; only whitelisted ids pass through. callDenoiseModel: saved.callDenoiseModel === 'rnnoise' || saved.callDenoiseModel === 'speex' || - saved.callDenoiseModel === 'dtln' + saved.callDenoiseModel === 'dtln' || + saved.callDenoiseModel === 'deepfilternet' ? saved.callDenoiseModel : defaultSettings.callDenoiseModel, composerToolbarButtons: { diff --git a/src/app/utils/denoisePipeline.ts b/src/app/utils/denoisePipeline.ts index d0b42aaa9..ea9129d2f 100644 --- a/src/app/utils/denoisePipeline.ts +++ b/src/app/utils/denoisePipeline.ts @@ -14,10 +14,10 @@ import { DenoiseModelId } from '../state/settings'; const BASE = `${import.meta.env.BASE_URL.replace(/\/+$/, '')}/public/element-call/denoise/`; /** - * Required AudioContext sample rate per model. RNNoise/Speex (sapphi) assume - * 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT resample internally - * — running it at 48 kHz produces robotic/choppy/quiet output, so its whole - * graph must run in a 16 kHz context. + * Required AudioContext sample rate per model. RNNoise/Speex (sapphi) and + * DeepFilterNet 3 are 48 kHz. DTLN (@workadventure) targets 16 kHz and does NOT + * resample internally — running it at 48 kHz produces robotic/choppy/quiet + * output, so its whole graph must run in a 16 kHz context. */ export const sampleRateFor = (model: DenoiseModelId): number => (model === 'dtln' ? 16000 : 48000); @@ -75,6 +75,32 @@ export async function buildModelNode( const handle = await mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true }); return { node: handle.node, dispose: () => handle.dispose() }; } + if (model === 'deepfilternet') { + // deepfilternet3-noise-filter ESM: inlines its worklet/wasm-bindgen glue and + // fetches only df_bg.wasm + the ONNX model, which we self-host under + // deepfilternet/v2/... Override its cdnUrl to our absolute base so nothing + // hits the upstream CDN. DeepFilterNet3Core builds the worklet node directly. + const dfnBase = new URL(`${BASE}deepfilternet`, window.location.href).href; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const mod: any = await import(/* @vite-ignore */ `${BASE}deepfilternet/index.esm.js`); + const core = new mod.DeepFilterNet3Core({ + sampleRate: sampleRateFor(model), + noiseReductionLevel: 80, + assetConfig: { cdnUrl: dfnBase }, + }); + await core.initialize(); + const node: AudioWorkletNode = await core.createAudioWorkletNode(ctx); + return { + node, + dispose: () => { + try { + core.destroy(); + } catch { + /* noop */ + } + }, + }; + } const cfg = SAPPHI[model]; const [, wasmBinary] = await Promise.all([addModuleOnce(ctx, cfg.script), fetchWasm(cfg.wasm)]); const node = new AudioWorkletNode(ctx, cfg.proc, { diff --git a/src/app/utils/lotusDenoiseUtils.ts b/src/app/utils/lotusDenoiseUtils.ts index 4955bd763..cc1d5d259 100644 --- a/src/app/utils/lotusDenoiseUtils.ts +++ b/src/app/utils/lotusDenoiseUtils.ts @@ -40,6 +40,15 @@ export const DENOISE_MODELS: DenoiseModel[] = [ transients: 'Excellent', voiceQuality: 'High', }, + { + id: 'deepfilternet', + name: 'DeepFilterNet 3 (beta)', + description: 'Studio-grade deep-learning model (48 kHz, ONNX). Best quality; highest CPU.', + cpuUsage: '25-50%', + binarySize: '~18 MB', + transients: 'Excellent', + voiceQuality: 'Very High', + }, ]; export const isMLDenoiseSupported = (): boolean => { diff --git a/vite.config.js b/vite.config.js index 7b3fc4243..4b78188e9 100644 --- a/vite.config.js +++ b/vite.config.js @@ -123,6 +123,43 @@ function lotusDenoise() { } fs.cpSync(dtlnSrc, path.join(denoiseDir, 'workadventure'), { recursive: true }); + // DeepFilterNet 3 (deepfilternet3-noise-filter): the npm package ships only + // its ESM (index.esm.js) with the AudioWorklet processor + wasm-bindgen glue + // INLINED as a string (loaded via a Blob URL, no CDN for the worklet). The + // only runtime CDN fetches are its single-threaded `df_bg.wasm` and the + // ONNX `DeepFilterNet3_onnx.tar.gz` model — which we VENDOR locally (under + // build/denoise-vendor/deepfilternet/) and serve, overriding the package's + // cdnUrl to our self-hosted base. This keeps the feature CDN-free / Tauri-CSP + // safe. We copy the ESM (the shim dynamic-imports it, mirroring DTLN) plus + // the vendored assets, preserving the package's expected v2/... layout. All + // are required — a missing entry means a broken install, so fail the build. + const dfnEsm = path.resolve('node_modules/deepfilternet3-noise-filter/dist/index.esm.js'); + const dfnVendor = path.resolve('build/denoise-vendor/deepfilternet'); + const dfnAssets = [ + [dfnEsm, path.join(denoiseDir, 'deepfilternet/index.esm.js')], + [ + path.join(dfnVendor, 'v2/pkg/df_bg.wasm'), + path.join(denoiseDir, 'deepfilternet/v2/pkg/df_bg.wasm'), + ], + [ + path.join(dfnVendor, 'v2/models/DeepFilterNet3_onnx.tar.gz'), + path.join(denoiseDir, 'deepfilternet/v2/models/DeepFilterNet3_onnx.tar.gz'), + ], + ]; + const dfnMissing = dfnAssets.filter(([s]) => !fs.existsSync(s)).map(([s]) => s); + if (dfnMissing.length > 0) { + throw new Error( + '[lotus-denoise] DeepFilterNet 3 asset(s) missing — build aborted to avoid ' + + 'shipping a broken ML feature:\n ' + + dfnMissing.join('\n ') + + '\n(Run `npm ci`; the vendored wasm/model live under build/denoise-vendor/deepfilternet/.)', + ); + } + dfnAssets.forEach(([s, d]) => { + fs.mkdirSync(path.dirname(d), { recursive: true }); + fs.copyFileSync(s, d); + }); + const shimSrc = path.resolve('build/lotus-denoise.js'); if (!fs.existsSync(shimSrc)) { throw new Error(`[lotus-denoise] Missing shim source ${shimSrc} — build aborted.`);