04b56ffacd
Integrate DeepFilterNet 3 (deepfilternet3-noise-filter@1.2.1) as a new client-side denoise model id 'deepfilternet', mirroring the DTLN pattern. The npm package ships only an ESM whose AudioWorklet processor + wasm-bindgen glue are inlined as a string (loaded via a Blob URL — no CDN for the worklet). Its only runtime fetches are a single-threaded df_bg.wasm and an ONNX model tarball, which previously loaded from an external CDN. We now VENDOR both (build/denoise-vendor/deepfilternet/v2/...) and self-host them under denoise/deepfilternet/, overriding the package's cdnUrl so nothing hits the upstream CDN — keeping it self-hosted / Tauri-CSP safe. The wasm is single-threaded (no SharedArrayBuffer / atomics / imported shared memory), so it needs no COOP/COEP cross-origin isolation and runs fine in EC's non-isolated iframe. Runs at 48 kHz fullband. Any init/runtime failure falls back to the raw mic, like the other models. - vite.config.js: copy ESM + vendored wasm/model into the EC denoise dir with a required-asset guard that aborts the build if any entry is missing. - build/lotus-denoise.js: 'deepfilternet' branch — dynamic-import the ESM, build a DeepFilterNet3Core pointed at the self-hosted base, await init, return the worklet node; 48 kHz; raw-mic fail-safe preserved. - denoisePipeline.ts: 'deepfilternet' branch for the in-app tester + sampleRate. - settings.ts: add 'deepfilternet' to DenoiseModelId + getSettings whitelist. - lotusDenoiseUtils.ts: add the comparison-chart row. - General.tsx: add the "DeepFilterNet 3 (beta)" dropdown option. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
341 lines
12 KiB
JavaScript
341 lines
12 KiB
JavaScript
/*
|
|
* Lotus Chat — client-side ML noise suppression shim for Element Call.
|
|
*
|
|
* Element Call runs as a same-origin iframe widget that captures the mic
|
|
* internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
|
|
* We can't reach that track from the host. Instead this classic <script> is
|
|
* injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
|
|
* deferred module entry, so it runs first and monkeypatches getUserMedia. When
|
|
* the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
|
|
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
|
|
* and hand the processed track back to EC/LiveKit.
|
|
*
|
|
* RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
|
|
* or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
|
|
* - run a 48 kHz AudioContext (which handles resampling from the hardware),
|
|
* - use the SIMD build if supported for better performance,
|
|
* - keep browser-native stationary suppression ON so the fans are removed
|
|
* before RNNoise focuses on transient noises (keyboard, dogs, etc.).
|
|
*
|
|
* Any failure falls back to the unprocessed mic so calls never break.
|
|
*/
|
|
(function () {
|
|
'use strict';
|
|
|
|
var params;
|
|
try {
|
|
params = new URLSearchParams(window.location.search);
|
|
if (params.get('lotusDenoise') !== 'ml') return;
|
|
} catch (e) {
|
|
return;
|
|
}
|
|
|
|
var md = navigator.mediaDevices;
|
|
if (!md || typeof md.getUserMedia !== 'function') return;
|
|
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
|
|
|
|
var ASSET_BASE = './denoise/';
|
|
|
|
var MODEL = params.get('lotusModel') || 'rnnoise';
|
|
// DTLN (@workadventure) targets 16 kHz and does not resample internally, so
|
|
// its whole graph runs in a 16 kHz context; RNNoise/Speex (sapphi) and
|
|
// DeepFilterNet 3 are 48 kHz fullband. The processed MediaStreamTrack is
|
|
// published to LiveKit either way (WebRTC/Opus resamples as needed).
|
|
var SAMPLE_RATE = MODEL === 'dtln' ? 16000 : 48000;
|
|
var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true';
|
|
var USE_GATE = params.get('lotusGate') === 'true';
|
|
var GATE_THRESHOLD = parseFloat(params.get('lotusGateThreshold') || '-45');
|
|
|
|
var PROCESSORS = {
|
|
rnnoise: {
|
|
name: '@sapphi-red/web-noise-suppressor/rnnoise',
|
|
script: 'rnnoiseWorklet.js',
|
|
wasm: 'rnnoise.wasm',
|
|
simdWasm: 'rnnoise_simd.wasm',
|
|
},
|
|
speex: {
|
|
name: '@sapphi-red/web-noise-suppressor/speex',
|
|
script: 'speexWorklet.js',
|
|
wasm: 'speex.wasm',
|
|
},
|
|
dtln: {
|
|
// @workadventure/noise-suppression is a self-contained ES module that
|
|
// resolves its own AudioWorklet processor + LiteRT WASM + TFLite models
|
|
// via import.meta.url. We dynamic-import this helper and let it build the
|
|
// node, rather than addModule-ing a flat worklet ourselves.
|
|
helper: 'workadventure/audio-worklet.js',
|
|
},
|
|
deepfilternet: {
|
|
// deepfilternet3-noise-filter ships an ESM whose AudioWorklet processor +
|
|
// wasm-bindgen glue are INLINED as a string (loaded via a Blob URL — no
|
|
// CDN for the worklet). The only assets it fetches are its single-threaded
|
|
// df_bg.wasm + ONNX model, which we vendor + self-host under
|
|
// deepfilternet/v2/... We dynamic-import the ESM, build a DeepFilterNet3Core
|
|
// pointed at the self-hosted base, and let it create the worklet node.
|
|
esm: 'deepfilternet/index.esm.js',
|
|
},
|
|
gate: {
|
|
name: '@sapphi-red/web-noise-suppressor/noise-gate',
|
|
script: 'noiseGateWorklet.js',
|
|
},
|
|
};
|
|
|
|
var origGetUserMedia = md.getUserMedia.bind(md);
|
|
var wasmPromises = {};
|
|
var ctxPromise = null;
|
|
|
|
function checkSimd() {
|
|
try {
|
|
return WebAssembly.validate(
|
|
new Uint8Array([
|
|
0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
|
|
253, 15, 253, 98, 11,
|
|
]),
|
|
)
|
|
? Promise.resolve(true)
|
|
: Promise.resolve(false);
|
|
} catch (e) {
|
|
return Promise.resolve(false);
|
|
}
|
|
}
|
|
|
|
function loadWasm(modelId) {
|
|
if (wasmPromises[modelId]) return wasmPromises[modelId];
|
|
var p = PROCESSORS[modelId];
|
|
if (!p || !p.wasm) return Promise.resolve(null);
|
|
|
|
wasmPromises[modelId] = (modelId === 'rnnoise' ? checkSimd() : Promise.resolve(false)).then(
|
|
function (simd) {
|
|
var file = simd && p.simdWasm ? p.simdWasm : p.wasm;
|
|
return fetch(ASSET_BASE + file).then(function (r) {
|
|
if (!r.ok) {
|
|
if (simd && p.simdWasm)
|
|
return fetch(ASSET_BASE + p.wasm).then(function (r2) {
|
|
if (!r2.ok) throw new Error(modelId + ' wasm failed');
|
|
return r2.arrayBuffer();
|
|
});
|
|
throw new Error(modelId + ' wasm failed');
|
|
}
|
|
return r.arrayBuffer();
|
|
});
|
|
},
|
|
);
|
|
return wasmPromises[modelId];
|
|
}
|
|
|
|
function getContext() {
|
|
if (!ctxPromise) {
|
|
ctxPromise = (function () {
|
|
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
|
|
if (ctx.sampleRate !== SAMPLE_RATE) {
|
|
try {
|
|
ctx.close();
|
|
} catch (e) {}
|
|
return Promise.reject(new Error('SampleRate mismatch: ' + ctx.sampleRate));
|
|
}
|
|
// Load worklet modules. DTLN registers its own processor via the
|
|
// dynamic-imported helper (see buildMlNode), so it needs nothing here.
|
|
var scripts = [];
|
|
if (MODEL === 'rnnoise' || MODEL === 'speex') scripts.push(PROCESSORS[MODEL].script);
|
|
if (USE_GATE) scripts.push(PROCESSORS.gate.script);
|
|
|
|
return Promise.all(
|
|
scripts.map(function (s) {
|
|
return ctx.audioWorklet.addModule(ASSET_BASE + s);
|
|
}),
|
|
).then(function () {
|
|
return ctx.state === 'suspended'
|
|
? ctx.resume().then(function () {
|
|
return ctx;
|
|
})
|
|
: ctx;
|
|
});
|
|
})();
|
|
ctxPromise.catch(function () {
|
|
ctxPromise = null;
|
|
});
|
|
}
|
|
return ctxPromise;
|
|
}
|
|
|
|
var hasNotifiedActive = false;
|
|
|
|
// Build the ML denoise AudioWorkletNode. RNNoise/Speex are flat sapphi
|
|
// worklets we instantiate directly with the fetched WASM binary. DTLN comes
|
|
// from @workadventure's self-contained helper, which we dynamic-import; it
|
|
// resolves its own processor + LiteRT WASM + TFLite models internally and
|
|
// returns the node. Resolves to { node, ready, dispose }.
|
|
function buildMlNode(ctx, wasmBinary) {
|
|
if (MODEL === 'dtln') {
|
|
return import(ASSET_BASE + PROCESSORS.dtln.helper).then(function (mod) {
|
|
// bypassUntilReady: pass raw audio through until the model is loaded so
|
|
// the call never has a silent/missing track during init.
|
|
return mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true });
|
|
});
|
|
}
|
|
if (MODEL === 'deepfilternet') {
|
|
// Resolve an absolute self-hosted base so the package's cdnUrl override
|
|
// fetches our vendored df_bg.wasm + ONNX model (never the upstream CDN).
|
|
var dfnBase = new URL(ASSET_BASE + 'deepfilternet', window.location.href).href;
|
|
return import(ASSET_BASE + PROCESSORS.deepfilternet.esm).then(function (mod) {
|
|
var core = new mod.DeepFilterNet3Core({
|
|
sampleRate: SAMPLE_RATE,
|
|
noiseReductionLevel: 80,
|
|
assetConfig: { cdnUrl: dfnBase },
|
|
});
|
|
// initialize() fetches + compiles the wasm and loads the model on the
|
|
// main thread; the worklet node only exists once that resolves, so the
|
|
// graph is connected with a ready model (no half-initialised passthrough).
|
|
return core.initialize().then(function () {
|
|
return core.createAudioWorkletNode(ctx).then(function (node) {
|
|
return {
|
|
node: node,
|
|
ready: Promise.resolve(),
|
|
dispose: function () {
|
|
try {
|
|
core.destroy();
|
|
} catch (e) {}
|
|
},
|
|
};
|
|
});
|
|
});
|
|
});
|
|
}
|
|
var node = new AudioWorkletNode(ctx, PROCESSORS[MODEL].name, {
|
|
channelCount: 1,
|
|
numberOfInputs: 1,
|
|
numberOfOutputs: 1,
|
|
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
|
|
});
|
|
return Promise.resolve({
|
|
node: node,
|
|
ready: Promise.resolve(),
|
|
dispose: function () {
|
|
try {
|
|
node.port.postMessage('destroy');
|
|
} catch (e) {}
|
|
},
|
|
});
|
|
}
|
|
|
|
function processStream(stream) {
|
|
var audioTracks = stream.getAudioTracks();
|
|
if (audioTracks.length === 0) return Promise.resolve(stream);
|
|
|
|
return Promise.all([loadWasm(MODEL), getContext()])
|
|
.then(function (res) {
|
|
var wasmBinary = res[0];
|
|
var ctx = res[1];
|
|
|
|
var source = ctx.createMediaStreamSource(stream);
|
|
var dest = ctx.createMediaStreamDestination();
|
|
var head = source;
|
|
|
|
// 1. Optional Noise Gate
|
|
if (USE_GATE) {
|
|
var gateNode = new AudioWorkletNode(ctx, PROCESSORS.gate.name, {
|
|
processorOptions: {
|
|
openThreshold: GATE_THRESHOLD,
|
|
closeThreshold: GATE_THRESHOLD - 5,
|
|
holdMs: 150,
|
|
maxChannels: 1,
|
|
},
|
|
});
|
|
head.connect(gateNode);
|
|
head = gateNode;
|
|
}
|
|
|
|
// 2. ML Processor
|
|
return buildMlNode(ctx, wasmBinary).then(function (ml) {
|
|
var mlNode = ml.node;
|
|
head.connect(mlNode);
|
|
mlNode.connect(dest);
|
|
|
|
// Surface async init failures (e.g. DTLN model load) without blocking
|
|
// the track handoff — audio flows via bypassUntilReady meanwhile.
|
|
if (ml.ready && typeof ml.ready.then === 'function') {
|
|
ml.ready.catch(function (err) {
|
|
var m = err instanceof Error ? err.message : String(err);
|
|
console.error('[lotus-denoise] ' + MODEL + ' init failed:', m);
|
|
});
|
|
}
|
|
|
|
var origTrack = audioTracks[0];
|
|
var processedTrack = dest.stream.getAudioTracks()[0];
|
|
|
|
var torndown = false;
|
|
function cleanup() {
|
|
if (torndown) return;
|
|
torndown = true;
|
|
try {
|
|
ml.dispose();
|
|
} catch (e) {}
|
|
try {
|
|
source.disconnect();
|
|
mlNode.disconnect();
|
|
} catch (e) {}
|
|
try {
|
|
origTrack.stop();
|
|
} catch (e) {}
|
|
}
|
|
|
|
var rawStop = processedTrack.stop.bind(processedTrack);
|
|
processedTrack.stop = function () {
|
|
cleanup();
|
|
rawStop();
|
|
};
|
|
origTrack.addEventListener('ended', function () {
|
|
try {
|
|
rawStop();
|
|
} catch (e) {}
|
|
cleanup();
|
|
});
|
|
|
|
if (!hasNotifiedActive) {
|
|
hasNotifiedActive = true;
|
|
window.parent.postMessage(
|
|
{
|
|
type: 'lotus-denoise-status',
|
|
active: true,
|
|
model: MODEL,
|
|
nativeNS: USE_NATIVE_NS,
|
|
gate: USE_GATE,
|
|
},
|
|
'*',
|
|
);
|
|
}
|
|
|
|
var out = new MediaStream();
|
|
out.addTrack(processedTrack);
|
|
stream.getVideoTracks().forEach(function (t) {
|
|
out.addTrack(t);
|
|
});
|
|
return out;
|
|
});
|
|
})
|
|
.catch(function (e) {
|
|
var msg = e instanceof Error ? e.message : String(e);
|
|
console.error('[lotus-denoise] Setup failed:', msg);
|
|
window.parent.postMessage({ type: 'lotus-denoise-status', active: false, error: msg }, '*');
|
|
return stream;
|
|
});
|
|
}
|
|
|
|
navigator.mediaDevices.getUserMedia = function (constraints) {
|
|
var wantsAudio = !!(constraints && constraints.audio);
|
|
var effective = constraints;
|
|
if (wantsAudio) {
|
|
var audioC =
|
|
typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
|
|
audioC.noiseSuppression = USE_NATIVE_NS;
|
|
audioC.channelCount = 1;
|
|
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
|
|
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
|
|
effective = Object.assign({}, constraints, { audio: audioC });
|
|
}
|
|
return origGetUserMedia(effective).then(function (stream) {
|
|
return wantsAudio ? processStream(stream) : stream;
|
|
});
|
|
};
|
|
})();
|