Files
cinny/build/lotus-denoise.js
T
jared 86272b6b08
CI / Build & Quality Checks (push) Successful in 10m25s
CI / Trigger Desktop Build (push) Successful in 6s
fix(calls): wire DTLN ML denoise correctly via @workadventure JS API
The prior DTLN attempt (89a2321d) broke the build (missing dep, wrong
`cinny/` asset paths) and typecheck (`'dtln'` not in DenoiseModelId), and was
wired against an API the package doesn't expose. @workadventure/noise-
suppression is not a flat AudioWorklet — it's a self-contained ES module whose
processor name is `workadventure-noise-suppression` and which resolves its own
LiteRT WASM + TFLite models via import.meta.url. Driving it by hand-rolled
addModule + processorOptions cannot work.

- Re-add @workadventure/noise-suppression@0.0.4 (package.json + lockfile).
- vite: copy the package's whole dist/ tree intact to
  denoise/workadventure/ (preserving assets/ + vendor/litert) so import.meta
  resolution works at runtime; fail the build if the entry module is missing.
- shim: for the DTLN model, dynamic-import denoise/workadventure/audio-worklet
  .js and use createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady })
  to build the node; RNNoise/Speex keep their direct flat-worklet path. Async
  init errors are logged + reported and fall back to the raw mic.
- Restore 'dtln' in DenoiseModelId (+ settings coercion), the model chart, and
  the settings dropdown, labelled "(beta)".

DTLN builds and is fully self-hosted, but its in-call audio is UNVERIFIED in
this environment — needs a real-call test. DeepFilterNet stays excluded (CDN
asset loading, incompatible with self-hosting / Tauri CSP).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 17:11:45 -04:00

300 lines
10 KiB
JavaScript

/*
* Lotus Chat — client-side ML noise suppression shim for Element Call.
*
* Element Call runs as a same-origin iframe widget that captures the mic
* internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
* We can't reach that track from the host. Instead this classic <script> is
* injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
* deferred module entry, so it runs first and monkeypatches getUserMedia. When
* the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
* captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
* and hand the processed track back to EC/LiveKit.
*
* RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
* or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
* - run a 48 kHz AudioContext (which handles resampling from the hardware),
* - use the SIMD build if supported for better performance,
* - keep browser-native stationary suppression ON so the fans are removed
* before RNNoise focuses on transient noises (keyboard, dogs, etc.).
*
* Any failure falls back to the unprocessed mic so calls never break.
*/
(function () {
'use strict';
var params;
try {
params = new URLSearchParams(window.location.search);
if (params.get('lotusDenoise') !== 'ml') return;
} catch (e) {
return;
}
var md = navigator.mediaDevices;
if (!md || typeof md.getUserMedia !== 'function') return;
if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;
var ASSET_BASE = './denoise/';
var SAMPLE_RATE = 48000;
var MODEL = params.get('lotusModel') || 'rnnoise';
var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true';
var USE_GATE = params.get('lotusGate') === 'true';
var GATE_THRESHOLD = parseFloat(params.get('lotusGateThreshold') || '-45');
var PROCESSORS = {
rnnoise: {
name: '@sapphi-red/web-noise-suppressor/rnnoise',
script: 'rnnoiseWorklet.js',
wasm: 'rnnoise.wasm',
simdWasm: 'rnnoise_simd.wasm',
},
speex: {
name: '@sapphi-red/web-noise-suppressor/speex',
script: 'speexWorklet.js',
wasm: 'speex.wasm',
},
dtln: {
// @workadventure/noise-suppression is a self-contained ES module that
// resolves its own AudioWorklet processor + LiteRT WASM + TFLite models
// via import.meta.url. We dynamic-import this helper and let it build the
// node, rather than addModule-ing a flat worklet ourselves.
helper: 'workadventure/audio-worklet.js',
},
gate: {
name: '@sapphi-red/web-noise-suppressor/noise-gate',
script: 'noiseGateWorklet.js',
},
};
var origGetUserMedia = md.getUserMedia.bind(md);
var wasmPromises = {};
var ctxPromise = null;
function checkSimd() {
try {
return WebAssembly.validate(
new Uint8Array([
0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0,
253, 15, 253, 98, 11,
]),
)
? Promise.resolve(true)
: Promise.resolve(false);
} catch (e) {
return Promise.resolve(false);
}
}
function loadWasm(modelId) {
if (wasmPromises[modelId]) return wasmPromises[modelId];
var p = PROCESSORS[modelId];
if (!p || !p.wasm) return Promise.resolve(null);
wasmPromises[modelId] = (modelId === 'rnnoise' ? checkSimd() : Promise.resolve(false)).then(
function (simd) {
var file = simd && p.simdWasm ? p.simdWasm : p.wasm;
return fetch(ASSET_BASE + file).then(function (r) {
if (!r.ok) {
if (simd && p.simdWasm)
return fetch(ASSET_BASE + p.wasm).then(function (r2) {
if (!r2.ok) throw new Error(modelId + ' wasm failed');
return r2.arrayBuffer();
});
throw new Error(modelId + ' wasm failed');
}
return r.arrayBuffer();
});
},
);
return wasmPromises[modelId];
}
function getContext() {
if (!ctxPromise) {
ctxPromise = (function () {
var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
if (ctx.sampleRate !== SAMPLE_RATE) {
try {
ctx.close();
} catch (e) {}
return Promise.reject(new Error('SampleRate mismatch: ' + ctx.sampleRate));
}
// Load worklet modules. DTLN registers its own processor via the
// dynamic-imported helper (see buildMlNode), so it needs nothing here.
var scripts = [];
if (MODEL === 'rnnoise' || MODEL === 'speex') scripts.push(PROCESSORS[MODEL].script);
if (USE_GATE) scripts.push(PROCESSORS.gate.script);
return Promise.all(
scripts.map(function (s) {
return ctx.audioWorklet.addModule(ASSET_BASE + s);
}),
).then(function () {
return ctx.state === 'suspended'
? ctx.resume().then(function () {
return ctx;
})
: ctx;
});
})();
ctxPromise.catch(function () {
ctxPromise = null;
});
}
return ctxPromise;
}
var hasNotifiedActive = false;
// Build the ML denoise AudioWorkletNode. RNNoise/Speex are flat sapphi
// worklets we instantiate directly with the fetched WASM binary. DTLN comes
// from @workadventure's self-contained helper, which we dynamic-import; it
// resolves its own processor + LiteRT WASM + TFLite models internally and
// returns the node. Resolves to { node, ready, dispose }.
function buildMlNode(ctx, wasmBinary) {
if (MODEL === 'dtln') {
return import(ASSET_BASE + PROCESSORS.dtln.helper).then(function (mod) {
// bypassUntilReady: pass raw audio through until the model is loaded so
// the call never has a silent/missing track during init.
return mod.createNoiseSuppressionAudioWorklet(ctx, { bypassUntilReady: true });
});
}
var node = new AudioWorkletNode(ctx, PROCESSORS[MODEL].name, {
channelCount: 1,
numberOfInputs: 1,
numberOfOutputs: 1,
processorOptions: { maxChannels: 1, wasmBinary: wasmBinary },
});
return Promise.resolve({
node: node,
ready: Promise.resolve(),
dispose: function () {
try {
node.port.postMessage('destroy');
} catch (e) {}
},
});
}
function processStream(stream) {
var audioTracks = stream.getAudioTracks();
if (audioTracks.length === 0) return Promise.resolve(stream);
return Promise.all([loadWasm(MODEL), getContext()])
.then(function (res) {
var wasmBinary = res[0];
var ctx = res[1];
var source = ctx.createMediaStreamSource(stream);
var dest = ctx.createMediaStreamDestination();
var head = source;
// 1. Optional Noise Gate
if (USE_GATE) {
var gateNode = new AudioWorkletNode(ctx, PROCESSORS.gate.name, {
processorOptions: {
openThreshold: GATE_THRESHOLD,
closeThreshold: GATE_THRESHOLD - 5,
holdMs: 150,
maxChannels: 1,
},
});
head.connect(gateNode);
head = gateNode;
}
// 2. ML Processor
return buildMlNode(ctx, wasmBinary).then(function (ml) {
var mlNode = ml.node;
head.connect(mlNode);
mlNode.connect(dest);
// Surface async init failures (e.g. DTLN model load) without blocking
// the track handoff — audio flows via bypassUntilReady meanwhile.
if (ml.ready && typeof ml.ready.then === 'function') {
ml.ready.catch(function (err) {
var m = err instanceof Error ? err.message : String(err);
console.error('[lotus-denoise] ' + MODEL + ' init failed:', m);
});
}
var origTrack = audioTracks[0];
var processedTrack = dest.stream.getAudioTracks()[0];
var torndown = false;
function cleanup() {
if (torndown) return;
torndown = true;
try {
ml.dispose();
} catch (e) {}
try {
source.disconnect();
mlNode.disconnect();
} catch (e) {}
try {
origTrack.stop();
} catch (e) {}
}
var rawStop = processedTrack.stop.bind(processedTrack);
processedTrack.stop = function () {
cleanup();
rawStop();
};
origTrack.addEventListener('ended', function () {
try {
rawStop();
} catch (e) {}
cleanup();
});
if (!hasNotifiedActive) {
hasNotifiedActive = true;
window.parent.postMessage(
{
type: 'lotus-denoise-status',
active: true,
model: MODEL,
nativeNS: USE_NATIVE_NS,
gate: USE_GATE,
},
'*',
);
}
var out = new MediaStream();
out.addTrack(processedTrack);
stream.getVideoTracks().forEach(function (t) {
out.addTrack(t);
});
return out;
});
})
.catch(function (e) {
var msg = e instanceof Error ? e.message : String(e);
console.error('[lotus-denoise] Setup failed:', msg);
window.parent.postMessage({ type: 'lotus-denoise-status', active: false, error: msg }, '*');
return stream;
});
}
navigator.mediaDevices.getUserMedia = function (constraints) {
var wantsAudio = !!(constraints && constraints.audio);
var effective = constraints;
if (wantsAudio) {
var audioC =
typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
audioC.noiseSuppression = USE_NATIVE_NS;
audioC.channelCount = 1;
if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
effective = Object.assign({}, constraints, { audio: audioC });
}
return origGetUserMedia(effective).then(function (stream) {
return wantsAudio ? processStream(stream) : stream;
});
};
})();