cinny/build/lotus-denoise.js

/*
 * Lotus Chat — client-side ML noise suppression shim for Element Call.
 *
 * Element Call runs as a same-origin iframe widget that captures the mic
 * internally (via livekit-client -> getUserMedia) and publishes it to LiveKit.
 * We can't reach that track from the host. Instead this classic <script> is
 * injected (by the vite `lotus-denoise` plugin) into EC's index.html BEFORE its
 * deferred module entry, so it runs first and monkeypatches getUserMedia. When
 * the "ml" tier is selected (lotusDenoise=ml in the widget URL) we route the
 * captured mic through an RNNoise AudioWorklet (@sapphi-red/web-noise-suppressor)
 * and hand the processed track back to EC/LiveKit.
 *
 * RNNoise REQUIRES mono, 48 kHz float audio. Feeding it anything else (stereo,
 * or 44.1 kHz data the model treats as 48 kHz) produces loud static. So we:
 *   - run a 48 kHz AudioContext (which handles resampling from the hardware),
 *   - use the SIMD build if supported for better performance,
 *   - keep browser-native stationary suppression ON so the fans are removed
 *     before RNNoise focuses on transient noises (keyboard, dogs, etc.).
 *
 * Any failure falls back to the unprocessed mic so calls never break.
 */
(function () {
  'use strict';

  var params;
  try {
    params = new URLSearchParams(window.location.search);
    if (params.get('lotusDenoise') !== 'ml') return;
  } catch (e) {
    return;
  }

  var md = navigator.mediaDevices;
  if (!md || typeof md.getUserMedia !== 'function') return;
  if (typeof AudioWorkletNode === 'undefined' || typeof AudioContext === 'undefined') return;

  var ASSET_BASE = './denoise/';
  var SAMPLE_RATE = 48000;

  var MODEL = params.get('lotusModel') || 'rnnoise';
  var USE_NATIVE_NS = params.get('lotusNativeNS') === 'true';
  var USE_GATE = params.get('lotusGate') === 'true';
  var GATE_THRESHOLD = parseFloat(params.get('lotusGateThreshold') || '-45');

  var PROCESSORS = {
    rnnoise: {
      name: '@sapphi-red/web-noise-suppressor/rnnoise',
      script: 'rnnoiseWorklet.js',
      wasm: 'rnnoise.wasm',
      simdWasm: 'rnnoise_simd.wasm',
    },
    speex: {
      name: '@sapphi-red/web-noise-suppressor/speex',
      script: 'speexWorklet.js',
      wasm: 'speex.wasm',
    },
    dtln: {
      name: '@workadventure/noise-suppression/processor',
      script: 'dtlnWorklet.js',
    },
    gate: {
      name: '@sapphi-red/web-noise-suppressor/noise-gate',
      script: 'noiseGateWorklet.js',
    },
  };

  var origGetUserMedia = md.getUserMedia.bind(md);
  var wasmPromises = {};
  var ctxPromise = null;

  function checkSimd() {
    try {
      return WebAssembly.validate(new Uint8Array([0, 97, 115, 109, 1, 0, 0, 0, 1, 5, 1, 96, 0, 1, 123, 3, 2, 1, 0, 10, 10, 1, 8, 0, 65, 0, 253, 15, 253, 98, 11]))
        ? Promise.resolve(true)
        : Promise.resolve(false);
    } catch (e) {
      return Promise.resolve(false);
    }
  }

  function loadWasm(modelId) {
    if (wasmPromises[modelId]) return wasmPromises[modelId];
    var p = PROCESSORS[modelId];
    if (!p || !p.wasm) return Promise.resolve(null);

    wasmPromises[modelId] = (modelId === 'rnnoise' ? checkSimd() : Promise.resolve(false)).then(function (simd) {
      var file = (simd && p.simdWasm) ? p.simdWasm : p.wasm;
      return fetch(ASSET_BASE + file).then(function (r) {
        if (!r.ok) {
          if (simd && p.simdWasm) return fetch(ASSET_BASE + p.wasm).then(function(r2) {
            if (!r2.ok) throw new Error(modelId + ' wasm failed');
            return r2.arrayBuffer();
          });
          throw new Error(modelId + ' wasm failed');
        }
        return r.arrayBuffer();
      });
    });
    return wasmPromises[modelId];
  }

  function getContext() {
    if (!ctxPromise) {
      ctxPromise = (function () {
        var ctx = new AudioContext({ sampleRate: SAMPLE_RATE });
        if (ctx.sampleRate !== SAMPLE_RATE) {
          try { ctx.close(); } catch (e) {}
          return Promise.reject(new Error('SampleRate mismatch: ' + ctx.sampleRate));
        }
        // Load required modules
        var scripts = [PROCESSORS[MODEL].script];
        if (USE_GATE) scripts.push(PROCESSORS.gate.script);

        return Promise.all(scripts.map(function(s) {
          return ctx.audioWorklet.addModule(ASSET_BASE + s);
        })).then(function () {
          return ctx.state === 'suspended' ? ctx.resume().then(function () { return ctx; }) : ctx;
        });
      })();
      ctxPromise.catch(function () { ctxPromise = null; });
    }
    return ctxPromise;
  }

  var hasNotifiedActive = false;

  function processStream(stream) {
    var audioTracks = stream.getAudioTracks();
    if (audioTracks.length === 0) return Promise.resolve(stream);

    return Promise.all([loadWasm(MODEL), getContext()])
      .then(function (res) {
        var wasmBinary = res[0];
        var ctx = res[1];

        var source = ctx.createMediaStreamSource(stream);
        var dest = ctx.createMediaStreamDestination();
        var head = source;

        // 1. Optional Noise Gate
        if (USE_GATE) {
          var gateNode = new AudioWorkletNode(ctx, PROCESSORS.gate.name, {
            processorOptions: {
              openThreshold: GATE_THRESHOLD,
              closeThreshold: GATE_THRESHOLD - 5,
              holdMs: 150,
              maxChannels: 1
            }
          });
          head.connect(gateNode);
          head = gateNode;
        }

        // 2. ML Processor
        var mlOptions = {
          channelCount: 1,
          numberOfInputs: 1,
          numberOfOutputs: 1,
          processorOptions: { maxChannels: 1 }
        };

        if (MODEL === 'rnnoise' || MODEL === 'speex') {
          mlOptions.processorOptions.wasmBinary = wasmBinary;
        } else if (MODEL === 'dtln') {
          mlOptions.processorOptions = {
            wasmUrl: ASSET_BASE + 'litert_wasm_internal.wasm',
            model1Url: ASSET_BASE + 'model_1.tflite',
            model2Url: ASSET_BASE + 'model_2.tflite',
          };
        } else if (MODEL === 'deepfilternet') {
          mlOptions.processorOptions = {
            wasmModule: wasmBinary,
            modelBytes: new Uint8Array(wasmBinary),
            suppressionLevel: 50
          };
        }

        var mlNode = new AudioWorkletNode(ctx, PROCESSORS[MODEL].name, mlOptions);
        head.connect(mlNode);
        mlNode.connect(dest);

        var origTrack = audioTracks[0];
        var processedTrack = dest.stream.getAudioTracks()[0];

        var torndown = false;
        function cleanup() {
          if (torndown) return;
          torndown = true;
          try { mlNode.port.postMessage('destroy'); } catch (e) {}
          try { source.disconnect(); mlNode.disconnect(); } catch (e) {}
          try { origTrack.stop(); } catch (e) {}
        }

        var rawStop = processedTrack.stop.bind(processedTrack);
        processedTrack.stop = function () { cleanup(); rawStop(); };
        origTrack.addEventListener('ended', function () {
          try { rawStop(); } catch (e) {}
          cleanup();
        });

        if (!hasNotifiedActive) {
          hasNotifiedActive = true;
          window.parent.postMessage({
            type: 'lotus-denoise-status',
            active: true,
            model: MODEL,
            nativeNS: USE_NATIVE_NS,
            gate: USE_GATE
          }, '*');
        }

        var out = new MediaStream();
        out.addTrack(processedTrack);
        stream.getVideoTracks().forEach(function (t) { out.addTrack(t); });
        return out;
      })
      .catch(function (e) {
        var msg = e instanceof Error ? e.message : String(e);
        console.error('[lotus-denoise] Setup failed:', msg);
        window.parent.postMessage({ type: 'lotus-denoise-status', active: false, error: msg }, '*');
        return stream;
      });
  }

  navigator.mediaDevices.getUserMedia = function (constraints) {
    var wantsAudio = !!(constraints && constraints.audio);
    var effective = constraints;
    if (wantsAudio) {
      var audioC = typeof constraints.audio === 'object' ? Object.assign({}, constraints.audio) : {};
      audioC.noiseSuppression = USE_NATIVE_NS;
      audioC.channelCount = 1;
      if (audioC.echoCancellation === undefined) audioC.echoCancellation = true;
      if (audioC.autoGainControl === undefined) audioC.autoGainControl = true;
      effective = Object.assign({}, constraints, { audio: audioC });
    }
    return origGetUserMedia(effective).then(function (stream) {
      return wantsAudio ? processStream(stream) : stream;
    });
  };
})();