fix(call): Wave-1 audit fixes (calls host side)

- C-H1: forceState only on FIRST join; on EC reconnect re-arm the fork handlers (resendForkState — deafen+quality only) instead of clobbering live mic/video/ deafen back to the join-time snapshot. - C-H2: AFK auto-mute reads the fork's io.lotus.call_state VAD of the LOCAL published track instead of getUserMedia on the browser DEFAULT mic (which could measure silence while the user spoke on another device → auto-mute an active speaker). Fails safe (never mutes) when call_state is null OR empty. - C-H3: control observer re-binds after EC re-renders (body subtree:true + 100ms debounce) with an early-return so unchanged state doesn't re-render. - C-M3 setQuality join-gated; C-M4 hangup 4s fallback dispose (idempotent); C-M5 PTT no longer silently un-deafens; C-M6 screenshare-audio mute resets on stop; C-L4 deafen key works in the iframe; C-L6 setState-after-unmount guards. Reviewed (C-H2 [] fail-safe + C-H3 re-render guard applied). tsc/eslint/prettier clean, build OK, 677 tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-02 20:20:07 -04:00
parent 0bbdd7ce94
commit ee6bdd8241
7 changed files with 214 additions and 66 deletions
@@ -37,7 +37,7 @@ Completed features are documented in [LOTUS_FEATURES.md](./LOTUS_FEATURES.md).

 Bug-hunt of the Tier-1 high-risk areas (notifications/unread/receipts, threads, calls host-side, Element Call fork) by 4 parallel deep-audit agents. `[T#]`=threads, `[N#]`=notifications, `[C#]`=calls host, `[EC#]`=fork.

-**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2) + web 🟠 (T2, T4, N3, N4). Web fixes are gate-green (678 tests incl. the new `threadReceipt.test.ts` locking the T1 regression). EC-fork 🟡 (EC1–EC6) fixed on `element-call:lotus` (needs a republish). **Still open:** calls-host 🟠/🟡 (C-H1/2/3, C-M*, C-L*) — see below; the remaining 🟡 notification/thread tail (N5, N6, T5, T6, T7).
+**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2); web 🟠 (T2, T4, N3, N4); calls-host 🟠 (C-H1, C-H2, C-H3) + 🟡 (C-M3, C-M4, C-M5, C-M6, C-L4, C-L6) — reviewed (the C-H2 AFK rewrite + C-H1 rejoin guard verified). EC-fork 🟡 (EC1–EC6) fixed on `element-call:lotus` (**needs a republish**). Web + calls gate-green (677 tests + `threadReceipt.test.ts` locking the T1 regression). **Still open (low tail):** C-M1/C-M2 (DOM-hack fragility — retire via the fork), C-L1/L2/L3/L5/L7/L8, and N5, N6, T5, T6, T7.

 ### 🔴 High — data-integrity / broken core UX

@@ -413,6 +413,16 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps)
  const dm = callInfo ? directs.has(callInfo.room.roomId) : false;
  const startCall = useCallStart(dm);

+  // C-L6: handleTimelineEvent awaits decryption before calling setState; guard
+  // against the component unmounting during that await.
+  const mountedRef = useRef(true);
+  useEffect(
+    () => () => {
+      mountedRef.current = false;
+    },
+    [],
+  );
+
  const handleTimelineEvent: EventTimelineSetHandlerMap[RoomEvent.Timeline] = useCallback(
    async (event, room, toStartOfTimeline, removed, data) => {
      // only process rtc notification reference events.
@@ -427,6 +437,9 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps)
        await event.getDecryptionPromise();
      }

+      // C-L6: bail if we unmounted while awaiting decryption above.
+      if (!mountedRef.current) return;
+
      // Caller-side: a participant declined a call we're hosting in this room.
      // Without this the caller's UI keeps "ringing" until the notification
      // lifetime expires, with no indication the callee said no.
@@ -1,4 +1,5 @@
 import React, { MouseEventHandler, useCallback, useEffect, useRef, useState } from 'react';
+import { useSetAtom } from 'jotai';
 import {
  Box,
  Button,
@@ -32,6 +33,7 @@ import {
 import { CallEmbed, useCallControlState } from '../../plugins/call';
 import { useSetting } from '../../state/hooks/settings';
 import { settingsAtom } from '../../state/settings';
+import { callEmbedAtom } from '../../state/callEmbed';
 import { useResizeObserver } from '../../hooks/useResizeObserver';
 import { stopPropagation } from '../../utils/keyboard';
 import { AsyncStatus, useAsyncCallback } from '../../hooks/useAsyncCallback';
@@ -48,6 +50,7 @@ type CallControlsProps = {
 export function CallControls({ callEmbed }: CallControlsProps) {
  const controlRef = useRef<HTMLDivElement>(null);
  const callEmbedRef = useCallEmbedRef();
+  const setCallEmbed = useSetAtom(callEmbedAtom);
  const [compact, setCompact] = useState(document.body.clientWidth < 500);
  const [isFullscreen, setIsFullscreen] = useState(false);

@@ -175,22 +178,28 @@ export function CallControls({ callEmbed }: CallControlsProps) {
      };
      if (isEditable(target)) return;
      e.preventDefault();
+      // C-M5: mark PTT active BEFORE unmuting so the mic echo (onMediaState)
+      // doesn't treat this transient unmute as a user-initiated undeafen.
+      callEmbed.control.pttActive = true;
      if (!microphoneRef.current) callEmbed.control.setMicrophone(true);
      pttActiveRef.current = true;
      setPttActive(true);
    };
    const onKeyUp = (e: KeyboardEvent) => {
      if (e.code !== pttKey) return;
+      callEmbed.control.pttActive = false;
      callEmbed.control.setMicrophone(false);
      pttActiveRef.current = false;
      setPttActive(false);
    };
    const onBlur = () => {
+      callEmbed.control.pttActive = false;
      callEmbed.control.setMicrophone(false);
      pttActiveRef.current = false;
      setPttActive(false);
    };
    const onFocus = () => {
+      callEmbed.control.pttActive = false;
      callEmbed.control.setMicrophone(false);
      pttActiveRef.current = false;
      setPttActive(false);
@@ -215,6 +224,7 @@ export function CallControls({ callEmbed }: CallControlsProps) {
      iframeWindow?.removeEventListener('focus', onFocus);
      // BUG-8: if callEmbed changes while PTT is active, release mic on cleanup
      if (pttActiveRef.current) {
+        callEmbed.control.pttActive = false;
        callEmbed.control.setMicrophone(false);
        pttActiveRef.current = false;
        setPttActive(false);
@@ -242,8 +252,15 @@ export function CallControls({ callEmbed }: CallControlsProps) {
      e.preventDefault();
      callEmbed.control.toggleSound();
    };
+    // C-L4: also bind the EC iframe window so the deafen key works when focus is
+    // inside the iframe (mirrors the PTT binding above).
+    const iframeWindow = callEmbed.iframe.contentWindow;
    window.addEventListener('keydown', onKeyDown);
-    return () => window.removeEventListener('keydown', onKeyDown);
+    iframeWindow?.addEventListener('keydown', onKeyDown);
+    return () => {
+      window.removeEventListener('keydown', onKeyDown);
+      iframeWindow?.removeEventListener('keydown', onKeyDown);
+    };
  }, [callEmbed, deafenKey]);

  const [hangupState, hangup] = useAsyncCallback(
@@ -252,6 +269,19 @@ export function CallControls({ callEmbed }: CallControlsProps) {
  const exiting =
    hangupState.status === AsyncStatus.Loading || hangupState.status === AsyncStatus.Success;

+  // C-M4: the normal teardown relies on EC echoing a Close/Hangup action after
+  // it ACKs HangupCall (useCallHangupEvent -> clears callEmbedAtom -> dispose).
+  // If EC ACKs but never echoes, the End button would spin forever. Fall back to
+  // disposing the embed a few seconds after a successful hangup send, unless it
+  // was already torn down by the normal path.
+  useEffect(() => {
+    if (hangupState.status !== AsyncStatus.Success) return undefined;
+    const id = setTimeout(() => {
+      if (!callEmbed.disposed) setCallEmbed(undefined);
+    }, 4000);
+    return () => clearTimeout(id);
+  }, [hangupState.status, callEmbed, setCallEmbed]);
+
  const pttKeyLabel = pttKey === 'Space' ? 'SPACE' : pttKey.replace('Key', '').replace('Digit', '');

  return (
@@ -1,4 +1,4 @@
-import React, { MouseEventHandler, useCallback, useMemo, useState } from 'react';
+import React, { MouseEventHandler, useCallback, useEffect, useMemo, useRef, useState } from 'react';
 import {
  Box,
  Icon,
@@ -64,6 +64,16 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) {
  const [playingKey, setPlayingKey] = useState<string>(); // host-side spam guard
  const [error, setError] = useState<string>();

+  // C-L6: the play() flow schedules a 30s safety timeout that clears playingKey;
+  // guard those setState calls against the component unmounting first.
+  const mountedRef = useRef(true);
+  useEffect(
+    () => () => {
+      mountedRef.current = false;
+    },
+    [],
+  );
+
  const groups = useMemo(
    () =>
      packs
@@ -86,7 +96,10 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) {
      if (playingKey) return; // one at a time (fork also enforces this)
      setPlayingKey(flat.key);
      setError(undefined);
-      const done = () => setPlayingKey((k) => (k === flat.key ? undefined : k));
+      const done = () => {
+        if (!mountedRef.current) return;
+        setPlayingKey((k) => (k === flat.key ? undefined : k));
+      };
      try {
        const url = await resolveClipObjectUrl(mx, flat.clip.url);
        const vol = (flat.clip.volume / 100) * master;
@@ -4,84 +4,86 @@ import { CallEmbed, useCallControlState } from '../plugins/call';
 import { useSetting } from '../state/hooks/settings';
 import { settingsAtom } from '../state/settings';
 import { toastQueueAtom } from '../state/toast';
+import { useMatrixClient } from './useMatrixClient';

-const SILENCE_RMS_THRESHOLD = 0.008;
 const CHECK_INTERVAL_MS = 500;

 /**
- * Monitors microphone audio while in a call. If the mic stays unmuted but
- * silent for longer than the configured timeout, the mic is muted and a toast
- * is shown.
+ * Monitors microphone activity while in a call. If the mic stays unmuted but
+ * the user is not speaking for longer than the configured timeout, the mic is
+ * muted and a toast is shown.
 *
- * The level-monitoring capture (`getUserMedia`) is opened ONLY while the mic is
- * unmuted — there is nothing to auto-mute once you are already muted, so
- * holding the capture would keep the OS recording indicator lit even though the
- * UI shows you as muted (N95). Muting therefore releases our stream; unmuting
- * re-acquires it. The AudioContext + stream are also torn down on unmount.
+ * [C-H2] Activity is read from the EC fork's `io.lotus.call_state` stream
+ * (getLotusParticipants) — i.e. the VAD state of the user's ACTUAL published
+ * track on their SELECTED input device. The previous implementation opened its
+ * own `getUserMedia({ audio: true })`, which captured the browser DEFAULT mic
+ * (not necessarily the device EC publishes from): it could measure silence
+ * while the user spoke on a different device (auto-muting an active speaker) and
+ * lit a second OS microphone indicator. Sourcing from the fork removes both
+ * problems and needs no extra capture.
+ *
+ * If the fork hasn't reported call-state yet (getLotusParticipants() === null —
+ * e.g. plain EC, or immediately after join), we cannot tell whether the user is
+ * publishing, so we fail SAFE and never auto-mute during that window.
 */
 export function useAfkAutoMute(callEmbed: CallEmbed | undefined): void {
+  const mx = useMatrixClient();
  const [enabled] = useSetting(settingsAtom, 'afkAutoMute');
  const [timeoutMinutes] = useSetting(settingsAtom, 'afkTimeoutMinutes');
  const setToast = useSetAtom(toastQueueAtom);
  const { microphone } = useCallControlState(callEmbed?.control);

  useEffect(() => {
-    // Only capture while in a call, enabled, AND unmuted (see N95 note above).
+    // Only monitor while in a call, enabled, AND unmuted — there is nothing to
+    // auto-mute once you are already muted.
    if (!callEmbed || !enabled || !microphone) return undefined;

-    let stream: MediaStream | undefined;
-    let audioCtx: AudioContext | undefined;
-    let intervalId: ReturnType<typeof setInterval> | undefined;
+    const localUserId = mx.getSafeUserId();
+    const timeoutMs = timeoutMinutes * 60 * 1000;
    let silenceStart: number | null = null;
    let active = true;
-    const timeoutMs = timeoutMinutes * 60 * 1000;

-    navigator.mediaDevices
-      .getUserMedia({ audio: true, video: false })
-      .then((s) => {
-        if (!active) {
-          s.getTracks().forEach((t) => t.stop());
-          return;
-        }
-        stream = s;
-        audioCtx = new AudioContext();
-        const source = audioCtx.createMediaStreamSource(stream);
-        const analyser = audioCtx.createAnalyser();
-        analyser.fftSize = 256;
-        source.connect(analyser);
-        const buffer = new Float32Array(analyser.fftSize);
+    // undefined = fork hasn't reported call-state yet (can't tell — fail safe).
+    const isLocalSpeaking = (): boolean | undefined => {
+      const participants = callEmbed.getLotusParticipants();
+      // null = fork not reported; [] = malformed/spurious payload (CallEmbed
+      // stores [] for a non-array). You are ALWAYS present in your own joined
+      // call, so an empty list means "no usable data", NOT "silent" — matching
+      // useCallSpeakers / useRemoteAllMuted. Treating [] as silent would let the
+      // timer mute an active speaker. Fail safe on both.
+      if (participants === null || participants.length === 0) return undefined;
+      return participants.some((p) => p.userId === localUserId && p.audioEnabled && p.speaking);
+    };

-        intervalId = setInterval(() => {
-          if (!active) return;
-          analyser.getFloatTimeDomainData(buffer);
-          const rms = Math.sqrt(buffer.reduce((sum, v) => sum + v * v, 0) / buffer.length);
+    const intervalId = setInterval(() => {
+      if (!active) return;
+      const speaking = isLocalSpeaking();

-          if (rms > SILENCE_RMS_THRESHOLD) {
-            // Audio detected — reset the silence timer.
-            silenceStart = null;
-          } else if (silenceStart === null) {
-            // Mic is unmuted (effect only runs while unmuted) but silent — start the timer.
-            silenceStart = Date.now();
-          } else if (Date.now() - silenceStart >= timeoutMs) {
-            callEmbed.control.setMicrophone(false);
-            setToast({
-              id: `afk-mute-${Date.now()}`,
-              displayName: 'Lotus Chat',
-              body: 'Your microphone was muted after inactivity.',
-              roomName: 'Voice call',
-              roomId: callEmbed.roomId,
-            });
-            silenceStart = null;
-          }
-        }, CHECK_INTERVAL_MS);
-      })
-      .catch(() => undefined);
+      if (speaking === undefined) {
+        // No usable signal — don't risk muting an active speaker.
+        silenceStart = null;
+      } else if (speaking) {
+        // Voice detected on the published track — reset the silence timer.
+        silenceStart = null;
+      } else if (silenceStart === null) {
+        // Mic is unmuted (effect only runs while unmuted) but silent — start the timer.
+        silenceStart = Date.now();
+      } else if (Date.now() - silenceStart >= timeoutMs) {
+        callEmbed.control.setMicrophone(false);
+        setToast({
+          id: `afk-mute-${Date.now()}`,
+          displayName: 'Lotus Chat',
+          body: 'Your microphone was muted after inactivity.',
+          roomName: 'Voice call',
+          roomId: callEmbed.roomId,
+        });
+        silenceStart = null;
+      }
+    }, CHECK_INTERVAL_MS);

    return () => {
      active = false;
-      if (intervalId !== undefined) clearInterval(intervalId);
-      stream?.getTracks().forEach((t) => t.stop());
-      audioCtx?.close().catch(() => undefined);
+      clearInterval(intervalId);
    };
-  }, [callEmbed, enabled, timeoutMinutes, setToast, microphone]);
+  }, [callEmbed, enabled, timeoutMinutes, setToast, microphone, mx]);
 }
@@ -29,8 +29,22 @@ export class CallControl extends EventEmitter implements CallControlState {

  private controlMutationObserver: MutationObserver;

+  // C-H3: coalesces bursts of body-subtree mutations into a single debounced
+  // re-observe pass so a busy EC re-render doesn't thrash the control observer.
+  private bodyMutationTimer?: ReturnType<typeof setTimeout>;
+
  private _pipMode = false;

+  // C-M3: last quality payload requested via setQuality(). Held so we can (re)send
+  // it once joined (io.lotus.set_quality must not be sent before call-join — a
+  // pre-join send pends to a 10s widget timeout, mirroring the deafen gate).
+  private lastQuality: LotusQualityPayload | null = null;
+
+  // C-M5: set true by CallControls while a push-to-talk key is held. A PTT hold
+  // unmutes the mic transiently, and onMediaState() must NOT treat that as a
+  // user-initiated unmute that auto-undeafens the user.
+  public pttActive = false;
+
  // P6-2: mirrors CallEmbed.joined. Set true from forceState(), which CallEmbed
  // invokes only from onCallJoined(). Gates io.lotus.set_deafen so we never send
  // before the fork's widget handler mounts (pre-join sends pend to a 10s
@@ -153,19 +167,43 @@ export class CallControl extends EventEmitter implements CallControlState {
    // this.joined was still false, so it was gated — this is the first send.)
    this.joined = true;
    this.sendDeafenState();
+    this.sendQuality();
+  }
+
+  /**
+   * C-H1 / C-M3: re-push the sticky fork-side state (deafen + quality) after an
+   * EC reconnect. Unlike forceState() this does NOT touch mic/video, so a
+   * reconnect can't clobber the user's live media state — it only re-arms the
+   * fork handlers that remount on reconnect.
+   */
+  public resendForkState(): void {
+    this.sendDeafenState();
+    this.sendQuality();
  }

  public startObserving() {
    if (!this.document) return;

+    // C-H3: watch the whole body subtree (not just direct children) so we
+    // re-bind the control observer when EC re-renders its controls deeper in the
+    // tree. Debounced via onBodyMutation() to avoid thrashing on busy renders.
    this.bodyMutationObserver.observe(this.document.body, {
      childList: true,
-      subtree: false, // only direct children of body
+      subtree: true,
    });
-    this.onBodyMutation();
+    this.applyBodyMutation();
  }

  private onBodyMutation() {
+    // C-H3: coalesce a burst of subtree mutations into one debounced pass.
+    if (this.bodyMutationTimer !== undefined) return;
+    this.bodyMutationTimer = setTimeout(() => {
+      this.bodyMutationTimer = undefined;
+      this.applyBodyMutation();
+    }, 100);
+  }
+
+  private applyBodyMutation() {
    if (!this.document) return;

    this.document.body.style.setProperty('background', 'none', 'important');
@@ -266,22 +304,43 @@ export class CallControl extends EventEmitter implements CallControlState {
    this.state = state;
    this.emitStateUpdate();

-    if (this.microphone && !this.sound) {
+    // C-M5: auto-undeafen when the mic turns on, but NOT for a transient
+    // push-to-talk unmute — a PTT tap while deafened must not silently
+    // un-deafen the user.
+    if (this.microphone && !this.sound && !this.pttActive) {
      this.toggleSound();
    }
  }

  private onControlMutation() {
+    const wasScreensharing = this.screenshare;
    const screenshare: boolean = this.screenshareButton?.getAttribute('data-kind') === 'primary';
    const spotlight: boolean = this.spotlightButton?.checked ?? false;

+    // C-M6: when a screenshare stops, clear the screenshare-audio mute so a
+    // later screenshare doesn't start pre-muted.
+    const screenshareAudioMuted =
+      wasScreensharing && !screenshare ? false : this.screenshareAudioMuted;
+
+    // C-H3: the body observer now watches subtree:true, so this fires on any DOM
+    // churn in EC's controls. Only re-emit (→ re-render every consumer) when one
+    // of the values this method derives actually changed — microphone/video/sound
+    // are copied unchanged from the current state here.
+    if (
+      this.state.screenshare === screenshare &&
+      this.state.spotlight === spotlight &&
+      this.state.screenshareAudioMuted === screenshareAudioMuted
+    ) {
+      return;
+    }
+
    this.state = new CallControlState(
      this.microphone,
      this.video,
      this.sound,
      screenshare,
      spotlight,
-      this.screenshareAudioMuted,
+      screenshareAudioMuted,
    );
    this.emitStateUpdate();
  }
@@ -423,10 +482,25 @@ export class CallControl extends EventEmitter implements CallControlState {
   * clamped fork-side, so out-of-range input can't brick the encoder.
   */
  public setQuality(settings: LotusQualityPayload): void {
-    this.call.transport.send('io.lotus.set_quality', settings).catch(() => undefined);
+    // C-M3: remember the request and only send once joined; sendQuality() gates
+    // on this.joined so a pre-join call is a no-op that we replay on join.
+    this.lastQuality = settings;
+    this.sendQuality();
+  }
+
+  // C-M3: push the last-requested quality to the fork. Gated on this.joined so
+  // we never send io.lotus.set_quality before the fork's handler mounts (a
+  // pre-join send would pend to a 10s widget timeout).
+  private sendQuality(): void {
+    if (!this.joined || !this.lastQuality) return;
+    this.call.transport.send('io.lotus.set_quality', this.lastQuality).catch(() => undefined);
  }

  public dispose() {
+    if (this.bodyMutationTimer !== undefined) {
+      clearTimeout(this.bodyMutationTimer);
+      this.bodyMutationTimer = undefined;
+    }
    this.bodyMutationObserver.disconnect();
    this.controlMutationObserver.disconnect();
  }
@@ -57,6 +57,10 @@ export class CallEmbed {

  public joined = false;

+  // C-M4: set once dispose() has run so the hangup fallback timer can tell
+  // whether the embed was already torn down by the normal Close/Hangup echo.
+  public disposed = false;
+
  // [lotus #2] Latest per-participant state from io.lotus.call_state, or null
  // until the fork sends the first one. When non-null, the speaker/mute hooks
  // read it instead of scraping the EC iframe DOM.
@@ -403,6 +407,8 @@ export class CallEmbed {
   * @param opts
   */
  public dispose(): void {
+    if (this.disposed) return;
+    this.disposed = true;
    this.disposables.forEach((disposable) => {
      disposable();
    });
@@ -501,9 +507,19 @@ export class CallEmbed {

  private onCallJoined(): void {
    this.settleLoad();
-    this.joined = true;
    this.applyStyles();
    this.control.startObserving();
+
+    // C-H1: EC fires JoinCall again on an EC reconnect (this action has no
+    // once-guard). forceState() would reset live mic/video/deafen back to the
+    // join-time snapshot, so only run it on the FIRST join. On a rejoin we just
+    // re-apply styles/observers (above) and re-push the sticky fork state
+    // (deafen/quality), leaving the user's live media state untouched.
+    if (this.joined) {
+      this.control.resendForkState();
+      return;
+    }
+    this.joined = true;
    // EC ignores io.element.device_mute before join; re-apply desired state now that EC is live
    this.control.forceState(this.initialState);
  }