From ee6bdd824150ec6937b55fb36c2353c76398f61d Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Thu, 2 Jul 2026 20:20:07 -0400 Subject: [PATCH] fix(call): Wave-1 audit fixes (calls host side) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - C-H1: forceState only on FIRST join; on EC reconnect re-arm the fork handlers (resendForkState — deafen+quality only) instead of clobbering live mic/video/ deafen back to the join-time snapshot. - C-H2: AFK auto-mute reads the fork's io.lotus.call_state VAD of the LOCAL published track instead of getUserMedia on the browser DEFAULT mic (which could measure silence while the user spoke on another device → auto-mute an active speaker). Fails safe (never mutes) when call_state is null OR empty. - C-H3: control observer re-binds after EC re-renders (body subtree:true + 100ms debounce) with an early-return so unchanged state doesn't re-render. - C-M3 setQuality join-gated; C-M4 hangup 4s fallback dispose (idempotent); C-M5 PTT no longer silently un-deafens; C-M6 screenshare-audio mute resets on stop; C-L4 deafen key works in the iframe; C-L6 setState-after-unmount guards. Reviewed (C-H2 [] fail-safe + C-H3 re-render guard applied). tsc/eslint/prettier clean, build OK, 677 tests. Co-Authored-By: Claude Opus 4.8 --- LOTUS_TODO.md | 2 +- src/app/components/CallEmbedProvider.tsx | 13 +++ src/app/features/call/CallControls.tsx | 32 ++++++- src/app/features/call/CallSoundboard.tsx | 17 +++- src/app/hooks/useAfkAutoMute.ts | 114 ++++++++++++----------- src/app/plugins/call/CallControl.ts | 84 ++++++++++++++++- src/app/plugins/call/CallEmbed.ts | 18 +++- 7 files changed, 214 insertions(+), 66 deletions(-) diff --git a/LOTUS_TODO.md b/LOTUS_TODO.md index 7fa9db146..05127a8c1 100644 --- a/LOTUS_TODO.md +++ b/LOTUS_TODO.md @@ -37,7 +37,7 @@ Completed features are documented in [LOTUS_FEATURES.md](./LOTUS_FEATURES.md). Bug-hunt of the Tier-1 high-risk areas (notifications/unread/receipts, threads, calls host-side, Element Call fork) by 4 parallel deep-audit agents. `[T#]`=threads, `[N#]`=notifications, `[C#]`=calls host, `[EC#]`=fork. -**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2) + web 🟠 (T2, T4, N3, N4). Web fixes are gate-green (678 tests incl. the new `threadReceipt.test.ts` locking the T1 regression). EC-fork 🟡 (EC1–EC6) fixed on `element-call:lotus` (needs a republish). **Still open:** calls-host 🟠/🟡 (C-H1/2/3, C-M*, C-L*) — see below; the remaining 🟡 notification/thread tail (N5, N6, T5, T6, T7). +**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2); web 🟠 (T2, T4, N3, N4); calls-host 🟠 (C-H1, C-H2, C-H3) + 🟡 (C-M3, C-M4, C-M5, C-M6, C-L4, C-L6) — reviewed (the C-H2 AFK rewrite + C-H1 rejoin guard verified). EC-fork 🟡 (EC1–EC6) fixed on `element-call:lotus` (**needs a republish**). Web + calls gate-green (677 tests + `threadReceipt.test.ts` locking the T1 regression). **Still open (low tail):** C-M1/C-M2 (DOM-hack fragility — retire via the fork), C-L1/L2/L3/L5/L7/L8, and N5, N6, T5, T6, T7. ### 🔴 High — data-integrity / broken core UX diff --git a/src/app/components/CallEmbedProvider.tsx b/src/app/components/CallEmbedProvider.tsx index f9ce307ac..65e559c75 100644 --- a/src/app/components/CallEmbedProvider.tsx +++ b/src/app/components/CallEmbedProvider.tsx @@ -413,6 +413,16 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps) const dm = callInfo ? directs.has(callInfo.room.roomId) : false; const startCall = useCallStart(dm); + // C-L6: handleTimelineEvent awaits decryption before calling setState; guard + // against the component unmounting during that await. + const mountedRef = useRef(true); + useEffect( + () => () => { + mountedRef.current = false; + }, + [], + ); + const handleTimelineEvent: EventTimelineSetHandlerMap[RoomEvent.Timeline] = useCallback( async (event, room, toStartOfTimeline, removed, data) => { // only process rtc notification reference events. @@ -427,6 +437,9 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps) await event.getDecryptionPromise(); } + // C-L6: bail if we unmounted while awaiting decryption above. + if (!mountedRef.current) return; + // Caller-side: a participant declined a call we're hosting in this room. // Without this the caller's UI keeps "ringing" until the notification // lifetime expires, with no indication the callee said no. diff --git a/src/app/features/call/CallControls.tsx b/src/app/features/call/CallControls.tsx index 882754993..0660fde53 100644 --- a/src/app/features/call/CallControls.tsx +++ b/src/app/features/call/CallControls.tsx @@ -1,4 +1,5 @@ import React, { MouseEventHandler, useCallback, useEffect, useRef, useState } from 'react'; +import { useSetAtom } from 'jotai'; import { Box, Button, @@ -32,6 +33,7 @@ import { import { CallEmbed, useCallControlState } from '../../plugins/call'; import { useSetting } from '../../state/hooks/settings'; import { settingsAtom } from '../../state/settings'; +import { callEmbedAtom } from '../../state/callEmbed'; import { useResizeObserver } from '../../hooks/useResizeObserver'; import { stopPropagation } from '../../utils/keyboard'; import { AsyncStatus, useAsyncCallback } from '../../hooks/useAsyncCallback'; @@ -48,6 +50,7 @@ type CallControlsProps = { export function CallControls({ callEmbed }: CallControlsProps) { const controlRef = useRef(null); const callEmbedRef = useCallEmbedRef(); + const setCallEmbed = useSetAtom(callEmbedAtom); const [compact, setCompact] = useState(document.body.clientWidth < 500); const [isFullscreen, setIsFullscreen] = useState(false); @@ -175,22 +178,28 @@ export function CallControls({ callEmbed }: CallControlsProps) { }; if (isEditable(target)) return; e.preventDefault(); + // C-M5: mark PTT active BEFORE unmuting so the mic echo (onMediaState) + // doesn't treat this transient unmute as a user-initiated undeafen. + callEmbed.control.pttActive = true; if (!microphoneRef.current) callEmbed.control.setMicrophone(true); pttActiveRef.current = true; setPttActive(true); }; const onKeyUp = (e: KeyboardEvent) => { if (e.code !== pttKey) return; + callEmbed.control.pttActive = false; callEmbed.control.setMicrophone(false); pttActiveRef.current = false; setPttActive(false); }; const onBlur = () => { + callEmbed.control.pttActive = false; callEmbed.control.setMicrophone(false); pttActiveRef.current = false; setPttActive(false); }; const onFocus = () => { + callEmbed.control.pttActive = false; callEmbed.control.setMicrophone(false); pttActiveRef.current = false; setPttActive(false); @@ -215,6 +224,7 @@ export function CallControls({ callEmbed }: CallControlsProps) { iframeWindow?.removeEventListener('focus', onFocus); // BUG-8: if callEmbed changes while PTT is active, release mic on cleanup if (pttActiveRef.current) { + callEmbed.control.pttActive = false; callEmbed.control.setMicrophone(false); pttActiveRef.current = false; setPttActive(false); @@ -242,8 +252,15 @@ export function CallControls({ callEmbed }: CallControlsProps) { e.preventDefault(); callEmbed.control.toggleSound(); }; + // C-L4: also bind the EC iframe window so the deafen key works when focus is + // inside the iframe (mirrors the PTT binding above). + const iframeWindow = callEmbed.iframe.contentWindow; window.addEventListener('keydown', onKeyDown); - return () => window.removeEventListener('keydown', onKeyDown); + iframeWindow?.addEventListener('keydown', onKeyDown); + return () => { + window.removeEventListener('keydown', onKeyDown); + iframeWindow?.removeEventListener('keydown', onKeyDown); + }; }, [callEmbed, deafenKey]); const [hangupState, hangup] = useAsyncCallback( @@ -252,6 +269,19 @@ export function CallControls({ callEmbed }: CallControlsProps) { const exiting = hangupState.status === AsyncStatus.Loading || hangupState.status === AsyncStatus.Success; + // C-M4: the normal teardown relies on EC echoing a Close/Hangup action after + // it ACKs HangupCall (useCallHangupEvent -> clears callEmbedAtom -> dispose). + // If EC ACKs but never echoes, the End button would spin forever. Fall back to + // disposing the embed a few seconds after a successful hangup send, unless it + // was already torn down by the normal path. + useEffect(() => { + if (hangupState.status !== AsyncStatus.Success) return undefined; + const id = setTimeout(() => { + if (!callEmbed.disposed) setCallEmbed(undefined); + }, 4000); + return () => clearTimeout(id); + }, [hangupState.status, callEmbed, setCallEmbed]); + const pttKeyLabel = pttKey === 'Space' ? 'SPACE' : pttKey.replace('Key', '').replace('Digit', ''); return ( diff --git a/src/app/features/call/CallSoundboard.tsx b/src/app/features/call/CallSoundboard.tsx index c69e41d8a..999e7e944 100644 --- a/src/app/features/call/CallSoundboard.tsx +++ b/src/app/features/call/CallSoundboard.tsx @@ -1,4 +1,4 @@ -import React, { MouseEventHandler, useCallback, useMemo, useState } from 'react'; +import React, { MouseEventHandler, useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { Box, Icon, @@ -64,6 +64,16 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) { const [playingKey, setPlayingKey] = useState(); // host-side spam guard const [error, setError] = useState(); + // C-L6: the play() flow schedules a 30s safety timeout that clears playingKey; + // guard those setState calls against the component unmounting first. + const mountedRef = useRef(true); + useEffect( + () => () => { + mountedRef.current = false; + }, + [], + ); + const groups = useMemo( () => packs @@ -86,7 +96,10 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) { if (playingKey) return; // one at a time (fork also enforces this) setPlayingKey(flat.key); setError(undefined); - const done = () => setPlayingKey((k) => (k === flat.key ? undefined : k)); + const done = () => { + if (!mountedRef.current) return; + setPlayingKey((k) => (k === flat.key ? undefined : k)); + }; try { const url = await resolveClipObjectUrl(mx, flat.clip.url); const vol = (flat.clip.volume / 100) * master; diff --git a/src/app/hooks/useAfkAutoMute.ts b/src/app/hooks/useAfkAutoMute.ts index f91890490..1e25f808a 100644 --- a/src/app/hooks/useAfkAutoMute.ts +++ b/src/app/hooks/useAfkAutoMute.ts @@ -4,84 +4,86 @@ import { CallEmbed, useCallControlState } from '../plugins/call'; import { useSetting } from '../state/hooks/settings'; import { settingsAtom } from '../state/settings'; import { toastQueueAtom } from '../state/toast'; +import { useMatrixClient } from './useMatrixClient'; -const SILENCE_RMS_THRESHOLD = 0.008; const CHECK_INTERVAL_MS = 500; /** - * Monitors microphone audio while in a call. If the mic stays unmuted but - * silent for longer than the configured timeout, the mic is muted and a toast - * is shown. + * Monitors microphone activity while in a call. If the mic stays unmuted but + * the user is not speaking for longer than the configured timeout, the mic is + * muted and a toast is shown. * - * The level-monitoring capture (`getUserMedia`) is opened ONLY while the mic is - * unmuted — there is nothing to auto-mute once you are already muted, so - * holding the capture would keep the OS recording indicator lit even though the - * UI shows you as muted (N95). Muting therefore releases our stream; unmuting - * re-acquires it. The AudioContext + stream are also torn down on unmount. + * [C-H2] Activity is read from the EC fork's `io.lotus.call_state` stream + * (getLotusParticipants) — i.e. the VAD state of the user's ACTUAL published + * track on their SELECTED input device. The previous implementation opened its + * own `getUserMedia({ audio: true })`, which captured the browser DEFAULT mic + * (not necessarily the device EC publishes from): it could measure silence + * while the user spoke on a different device (auto-muting an active speaker) and + * lit a second OS microphone indicator. Sourcing from the fork removes both + * problems and needs no extra capture. + * + * If the fork hasn't reported call-state yet (getLotusParticipants() === null — + * e.g. plain EC, or immediately after join), we cannot tell whether the user is + * publishing, so we fail SAFE and never auto-mute during that window. */ export function useAfkAutoMute(callEmbed: CallEmbed | undefined): void { + const mx = useMatrixClient(); const [enabled] = useSetting(settingsAtom, 'afkAutoMute'); const [timeoutMinutes] = useSetting(settingsAtom, 'afkTimeoutMinutes'); const setToast = useSetAtom(toastQueueAtom); const { microphone } = useCallControlState(callEmbed?.control); useEffect(() => { - // Only capture while in a call, enabled, AND unmuted (see N95 note above). + // Only monitor while in a call, enabled, AND unmuted — there is nothing to + // auto-mute once you are already muted. if (!callEmbed || !enabled || !microphone) return undefined; - let stream: MediaStream | undefined; - let audioCtx: AudioContext | undefined; - let intervalId: ReturnType | undefined; + const localUserId = mx.getSafeUserId(); + const timeoutMs = timeoutMinutes * 60 * 1000; let silenceStart: number | null = null; let active = true; - const timeoutMs = timeoutMinutes * 60 * 1000; - navigator.mediaDevices - .getUserMedia({ audio: true, video: false }) - .then((s) => { - if (!active) { - s.getTracks().forEach((t) => t.stop()); - return; - } - stream = s; - audioCtx = new AudioContext(); - const source = audioCtx.createMediaStreamSource(stream); - const analyser = audioCtx.createAnalyser(); - analyser.fftSize = 256; - source.connect(analyser); - const buffer = new Float32Array(analyser.fftSize); + // undefined = fork hasn't reported call-state yet (can't tell — fail safe). + const isLocalSpeaking = (): boolean | undefined => { + const participants = callEmbed.getLotusParticipants(); + // null = fork not reported; [] = malformed/spurious payload (CallEmbed + // stores [] for a non-array). You are ALWAYS present in your own joined + // call, so an empty list means "no usable data", NOT "silent" — matching + // useCallSpeakers / useRemoteAllMuted. Treating [] as silent would let the + // timer mute an active speaker. Fail safe on both. + if (participants === null || participants.length === 0) return undefined; + return participants.some((p) => p.userId === localUserId && p.audioEnabled && p.speaking); + }; - intervalId = setInterval(() => { - if (!active) return; - analyser.getFloatTimeDomainData(buffer); - const rms = Math.sqrt(buffer.reduce((sum, v) => sum + v * v, 0) / buffer.length); + const intervalId = setInterval(() => { + if (!active) return; + const speaking = isLocalSpeaking(); - if (rms > SILENCE_RMS_THRESHOLD) { - // Audio detected — reset the silence timer. - silenceStart = null; - } else if (silenceStart === null) { - // Mic is unmuted (effect only runs while unmuted) but silent — start the timer. - silenceStart = Date.now(); - } else if (Date.now() - silenceStart >= timeoutMs) { - callEmbed.control.setMicrophone(false); - setToast({ - id: `afk-mute-${Date.now()}`, - displayName: 'Lotus Chat', - body: 'Your microphone was muted after inactivity.', - roomName: 'Voice call', - roomId: callEmbed.roomId, - }); - silenceStart = null; - } - }, CHECK_INTERVAL_MS); - }) - .catch(() => undefined); + if (speaking === undefined) { + // No usable signal — don't risk muting an active speaker. + silenceStart = null; + } else if (speaking) { + // Voice detected on the published track — reset the silence timer. + silenceStart = null; + } else if (silenceStart === null) { + // Mic is unmuted (effect only runs while unmuted) but silent — start the timer. + silenceStart = Date.now(); + } else if (Date.now() - silenceStart >= timeoutMs) { + callEmbed.control.setMicrophone(false); + setToast({ + id: `afk-mute-${Date.now()}`, + displayName: 'Lotus Chat', + body: 'Your microphone was muted after inactivity.', + roomName: 'Voice call', + roomId: callEmbed.roomId, + }); + silenceStart = null; + } + }, CHECK_INTERVAL_MS); return () => { active = false; - if (intervalId !== undefined) clearInterval(intervalId); - stream?.getTracks().forEach((t) => t.stop()); - audioCtx?.close().catch(() => undefined); + clearInterval(intervalId); }; - }, [callEmbed, enabled, timeoutMinutes, setToast, microphone]); + }, [callEmbed, enabled, timeoutMinutes, setToast, microphone, mx]); } diff --git a/src/app/plugins/call/CallControl.ts b/src/app/plugins/call/CallControl.ts index fda6c608c..d4bdd30f8 100644 --- a/src/app/plugins/call/CallControl.ts +++ b/src/app/plugins/call/CallControl.ts @@ -29,8 +29,22 @@ export class CallControl extends EventEmitter implements CallControlState { private controlMutationObserver: MutationObserver; + // C-H3: coalesces bursts of body-subtree mutations into a single debounced + // re-observe pass so a busy EC re-render doesn't thrash the control observer. + private bodyMutationTimer?: ReturnType; + private _pipMode = false; + // C-M3: last quality payload requested via setQuality(). Held so we can (re)send + // it once joined (io.lotus.set_quality must not be sent before call-join — a + // pre-join send pends to a 10s widget timeout, mirroring the deafen gate). + private lastQuality: LotusQualityPayload | null = null; + + // C-M5: set true by CallControls while a push-to-talk key is held. A PTT hold + // unmutes the mic transiently, and onMediaState() must NOT treat that as a + // user-initiated unmute that auto-undeafens the user. + public pttActive = false; + // P6-2: mirrors CallEmbed.joined. Set true from forceState(), which CallEmbed // invokes only from onCallJoined(). Gates io.lotus.set_deafen so we never send // before the fork's widget handler mounts (pre-join sends pend to a 10s @@ -153,19 +167,43 @@ export class CallControl extends EventEmitter implements CallControlState { // this.joined was still false, so it was gated — this is the first send.) this.joined = true; this.sendDeafenState(); + this.sendQuality(); + } + + /** + * C-H1 / C-M3: re-push the sticky fork-side state (deafen + quality) after an + * EC reconnect. Unlike forceState() this does NOT touch mic/video, so a + * reconnect can't clobber the user's live media state — it only re-arms the + * fork handlers that remount on reconnect. + */ + public resendForkState(): void { + this.sendDeafenState(); + this.sendQuality(); } public startObserving() { if (!this.document) return; + // C-H3: watch the whole body subtree (not just direct children) so we + // re-bind the control observer when EC re-renders its controls deeper in the + // tree. Debounced via onBodyMutation() to avoid thrashing on busy renders. this.bodyMutationObserver.observe(this.document.body, { childList: true, - subtree: false, // only direct children of body + subtree: true, }); - this.onBodyMutation(); + this.applyBodyMutation(); } private onBodyMutation() { + // C-H3: coalesce a burst of subtree mutations into one debounced pass. + if (this.bodyMutationTimer !== undefined) return; + this.bodyMutationTimer = setTimeout(() => { + this.bodyMutationTimer = undefined; + this.applyBodyMutation(); + }, 100); + } + + private applyBodyMutation() { if (!this.document) return; this.document.body.style.setProperty('background', 'none', 'important'); @@ -266,22 +304,43 @@ export class CallControl extends EventEmitter implements CallControlState { this.state = state; this.emitStateUpdate(); - if (this.microphone && !this.sound) { + // C-M5: auto-undeafen when the mic turns on, but NOT for a transient + // push-to-talk unmute — a PTT tap while deafened must not silently + // un-deafen the user. + if (this.microphone && !this.sound && !this.pttActive) { this.toggleSound(); } } private onControlMutation() { + const wasScreensharing = this.screenshare; const screenshare: boolean = this.screenshareButton?.getAttribute('data-kind') === 'primary'; const spotlight: boolean = this.spotlightButton?.checked ?? false; + // C-M6: when a screenshare stops, clear the screenshare-audio mute so a + // later screenshare doesn't start pre-muted. + const screenshareAudioMuted = + wasScreensharing && !screenshare ? false : this.screenshareAudioMuted; + + // C-H3: the body observer now watches subtree:true, so this fires on any DOM + // churn in EC's controls. Only re-emit (→ re-render every consumer) when one + // of the values this method derives actually changed — microphone/video/sound + // are copied unchanged from the current state here. + if ( + this.state.screenshare === screenshare && + this.state.spotlight === spotlight && + this.state.screenshareAudioMuted === screenshareAudioMuted + ) { + return; + } + this.state = new CallControlState( this.microphone, this.video, this.sound, screenshare, spotlight, - this.screenshareAudioMuted, + screenshareAudioMuted, ); this.emitStateUpdate(); } @@ -423,10 +482,25 @@ export class CallControl extends EventEmitter implements CallControlState { * clamped fork-side, so out-of-range input can't brick the encoder. */ public setQuality(settings: LotusQualityPayload): void { - this.call.transport.send('io.lotus.set_quality', settings).catch(() => undefined); + // C-M3: remember the request and only send once joined; sendQuality() gates + // on this.joined so a pre-join call is a no-op that we replay on join. + this.lastQuality = settings; + this.sendQuality(); + } + + // C-M3: push the last-requested quality to the fork. Gated on this.joined so + // we never send io.lotus.set_quality before the fork's handler mounts (a + // pre-join send would pend to a 10s widget timeout). + private sendQuality(): void { + if (!this.joined || !this.lastQuality) return; + this.call.transport.send('io.lotus.set_quality', this.lastQuality).catch(() => undefined); } public dispose() { + if (this.bodyMutationTimer !== undefined) { + clearTimeout(this.bodyMutationTimer); + this.bodyMutationTimer = undefined; + } this.bodyMutationObserver.disconnect(); this.controlMutationObserver.disconnect(); } diff --git a/src/app/plugins/call/CallEmbed.ts b/src/app/plugins/call/CallEmbed.ts index 8b2c6f588..cc54748b0 100644 --- a/src/app/plugins/call/CallEmbed.ts +++ b/src/app/plugins/call/CallEmbed.ts @@ -57,6 +57,10 @@ export class CallEmbed { public joined = false; + // C-M4: set once dispose() has run so the hangup fallback timer can tell + // whether the embed was already torn down by the normal Close/Hangup echo. + public disposed = false; + // [lotus #2] Latest per-participant state from io.lotus.call_state, or null // until the fork sends the first one. When non-null, the speaker/mute hooks // read it instead of scraping the EC iframe DOM. @@ -403,6 +407,8 @@ export class CallEmbed { * @param opts */ public dispose(): void { + if (this.disposed) return; + this.disposed = true; this.disposables.forEach((disposable) => { disposable(); }); @@ -501,9 +507,19 @@ export class CallEmbed { private onCallJoined(): void { this.settleLoad(); - this.joined = true; this.applyStyles(); this.control.startObserving(); + + // C-H1: EC fires JoinCall again on an EC reconnect (this action has no + // once-guard). forceState() would reset live mic/video/deafen back to the + // join-time snapshot, so only run it on the FIRST join. On a rejoin we just + // re-apply styles/observers (above) and re-push the sticky fork state + // (deafen/quality), leaving the user's live media state untouched. + if (this.joined) { + this.control.resendForkState(); + return; + } + this.joined = true; // EC ignores io.element.device_mute before join; re-apply desired state now that EC is live this.control.forceState(this.initialState); }