fix(call): Wave-1 audit fixes (calls host side)

- C-H1: forceState only on FIRST join; on EC reconnect re-arm the fork handlers
  (resendForkState — deafen+quality only) instead of clobbering live mic/video/
  deafen back to the join-time snapshot.
- C-H2: AFK auto-mute reads the fork's io.lotus.call_state VAD of the LOCAL
  published track instead of getUserMedia on the browser DEFAULT mic (which could
  measure silence while the user spoke on another device → auto-mute an active
  speaker). Fails safe (never mutes) when call_state is null OR empty.
- C-H3: control observer re-binds after EC re-renders (body subtree:true + 100ms
  debounce) with an early-return so unchanged state doesn't re-render.
- C-M3 setQuality join-gated; C-M4 hangup 4s fallback dispose (idempotent);
  C-M5 PTT no longer silently un-deafens; C-M6 screenshare-audio mute resets on
  stop; C-L4 deafen key works in the iframe; C-L6 setState-after-unmount guards.

Reviewed (C-H2 [] fail-safe + C-H3 re-render guard applied). tsc/eslint/prettier
clean, build OK, 677 tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-07-02 20:20:07 -04:00
parent 0bbdd7ce94
commit ee6bdd8241
7 changed files with 214 additions and 66 deletions
+1 -1
View File
@@ -37,7 +37,7 @@ Completed features are documented in [LOTUS_FEATURES.md](./LOTUS_FEATURES.md).
Bug-hunt of the Tier-1 high-risk areas (notifications/unread/receipts, threads, calls host-side, Element Call fork) by 4 parallel deep-audit agents. `[T#]`=threads, `[N#]`=notifications, `[C#]`=calls host, `[EC#]`=fork.
**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2) + web 🟠 (T2, T4, N3, N4). Web fixes are gate-green (678 tests incl. the new `threadReceipt.test.ts` locking the T1 regression). EC-fork 🟡 (EC1EC6) fixed on `element-call:lotus` (needs a republish). **Still open:** calls-host 🟠/🟡 (C-H1/2/3, C-M*, C-L*) — see below; the remaining 🟡 notification/thread tail (N5, N6, T5, T6, T7).
**✅ FIXED (2026-07):** all 🔴 (T1, N1, N2); web 🟠 (T2, T4, N3, N4); calls-host 🟠 (C-H1, C-H2, C-H3) + 🟡 (C-M3, C-M4, C-M5, C-M6, C-L4, C-L6) — reviewed (the C-H2 AFK rewrite + C-H1 rejoin guard verified). EC-fork 🟡 (EC1EC6) fixed on `element-call:lotus` (**needs a republish**). Web + calls gate-green (677 tests + `threadReceipt.test.ts` locking the T1 regression). **Still open (low tail):** C-M1/C-M2 (DOM-hack fragility — retire via the fork), C-L1/L2/L3/L5/L7/L8, and N5, N6, T5, T6, T7.
### 🔴 High — data-integrity / broken core UX
+13
View File
@@ -413,6 +413,16 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps)
const dm = callInfo ? directs.has(callInfo.room.roomId) : false;
const startCall = useCallStart(dm);
// C-L6: handleTimelineEvent awaits decryption before calling setState; guard
// against the component unmounting during that await.
const mountedRef = useRef(true);
useEffect(
() => () => {
mountedRef.current = false;
},
[],
);
const handleTimelineEvent: EventTimelineSetHandlerMap[RoomEvent.Timeline] = useCallback(
async (event, room, toStartOfTimeline, removed, data) => {
// only process rtc notification reference events.
@@ -427,6 +437,9 @@ function IncomingCallListener({ callEmbed, joined }: IncomingCallListenerProps)
await event.getDecryptionPromise();
}
// C-L6: bail if we unmounted while awaiting decryption above.
if (!mountedRef.current) return;
// Caller-side: a participant declined a call we're hosting in this room.
// Without this the caller's UI keeps "ringing" until the notification
// lifetime expires, with no indication the callee said no.
+31 -1
View File
@@ -1,4 +1,5 @@
import React, { MouseEventHandler, useCallback, useEffect, useRef, useState } from 'react';
import { useSetAtom } from 'jotai';
import {
Box,
Button,
@@ -32,6 +33,7 @@ import {
import { CallEmbed, useCallControlState } from '../../plugins/call';
import { useSetting } from '../../state/hooks/settings';
import { settingsAtom } from '../../state/settings';
import { callEmbedAtom } from '../../state/callEmbed';
import { useResizeObserver } from '../../hooks/useResizeObserver';
import { stopPropagation } from '../../utils/keyboard';
import { AsyncStatus, useAsyncCallback } from '../../hooks/useAsyncCallback';
@@ -48,6 +50,7 @@ type CallControlsProps = {
export function CallControls({ callEmbed }: CallControlsProps) {
const controlRef = useRef<HTMLDivElement>(null);
const callEmbedRef = useCallEmbedRef();
const setCallEmbed = useSetAtom(callEmbedAtom);
const [compact, setCompact] = useState(document.body.clientWidth < 500);
const [isFullscreen, setIsFullscreen] = useState(false);
@@ -175,22 +178,28 @@ export function CallControls({ callEmbed }: CallControlsProps) {
};
if (isEditable(target)) return;
e.preventDefault();
// C-M5: mark PTT active BEFORE unmuting so the mic echo (onMediaState)
// doesn't treat this transient unmute as a user-initiated undeafen.
callEmbed.control.pttActive = true;
if (!microphoneRef.current) callEmbed.control.setMicrophone(true);
pttActiveRef.current = true;
setPttActive(true);
};
const onKeyUp = (e: KeyboardEvent) => {
if (e.code !== pttKey) return;
callEmbed.control.pttActive = false;
callEmbed.control.setMicrophone(false);
pttActiveRef.current = false;
setPttActive(false);
};
const onBlur = () => {
callEmbed.control.pttActive = false;
callEmbed.control.setMicrophone(false);
pttActiveRef.current = false;
setPttActive(false);
};
const onFocus = () => {
callEmbed.control.pttActive = false;
callEmbed.control.setMicrophone(false);
pttActiveRef.current = false;
setPttActive(false);
@@ -215,6 +224,7 @@ export function CallControls({ callEmbed }: CallControlsProps) {
iframeWindow?.removeEventListener('focus', onFocus);
// BUG-8: if callEmbed changes while PTT is active, release mic on cleanup
if (pttActiveRef.current) {
callEmbed.control.pttActive = false;
callEmbed.control.setMicrophone(false);
pttActiveRef.current = false;
setPttActive(false);
@@ -242,8 +252,15 @@ export function CallControls({ callEmbed }: CallControlsProps) {
e.preventDefault();
callEmbed.control.toggleSound();
};
// C-L4: also bind the EC iframe window so the deafen key works when focus is
// inside the iframe (mirrors the PTT binding above).
const iframeWindow = callEmbed.iframe.contentWindow;
window.addEventListener('keydown', onKeyDown);
return () => window.removeEventListener('keydown', onKeyDown);
iframeWindow?.addEventListener('keydown', onKeyDown);
return () => {
window.removeEventListener('keydown', onKeyDown);
iframeWindow?.removeEventListener('keydown', onKeyDown);
};
}, [callEmbed, deafenKey]);
const [hangupState, hangup] = useAsyncCallback(
@@ -252,6 +269,19 @@ export function CallControls({ callEmbed }: CallControlsProps) {
const exiting =
hangupState.status === AsyncStatus.Loading || hangupState.status === AsyncStatus.Success;
// C-M4: the normal teardown relies on EC echoing a Close/Hangup action after
// it ACKs HangupCall (useCallHangupEvent -> clears callEmbedAtom -> dispose).
// If EC ACKs but never echoes, the End button would spin forever. Fall back to
// disposing the embed a few seconds after a successful hangup send, unless it
// was already torn down by the normal path.
useEffect(() => {
if (hangupState.status !== AsyncStatus.Success) return undefined;
const id = setTimeout(() => {
if (!callEmbed.disposed) setCallEmbed(undefined);
}, 4000);
return () => clearTimeout(id);
}, [hangupState.status, callEmbed, setCallEmbed]);
const pttKeyLabel = pttKey === 'Space' ? 'SPACE' : pttKey.replace('Key', '').replace('Digit', '');
return (
+15 -2
View File
@@ -1,4 +1,4 @@
import React, { MouseEventHandler, useCallback, useMemo, useState } from 'react';
import React, { MouseEventHandler, useCallback, useEffect, useMemo, useRef, useState } from 'react';
import {
Box,
Icon,
@@ -64,6 +64,16 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) {
const [playingKey, setPlayingKey] = useState<string>(); // host-side spam guard
const [error, setError] = useState<string>();
// C-L6: the play() flow schedules a 30s safety timeout that clears playingKey;
// guard those setState calls against the component unmounting first.
const mountedRef = useRef(true);
useEffect(
() => () => {
mountedRef.current = false;
},
[],
);
const groups = useMemo(
() =>
packs
@@ -86,7 +96,10 @@ export function CallSoundboard({ callEmbed }: CallSoundboardProps) {
if (playingKey) return; // one at a time (fork also enforces this)
setPlayingKey(flat.key);
setError(undefined);
const done = () => setPlayingKey((k) => (k === flat.key ? undefined : k));
const done = () => {
if (!mountedRef.current) return;
setPlayingKey((k) => (k === flat.key ? undefined : k));
};
try {
const url = await resolveClipObjectUrl(mx, flat.clip.url);
const vol = (flat.clip.volume / 100) * master;
+58 -56
View File
@@ -4,84 +4,86 @@ import { CallEmbed, useCallControlState } from '../plugins/call';
import { useSetting } from '../state/hooks/settings';
import { settingsAtom } from '../state/settings';
import { toastQueueAtom } from '../state/toast';
import { useMatrixClient } from './useMatrixClient';
const SILENCE_RMS_THRESHOLD = 0.008;
const CHECK_INTERVAL_MS = 500;
/**
* Monitors microphone audio while in a call. If the mic stays unmuted but
* silent for longer than the configured timeout, the mic is muted and a toast
* is shown.
* Monitors microphone activity while in a call. If the mic stays unmuted but
* the user is not speaking for longer than the configured timeout, the mic is
* muted and a toast is shown.
*
* The level-monitoring capture (`getUserMedia`) is opened ONLY while the mic is
* unmuted there is nothing to auto-mute once you are already muted, so
* holding the capture would keep the OS recording indicator lit even though the
* UI shows you as muted (N95). Muting therefore releases our stream; unmuting
* re-acquires it. The AudioContext + stream are also torn down on unmount.
* [C-H2] Activity is read from the EC fork's `io.lotus.call_state` stream
* (getLotusParticipants) i.e. the VAD state of the user's ACTUAL published
* track on their SELECTED input device. The previous implementation opened its
* own `getUserMedia({ audio: true })`, which captured the browser DEFAULT mic
* (not necessarily the device EC publishes from): it could measure silence
* while the user spoke on a different device (auto-muting an active speaker) and
* lit a second OS microphone indicator. Sourcing from the fork removes both
* problems and needs no extra capture.
*
* If the fork hasn't reported call-state yet (getLotusParticipants() === null
* e.g. plain EC, or immediately after join), we cannot tell whether the user is
* publishing, so we fail SAFE and never auto-mute during that window.
*/
export function useAfkAutoMute(callEmbed: CallEmbed | undefined): void {
const mx = useMatrixClient();
const [enabled] = useSetting(settingsAtom, 'afkAutoMute');
const [timeoutMinutes] = useSetting(settingsAtom, 'afkTimeoutMinutes');
const setToast = useSetAtom(toastQueueAtom);
const { microphone } = useCallControlState(callEmbed?.control);
useEffect(() => {
// Only capture while in a call, enabled, AND unmuted (see N95 note above).
// Only monitor while in a call, enabled, AND unmuted — there is nothing to
// auto-mute once you are already muted.
if (!callEmbed || !enabled || !microphone) return undefined;
let stream: MediaStream | undefined;
let audioCtx: AudioContext | undefined;
let intervalId: ReturnType<typeof setInterval> | undefined;
const localUserId = mx.getSafeUserId();
const timeoutMs = timeoutMinutes * 60 * 1000;
let silenceStart: number | null = null;
let active = true;
const timeoutMs = timeoutMinutes * 60 * 1000;
navigator.mediaDevices
.getUserMedia({ audio: true, video: false })
.then((s) => {
if (!active) {
s.getTracks().forEach((t) => t.stop());
return;
}
stream = s;
audioCtx = new AudioContext();
const source = audioCtx.createMediaStreamSource(stream);
const analyser = audioCtx.createAnalyser();
analyser.fftSize = 256;
source.connect(analyser);
const buffer = new Float32Array(analyser.fftSize);
// undefined = fork hasn't reported call-state yet (can't tell — fail safe).
const isLocalSpeaking = (): boolean | undefined => {
const participants = callEmbed.getLotusParticipants();
// null = fork not reported; [] = malformed/spurious payload (CallEmbed
// stores [] for a non-array). You are ALWAYS present in your own joined
// call, so an empty list means "no usable data", NOT "silent" — matching
// useCallSpeakers / useRemoteAllMuted. Treating [] as silent would let the
// timer mute an active speaker. Fail safe on both.
if (participants === null || participants.length === 0) return undefined;
return participants.some((p) => p.userId === localUserId && p.audioEnabled && p.speaking);
};
intervalId = setInterval(() => {
if (!active) return;
analyser.getFloatTimeDomainData(buffer);
const rms = Math.sqrt(buffer.reduce((sum, v) => sum + v * v, 0) / buffer.length);
const intervalId = setInterval(() => {
if (!active) return;
const speaking = isLocalSpeaking();
if (rms > SILENCE_RMS_THRESHOLD) {
// Audio detected — reset the silence timer.
silenceStart = null;
} else if (silenceStart === null) {
// Mic is unmuted (effect only runs while unmuted) but silent — start the timer.
silenceStart = Date.now();
} else if (Date.now() - silenceStart >= timeoutMs) {
callEmbed.control.setMicrophone(false);
setToast({
id: `afk-mute-${Date.now()}`,
displayName: 'Lotus Chat',
body: 'Your microphone was muted after inactivity.',
roomName: 'Voice call',
roomId: callEmbed.roomId,
});
silenceStart = null;
}
}, CHECK_INTERVAL_MS);
})
.catch(() => undefined);
if (speaking === undefined) {
// No usable signal — don't risk muting an active speaker.
silenceStart = null;
} else if (speaking) {
// Voice detected on the published track — reset the silence timer.
silenceStart = null;
} else if (silenceStart === null) {
// Mic is unmuted (effect only runs while unmuted) but silent — start the timer.
silenceStart = Date.now();
} else if (Date.now() - silenceStart >= timeoutMs) {
callEmbed.control.setMicrophone(false);
setToast({
id: `afk-mute-${Date.now()}`,
displayName: 'Lotus Chat',
body: 'Your microphone was muted after inactivity.',
roomName: 'Voice call',
roomId: callEmbed.roomId,
});
silenceStart = null;
}
}, CHECK_INTERVAL_MS);
return () => {
active = false;
if (intervalId !== undefined) clearInterval(intervalId);
stream?.getTracks().forEach((t) => t.stop());
audioCtx?.close().catch(() => undefined);
clearInterval(intervalId);
};
}, [callEmbed, enabled, timeoutMinutes, setToast, microphone]);
}, [callEmbed, enabled, timeoutMinutes, setToast, microphone, mx]);
}
+79 -5
View File
@@ -29,8 +29,22 @@ export class CallControl extends EventEmitter implements CallControlState {
private controlMutationObserver: MutationObserver;
// C-H3: coalesces bursts of body-subtree mutations into a single debounced
// re-observe pass so a busy EC re-render doesn't thrash the control observer.
private bodyMutationTimer?: ReturnType<typeof setTimeout>;
private _pipMode = false;
// C-M3: last quality payload requested via setQuality(). Held so we can (re)send
// it once joined (io.lotus.set_quality must not be sent before call-join — a
// pre-join send pends to a 10s widget timeout, mirroring the deafen gate).
private lastQuality: LotusQualityPayload | null = null;
// C-M5: set true by CallControls while a push-to-talk key is held. A PTT hold
// unmutes the mic transiently, and onMediaState() must NOT treat that as a
// user-initiated unmute that auto-undeafens the user.
public pttActive = false;
// P6-2: mirrors CallEmbed.joined. Set true from forceState(), which CallEmbed
// invokes only from onCallJoined(). Gates io.lotus.set_deafen so we never send
// before the fork's widget handler mounts (pre-join sends pend to a 10s
@@ -153,19 +167,43 @@ export class CallControl extends EventEmitter implements CallControlState {
// this.joined was still false, so it was gated — this is the first send.)
this.joined = true;
this.sendDeafenState();
this.sendQuality();
}
/**
* C-H1 / C-M3: re-push the sticky fork-side state (deafen + quality) after an
* EC reconnect. Unlike forceState() this does NOT touch mic/video, so a
* reconnect can't clobber the user's live media state it only re-arms the
* fork handlers that remount on reconnect.
*/
public resendForkState(): void {
this.sendDeafenState();
this.sendQuality();
}
public startObserving() {
if (!this.document) return;
// C-H3: watch the whole body subtree (not just direct children) so we
// re-bind the control observer when EC re-renders its controls deeper in the
// tree. Debounced via onBodyMutation() to avoid thrashing on busy renders.
this.bodyMutationObserver.observe(this.document.body, {
childList: true,
subtree: false, // only direct children of body
subtree: true,
});
this.onBodyMutation();
this.applyBodyMutation();
}
private onBodyMutation() {
// C-H3: coalesce a burst of subtree mutations into one debounced pass.
if (this.bodyMutationTimer !== undefined) return;
this.bodyMutationTimer = setTimeout(() => {
this.bodyMutationTimer = undefined;
this.applyBodyMutation();
}, 100);
}
private applyBodyMutation() {
if (!this.document) return;
this.document.body.style.setProperty('background', 'none', 'important');
@@ -266,22 +304,43 @@ export class CallControl extends EventEmitter implements CallControlState {
this.state = state;
this.emitStateUpdate();
if (this.microphone && !this.sound) {
// C-M5: auto-undeafen when the mic turns on, but NOT for a transient
// push-to-talk unmute — a PTT tap while deafened must not silently
// un-deafen the user.
if (this.microphone && !this.sound && !this.pttActive) {
this.toggleSound();
}
}
private onControlMutation() {
const wasScreensharing = this.screenshare;
const screenshare: boolean = this.screenshareButton?.getAttribute('data-kind') === 'primary';
const spotlight: boolean = this.spotlightButton?.checked ?? false;
// C-M6: when a screenshare stops, clear the screenshare-audio mute so a
// later screenshare doesn't start pre-muted.
const screenshareAudioMuted =
wasScreensharing && !screenshare ? false : this.screenshareAudioMuted;
// C-H3: the body observer now watches subtree:true, so this fires on any DOM
// churn in EC's controls. Only re-emit (→ re-render every consumer) when one
// of the values this method derives actually changed — microphone/video/sound
// are copied unchanged from the current state here.
if (
this.state.screenshare === screenshare &&
this.state.spotlight === spotlight &&
this.state.screenshareAudioMuted === screenshareAudioMuted
) {
return;
}
this.state = new CallControlState(
this.microphone,
this.video,
this.sound,
screenshare,
spotlight,
this.screenshareAudioMuted,
screenshareAudioMuted,
);
this.emitStateUpdate();
}
@@ -423,10 +482,25 @@ export class CallControl extends EventEmitter implements CallControlState {
* clamped fork-side, so out-of-range input can't brick the encoder.
*/
public setQuality(settings: LotusQualityPayload): void {
this.call.transport.send('io.lotus.set_quality', settings).catch(() => undefined);
// C-M3: remember the request and only send once joined; sendQuality() gates
// on this.joined so a pre-join call is a no-op that we replay on join.
this.lastQuality = settings;
this.sendQuality();
}
// C-M3: push the last-requested quality to the fork. Gated on this.joined so
// we never send io.lotus.set_quality before the fork's handler mounts (a
// pre-join send would pend to a 10s widget timeout).
private sendQuality(): void {
if (!this.joined || !this.lastQuality) return;
this.call.transport.send('io.lotus.set_quality', this.lastQuality).catch(() => undefined);
}
public dispose() {
if (this.bodyMutationTimer !== undefined) {
clearTimeout(this.bodyMutationTimer);
this.bodyMutationTimer = undefined;
}
this.bodyMutationObserver.disconnect();
this.controlMutationObserver.disconnect();
}
+17 -1
View File
@@ -57,6 +57,10 @@ export class CallEmbed {
public joined = false;
// C-M4: set once dispose() has run so the hangup fallback timer can tell
// whether the embed was already torn down by the normal Close/Hangup echo.
public disposed = false;
// [lotus #2] Latest per-participant state from io.lotus.call_state, or null
// until the fork sends the first one. When non-null, the speaker/mute hooks
// read it instead of scraping the EC iframe DOM.
@@ -403,6 +407,8 @@ export class CallEmbed {
* @param opts
*/
public dispose(): void {
if (this.disposed) return;
this.disposed = true;
this.disposables.forEach((disposable) => {
disposable();
});
@@ -501,9 +507,19 @@ export class CallEmbed {
private onCallJoined(): void {
this.settleLoad();
this.joined = true;
this.applyStyles();
this.control.startObserving();
// C-H1: EC fires JoinCall again on an EC reconnect (this action has no
// once-guard). forceState() would reset live mic/video/deafen back to the
// join-time snapshot, so only run it on the FIRST join. On a rejoin we just
// re-apply styles/observers (above) and re-push the sticky fork state
// (deafen/quality), leaving the user's live media state untouched.
if (this.joined) {
this.control.resendForkState();
return;
}
this.joined = true;
// EC ignores io.element.device_mute before join; re-apply desired state now that EC is live
this.control.forceState(this.initialState);
}