Files
cinny/src/app/plugins/call/CallControl.ts
T
jared ee6bdd8241 fix(call): Wave-1 audit fixes (calls host side)
- C-H1: forceState only on FIRST join; on EC reconnect re-arm the fork handlers
  (resendForkState — deafen+quality only) instead of clobbering live mic/video/
  deafen back to the join-time snapshot.
- C-H2: AFK auto-mute reads the fork's io.lotus.call_state VAD of the LOCAL
  published track instead of getUserMedia on the browser DEFAULT mic (which could
  measure silence while the user spoke on another device → auto-mute an active
  speaker). Fails safe (never mutes) when call_state is null OR empty.
- C-H3: control observer re-binds after EC re-renders (body subtree:true + 100ms
  debounce) with an early-return so unchanged state doesn't re-render.
- C-M3 setQuality join-gated; C-M4 hangup 4s fallback dispose (idempotent);
  C-M5 PTT no longer silently un-deafens; C-M6 screenshare-audio mute resets on
  stop; C-L4 deafen key works in the iframe; C-L6 setState-after-unmount guards.

Reviewed (C-H2 [] fail-safe + C-H3 re-render guard applied). tsc/eslint/prettier
clean, build OK, 677 tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-02 20:20:07 -04:00

512 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { ClientWidgetApi } from 'matrix-widget-api';
import { EventEmitter } from 'events';
import { CallControlState } from './CallControlState';
import { ElementMediaStateDetail, ElementMediaStatePayload, ElementWidgetActions } from './types';
export enum CallControlEvent {
StateUpdate = 'state_update',
}
/**
* [lotus #7 / P5-31] Payload for the fork's `io.lotus.set_quality` action.
* All fields optional; `null` clears that cap. Bits/sec for bitrates, fps for
* framerate.
*/
export type LotusQualityPayload = {
audioMaxBitrate?: number | null;
screenshareMaxBitrate?: number | null;
screenshareMaxFramerate?: number | null;
};
export class CallControl extends EventEmitter implements CallControlState {
private state: CallControlState;
private call: ClientWidgetApi;
private iframe: HTMLIFrameElement;
private bodyMutationObserver: MutationObserver;
private controlMutationObserver: MutationObserver;
// C-H3: coalesces bursts of body-subtree mutations into a single debounced
// re-observe pass so a busy EC re-render doesn't thrash the control observer.
private bodyMutationTimer?: ReturnType<typeof setTimeout>;
private _pipMode = false;
// C-M3: last quality payload requested via setQuality(). Held so we can (re)send
// it once joined (io.lotus.set_quality must not be sent before call-join — a
// pre-join send pends to a 10s widget timeout, mirroring the deafen gate).
private lastQuality: LotusQualityPayload | null = null;
// C-M5: set true by CallControls while a push-to-talk key is held. A PTT hold
// unmutes the mic transiently, and onMediaState() must NOT treat that as a
// user-initiated unmute that auto-undeafens the user.
public pttActive = false;
// P6-2: mirrors CallEmbed.joined. Set true from forceState(), which CallEmbed
// invokes only from onCallJoined(). Gates io.lotus.set_deafen so we never send
// before the fork's widget handler mounts (pre-join sends pend to a 10s
// timeout — io.lotus toWidget actions must only be sent after call-join).
private joined = false;
private get document(): Document | undefined {
return this.iframe.contentDocument ?? this.iframe.contentWindow?.document;
}
private get screenshareButton(): HTMLElement | undefined {
const screenshareBtn = this.document?.querySelector(
'[data-testid="incall_screenshare"]',
) as HTMLElement | null;
return screenshareBtn ?? undefined;
}
private get leaveButton(): Element | undefined {
const leaveBtn = this.document?.querySelector('[data-testid="incall_leave"]');
return leaveBtn ?? undefined;
}
private get settingsButton(): HTMLElement | undefined {
// EC 0.20.1: settings button moved to bottom-left; fall back to bottom-center.
const settingsButtonLeft = this.document?.querySelector(
'[data-testid="settings-bottom-left"]',
) as HTMLButtonElement | undefined;
const settingsButtonCenter = this.document?.querySelector(
'[data-testid="settings-bottom-center"]',
) as HTMLButtonElement | undefined;
return settingsButtonLeft ?? settingsButtonCenter ?? undefined;
}
private get reactionsButton(): HTMLElement | undefined {
// EC 0.20.1: reactions/raise-hand button sits just before the leave button.
const reactionsButton = this.leaveButton?.previousElementSibling as HTMLElement | null;
return reactionsButton ?? undefined;
}
private get spotlightButton(): HTMLInputElement | undefined {
const spotlightButton = this.document?.querySelector(
'input[value="spotlight"]',
) as HTMLInputElement | null;
return spotlightButton ?? undefined;
}
private get gridButton(): HTMLInputElement | undefined {
const gridButton = this.document?.querySelector(
'input[value="grid"]',
) as HTMLInputElement | null;
return gridButton ?? undefined;
}
constructor(state: CallControlState, call: ClientWidgetApi, iframe: HTMLIFrameElement) {
super();
this.state = state;
this.call = call;
this.iframe = iframe;
this.bodyMutationObserver = new MutationObserver(this.onBodyMutation.bind(this));
this.controlMutationObserver = new MutationObserver(this.onControlMutation.bind(this));
}
public getState(): CallControlState {
return this.state;
}
public get microphone(): boolean {
return this.state.microphone;
}
public get video(): boolean {
return this.state.video;
}
public get sound(): boolean {
return this.state.sound;
}
public get screenshare(): boolean {
return this.state.screenshare;
}
public get spotlight(): boolean {
return this.state.spotlight;
}
public get screenshareAudioMuted(): boolean {
return this.state.screenshareAudioMuted;
}
public async applyState() {
await this.setMediaState({
audio_enabled: this.microphone,
video_enabled: this.video,
});
this.setSound(this.sound);
this.emitStateUpdate();
}
public async forceState(desired: CallControlState) {
this.state = new CallControlState(
desired.microphone,
desired.video,
desired.sound,
this.screenshare,
this.spotlight,
);
await this.applyState();
// P6-2: CallEmbed calls forceState() only from onCallJoined(), so this is
// the join transition. Flip the gate open, then push the current deafen
// state to the fork's freshly-mounted handler. (setSound() above ran while
// this.joined was still false, so it was gated — this is the first send.)
this.joined = true;
this.sendDeafenState();
this.sendQuality();
}
/**
* C-H1 / C-M3: re-push the sticky fork-side state (deafen + quality) after an
* EC reconnect. Unlike forceState() this does NOT touch mic/video, so a
* reconnect can't clobber the user's live media state — it only re-arms the
* fork handlers that remount on reconnect.
*/
public resendForkState(): void {
this.sendDeafenState();
this.sendQuality();
}
public startObserving() {
if (!this.document) return;
// C-H3: watch the whole body subtree (not just direct children) so we
// re-bind the control observer when EC re-renders its controls deeper in the
// tree. Debounced via onBodyMutation() to avoid thrashing on busy renders.
this.bodyMutationObserver.observe(this.document.body, {
childList: true,
subtree: true,
});
this.applyBodyMutation();
}
private onBodyMutation() {
// C-H3: coalesce a burst of subtree mutations into one debounced pass.
if (this.bodyMutationTimer !== undefined) return;
this.bodyMutationTimer = setTimeout(() => {
this.bodyMutationTimer = undefined;
this.applyBodyMutation();
}, 100);
}
private applyBodyMutation() {
if (!this.document) return;
this.document.body.style.setProperty('background', 'none', 'important');
const controls = this.leaveButton?.parentElement?.parentElement;
if (controls) {
controls.style.setProperty('position', 'absolute');
controls.style.setProperty('visibility', 'hidden');
}
this.observeControls();
}
private observeControls() {
this.controlMutationObserver.disconnect();
const screenshareBtn = this.screenshareButton;
if (screenshareBtn) {
this.controlMutationObserver.observe(screenshareBtn, {
attributes: true,
attributeFilter: ['data-kind'],
});
}
const spotlightBtn = this.spotlightButton;
if (spotlightBtn) {
this.controlMutationObserver.observe(spotlightBtn, {
attributes: true,
});
}
this.onControlMutation();
}
public applySound() {
this.setSound(this.sound);
}
private async setMediaState(state: ElementMediaStatePayload) {
// transport.send resolves once EC has ACK'd the command, which is enough to
// consider the mute applied. We deliberately do NOT gate completion on a
// follow-up DeviceMute state-echo: EC may elide it (e.g. when the requested
// state already matches its current state) or skip it during teardown,
// which would strand this promise forever and block applyState(). The echo,
// when it does arrive, is still handled authoritatively by onMediaState().
return this.call.transport.send(ElementWidgetActions.DeviceMute, state);
}
private setSound(sound: boolean): void {
const callDocument = this.iframe.contentDocument ?? this.iframe.contentWindow?.document;
if (callDocument) {
callDocument.querySelectorAll('audio').forEach((el) => {
const isScreenshareAudio = el.getAttribute('data-lk-source') === 'screen_share_audio';
el.muted = !sound || (isScreenshareAudio && this.screenshareAudioMuted);
});
}
this.sendDeafenState();
}
private applyScreenshareAudioMuted(): void {
if (!this.sound) return;
const callDocument = this.iframe.contentDocument ?? this.iframe.contentWindow?.document;
if (callDocument) {
callDocument
.querySelectorAll<HTMLAudioElement>('audio[data-lk-source="screen_share_audio"]')
.forEach((el) => {
el.muted = this.screenshareAudioMuted;
});
}
this.sendDeafenState();
}
// P6-2: send deafen state to the fork (io.lotus.set_deafen). The DOM .muted
// code above is a transitional fallback — remove once the fork ships & the
// pin is bumped.
private sendDeafenState(): void {
if (!this.joined) return;
this.call.transport
.send('io.lotus.set_deafen', {
deafened: !this.sound,
screenshareAudioMuted: this.screenshareAudioMuted,
})
.catch(() => undefined);
}
public onMediaState(evt: CustomEvent<ElementMediaStateDetail>) {
const { data } = evt.detail;
if (!data) return;
const state = new CallControlState(
data.audio_enabled ?? this.microphone,
data.video_enabled ?? this.video,
this.sound,
this.screenshare,
this.spotlight,
this.screenshareAudioMuted,
);
this.state = state;
this.emitStateUpdate();
// C-M5: auto-undeafen when the mic turns on, but NOT for a transient
// push-to-talk unmute — a PTT tap while deafened must not silently
// un-deafen the user.
if (this.microphone && !this.sound && !this.pttActive) {
this.toggleSound();
}
}
private onControlMutation() {
const wasScreensharing = this.screenshare;
const screenshare: boolean = this.screenshareButton?.getAttribute('data-kind') === 'primary';
const spotlight: boolean = this.spotlightButton?.checked ?? false;
// C-M6: when a screenshare stops, clear the screenshare-audio mute so a
// later screenshare doesn't start pre-muted.
const screenshareAudioMuted =
wasScreensharing && !screenshare ? false : this.screenshareAudioMuted;
// C-H3: the body observer now watches subtree:true, so this fires on any DOM
// churn in EC's controls. Only re-emit (→ re-render every consumer) when one
// of the values this method derives actually changed — microphone/video/sound
// are copied unchanged from the current state here.
if (
this.state.screenshare === screenshare &&
this.state.spotlight === spotlight &&
this.state.screenshareAudioMuted === screenshareAudioMuted
) {
return;
}
this.state = new CallControlState(
this.microphone,
this.video,
this.sound,
screenshare,
spotlight,
screenshareAudioMuted,
);
this.emitStateUpdate();
}
public setMicrophone(enabled: boolean) {
const payload: ElementMediaStatePayload = {
audio_enabled: enabled,
video_enabled: this.video,
};
return this.setMediaState(payload);
}
public toggleMicrophone() {
const payload: ElementMediaStatePayload = {
audio_enabled: !this.microphone,
video_enabled: this.video,
};
return this.setMediaState(payload);
}
public toggleVideo() {
const payload: ElementMediaStatePayload = {
audio_enabled: this.microphone,
video_enabled: !this.video,
};
return this.setMediaState(payload);
}
public toggleSound() {
const sound = !this.sound;
// P6-2: commit state before setSound()/applyScreenshareAudioMuted() so
// sendDeafenState() (which reads this.sound) reports the new value.
const state = new CallControlState(
this.microphone,
this.video,
sound,
this.screenshare,
this.spotlight,
this.screenshareAudioMuted,
);
this.state = state;
this.setSound(sound);
// After un-deafening, re-apply screenshare audio mute if active
if (sound) this.applyScreenshareAudioMuted();
this.emitStateUpdate();
if (!this.sound && this.microphone) {
this.toggleMicrophone();
}
}
public toggleScreenshareAudio() {
const screenshareAudioMuted = !this.screenshareAudioMuted;
this.state = new CallControlState(
this.microphone,
this.video,
this.sound,
this.screenshare,
this.spotlight,
screenshareAudioMuted,
);
this.emitStateUpdate();
this.applyScreenshareAudioMuted();
}
public toggleScreenshare() {
this.screenshareButton?.click();
}
public toggleSpotlight() {
if (this.spotlight) {
this.gridButton?.click();
return;
}
this.spotlightButton?.click();
}
public setPipMode(pip: boolean) {
this._pipMode = pip;
}
public toggleReactions() {
this.reactionsButton?.click();
}
public toggleSettings() {
this.settingsButton?.click();
}
/**
* Focus a specific participant's camera tile in Element Call.
*
* EC renders video tiles as `[data-testid="videoTile"]`. Each tile wraps a
* mute-status indicator with `aria-label` set to the participant's Matrix
* user ID. We find the tile containing that user, switch to spotlight mode
* if needed, then click the tile so EC's internal focus handler runs.
*
* Falls back to a plain spotlight toggle if the tile is not found (e.g. the
* participant has their camera off and EC didn't render a video tile for
* them yet).
*/
public focusCameraParticipant(userId: string): void {
// [lotus #4] Pin the participant via the fork's widget action instead of
// DOM-poking tiles. EC's layout honors it — including surfacing the camera
// alongside a screenshare (A5) — and it's version-stable. The fork always
// acks, so the promise resolves regardless.
this.call.transport.send('io.lotus.focus_participant', { userId }).catch(() => undefined);
}
/** [lotus #4] Clear any manual spotlight pin and return to speaker-follows. */
public clearFocusParticipant(): void {
this.call.transport.send('io.lotus.focus_participant', { userId: null }).catch(() => undefined);
}
/**
* [lotus #3 / P5-15] Inject a soundboard clip into the call so other
* participants hear it. The fork publishes it as a separate LiveKit audio
* track (`io.lotus.inject_audio`) rather than splicing the mic. `url` must be
* an https/blob URL the widget can fetch WITHOUT credentials — the host
* resolves an mxc clip to a `blob:` object URL first (authenticated media
* can't be fetched cross-realm by the widget). `volume` is 01.
*
* The local user does not hear their own published track, so callers should
* also play the clip locally for feedback.
*/
public injectAudio(url: string, volume = 1): void {
this.call.transport.send('io.lotus.inject_audio', { url, volume }).catch(() => undefined);
}
/**
* [lotus #7 / P5-31] Apply audio/screenshare encoding limits to the local
* published tracks (the fork's `io.lotus.set_quality` action, via
* `RTCRtpSender.setParameters` — no republish). Bitrates are bits/sec,
* framerate is fps. A field set to `null` clears that cap. Settings are
* sticky fork-side (re-applied on every re-publish / reconnect). Values are
* clamped fork-side, so out-of-range input can't brick the encoder.
*/
public setQuality(settings: LotusQualityPayload): void {
// C-M3: remember the request and only send once joined; sendQuality() gates
// on this.joined so a pre-join call is a no-op that we replay on join.
this.lastQuality = settings;
this.sendQuality();
}
// C-M3: push the last-requested quality to the fork. Gated on this.joined so
// we never send io.lotus.set_quality before the fork's handler mounts (a
// pre-join send would pend to a 10s widget timeout).
private sendQuality(): void {
if (!this.joined || !this.lastQuality) return;
this.call.transport.send('io.lotus.set_quality', this.lastQuality).catch(() => undefined);
}
public dispose() {
if (this.bodyMutationTimer !== undefined) {
clearTimeout(this.bodyMutationTimer);
this.bodyMutationTimer = undefined;
}
this.bodyMutationObserver.disconnect();
this.controlMutationObserver.disconnect();
}
private emitStateUpdate() {
this.emit(CallControlEvent.StateUpdate);
}
}