fix(calls): make speaker detection scan full DOM via body observer (N116/N117)

useCallSpeakers rebuilt the speaker Set from only the mutated tiles in each
batch (so a still-speaking participant whose tile didn't mutate was dropped),
and observed a static querySelectorAll NodeList (so tiles for participants who
joined mid-call were never watched). Rewritten to mirror useRemoteAllMuted in
the same file: a single body-level MutationObserver (subtree+childList+attrs)
re-scans ALL [data-video-fit] tiles on each relevant mutation. The speaking
criterion (::before background-image !== 'none') and the id (aria-label +
isUserId) are unchanged, so behavior on real EC DOM is a strict superset.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-28 10:55:19 -04:00
parent adbda094e7
commit 19feca4964
+95 -39
View File
@@ -1,60 +1,116 @@
import { useCallback, useEffect, useMemo, useState } from 'react'; import { useEffect, useState } from 'react';
import { CallEmbed } from '../plugins/call'; import { CallEmbed } from '../plugins/call';
import { useMutationObserver } from './useMutationObserver';
import { isUserId } from '../utils/matrix'; import { isUserId } from '../utils/matrix';
import { useCallMembers, useCallSession } from './useCall'; import { useCallMembers, useCallSession } from './useCall';
import { useCallJoined } from './useCallEmbed'; import { useCallJoined } from './useCallEmbed';
/**
* Returns the set of Matrix user IDs currently speaking in the Element Call
* iframe.
*
* EC renders each participant's video tile with a `[data-video-fit]` wrapper.
* When a participant is speaking, EC draws a speaking indicator via the tile's
* `::before` pseudo-element `background-image` (anything other than `none`).
* The participant's Matrix user ID is exposed on the first descendant carrying
* an `aria-label`.
*
* We watch the whole iframe document so tiles added/removed mid-call are picked
* up automatically, and on every relevant mutation we re-scan ALL `[data-video-fit]`
* tiles and rebuild the set from the full current DOM state (rather than just the
* tiles in the mutation batch).
*/
export const useCallSpeakers = (callEmbed: CallEmbed): Set<string> => { export const useCallSpeakers = (callEmbed: CallEmbed): Set<string> => {
const [speakers, setSpeakers] = useState(new Set<string>()); const [speakers, setSpeakers] = useState(new Set<string>());
const callSession = useCallSession(callEmbed.room); const callSession = useCallSession(callEmbed.room);
const callMembers = useCallMembers(callSession); const callMembers = useCallMembers(callSession);
const joined = useCallJoined(callEmbed); const joined = useCallJoined(callEmbed);
const videoContainers = useMemo(() => {
if (callMembers && joined) return callEmbed.document?.querySelectorAll('[data-video-fit]');
return undefined;
}, [callEmbed, callMembers, joined]);
const mutationObserver = useMutationObserver(
useCallback(
(mutations) => {
const s = new Set<string>();
mutations.forEach((mutation) => {
if (mutation.type !== 'attributes') return;
const el = mutation.target as HTMLElement;
const style = callEmbed.iframe.contentWindow?.getComputedStyle(el, '::before');
if (!style) return;
const tileBackgroundImage = style.getPropertyValue('background-image');
const speaking = tileBackgroundImage !== 'none';
if (!speaking) return;
const speakerId = el.querySelector('[aria-label]')?.getAttribute('aria-label');
if (speakerId && isUserId(speakerId)) {
s.add(speakerId);
}
});
setSpeakers(s);
},
[callEmbed],
),
);
useEffect(() => { useEffect(() => {
videoContainers?.forEach((element) => { if (!callMembers || !joined) {
mutationObserver.observe(element, { setSpeakers(new Set<string>());
return undefined;
}
const getDoc = (): Document | undefined =>
callEmbed.iframe.contentDocument ?? callEmbed.iframe.contentWindow?.document ?? undefined;
const syncState = (): void => {
const doc = getDoc();
if (!doc) {
setSpeakers(new Set<string>());
return;
}
const s = new Set<string>();
// Re-scan every tile on each mutation and build the set from the full
// current DOM state, not just the tiles that mutated this batch.
const tiles = doc.querySelectorAll<HTMLElement>('[data-video-fit]');
tiles.forEach((el) => {
const style = callEmbed.iframe.contentWindow?.getComputedStyle(el, '::before');
if (!style) return;
const tileBackgroundImage = style.getPropertyValue('background-image');
const speaking = tileBackgroundImage !== 'none';
if (!speaking) return;
const speakerId = el.querySelector('[aria-label]')?.getAttribute('aria-label');
if (speakerId && isUserId(speakerId)) {
s.add(speakerId);
}
});
setSpeakers(s);
};
let tileObserver: MutationObserver | undefined;
const attachObserver = (): void => {
const doc = getDoc();
if (!doc) return;
tileObserver?.disconnect();
// Watch the whole document for attribute changes on tiles (which carry
// the speaking indicator) and for new tiles being added/removed.
tileObserver = new MutationObserver((mutations) => {
const relevant = mutations.some(
(m) =>
m.type === 'attributes' ||
(m.type === 'childList' &&
(Array.from(m.addedNodes).some(
(n) => n instanceof Element && n.querySelector('[data-video-fit]'),
) ||
Array.from(m.removedNodes).some(
(n) => n instanceof Element && n.querySelector('[data-video-fit]'),
))),
);
if (relevant) syncState();
});
tileObserver.observe(doc.body, {
subtree: true,
childList: true,
attributes: true, attributes: true,
attributeFilter: ['class', 'style'], attributeFilter: ['class', 'style'],
}); });
}); syncState();
};
attachObserver();
// If iframe isn't ready yet, wait for body to be available.
let bodyWatcher: MutationObserver | undefined;
if (!getDoc()?.body) {
bodyWatcher = new MutationObserver(() => {
if (getDoc()?.body) {
bodyWatcher?.disconnect();
bodyWatcher = undefined;
attachObserver();
}
});
const doc = getDoc();
if (doc) bodyWatcher.observe(doc, { childList: true });
}
return () => { return () => {
mutationObserver.disconnect(); tileObserver?.disconnect();
bodyWatcher?.disconnect();
}; };
}, [videoContainers, mutationObserver]); }, [callEmbed, callMembers, joined]);
return speakers; return speakers;
}; };