fix(calls): make speaker detection scan full DOM via body observer (N116/N117)

useCallSpeakers rebuilt the speaker Set from only the mutated tiles in each batch (so a still-speaking participant whose tile didn't mutate was dropped), and observed a static querySelectorAll NodeList (so tiles for participants who joined mid-call were never watched). Rewritten to mirror useRemoteAllMuted in the same file: a single body-level MutationObserver (subtree+childList+attrs) re-scans ALL [data-video-fit] tiles on each relevant mutation. The speaking criterion (::before background-image !== 'none') and the id (aria-label + isUserId) are unchanged, so behavior on real EC DOM is a strict superset. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 10:55:19 -04:00
parent adbda094e7
commit 19feca4964
1 changed files with 95 additions and 39 deletions
@@ -1,60 +1,116 @@
-import { useCallback, useEffect, useMemo, useState } from 'react';
+import { useEffect, useState } from 'react';
 import { CallEmbed } from '../plugins/call';
 import { useMutationObserver } from './useMutationObserver';
 import { isUserId } from '../utils/matrix';
 import { useCallMembers, useCallSession } from './useCall';
 import { useCallJoined } from './useCallEmbed';
 /**
 * Returns the set of Matrix user IDs currently speaking in the Element Call
 * iframe.
 *
 * EC renders each participant's video tile with a `[data-video-fit]` wrapper.
 * When a participant is speaking, EC draws a speaking indicator via the tile's
 * `::before` pseudo-element `background-image` (anything other than `none`).
 * The participant's Matrix user ID is exposed on the first descendant carrying
 * an `aria-label`.
 *
 * We watch the whole iframe document so tiles added/removed mid-call are picked
 * up automatically, and on every relevant mutation we re-scan ALL `[data-video-fit]`
 * tiles and rebuild the set from the full current DOM state (rather than just the
 * tiles in the mutation batch).
 */
 export const useCallSpeakers = (callEmbed: CallEmbed): Set<string> => {
  const [speakers, setSpeakers] = useState(new Set<string>());
  const callSession = useCallSession(callEmbed.room);
  const callMembers = useCallMembers(callSession);
  const joined = useCallJoined(callEmbed);
  const videoContainers = useMemo(() => {
    if (callMembers && joined) return callEmbed.document?.querySelectorAll('[data-video-fit]');
    return undefined;
  }, [callEmbed, callMembers, joined]);
  const mutationObserver = useMutationObserver(
    useCallback(
      (mutations) => {
        const s = new Set<string>();
        mutations.forEach((mutation) => {
          if (mutation.type !== 'attributes') return;
          const el = mutation.target as HTMLElement;
          const style = callEmbed.iframe.contentWindow?.getComputedStyle(el, '::before');
          if (!style) return;
          const tileBackgroundImage = style.getPropertyValue('background-image');
          const speaking = tileBackgroundImage !== 'none';
          if (!speaking) return;
          const speakerId = el.querySelector('[aria-label]')?.getAttribute('aria-label');
          if (speakerId && isUserId(speakerId)) {
            s.add(speakerId);
          }
        });
        setSpeakers(s);
      },
      [callEmbed],
    ),
  );
  useEffect(() => {
-    videoContainers?.forEach((element) => {
+    if (!callMembers || !joined) {
-      mutationObserver.observe(element, {
+      setSpeakers(new Set<string>());
      return undefined;
    }
    const getDoc = (): Document | undefined =>
      callEmbed.iframe.contentDocument ?? callEmbed.iframe.contentWindow?.document ?? undefined;
    const syncState = (): void => {
      const doc = getDoc();
      if (!doc) {
        setSpeakers(new Set<string>());
        return;
      }
      const s = new Set<string>();
      // Re-scan every tile on each mutation and build the set from the full
      // current DOM state, not just the tiles that mutated this batch.
      const tiles = doc.querySelectorAll<HTMLElement>('[data-video-fit]');
      tiles.forEach((el) => {
        const style = callEmbed.iframe.contentWindow?.getComputedStyle(el, '::before');
        if (!style) return;
        const tileBackgroundImage = style.getPropertyValue('background-image');
        const speaking = tileBackgroundImage !== 'none';
        if (!speaking) return;
        const speakerId = el.querySelector('[aria-label]')?.getAttribute('aria-label');
        if (speakerId && isUserId(speakerId)) {
          s.add(speakerId);
        }
      });
      setSpeakers(s);
    };
    let tileObserver: MutationObserver | undefined;
    const attachObserver = (): void => {
      const doc = getDoc();
      if (!doc) return;
      tileObserver?.disconnect();
      // Watch the whole document for attribute changes on tiles (which carry
      // the speaking indicator) and for new tiles being added/removed.
      tileObserver = new MutationObserver((mutations) => {
        const relevant = mutations.some(
          (m) =>
            m.type === 'attributes' ||
            (m.type === 'childList' &&
              (Array.from(m.addedNodes).some(
                (n) => n instanceof Element && n.querySelector('[data-video-fit]'),
              ) ||
                Array.from(m.removedNodes).some(
                  (n) => n instanceof Element && n.querySelector('[data-video-fit]'),
                ))),
        );
        if (relevant) syncState();
      });
      tileObserver.observe(doc.body, {
        subtree: true,
        childList: true,
        attributes: true,
        attributeFilter: ['class', 'style'],
      });
-    });
+      syncState();
    };
    attachObserver();
    // If iframe isn't ready yet, wait for body to be available.
    let bodyWatcher: MutationObserver | undefined;
    if (!getDoc()?.body) {
      bodyWatcher = new MutationObserver(() => {
        if (getDoc()?.body) {
          bodyWatcher?.disconnect();
          bodyWatcher = undefined;
          attachObserver();
        }
      });
      const doc = getDoc();
      if (doc) bodyWatcher.observe(doc, { childList: true });
    }
    return () => {
-      mutationObserver.disconnect();
+      tileObserver?.disconnect();
      bodyWatcher?.disconnect();
    };
-  }, [videoContainers, mutationObserver]);
+  }, [callEmbed, callMembers, joined]);
  return speakers;
 };