Files
cinny/src/app/features/message-search/useLocalMessageSearch.ts
T
jared 7da960ac8c feat(search): opt-in persistent index for encrypted-room search (P4-8)
Raw-IndexedDB cache (lotus-search-cache: messages keyed [roomId,eventId] +
per-room coverage) merged into local search with in-memory-wins dedupe. OPT-IN
(default off) via a standalone atom — stores decrypted text at rest, so it ships
with a privacy note, a Clear button, and an unconditional wipe on logout
(initMatrix). All IDB errors degrade to cache-miss. +8 tests (1 IDB skip in node).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-01 21:19:02 -04:00

266 lines
9.2 KiB
TypeScript

import { EventType, MatrixEvent } from 'matrix-js-sdk';
import { useCallback } from 'react';
import { useAtomValue } from 'jotai';
import { useMatrixClient } from '../../hooks/useMatrixClient';
import { ResultGroup, ResultItem } from './useMessageSearch';
import { searchCacheEnabledAtom } from '../../state/searchCacheEnabled';
import {
mergeSearchResults,
queryRoom,
saveRoomIndex,
SearchCacheRow,
} from '../../utils/searchCache';
export type LocalSearchParams = {
term: string;
roomIds: string[];
senders?: string[];
/** Optional date-range filter (ms). Applied to both memory and cached rows. */
fromTs?: number;
toTs?: number;
};
export type LocalSearchResult = {
groups: ResultGroup[];
/** How many rooms in scope are encrypted */
encryptedRoomsCount: number;
/** How many of those had locally cached events to scan */
searchedRoomsCount: number;
};
/** Extracted, searchable plaintext for a single message event. */
type ExtractedText = {
body: string;
formattedBody: string;
pollText: string;
};
const POLL_START_TYPES = ['m.poll.start', 'org.matrix.msc3381.poll.start'];
/**
* Pull the text we index/search from a decrypted event's content. Returns
* `null` for events that carry no searchable text (e.g. stickers).
*/
const extractText = (event: MatrixEvent): ExtractedText | null => {
const evType = event.getType();
const content = event.getContent();
if (POLL_START_TYPES.includes(evType)) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const poll = (content['m.poll'] ?? content['org.matrix.msc3381.poll.start']) as any;
if (!poll) return null;
const qBody =
(poll.question?.['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
(poll.question?.body as string | undefined) ??
'';
const answerBodies = ((poll.answers ?? []) as Array<Record<string, unknown>>)
.map(
(a) =>
((a['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(a['org.matrix.msc3381.poll.answer'] as any)?.body ??
'') as string,
)
.join(' ');
const pollText = `${qBody} ${answerBodies}`.trim();
return pollText ? { body: '', formattedBody: '', pollText } : null;
}
if (evType !== EventType.RoomMessage) return null;
const body = (content.body as string | undefined) ?? '';
const formattedBody = (content.formatted_body as string | undefined) ?? '';
if (!body && !formattedBody) return null;
return { body, formattedBody, pollText: '' };
};
/** Does the extracted text contain the (already-lowercased) term? */
const matchesTerm = (text: ExtractedText, termLower: string): boolean =>
text.body.toLowerCase().includes(termLower) ||
text.formattedBody.toLowerCase().includes(termLower) ||
text.pollText.toLowerCase().includes(termLower);
const rowMatchesTerm = (row: SearchCacheRow, termLower: string): boolean =>
row.body.toLowerCase().includes(termLower) ||
(row.formattedBody ?? '').toLowerCase().includes(termLower) ||
(row.pollText ?? '').toLowerCase().includes(termLower);
/** Build the synthetic result item a cached row renders as (text message). */
const rowToResultItem = (row: SearchCacheRow): ResultItem => {
const bodyText = row.body || row.pollText || '';
const content: Record<string, unknown> = { msgtype: 'm.text', body: bodyText };
if (row.formattedBody) {
content.format = 'org.matrix.custom.html';
content.formatted_body = row.formattedBody;
}
const syntheticEvent = {
room_id: row.roomId,
event_id: row.eventId,
type: EventType.RoomMessage,
sender: row.sender,
origin_server_ts: row.ts,
content,
unsigned: {},
};
return {
rank: 0,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
event: syntheticEvent as any,
context: { events_before: [], events_after: [], profile_info: {} },
};
};
/**
* Client-side full-text search over locally cached events in encrypted rooms.
* The homeserver cannot search E2EE message content, so we scan whatever the
* client has already received and decrypted in memory.
*
* When the persistent search cache is enabled (opt-in), the in-memory scan is
* also persisted to IndexedDB (fire-and-forget) and merged with prior cached
* coverage so results survive reloads. When disabled, zero cache reads/writes
* occur.
*/
export const useLocalMessageSearch = () => {
const mx = useMatrixClient();
const cacheEnabled = useAtomValue(searchCacheEnabledAtom);
const search = useCallback(
async ({
term,
roomIds,
senders,
fromTs,
toTs,
}: LocalSearchParams): Promise<LocalSearchResult> => {
const trimmedTerm = term.trim();
const senderSet = senders && senders.length > 0 ? new Set(senders) : null;
// Sender-only mode: no text filter, search all rooms (server can't filter by sender alone)
const senderOnlyMode = !trimmedTerm && !!senderSet;
if (!trimmedTerm && !senderSet) {
return { groups: [], encryptedRoomsCount: 0, searchedRoomsCount: 0 };
}
const termLower = trimmedTerm.toLowerCase();
const inRange = (ts: number): boolean =>
(fromTs === undefined || ts >= fromTs) && (toTs === undefined || ts <= toTs);
const groups: ResultGroup[] = [];
let encryptedRoomsCount = 0;
let searchedRoomsCount = 0;
for (const roomId of roomIds) {
const room = mx.getRoom(roomId);
if (!room) continue;
const isEncrypted = !!room.currentState.getStateEvents(EventType.RoomEncryption, '');
// Text search: encrypted rooms only — server already covers plaintext rooms
// Sender-only: all rooms — server has no sender-only search
if (!senderOnlyMode && !isEncrypted) continue;
if (isEncrypted) encryptedRoomsCount += 1;
const events = room
.getUnfilteredTimelineSet()
.getTimelines()
.flatMap((tl) => tl.getEvents());
// eslint-disable-next-line no-await-in-loop
const cachedRows = cacheEnabled ? await queryRoom(roomId) : [];
if (events.length === 0 && cachedRows.length === 0) continue;
searchedRoomsCount += 1;
const memoryItems: ResultItem[] = [];
const rowsToPersist: SearchCacheRow[] = [];
for (let i = 0; i < events.length; i += 1) {
const event = events[i];
if (event.isDecryptionFailure()) continue;
if (event.isRedacted()) continue;
const evType = event.getType();
const isSticker = evType === 'm.sticker';
const isMessageLike =
evType === EventType.RoomMessage || POLL_START_TYPES.includes(evType);
// Sender-only mode indexes/returns all message types; text mode needs text.
if (!senderOnlyMode && !isMessageLike && !isSticker) continue;
const sender = event.getSender() ?? '';
const ts = event.getTs();
const text = extractText(event);
// Persist every indexable (text-bearing) event we scanned, regardless
// of whether it matches the current term — future searches benefit.
if (cacheEnabled && text && event.getId()) {
rowsToPersist.push({
roomId,
eventId: event.getId() as string,
ts,
sender,
body: text.body,
...(text.formattedBody ? { formattedBody: text.formattedBody } : {}),
...(text.pollText ? { pollText: text.pollText } : {}),
});
}
if (senderSet && !senderSet.has(sender)) continue;
if (!inRange(ts)) continue;
if (!senderOnlyMode) {
if (!text || !matchesTerm(text, termLower)) continue;
}
const content = event.getContent();
const syntheticEvent = {
room_id: roomId,
event_id: event.getId() ?? '',
type: evType,
sender,
origin_server_ts: ts,
content,
unsigned: event.getUnsigned(),
};
memoryItems.push({
rank: 0,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
event: syntheticEvent as any,
context: { events_before: [], events_after: [], profile_info: {} },
});
}
// Match cached rows (skip ids already present in memory happens in merge).
const cachedItems: ResultItem[] = [];
cachedRows.forEach((row) => {
if (senderSet && !senderSet.has(row.sender)) return;
if (!inRange(row.ts)) return;
if (!senderOnlyMode && !rowMatchesTerm(row, termLower)) return;
cachedItems.push(rowToResultItem(row));
});
const items = mergeSearchResults(memoryItems, cachedItems);
if (items.length > 0) {
groups.push({ roomId, items });
}
// Fire-and-forget persist of freshly scanned rows + coverage.
// saveRoomIndex swallows all errors internally, so a floating promise
// here can never reject.
if (cacheEnabled && rowsToPersist.length > 0) {
saveRoomIndex(roomId, rowsToPersist);
}
}
return { groups, encryptedRoomsCount, searchedRoomsCount };
},
[mx, cacheEnabled],
);
return search;
};