feat(search): opt-in persistent index for encrypted-room search (P4-8)

Raw-IndexedDB cache (lotus-search-cache: messages keyed [roomId,eventId] +
per-room coverage) merged into local search with in-memory-wins dedupe. OPT-IN
(default off) via a standalone atom — stores decrypted text at rest, so it ships
with a privacy note, a Clear button, and an unconditional wipe on logout
(initMatrix). All IDB errors degrade to cache-miss. +8 tests (1 IDB skip in node).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-07-01 21:19:02 -04:00
parent ed51c39fe7
commit 7da960ac8c
6 changed files with 700 additions and 78 deletions
@@ -1,12 +1,23 @@
import { EventType } from 'matrix-js-sdk';
import { EventType, MatrixEvent } from 'matrix-js-sdk';
import { useCallback } from 'react';
import { useAtomValue } from 'jotai';
import { useMatrixClient } from '../../hooks/useMatrixClient';
import { ResultGroup, ResultItem } from './useMessageSearch';
import { searchCacheEnabledAtom } from '../../state/searchCacheEnabled';
import {
mergeSearchResults,
queryRoom,
saveRoomIndex,
SearchCacheRow,
} from '../../utils/searchCache';
export type LocalSearchParams = {
term: string;
roomIds: string[];
senders?: string[];
/** Optional date-range filter (ms). Applied to both memory and cached rows. */
fromTs?: number;
toTs?: number;
};
export type LocalSearchResult = {
@@ -17,19 +28,110 @@ export type LocalSearchResult = {
searchedRoomsCount: number;
};
/** Extracted, searchable plaintext for a single message event. */
type ExtractedText = {
body: string;
formattedBody: string;
pollText: string;
};
const POLL_START_TYPES = ['m.poll.start', 'org.matrix.msc3381.poll.start'];
/**
* Pull the text we index/search from a decrypted event's content. Returns
* `null` for events that carry no searchable text (e.g. stickers).
*/
const extractText = (event: MatrixEvent): ExtractedText | null => {
const evType = event.getType();
const content = event.getContent();
if (POLL_START_TYPES.includes(evType)) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const poll = (content['m.poll'] ?? content['org.matrix.msc3381.poll.start']) as any;
if (!poll) return null;
const qBody =
(poll.question?.['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
(poll.question?.body as string | undefined) ??
'';
const answerBodies = ((poll.answers ?? []) as Array<Record<string, unknown>>)
.map(
(a) =>
((a['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(a['org.matrix.msc3381.poll.answer'] as any)?.body ??
'') as string,
)
.join(' ');
const pollText = `${qBody} ${answerBodies}`.trim();
return pollText ? { body: '', formattedBody: '', pollText } : null;
}
if (evType !== EventType.RoomMessage) return null;
const body = (content.body as string | undefined) ?? '';
const formattedBody = (content.formatted_body as string | undefined) ?? '';
if (!body && !formattedBody) return null;
return { body, formattedBody, pollText: '' };
};
/** Does the extracted text contain the (already-lowercased) term? */
const matchesTerm = (text: ExtractedText, termLower: string): boolean =>
text.body.toLowerCase().includes(termLower) ||
text.formattedBody.toLowerCase().includes(termLower) ||
text.pollText.toLowerCase().includes(termLower);
const rowMatchesTerm = (row: SearchCacheRow, termLower: string): boolean =>
row.body.toLowerCase().includes(termLower) ||
(row.formattedBody ?? '').toLowerCase().includes(termLower) ||
(row.pollText ?? '').toLowerCase().includes(termLower);
/** Build the synthetic result item a cached row renders as (text message). */
const rowToResultItem = (row: SearchCacheRow): ResultItem => {
const bodyText = row.body || row.pollText || '';
const content: Record<string, unknown> = { msgtype: 'm.text', body: bodyText };
if (row.formattedBody) {
content.format = 'org.matrix.custom.html';
content.formatted_body = row.formattedBody;
}
const syntheticEvent = {
room_id: row.roomId,
event_id: row.eventId,
type: EventType.RoomMessage,
sender: row.sender,
origin_server_ts: row.ts,
content,
unsigned: {},
};
return {
rank: 0,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
event: syntheticEvent as any,
context: { events_before: [], events_after: [], profile_info: {} },
};
};
/**
* Client-side full-text search over locally cached events in encrypted rooms.
* The homeserver cannot search E2EE message content, so we scan whatever the
* client has already received and decrypted in memory.
*
* Limitation: only messages present in the live timeline window are covered.
* Rooms that haven't been opened yet will return no results.
* When the persistent search cache is enabled (opt-in), the in-memory scan is
* also persisted to IndexedDB (fire-and-forget) and merged with prior cached
* coverage so results survive reloads. When disabled, zero cache reads/writes
* occur.
*/
export const useLocalMessageSearch = () => {
const mx = useMatrixClient();
const cacheEnabled = useAtomValue(searchCacheEnabledAtom);
const search = useCallback(
({ term, roomIds, senders }: LocalSearchParams): LocalSearchResult => {
async ({
term,
roomIds,
senders,
fromTs,
toTs,
}: LocalSearchParams): Promise<LocalSearchResult> => {
const trimmedTerm = term.trim();
const senderSet = senders && senders.length > 0 ? new Set(senders) : null;
@@ -41,6 +143,9 @@ export const useLocalMessageSearch = () => {
}
const termLower = trimmedTerm.toLowerCase();
const inRange = (ts: number): boolean =>
(fromTs === undefined || ts >= fromTs) && (toTs === undefined || ts <= toTs);
const groups: ResultGroup[] = [];
let encryptedRoomsCount = 0;
let searchedRoomsCount = 0;
@@ -61,106 +166,99 @@ export const useLocalMessageSearch = () => {
.getUnfilteredTimelineSet()
.getTimelines()
.flatMap((tl) => tl.getEvents());
if (events.length === 0) continue;
// eslint-disable-next-line no-await-in-loop
const cachedRows = cacheEnabled ? await queryRoom(roomId) : [];
if (events.length === 0 && cachedRows.length === 0) continue;
searchedRoomsCount += 1;
const items: ResultItem[] = [];
const memoryItems: ResultItem[] = [];
const rowsToPersist: SearchCacheRow[] = [];
for (let i = 0; i < events.length; i += 1) {
const event = events[i];
// In sender-only mode: include all message types; skip non-message events
if (event.getType() !== EventType.RoomMessage) {
if (senderOnlyMode) continue;
const evType = event.getType();
const isSticker = evType === 'm.sticker';
const isPoll = evType === 'm.poll.start' || evType === 'org.matrix.msc3381.poll.start';
if (!isSticker && !isPoll) continue;
}
if (event.isDecryptionFailure()) continue;
if (event.isRedacted()) continue;
if (senderSet && !senderSet.has(event.getSender() ?? '')) continue;
// getContent() returns decrypted plaintext regardless of encryption
const content = event.getContent();
const evType = event.getType();
const isSticker = evType === 'm.sticker';
const isMessageLike =
evType === EventType.RoomMessage || POLL_START_TYPES.includes(evType);
// Sender-only mode: no text filter needed
if (!senderOnlyMode) {
const evType = event.getType();
const isPoll = evType === 'm.poll.start' || evType === 'org.matrix.msc3381.poll.start';
// Sender-only mode indexes/returns all message types; text mode needs text.
if (!senderOnlyMode && !isMessageLike && !isSticker) continue;
let body = '';
let formattedBody = '';
if (!isPoll) {
body = (content.body as string | undefined) ?? '';
formattedBody = (content.formatted_body as string | undefined) ?? '';
} else {
// Poll — index question text and all answer options
const poll = (content['m.poll'] ??
// eslint-disable-next-line @typescript-eslint/no-explicit-any
content['org.matrix.msc3381.poll.start']) as any;
if (poll) {
const qBody =
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(poll.question?.['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
(poll.question?.body as string | undefined) ??
'';
const answerBodies = ((poll.answers ?? []) as Array<Record<string, unknown>>)
.map(
(a) =>
// eslint-disable-next-line @typescript-eslint/no-explicit-any
((a['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(a['org.matrix.msc3381.poll.answer'] as any)?.body ??
'') as string,
)
.join(' ');
body = `${qBody} ${answerBodies}`.trim();
}
}
const sender = event.getSender() ?? '';
const ts = event.getTs();
const text = extractText(event);
if (
!body.toLowerCase().includes(termLower) &&
!formattedBody.toLowerCase().includes(termLower)
)
continue;
// Persist every indexable (text-bearing) event we scanned, regardless
// of whether it matches the current term — future searches benefit.
if (cacheEnabled && text && event.getId()) {
rowsToPersist.push({
roomId,
eventId: event.getId() as string,
ts,
sender,
body: text.body,
...(text.formattedBody ? { formattedBody: text.formattedBody } : {}),
...(text.pollText ? { pollText: text.pollText } : {}),
});
}
// Build a synthetic IEventWithRoomId using decrypted content so the
// existing SearchResultGroup renderer works without modification.
if (senderSet && !senderSet.has(sender)) continue;
if (!inRange(ts)) continue;
if (!senderOnlyMode) {
if (!text || !matchesTerm(text, termLower)) continue;
}
const content = event.getContent();
const syntheticEvent = {
room_id: roomId,
event_id: event.getId() ?? '',
type: event.getType(),
sender: event.getSender() ?? '',
origin_server_ts: event.getTs(),
type: evType,
sender,
origin_server_ts: ts,
content,
unsigned: event.getUnsigned(),
};
items.push({
memoryItems.push({
rank: 0,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
event: syntheticEvent as any,
context: {
events_before: [],
events_after: [],
profile_info: {},
},
context: { events_before: [], events_after: [], profile_info: {} },
});
}
// Match cached rows (skip ids already present in memory happens in merge).
const cachedItems: ResultItem[] = [];
cachedRows.forEach((row) => {
if (senderSet && !senderSet.has(row.sender)) return;
if (!inRange(row.ts)) return;
if (!senderOnlyMode && !rowMatchesTerm(row, termLower)) return;
cachedItems.push(rowToResultItem(row));
});
const items = mergeSearchResults(memoryItems, cachedItems);
if (items.length > 0) {
items.sort((a, b) => (b.event.origin_server_ts ?? 0) - (a.event.origin_server_ts ?? 0));
groups.push({ roomId, items });
}
// Fire-and-forget persist of freshly scanned rows + coverage.
// saveRoomIndex swallows all errors internally, so a floating promise
// here can never reject.
if (cacheEnabled && rowsToPersist.length > 0) {
saveRoomIndex(roomId, rowsToPersist);
}
}
return { groups, encryptedRoomsCount, searchedRoomsCount };
},
[mx],
[mx, cacheEnabled],
);
return search;