feat(search): opt-in persistent index for encrypted-room search (P4-8)
Raw-IndexedDB cache (lotus-search-cache: messages keyed [roomId,eventId] + per-room coverage) merged into local search with in-memory-wins dedupe. OPT-IN (default off) via a standalone atom — stores decrypted text at rest, so it ships with a privacy note, a Clear button, and an unconditional wipe on logout (initMatrix). All IDB errors degrade to cache-miss. +8 tests (1 IDB skip in node). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,12 +1,23 @@
|
||||
import { EventType } from 'matrix-js-sdk';
|
||||
import { EventType, MatrixEvent } from 'matrix-js-sdk';
|
||||
import { useCallback } from 'react';
|
||||
import { useAtomValue } from 'jotai';
|
||||
import { useMatrixClient } from '../../hooks/useMatrixClient';
|
||||
import { ResultGroup, ResultItem } from './useMessageSearch';
|
||||
import { searchCacheEnabledAtom } from '../../state/searchCacheEnabled';
|
||||
import {
|
||||
mergeSearchResults,
|
||||
queryRoom,
|
||||
saveRoomIndex,
|
||||
SearchCacheRow,
|
||||
} from '../../utils/searchCache';
|
||||
|
||||
export type LocalSearchParams = {
|
||||
term: string;
|
||||
roomIds: string[];
|
||||
senders?: string[];
|
||||
/** Optional date-range filter (ms). Applied to both memory and cached rows. */
|
||||
fromTs?: number;
|
||||
toTs?: number;
|
||||
};
|
||||
|
||||
export type LocalSearchResult = {
|
||||
@@ -17,19 +28,110 @@ export type LocalSearchResult = {
|
||||
searchedRoomsCount: number;
|
||||
};
|
||||
|
||||
/** Extracted, searchable plaintext for a single message event. */
|
||||
type ExtractedText = {
|
||||
body: string;
|
||||
formattedBody: string;
|
||||
pollText: string;
|
||||
};
|
||||
|
||||
const POLL_START_TYPES = ['m.poll.start', 'org.matrix.msc3381.poll.start'];
|
||||
|
||||
/**
|
||||
* Pull the text we index/search from a decrypted event's content. Returns
|
||||
* `null` for events that carry no searchable text (e.g. stickers).
|
||||
*/
|
||||
const extractText = (event: MatrixEvent): ExtractedText | null => {
|
||||
const evType = event.getType();
|
||||
const content = event.getContent();
|
||||
|
||||
if (POLL_START_TYPES.includes(evType)) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const poll = (content['m.poll'] ?? content['org.matrix.msc3381.poll.start']) as any;
|
||||
if (!poll) return null;
|
||||
const qBody =
|
||||
(poll.question?.['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
|
||||
(poll.question?.body as string | undefined) ??
|
||||
'';
|
||||
const answerBodies = ((poll.answers ?? []) as Array<Record<string, unknown>>)
|
||||
.map(
|
||||
(a) =>
|
||||
((a['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(a['org.matrix.msc3381.poll.answer'] as any)?.body ??
|
||||
'') as string,
|
||||
)
|
||||
.join(' ');
|
||||
const pollText = `${qBody} ${answerBodies}`.trim();
|
||||
return pollText ? { body: '', formattedBody: '', pollText } : null;
|
||||
}
|
||||
|
||||
if (evType !== EventType.RoomMessage) return null;
|
||||
|
||||
const body = (content.body as string | undefined) ?? '';
|
||||
const formattedBody = (content.formatted_body as string | undefined) ?? '';
|
||||
if (!body && !formattedBody) return null;
|
||||
return { body, formattedBody, pollText: '' };
|
||||
};
|
||||
|
||||
/** Does the extracted text contain the (already-lowercased) term? */
|
||||
const matchesTerm = (text: ExtractedText, termLower: string): boolean =>
|
||||
text.body.toLowerCase().includes(termLower) ||
|
||||
text.formattedBody.toLowerCase().includes(termLower) ||
|
||||
text.pollText.toLowerCase().includes(termLower);
|
||||
|
||||
const rowMatchesTerm = (row: SearchCacheRow, termLower: string): boolean =>
|
||||
row.body.toLowerCase().includes(termLower) ||
|
||||
(row.formattedBody ?? '').toLowerCase().includes(termLower) ||
|
||||
(row.pollText ?? '').toLowerCase().includes(termLower);
|
||||
|
||||
/** Build the synthetic result item a cached row renders as (text message). */
|
||||
const rowToResultItem = (row: SearchCacheRow): ResultItem => {
|
||||
const bodyText = row.body || row.pollText || '';
|
||||
const content: Record<string, unknown> = { msgtype: 'm.text', body: bodyText };
|
||||
if (row.formattedBody) {
|
||||
content.format = 'org.matrix.custom.html';
|
||||
content.formatted_body = row.formattedBody;
|
||||
}
|
||||
const syntheticEvent = {
|
||||
room_id: row.roomId,
|
||||
event_id: row.eventId,
|
||||
type: EventType.RoomMessage,
|
||||
sender: row.sender,
|
||||
origin_server_ts: row.ts,
|
||||
content,
|
||||
unsigned: {},
|
||||
};
|
||||
return {
|
||||
rank: 0,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
event: syntheticEvent as any,
|
||||
context: { events_before: [], events_after: [], profile_info: {} },
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Client-side full-text search over locally cached events in encrypted rooms.
|
||||
* The homeserver cannot search E2EE message content, so we scan whatever the
|
||||
* client has already received and decrypted in memory.
|
||||
*
|
||||
* Limitation: only messages present in the live timeline window are covered.
|
||||
* Rooms that haven't been opened yet will return no results.
|
||||
* When the persistent search cache is enabled (opt-in), the in-memory scan is
|
||||
* also persisted to IndexedDB (fire-and-forget) and merged with prior cached
|
||||
* coverage so results survive reloads. When disabled, zero cache reads/writes
|
||||
* occur.
|
||||
*/
|
||||
export const useLocalMessageSearch = () => {
|
||||
const mx = useMatrixClient();
|
||||
const cacheEnabled = useAtomValue(searchCacheEnabledAtom);
|
||||
|
||||
const search = useCallback(
|
||||
({ term, roomIds, senders }: LocalSearchParams): LocalSearchResult => {
|
||||
async ({
|
||||
term,
|
||||
roomIds,
|
||||
senders,
|
||||
fromTs,
|
||||
toTs,
|
||||
}: LocalSearchParams): Promise<LocalSearchResult> => {
|
||||
const trimmedTerm = term.trim();
|
||||
const senderSet = senders && senders.length > 0 ? new Set(senders) : null;
|
||||
|
||||
@@ -41,6 +143,9 @@ export const useLocalMessageSearch = () => {
|
||||
}
|
||||
|
||||
const termLower = trimmedTerm.toLowerCase();
|
||||
const inRange = (ts: number): boolean =>
|
||||
(fromTs === undefined || ts >= fromTs) && (toTs === undefined || ts <= toTs);
|
||||
|
||||
const groups: ResultGroup[] = [];
|
||||
let encryptedRoomsCount = 0;
|
||||
let searchedRoomsCount = 0;
|
||||
@@ -61,106 +166,99 @@ export const useLocalMessageSearch = () => {
|
||||
.getUnfilteredTimelineSet()
|
||||
.getTimelines()
|
||||
.flatMap((tl) => tl.getEvents());
|
||||
if (events.length === 0) continue;
|
||||
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const cachedRows = cacheEnabled ? await queryRoom(roomId) : [];
|
||||
|
||||
if (events.length === 0 && cachedRows.length === 0) continue;
|
||||
|
||||
searchedRoomsCount += 1;
|
||||
|
||||
const items: ResultItem[] = [];
|
||||
const memoryItems: ResultItem[] = [];
|
||||
const rowsToPersist: SearchCacheRow[] = [];
|
||||
|
||||
for (let i = 0; i < events.length; i += 1) {
|
||||
const event = events[i];
|
||||
|
||||
// In sender-only mode: include all message types; skip non-message events
|
||||
if (event.getType() !== EventType.RoomMessage) {
|
||||
if (senderOnlyMode) continue;
|
||||
const evType = event.getType();
|
||||
const isSticker = evType === 'm.sticker';
|
||||
const isPoll = evType === 'm.poll.start' || evType === 'org.matrix.msc3381.poll.start';
|
||||
if (!isSticker && !isPoll) continue;
|
||||
}
|
||||
|
||||
if (event.isDecryptionFailure()) continue;
|
||||
if (event.isRedacted()) continue;
|
||||
if (senderSet && !senderSet.has(event.getSender() ?? '')) continue;
|
||||
|
||||
// getContent() returns decrypted plaintext regardless of encryption
|
||||
const content = event.getContent();
|
||||
const evType = event.getType();
|
||||
const isSticker = evType === 'm.sticker';
|
||||
const isMessageLike =
|
||||
evType === EventType.RoomMessage || POLL_START_TYPES.includes(evType);
|
||||
|
||||
// Sender-only mode: no text filter needed
|
||||
if (!senderOnlyMode) {
|
||||
const evType = event.getType();
|
||||
const isPoll = evType === 'm.poll.start' || evType === 'org.matrix.msc3381.poll.start';
|
||||
// Sender-only mode indexes/returns all message types; text mode needs text.
|
||||
if (!senderOnlyMode && !isMessageLike && !isSticker) continue;
|
||||
|
||||
let body = '';
|
||||
let formattedBody = '';
|
||||
if (!isPoll) {
|
||||
body = (content.body as string | undefined) ?? '';
|
||||
formattedBody = (content.formatted_body as string | undefined) ?? '';
|
||||
} else {
|
||||
// Poll — index question text and all answer options
|
||||
const poll = (content['m.poll'] ??
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
content['org.matrix.msc3381.poll.start']) as any;
|
||||
if (poll) {
|
||||
const qBody =
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(poll.question?.['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
|
||||
(poll.question?.body as string | undefined) ??
|
||||
'';
|
||||
const answerBodies = ((poll.answers ?? []) as Array<Record<string, unknown>>)
|
||||
.map(
|
||||
(a) =>
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
((a['m.text'] as Array<{ body: string }> | undefined)?.[0]?.body ??
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(a['org.matrix.msc3381.poll.answer'] as any)?.body ??
|
||||
'') as string,
|
||||
)
|
||||
.join(' ');
|
||||
body = `${qBody} ${answerBodies}`.trim();
|
||||
}
|
||||
}
|
||||
const sender = event.getSender() ?? '';
|
||||
const ts = event.getTs();
|
||||
const text = extractText(event);
|
||||
|
||||
if (
|
||||
!body.toLowerCase().includes(termLower) &&
|
||||
!formattedBody.toLowerCase().includes(termLower)
|
||||
)
|
||||
continue;
|
||||
// Persist every indexable (text-bearing) event we scanned, regardless
|
||||
// of whether it matches the current term — future searches benefit.
|
||||
if (cacheEnabled && text && event.getId()) {
|
||||
rowsToPersist.push({
|
||||
roomId,
|
||||
eventId: event.getId() as string,
|
||||
ts,
|
||||
sender,
|
||||
body: text.body,
|
||||
...(text.formattedBody ? { formattedBody: text.formattedBody } : {}),
|
||||
...(text.pollText ? { pollText: text.pollText } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
// Build a synthetic IEventWithRoomId using decrypted content so the
|
||||
// existing SearchResultGroup renderer works without modification.
|
||||
if (senderSet && !senderSet.has(sender)) continue;
|
||||
if (!inRange(ts)) continue;
|
||||
|
||||
if (!senderOnlyMode) {
|
||||
if (!text || !matchesTerm(text, termLower)) continue;
|
||||
}
|
||||
|
||||
const content = event.getContent();
|
||||
const syntheticEvent = {
|
||||
room_id: roomId,
|
||||
event_id: event.getId() ?? '',
|
||||
type: event.getType(),
|
||||
sender: event.getSender() ?? '',
|
||||
origin_server_ts: event.getTs(),
|
||||
type: evType,
|
||||
sender,
|
||||
origin_server_ts: ts,
|
||||
content,
|
||||
unsigned: event.getUnsigned(),
|
||||
};
|
||||
|
||||
items.push({
|
||||
memoryItems.push({
|
||||
rank: 0,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
event: syntheticEvent as any,
|
||||
context: {
|
||||
events_before: [],
|
||||
events_after: [],
|
||||
profile_info: {},
|
||||
},
|
||||
context: { events_before: [], events_after: [], profile_info: {} },
|
||||
});
|
||||
}
|
||||
|
||||
// Match cached rows (skip ids already present in memory happens in merge).
|
||||
const cachedItems: ResultItem[] = [];
|
||||
cachedRows.forEach((row) => {
|
||||
if (senderSet && !senderSet.has(row.sender)) return;
|
||||
if (!inRange(row.ts)) return;
|
||||
if (!senderOnlyMode && !rowMatchesTerm(row, termLower)) return;
|
||||
cachedItems.push(rowToResultItem(row));
|
||||
});
|
||||
|
||||
const items = mergeSearchResults(memoryItems, cachedItems);
|
||||
|
||||
if (items.length > 0) {
|
||||
items.sort((a, b) => (b.event.origin_server_ts ?? 0) - (a.event.origin_server_ts ?? 0));
|
||||
groups.push({ roomId, items });
|
||||
}
|
||||
|
||||
// Fire-and-forget persist of freshly scanned rows + coverage.
|
||||
// saveRoomIndex swallows all errors internally, so a floating promise
|
||||
// here can never reject.
|
||||
if (cacheEnabled && rowsToPersist.length > 0) {
|
||||
saveRoomIndex(roomId, rowsToPersist);
|
||||
}
|
||||
}
|
||||
|
||||
return { groups, encryptedRoomsCount, searchedRoomsCount };
|
||||
},
|
||||
[mx],
|
||||
[mx, cacheEnabled],
|
||||
);
|
||||
|
||||
return search;
|
||||
|
||||
Reference in New Issue
Block a user