From 9940ee3beed2cb9bebdb956508f8f04c004f4dde Mon Sep 17 00:00:00 2001 From: perfectra1n Date: Fri, 28 Nov 2025 20:57:18 -0800 Subject: [PATCH] feat(fts): break up the huge fts_search into smaller files --- apps/server/src/services/search/fts/errors.ts | 30 + apps/server/src/services/search/fts/index.ts | 90 ++ .../src/services/search/fts/index_manager.ts | 262 ++++ .../src/services/search/fts/query_builder.ts | 160 +++ .../src/services/search/fts/search_service.ts | 655 ++++++++++ apps/server/src/services/search/fts/types.ts | 62 + apps/server/src/services/search/fts_search.ts | 1163 +---------------- 7 files changed, 1302 insertions(+), 1120 deletions(-) create mode 100644 apps/server/src/services/search/fts/errors.ts create mode 100644 apps/server/src/services/search/fts/index.ts create mode 100644 apps/server/src/services/search/fts/index_manager.ts create mode 100644 apps/server/src/services/search/fts/query_builder.ts create mode 100644 apps/server/src/services/search/fts/search_service.ts create mode 100644 apps/server/src/services/search/fts/types.ts diff --git a/apps/server/src/services/search/fts/errors.ts b/apps/server/src/services/search/fts/errors.ts new file mode 100644 index 000000000..5241fd6d1 --- /dev/null +++ b/apps/server/src/services/search/fts/errors.ts @@ -0,0 +1,30 @@ +/** + * FTS5 Error Classes + * + * Custom error types for FTS5 operations to enable proper error handling + * and recovery strategies. + */ + +/** + * Base error class for FTS operations + */ +export class FTSError extends Error { + constructor( + message: string, + public readonly code: string, + public readonly recoverable: boolean = true + ) { + super(message); + this.name = 'FTSError'; + } +} + +/** + * Error thrown when an FTS query is malformed or invalid + */ +export class FTSQueryError extends FTSError { + constructor(message: string, public readonly query?: string) { + super(message, 'FTS_QUERY_ERROR', true); + this.name = 'FTSQueryError'; + } +} diff --git a/apps/server/src/services/search/fts/index.ts b/apps/server/src/services/search/fts/index.ts new file mode 100644 index 000000000..32610fc93 --- /dev/null +++ b/apps/server/src/services/search/fts/index.ts @@ -0,0 +1,90 @@ +/** + * FTS5 Search Module + * + * Barrel export for all FTS5 functionality. + * This module provides full-text search using SQLite's FTS5 extension. + */ + +// Error classes +export { FTSError, FTSQueryError } from "./errors.js"; + +// Types and configuration +export { + FTS_CONFIG, + type FTSSearchResult, + type FTSSearchOptions, + type FTSErrorInfo, + type FTSIndexStats +} from "./types.js"; + +// Query building utilities +export { + convertToFTS5Query, + sanitizeFTS5Token, + escapeLikeWildcards, + containsExactPhrase, + generateSnippet +} from "./query_builder.js"; + +// Index management +export { + assertFTS5Available, + checkFTS5Availability, + updateNoteIndex, + removeNoteFromIndex, + syncMissingNotes, + rebuildIndex, + getIndexStats, + filterNonProtectedNoteIds +} from "./index_manager.js"; + +// Search operations +export { + searchWithLike, + searchSync, + searchAttributesSync, + searchProtectedNotesSync +} from "./search_service.js"; + +// Legacy class-based API for backward compatibility +import { + assertFTS5Available, + checkFTS5Availability, + updateNoteIndex, + removeNoteFromIndex, + syncMissingNotes, + rebuildIndex, + getIndexStats +} from "./index_manager.js"; +import { + searchWithLike, + searchSync, + searchAttributesSync, + searchProtectedNotesSync +} from "./search_service.js"; +import { convertToFTS5Query } from "./query_builder.js"; + +/** + * FTS Search Service class + * + * Provides a class-based API for backward compatibility. + * New code should prefer the individual exported functions. + */ +class FTSSearchService { + assertFTS5Available = assertFTS5Available; + checkFTS5Availability = checkFTS5Availability; + convertToFTS5Query = convertToFTS5Query; + searchWithLike = searchWithLike; + searchSync = searchSync; + searchAttributesSync = searchAttributesSync; + searchProtectedNotesSync = searchProtectedNotesSync; + updateNoteIndex = updateNoteIndex; + removeNoteFromIndex = removeNoteFromIndex; + syncMissingNotes = syncMissingNotes; + rebuildIndex = rebuildIndex; + getIndexStats = getIndexStats; +} + +// Export singleton instance for backward compatibility +export const ftsSearchService = new FTSSearchService(); +export default ftsSearchService; diff --git a/apps/server/src/services/search/fts/index_manager.ts b/apps/server/src/services/search/fts/index_manager.ts new file mode 100644 index 000000000..53d7d24be --- /dev/null +++ b/apps/server/src/services/search/fts/index_manager.ts @@ -0,0 +1,262 @@ +/** + * FTS5 Index Manager + * + * Handles FTS5 index CRUD operations including: + * - Index availability verification + * - Note indexing and removal + * - Index synchronization and rebuilding + * - Index statistics + */ + +import sql from "../../sql.js"; +import log from "../../log.js"; +import type { FTSIndexStats } from "./types.js"; + +/** + * Asserts that FTS5 is available. Should be called at application startup. + * Throws an error if FTS5 tables are not found. + */ +export function assertFTS5Available(): void { + const result = sql.getValue(` + SELECT COUNT(*) + FROM sqlite_master + WHERE type = 'table' + AND name = 'notes_fts' + `); + + if (result === 0) { + throw new Error("CRITICAL: FTS5 table 'notes_fts' not found. Run database migration."); + } + + log.info("FTS5 tables verified - full-text search is available"); +} + +/** + * Checks if FTS5 is available. + * @returns Always returns true - FTS5 is required and validated at startup. + * @deprecated This method is kept for API compatibility. FTS5 is now required. + */ +export function checkFTS5Availability(): boolean { + return true; +} + +/** + * Updates the FTS index for a specific note (synchronous) + * + * @param noteId - The note ID to update + * @param title - The note title + * @param content - The note content + */ +export function updateNoteIndex(noteId: string, title: string, content: string): void { + try { + sql.transactional(() => { + // Delete existing entry + sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + + // Insert new entry + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) + VALUES (?, ?, ?) + `, [noteId, title, content]); + }); + } catch (error) { + log.error(`Failed to update FTS index for note ${noteId}: ${error}`); + } +} + +/** + * Removes a note from the FTS index (synchronous) + * + * @param noteId - The note ID to remove + */ +export function removeNoteFromIndex(noteId: string): void { + try { + sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + } catch (error) { + log.error(`Failed to remove note ${noteId} from FTS index: ${error}`); + } +} + +/** + * Syncs missing notes to the FTS index (synchronous) + * This is useful after bulk operations like imports where triggers might not fire + * + * @param noteIds - Optional array of specific note IDs to sync. If not provided, syncs all missing notes. + * @returns The number of notes that were synced + */ +export function syncMissingNotes(noteIds?: string[]): number { + try { + let syncedCount = 0; + + sql.transactional(() => { + let query: string; + let params: any[] = []; + + if (noteIds && noteIds.length > 0) { + // Sync specific notes that are missing from FTS + const placeholders = noteIds.map(() => '?').join(','); + query = ` + WITH missing_notes AS ( + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.noteId IN (${placeholders}) + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts (noteId, title, content) + SELECT noteId, title, content FROM missing_notes + `; + params = noteIds; + } else { + // Sync all missing notes + query = ` + WITH missing_notes AS ( + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts (noteId, title, content) + SELECT noteId, title, content FROM missing_notes + `; + } + + const result = sql.execute(query, params); + syncedCount = result.changes; + + if (syncedCount > 0) { + log.info(`Synced ${syncedCount} missing notes to FTS index`); + // Optimize if we synced a significant number of notes + if (syncedCount > 100) { + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + } + } + }); + + return syncedCount; + } catch (error) { + log.error(`Failed to sync missing notes to FTS index: ${error}`); + return 0; + } +} + +/** + * Rebuilds the entire FTS index (synchronous) + * This is useful for maintenance or after bulk operations + */ +export function rebuildIndex(): void { + log.info("Rebuilding FTS5 index..."); + + try { + sql.transactional(() => { + // Clear existing index + sql.execute(`DELETE FROM notes_fts`); + + // Rebuild from notes + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) + SELECT + n.noteId, + n.title, + b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + `); + + // Optimize the FTS table + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + }); + + log.info("FTS5 index rebuild completed"); + } catch (error) { + log.error(`Failed to rebuild FTS index: ${error}`); + throw error; + } +} + +/** + * Gets statistics about the FTS index (synchronous) + * Includes fallback when dbstat is not available + */ +export function getIndexStats(): FTSIndexStats { + const totalDocuments = sql.getValue(` + SELECT COUNT(*) FROM notes_fts + `) || 0; + + let indexSize = 0; + let dbstatAvailable = false; + + try { + // Try to get index size from dbstat + // dbstat is a virtual table that may not be available in all SQLite builds + indexSize = sql.getValue(` + SELECT SUM(pgsize) + FROM dbstat + WHERE name LIKE 'notes_fts%' + `) || 0; + dbstatAvailable = true; + } catch (error: any) { + // dbstat not available, use fallback + if (error.message?.includes('no such table: dbstat')) { + log.info("dbstat virtual table not available, using fallback for index size estimation"); + + // Fallback: Estimate based on number of documents and average content size + try { + const avgContentSize = sql.getValue(` + SELECT AVG(LENGTH(content) + LENGTH(title)) + FROM notes_fts + LIMIT 1000 + `) || 0; + + // Rough estimate: avg size * document count * overhead factor + indexSize = Math.round(avgContentSize * totalDocuments * 1.5); + } catch (fallbackError) { + log.info(`Could not estimate index size: ${fallbackError}`); + indexSize = 0; + } + } else { + log.error(`Error accessing dbstat: ${error}`); + } + } + + return { + totalDocuments, + indexSize, + isOptimized: true, // FTS5 manages optimization internally + dbstatAvailable + }; +} + +/** + * Filters out protected note IDs from the given set + */ +export function filterNonProtectedNoteIds(noteIds: Set): string[] { + const noteIdList = Array.from(noteIds); + const placeholders = noteIdList.map(() => '?').join(','); + + const nonProtectedNotes = sql.getColumn(` + SELECT noteId + FROM notes + WHERE noteId IN (${placeholders}) + AND isProtected = 0 + `, noteIdList); + + return nonProtectedNotes; +} diff --git a/apps/server/src/services/search/fts/query_builder.ts b/apps/server/src/services/search/fts/query_builder.ts new file mode 100644 index 000000000..27dda74af --- /dev/null +++ b/apps/server/src/services/search/fts/query_builder.ts @@ -0,0 +1,160 @@ +/** + * FTS5 Query Builder + * + * Utilities for converting Trilium search syntax to FTS5 MATCH syntax, + * sanitizing tokens, and handling text matching operations. + */ + +import striptags from "striptags"; +import log from "../../log.js"; +import { FTSQueryError } from "./errors.js"; + +/** + * Converts Trilium search syntax to FTS5 MATCH syntax + * + * @param tokens - Array of search tokens + * @param operator - Trilium search operator + * @returns FTS5 MATCH query string + */ +export function convertToFTS5Query(tokens: string[], operator: string): string { + if (!tokens || tokens.length === 0) { + throw new Error("No search tokens provided"); + } + + // Substring operators (*=*, *=, =*) use LIKE queries now, not MATCH + if (operator === "*=*" || operator === "*=" || operator === "=*") { + throw new Error("Substring operators should use searchWithLike(), not MATCH queries"); + } + + // Trigram tokenizer requires minimum 3 characters + const shortTokens = tokens.filter(token => token.length < 3); + if (shortTokens.length > 0) { + const shortList = shortTokens.join(', '); + log.info(`Tokens shorter than 3 characters detected (${shortList}) - cannot use trigram FTS5`); + throw new FTSQueryError( + `Trigram tokenizer requires tokens of at least 3 characters. Short tokens: ${shortList}` + ); + } + + // Sanitize tokens to prevent FTS5 syntax injection + const sanitizedTokens = tokens.map(token => sanitizeFTS5Token(token)); + + // Only handle operators that work with MATCH + switch (operator) { + case "=": // Exact phrase match + return `"${sanitizedTokens.join(" ")}"`; + + case "!=": // Does not contain + return `NOT (${sanitizedTokens.join(" OR ")})`; + + case "~=": // Fuzzy match (use OR) + case "~*": + return sanitizedTokens.join(" OR "); + + case "%=": // Regex - uses traditional SQL iteration fallback + throw new FTSQueryError("Regex search not supported in FTS5 - use traditional search path"); + + default: + throw new FTSQueryError(`Unsupported MATCH operator: ${operator}`); + } +} + +/** + * Sanitizes a token for safe use in FTS5 queries + * Validates that the token is not empty after sanitization + */ +export function sanitizeFTS5Token(token: string): string { + // Remove special FTS5 characters that could break syntax + const sanitized = token + .replace(/["\(\)\*]/g, '') // Remove quotes, parens, wildcards + .replace(/\s+/g, ' ') // Normalize whitespace + .trim(); + + // Validate that token is not empty after sanitization + if (!sanitized || sanitized.length === 0) { + log.info(`Token became empty after sanitization: "${token}"`); + // Return a safe placeholder that won't match anything + return "__empty_token__"; + } + + // Additional validation: ensure token doesn't contain SQL injection attempts + if (sanitized.includes(';') || sanitized.includes('--')) { + log.error(`Potential SQL injection attempt detected in token: "${token}"`); + return "__invalid_token__"; + } + + return sanitized; +} + +/** + * Escapes LIKE wildcards (% and _) in user input to treat them as literals + * @param str - User input string + * @returns String with LIKE wildcards escaped + */ +export function escapeLikeWildcards(str: string): string { + return str.replace(/[%_]/g, '\\$&'); +} + +/** + * Checks if a phrase appears as exact words in text (respecting word boundaries) + * @param phrase - The phrase to search for (case-insensitive) + * @param text - The text to search in + * @returns true if the phrase appears as complete words, false otherwise + */ +export function containsExactPhrase(phrase: string, text: string | null | undefined): boolean { + if (!text || !phrase || typeof text !== 'string') { + return false; + } + + // Normalize both to lowercase for case-insensitive comparison + const normalizedPhrase = phrase.toLowerCase().trim(); + const normalizedText = text.toLowerCase(); + + // Strip HTML tags for content matching + const plainText = striptags(normalizedText); + + // For single words, use word-boundary matching + if (!normalizedPhrase.includes(' ')) { + // Split text into words and check for exact match + const words = plainText.split(/\s+/); + return words.some(word => word === normalizedPhrase); + } + + // For multi-word phrases, check if the phrase appears as consecutive words + // Split text into words, then check if the phrase appears in the word sequence + const textWords = plainText.split(/\s+/); + const phraseWords = normalizedPhrase.split(/\s+/); + + // Sliding window to find exact phrase match + for (let i = 0; i <= textWords.length - phraseWords.length; i++) { + let match = true; + for (let j = 0; j < phraseWords.length; j++) { + if (textWords[i + j] !== phraseWords[j]) { + match = false; + break; + } + } + if (match) { + return true; + } + } + + return false; +} + +/** + * Generates a snippet from content + */ +export function generateSnippet(content: string, maxLength: number = 30): string { + // Strip HTML tags for snippet + const plainText = striptags(content); + // Simple normalization - just trim and collapse whitespace + const normalized = plainText.replace(/\s+/g, ' ').trim(); + + if (normalized.length <= maxLength * 10) { + return normalized; + } + + // Extract snippet around first occurrence + return normalized.substring(0, maxLength * 10) + '...'; +} diff --git a/apps/server/src/services/search/fts/search_service.ts b/apps/server/src/services/search/fts/search_service.ts new file mode 100644 index 000000000..717a7ee6d --- /dev/null +++ b/apps/server/src/services/search/fts/search_service.ts @@ -0,0 +1,655 @@ +/** + * FTS5 Search Service + * + * Core search operations using SQLite's FTS5 extension with: + * - Trigram tokenization for fast substring matching + * - Snippet extraction for context + * - Highlighting of matched terms + * - LIKE-based substring searches + * - Protected notes search + * - Attribute search + */ + +import sql from "../../sql.js"; +import log from "../../log.js"; +import protectedSessionService from "../../protected_session.js"; +import { FTSError, FTSQueryError } from "./errors.js"; +import { FTS_CONFIG, type FTSSearchResult, type FTSSearchOptions } from "./types.js"; +import { + convertToFTS5Query, + sanitizeFTS5Token, + escapeLikeWildcards, + containsExactPhrase, + generateSnippet +} from "./query_builder.js"; +import { filterNonProtectedNoteIds } from "./index_manager.js"; + +/** + * Performs substring search using LIKE queries optimized by trigram index + * This is used for *=*, *=, and =* operators with detail='none' + * + * @param tokens - Search tokens + * @param operator - Search operator (*=*, *=, =*) + * @param noteIds - Optional set of note IDs to filter + * @param options - Search options + * @returns Array of search results (noteIds only, no scoring) + */ +export function searchWithLike( + tokens: string[], + operator: string, + noteIds?: Set, + options: FTSSearchOptions = {} +): FTSSearchResult[] { + // Handle empty tokens efficiently - return all notes without running diagnostics + if (tokens.length === 0) { + // Empty query means return all indexed notes (optionally filtered by noteIds) + log.info('[FTS-OPTIMIZATION] Empty token array - returning all indexed notes without diagnostics'); + + const results: FTSSearchResult[] = []; + let query: string; + const params: any[] = []; + + if (noteIds && noteIds.size > 0) { + const nonProtectedNoteIds = filterNonProtectedNoteIds(noteIds); + if (nonProtectedNoteIds.length === 0) { + return []; // No non-protected notes to search + } + query = `SELECT noteId, title FROM notes_fts WHERE noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`; + params.push(...nonProtectedNoteIds); + } else { + // Return all indexed notes + query = `SELECT noteId, title FROM notes_fts`; + } + + for (const row of sql.iterateRows<{ noteId: string; title: string }>(query, params)) { + results.push({ + noteId: row.noteId, + title: row.title, + score: 0, // No ranking for empty query + snippet: undefined + }); + } + + log.info(`[FTS-OPTIMIZATION] Empty token search returned ${results.length} results`); + return results; + } + + // Normalize tokens to lowercase for case-insensitive search + const normalizedTokens = tokens.map(t => t.toLowerCase()); + + // Validate token lengths to prevent memory issues + const longTokens = normalizedTokens.filter(t => t.length > FTS_CONFIG.MAX_TOKEN_LENGTH); + if (longTokens.length > 0) { + throw new FTSQueryError( + `Search tokens too long (max ${FTS_CONFIG.MAX_TOKEN_LENGTH} characters). ` + + `Long tokens: ${longTokens.map(t => t.substring(0, 50) + '...').join(', ')}` + ); + } + + const { + limit, // No default limit - return all results + offset = 0, + skipDiagnostics = false + } = options; + + // Run diagnostics BEFORE the actual search (not counted in performance timing) + if (!skipDiagnostics) { + log.info('[FTS-DIAGNOSTICS] Running index completeness checks (not counted in search timing)...'); + const totalInFts = sql.getValue(`SELECT COUNT(*) FROM notes_fts`); + const totalNotes = sql.getValue(` + SELECT COUNT(*) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `); + + if (totalInFts < totalNotes) { + log.info(`[FTS-DIAGNOSTICS] FTS index incomplete: ${totalInFts} indexed out of ${totalNotes} total notes. Run syncMissingNotes().`); + } else { + log.info(`[FTS-DIAGNOSTICS] FTS index complete: ${totalInFts} notes indexed`); + } + } + + try { + // Start timing for actual search (excludes diagnostics) + const searchStartTime = Date.now(); + + // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses + // The FTS table already excludes protected notes, so we can search all notes + const isLargeNoteSet = noteIds && noteIds.size > FTS_CONFIG.LARGE_SET_THRESHOLD; + + if (isLargeNoteSet) { + log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); + } + + // Only filter noteIds if the set is small enough to benefit from it + const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; + const nonProtectedNoteIds = shouldFilterByNoteIds + ? filterNonProtectedNoteIds(noteIds) + : []; + + let whereConditions: string[] = []; + const params: any[] = []; + + // Build LIKE conditions for each token - search BOTH title and content + switch (operator) { + case "*=*": // Contains (substring) + normalizedTokens.forEach(token => { + // Search in BOTH title and content with escaped wildcards + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = escapeLikeWildcards(token); + params.push(`%${escapedToken}%`, `%${escapedToken}%`); + }); + break; + + case "*=": // Ends with + normalizedTokens.forEach(token => { + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = escapeLikeWildcards(token); + params.push(`%${escapedToken}`, `%${escapedToken}`); + }); + break; + + case "=*": // Starts with + normalizedTokens.forEach(token => { + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = escapeLikeWildcards(token); + params.push(`${escapedToken}%`, `${escapedToken}%`); + }); + break; + + default: + throw new FTSQueryError(`Unsupported LIKE operator: ${operator}`); + } + + // Validate that we have search criteria + if (whereConditions.length === 0 && nonProtectedNoteIds.length === 0) { + throw new FTSQueryError("No search criteria provided (empty tokens and no note filter)"); + } + + // Add noteId filter if provided + if (nonProtectedNoteIds.length > 0) { + const tokenParamCount = params.length; + const additionalParams = 2; // For limit and offset + + if (nonProtectedNoteIds.length <= FTS_CONFIG.MAX_PARAMS_PER_QUERY - tokenParamCount - additionalParams) { + // Normal case: all IDs fit in one query + whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); + params.push(...nonProtectedNoteIds); + } else { + // Large noteIds set: split into chunks and execute multiple queries + const chunks: string[][] = []; + for (let i = 0; i < nonProtectedNoteIds.length; i += FTS_CONFIG.MAX_PARAMS_PER_QUERY) { + chunks.push(nonProtectedNoteIds.slice(i, i + FTS_CONFIG.MAX_PARAMS_PER_QUERY)); + } + + log.info(`Large noteIds set detected (${nonProtectedNoteIds.length} notes), splitting into ${chunks.length} chunks`); + + // Execute a query for each chunk and combine results + const allResults: FTSSearchResult[] = []; + let remainingLimit = limit !== undefined ? limit : Number.MAX_SAFE_INTEGER; + let currentOffset = offset; + + for (const chunk of chunks) { + if (remainingLimit <= 0) break; + + const chunkWhereConditions = [...whereConditions]; + const chunkParams: any[] = [...params]; + + chunkWhereConditions.push(`noteId IN (${chunk.map(() => '?').join(',')})`); + chunkParams.push(...chunk); + + // Build chunk query + const chunkQuery = ` + SELECT noteId, title + FROM notes_fts + WHERE ${chunkWhereConditions.join(' AND ')} + ${remainingLimit !== Number.MAX_SAFE_INTEGER ? 'LIMIT ?' : ''} + ${currentOffset > 0 ? 'OFFSET ?' : ''} + `; + + if (remainingLimit !== Number.MAX_SAFE_INTEGER) chunkParams.push(remainingLimit); + if (currentOffset > 0) chunkParams.push(currentOffset); + + const chunkResults = sql.getRows<{ noteId: string; title: string }>(chunkQuery, chunkParams); + allResults.push(...chunkResults.map(row => ({ + noteId: row.noteId, + title: row.title, + score: 1.0 + }))); + + if (remainingLimit !== Number.MAX_SAFE_INTEGER) { + remainingLimit -= chunkResults.length; + } + currentOffset = 0; // Only apply offset to first chunk + } + + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 LIKE search (chunked) returned ${allResults.length} results in ${searchTime}ms`); + + return allResults; + } + } + + // Build query - LIKE queries are automatically optimized by trigram index + // Only add LIMIT/OFFSET if specified + const query = ` + SELECT noteId, title + FROM notes_fts + WHERE ${whereConditions.join(' AND ')} + ${limit !== undefined ? 'LIMIT ?' : ''} + ${offset > 0 ? 'OFFSET ?' : ''} + `; + + // Only add limit/offset params if specified + if (limit !== undefined) params.push(limit); + if (offset > 0) params.push(offset); + + // Log the search parameters + log.info(`FTS5 LIKE search: tokens=[${normalizedTokens.join(', ')}], operator=${operator}, limit=${limit || 'none'}, offset=${offset}`); + + const rows = sql.getRows<{ noteId: string; title: string }>(query, params); + + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 LIKE search returned ${rows.length} results in ${searchTime}ms`); + + return rows.map(row => ({ + noteId: row.noteId, + title: row.title, + score: 1.0 // LIKE queries don't have ranking + })); + + } catch (error: any) { + log.error(`FTS5 LIKE search error: ${error}`); + throw new FTSQueryError( + `FTS5 LIKE search failed: ${error.message}`, + undefined + ); + } +} + +/** + * Performs a synchronous full-text search using FTS5 + * + * @param tokens - Search tokens + * @param operator - Search operator + * @param noteIds - Optional set of note IDs to search within + * @param options - Search options + * @returns Array of search results + */ +export function searchSync( + tokens: string[], + operator: string, + noteIds?: Set, + options: FTSSearchOptions = {} +): FTSSearchResult[] { + // Handle empty tokens efficiently - return all notes without MATCH query + if (tokens.length === 0) { + log.info('[FTS-OPTIMIZATION] Empty token array in searchSync - returning all indexed notes'); + + // Reuse the empty token logic from searchWithLike + const results: FTSSearchResult[] = []; + let query: string; + const params: any[] = []; + + if (noteIds && noteIds.size > 0) { + const nonProtectedNoteIds = filterNonProtectedNoteIds(noteIds); + if (nonProtectedNoteIds.length === 0) { + return []; // No non-protected notes to search + } + query = `SELECT noteId, title FROM notes_fts WHERE noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`; + params.push(...nonProtectedNoteIds); + } else { + // Return all indexed notes + query = `SELECT noteId, title FROM notes_fts`; + } + + for (const row of sql.iterateRows<{ noteId: string; title: string }>(query, params)) { + results.push({ + noteId: row.noteId, + title: row.title, + score: 0, // No ranking for empty query + snippet: undefined + }); + } + + log.info(`[FTS-OPTIMIZATION] Empty token search returned ${results.length} results`); + return results; + } + + const { + limit = FTS_CONFIG.DEFAULT_LIMIT, + offset = 0, + includeSnippets = true, + snippetLength = FTS_CONFIG.DEFAULT_SNIPPET_LENGTH, + highlightTag = FTS_CONFIG.DEFAULT_HIGHLIGHT_START, + searchProtected = false + } = options; + + try { + // Start timing for actual search + const searchStartTime = Date.now(); + + const ftsQuery = convertToFTS5Query(tokens, operator); + + // Validate query length + if (ftsQuery.length > FTS_CONFIG.MAX_QUERY_LENGTH) { + throw new FTSQueryError( + `Query too long: ${ftsQuery.length} characters (max: ${FTS_CONFIG.MAX_QUERY_LENGTH})`, + ftsQuery + ); + } + + // Check if we're searching for protected notes + // Protected notes are NOT in the FTS index, so we need to handle them separately + if (searchProtected && protectedSessionService.isProtectedSessionAvailable()) { + log.info("Protected session available - will search protected notes separately"); + // Return empty results from FTS and let the caller handle protected notes + // The caller should use a fallback search method for protected notes + return []; + } + + // Build the SQL query + let whereConditions = [`notes_fts MATCH ?`]; + const params: any[] = [ftsQuery]; + + // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses + // The FTS table already excludes protected notes, so we can search all notes + const isLargeNoteSet = noteIds && noteIds.size > FTS_CONFIG.LARGE_SET_THRESHOLD; + + if (isLargeNoteSet) { + log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); + } + + // Filter by noteIds if provided and set is small enough + const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; + if (shouldFilterByNoteIds) { + // First filter out any protected notes from the noteIds + const nonProtectedNoteIds = filterNonProtectedNoteIds(noteIds!); + if (nonProtectedNoteIds.length === 0) { + // All provided notes are protected, return empty results + return []; + } + whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); + params.push(...nonProtectedNoteIds); + } + + // Build snippet extraction if requested + const snippetSelect = includeSnippets + ? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '(query, params); + + // Post-filter for exact match operator (=) to handle word boundaries + // Trigram FTS5 doesn't respect word boundaries in phrase queries, + // so "test123" matches "test1234" due to shared trigrams. + // We need to post-filter results to only include exact word matches. + if (operator === "=") { + const phrase = tokens.join(" "); + results = results.filter(result => { + // Use content from result if available, otherwise fetch it + let noteContent = result.content; + if (!noteContent) { + noteContent = sql.getValue(` + SELECT b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.noteId = ? + `, [result.noteId]); + } + + if (!noteContent) { + return false; + } + + // Check if phrase appears as exact words in content or title + return containsExactPhrase(phrase, result.title) || + containsExactPhrase(phrase, noteContent); + }); + } + + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`); + + return results; + + } catch (error: any) { + // Provide structured error information + if (error instanceof FTSError) { + throw error; + } + + log.error(`FTS5 search error: ${error}`); + + // Determine if this is a recoverable error + const isRecoverable = + error.message?.includes('syntax error') || + error.message?.includes('malformed MATCH') || + error.message?.includes('no such table'); + + throw new FTSQueryError( + `FTS5 search failed: ${error.message}. ${isRecoverable ? 'Falling back to standard search.' : ''}`, + undefined + ); + } +} + +/** + * Searches attributes using FTS5 + * Returns noteIds of notes that have matching attributes + */ +export function searchAttributesSync( + tokens: string[], + operator: string, + noteIds?: Set +): Set { + // Check if attributes_fts table exists + const tableExists = sql.getValue(` + SELECT COUNT(*) + FROM sqlite_master + WHERE type='table' AND name='attributes_fts' + `); + + if (!tableExists) { + log.info("attributes_fts table does not exist - skipping FTS attribute search"); + return new Set(); + } + + try { + // Sanitize tokens to prevent FTS5 syntax injection + const sanitizedTokens = tokens.map(token => sanitizeFTS5Token(token)); + + // Check if any tokens became invalid after sanitization + if (sanitizedTokens.some(t => t === '__empty_token__' || t === '__invalid_token__')) { + return new Set(); + } + + const phrase = sanitizedTokens.join(" "); + + // Build FTS5 query for exact match + const ftsQuery = operator === "=" ? `"${phrase}"` : phrase; + + // Search both name and value columns + const whereConditions: string[] = [ + `attributes_fts MATCH '${ftsQuery.replace(/'/g, "''")}'` + ]; + + const params: any[] = []; + + // Filter by noteIds if provided + if (noteIds && noteIds.size > 0 && noteIds.size < 1000) { + const noteIdList = Array.from(noteIds); + whereConditions.push(`noteId IN (${noteIdList.map(() => '?').join(',')})`); + params.push(...noteIdList); + } + + const query = ` + SELECT DISTINCT noteId, name, value + FROM attributes_fts + WHERE ${whereConditions.join(' AND ')} + `; + + const results = sql.getRows<{ + noteId: string; + name: string; + value: string; + }>(query, params); + + // Post-filter for exact word matches when operator is "=" + if (operator === "=") { + const matchingNoteIds = new Set(); + for (const result of results) { + // Check if phrase matches attribute name or value with word boundaries + // For attribute names, check exact match (attribute name "test125" matches search "test125") + // For attribute values, check if phrase appears as exact words + const nameMatch = result.name.toLowerCase() === phrase.toLowerCase(); + const valueMatch = result.value ? containsExactPhrase(phrase, result.value) : false; + + if (nameMatch || valueMatch) { + matchingNoteIds.add(result.noteId); + } + } + return matchingNoteIds; + } + + // For other operators, return all matching noteIds + const matchingNoteIds = new Set(results.map(r => r.noteId)); + return matchingNoteIds; + + } catch (error: any) { + log.error(`FTS5 attribute search error: ${error}`); + return new Set(); + } +} + +/** + * Searches protected notes separately (not in FTS index) + * This is a fallback method for protected notes + */ +export function searchProtectedNotesSync( + tokens: string[], + operator: string, + noteIds?: Set, + options: FTSSearchOptions = {} +): FTSSearchResult[] { + if (!protectedSessionService.isProtectedSessionAvailable()) { + return []; + } + + const { + limit = FTS_CONFIG.DEFAULT_LIMIT, + offset = 0 + } = options; + + try { + // Build query for protected notes only + let whereConditions = [`n.isProtected = 1`, `n.isDeleted = 0`]; + const params: any[] = []; + + if (noteIds && noteIds.size > 0) { + const noteIdList = Array.from(noteIds); + whereConditions.push(`n.noteId IN (${noteIdList.map(() => '?').join(',')})`); + params.push(...noteIdList); + } + + // Get protected notes + const protectedNotes = sql.getRows<{ + noteId: string; + title: string; + content: string | null; + }>(` + SELECT n.noteId, n.title, b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE ${whereConditions.join(' AND ')} + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + LIMIT ? OFFSET ? + `, [...params, limit, offset]); + + const results: FTSSearchResult[] = []; + + for (const note of protectedNotes) { + if (!note.content) continue; + + try { + // Decrypt content + const decryptedContent = protectedSessionService.decryptString(note.content); + if (!decryptedContent) continue; + + // Simple token matching for protected notes + const contentLower = decryptedContent.toLowerCase(); + const titleLower = note.title.toLowerCase(); + let matches = false; + + switch (operator) { + case "=": // Exact match + const phrase = tokens.join(' ').toLowerCase(); + matches = contentLower.includes(phrase) || titleLower.includes(phrase); + break; + case "*=*": // Contains all tokens + matches = tokens.every(token => + contentLower.includes(token.toLowerCase()) || + titleLower.includes(token.toLowerCase()) + ); + break; + case "~=": // Contains any token + case "~*": + matches = tokens.some(token => + contentLower.includes(token.toLowerCase()) || + titleLower.includes(token.toLowerCase()) + ); + break; + default: + matches = tokens.every(token => + contentLower.includes(token.toLowerCase()) || + titleLower.includes(token.toLowerCase()) + ); + } + + if (matches) { + results.push({ + noteId: note.noteId, + title: note.title, + score: 1.0, // Simple scoring for protected notes + snippet: generateSnippet(decryptedContent) + }); + } + } catch (error) { + log.info(`Could not decrypt protected note ${note.noteId}`); + } + } + + return results; + } catch (error: any) { + log.error(`Protected notes search error: ${error}`); + return []; + } +} diff --git a/apps/server/src/services/search/fts/types.ts b/apps/server/src/services/search/fts/types.ts new file mode 100644 index 000000000..2ff845bee --- /dev/null +++ b/apps/server/src/services/search/fts/types.ts @@ -0,0 +1,62 @@ +/** + * FTS5 Types and Configuration + * + * Shared interfaces and configuration constants for FTS5 operations. + */ + +import type { FTSError } from "./errors.js"; + +export interface FTSSearchResult { + noteId: string; + title: string; + score: number; + snippet?: string; + highlights?: string[]; +} + +export interface FTSSearchOptions { + limit?: number; + offset?: number; + includeSnippets?: boolean; + snippetLength?: number; + highlightTag?: string; + searchProtected?: boolean; + skipDiagnostics?: boolean; +} + +export interface FTSErrorInfo { + error: FTSError; + fallbackUsed: boolean; + message: string; +} + +export interface FTSIndexStats { + totalDocuments: number; + indexSize: number; + isOptimized: boolean; + dbstatAvailable: boolean; +} + +/** + * Configuration for FTS5 search operations + */ +export const FTS_CONFIG = { + /** Maximum number of results to return by default */ + DEFAULT_LIMIT: 100, + /** Default snippet length in tokens */ + DEFAULT_SNIPPET_LENGTH: 30, + /** Default highlight tags */ + DEFAULT_HIGHLIGHT_START: '', + DEFAULT_HIGHLIGHT_END: '', + /** Maximum query length to prevent DoS */ + MAX_QUERY_LENGTH: 1000, + /** Maximum token length to prevent memory issues */ + MAX_TOKEN_LENGTH: 1000, + /** Threshold for considering a noteIds set as "large" */ + LARGE_SET_THRESHOLD: 1000, + /** SQLite parameter limit (with margin) */ + MAX_PARAMS_PER_QUERY: 900, + /** Snippet column indices */ + SNIPPET_COLUMN_TITLE: 1, + SNIPPET_COLUMN_CONTENT: 2, +} as const; diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index f7f1e5fa3..4c9f58dc8 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -1,1125 +1,48 @@ /** * FTS5 Search Service * - * Encapsulates all FTS5-specific operations for full-text searching. - * Provides efficient text search using SQLite's FTS5 extension with: - * - Trigram tokenization for fast substring matching - * - Snippet extraction for context - * - Highlighting of matched terms - * - Query syntax conversion from Trilium to FTS5 + * This module re-exports from the fts/ folder for backward compatibility. + * New code should import directly from './fts/index.js' or './fts/.js'. */ -import sql from "../sql.js"; -import log from "../log.js"; -import protectedSessionService from "../protected_session.js"; -import striptags from "striptags"; -import { normalize } from "../utils.js"; - -/** - * Custom error classes for FTS operations - */ -export class FTSError extends Error { - constructor(message: string, public readonly code: string, public readonly recoverable: boolean = true) { - super(message); - this.name = 'FTSError'; - } -} - -// FTSNotAvailableError removed - FTS5 is now required and validated at startup - -export class FTSQueryError extends FTSError { - constructor(message: string, public readonly query?: string) { - super(message, 'FTS_QUERY_ERROR', true); - this.name = 'FTSQueryError'; - } -} - -export interface FTSSearchResult { - noteId: string; - title: string; - score: number; - snippet?: string; - highlights?: string[]; -} - -export interface FTSSearchOptions { - limit?: number; - offset?: number; - includeSnippets?: boolean; - snippetLength?: number; - highlightTag?: string; - searchProtected?: boolean; - skipDiagnostics?: boolean; // Skip diagnostic queries for performance measurements -} - -export interface FTSErrorInfo { - error: FTSError; - fallbackUsed: boolean; - message: string; -} - -/** - * Configuration for FTS5 search operations - */ -const FTS_CONFIG = { - /** Maximum number of results to return by default */ - DEFAULT_LIMIT: 100, - /** Default snippet length in tokens */ - DEFAULT_SNIPPET_LENGTH: 30, - /** Default highlight tags */ - DEFAULT_HIGHLIGHT_START: '', - DEFAULT_HIGHLIGHT_END: '', - /** Maximum query length to prevent DoS */ - MAX_QUERY_LENGTH: 1000, - /** Snippet column indices */ - SNIPPET_COLUMN_TITLE: 1, - SNIPPET_COLUMN_CONTENT: 2, -}; - -class FTSSearchService { - /** - * Asserts that FTS5 is available. Should be called at application startup. - * Throws an error if FTS5 tables are not found. - */ - assertFTS5Available(): void { - const result = sql.getValue(` - SELECT COUNT(*) - FROM sqlite_master - WHERE type = 'table' - AND name = 'notes_fts' - `); - - if (result === 0) { - throw new Error("CRITICAL: FTS5 table 'notes_fts' not found. Run database migration."); - } - - log.info("FTS5 tables verified - full-text search is available"); - } - - /** - * Checks if FTS5 is available. - * @returns Always returns true - FTS5 is required and validated at startup. - * @deprecated This method is kept for API compatibility. FTS5 is now required. - */ - checkFTS5Availability(): boolean { - return true; - } - - /** - * Converts Trilium search syntax to FTS5 MATCH syntax - * - * @param tokens - Array of search tokens - * @param operator - Trilium search operator - * @returns FTS5 MATCH query string - */ - convertToFTS5Query(tokens: string[], operator: string): string { - if (!tokens || tokens.length === 0) { - throw new Error("No search tokens provided"); - } - - // Substring operators (*=*, *=, =*) use LIKE queries now, not MATCH - if (operator === "*=*" || operator === "*=" || operator === "=*") { - throw new Error("Substring operators should use searchWithLike(), not MATCH queries"); - } - - // Trigram tokenizer requires minimum 3 characters - const shortTokens = tokens.filter(token => token.length < 3); - if (shortTokens.length > 0) { - const shortList = shortTokens.join(', '); - log.info(`Tokens shorter than 3 characters detected (${shortList}) - cannot use trigram FTS5`); - throw new FTSQueryError( - `Trigram tokenizer requires tokens of at least 3 characters. Short tokens: ${shortList}` - ); - } - - // Sanitize tokens to prevent FTS5 syntax injection - const sanitizedTokens = tokens.map(token => - this.sanitizeFTS5Token(token) - ); - - // Only handle operators that work with MATCH - switch (operator) { - case "=": // Exact phrase match - return `"${sanitizedTokens.join(" ")}"`; - - case "!=": // Does not contain - return `NOT (${sanitizedTokens.join(" OR ")})`; - - case "~=": // Fuzzy match (use OR) - case "~*": - return sanitizedTokens.join(" OR "); - - case "%=": // Regex - uses traditional SQL iteration fallback - throw new FTSQueryError("Regex search not supported in FTS5 - use traditional search path"); - - default: - throw new FTSQueryError(`Unsupported MATCH operator: ${operator}`); - } - } - - /** - * Sanitizes a token for safe use in FTS5 queries - * Validates that the token is not empty after sanitization - */ - private sanitizeFTS5Token(token: string): string { - // Remove special FTS5 characters that could break syntax - const sanitized = token - .replace(/["\(\)\*]/g, '') // Remove quotes, parens, wildcards - .replace(/\s+/g, ' ') // Normalize whitespace - .trim(); - - // Validate that token is not empty after sanitization - if (!sanitized || sanitized.length === 0) { - log.info(`Token became empty after sanitization: "${token}"`); - // Return a safe placeholder that won't match anything - return "__empty_token__"; - } - - // Additional validation: ensure token doesn't contain SQL injection attempts - if (sanitized.includes(';') || sanitized.includes('--')) { - log.error(`Potential SQL injection attempt detected in token: "${token}"`); - return "__invalid_token__"; - } - - return sanitized; - } - - /** - * Escapes LIKE wildcards (% and _) in user input to treat them as literals - * @param str - User input string - * @returns String with LIKE wildcards escaped - */ - private escapeLikeWildcards(str: string): string { - return str.replace(/[%_]/g, '\\$&'); - } - - /** - * Performs substring search using LIKE queries optimized by trigram index - * This is used for *=*, *=, and =* operators with detail='none' - * - * @param tokens - Search tokens - * @param operator - Search operator (*=*, *=, =*) - * @param noteIds - Optional set of note IDs to filter - * @param options - Search options - * @returns Array of search results (noteIds only, no scoring) - */ - searchWithLike( - tokens: string[], - operator: string, - noteIds?: Set, - options: FTSSearchOptions = {} - ): FTSSearchResult[] { - // Handle empty tokens efficiently - return all notes without running diagnostics - if (tokens.length === 0) { - // Empty query means return all indexed notes (optionally filtered by noteIds) - log.info('[FTS-OPTIMIZATION] Empty token array - returning all indexed notes without diagnostics'); - - const results: FTSSearchResult[] = []; - let query: string; - const params: any[] = []; - - if (noteIds && noteIds.size > 0) { - const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds); - if (nonProtectedNoteIds.length === 0) { - return []; // No non-protected notes to search - } - query = `SELECT noteId, title FROM notes_fts WHERE noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`; - params.push(...nonProtectedNoteIds); - } else { - // Return all indexed notes - query = `SELECT noteId, title FROM notes_fts`; - } - - for (const row of sql.iterateRows<{ noteId: string; title: string }>(query, params)) { - results.push({ - noteId: row.noteId, - title: row.title, - score: 0, // No ranking for empty query - snippet: undefined - }); - } - - log.info(`[FTS-OPTIMIZATION] Empty token search returned ${results.length} results`); - return results; - } - - // Normalize tokens to lowercase for case-insensitive search - const normalizedTokens = tokens.map(t => t.toLowerCase()); - - // Validate token lengths to prevent memory issues - const MAX_TOKEN_LENGTH = 1000; - const longTokens = normalizedTokens.filter(t => t.length > MAX_TOKEN_LENGTH); - if (longTokens.length > 0) { - throw new FTSQueryError( - `Search tokens too long (max ${MAX_TOKEN_LENGTH} characters). ` + - `Long tokens: ${longTokens.map(t => t.substring(0, 50) + '...').join(', ')}` - ); - } - - const { - limit, // No default limit - return all results - offset = 0, - skipDiagnostics = false - } = options; - - // Run diagnostics BEFORE the actual search (not counted in performance timing) - if (!skipDiagnostics) { - log.info('[FTS-DIAGNOSTICS] Running index completeness checks (not counted in search timing)...'); - const totalInFts = sql.getValue(`SELECT COUNT(*) FROM notes_fts`); - const totalNotes = sql.getValue(` - SELECT COUNT(*) - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - `); - - if (totalInFts < totalNotes) { - log.info(`[FTS-DIAGNOSTICS] FTS index incomplete: ${totalInFts} indexed out of ${totalNotes} total notes. Run syncMissingNotes().`); - } else { - log.info(`[FTS-DIAGNOSTICS] FTS index complete: ${totalInFts} notes indexed`); - } - } - - try { - // Start timing for actual search (excludes diagnostics) - const searchStartTime = Date.now(); - - // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses - // The FTS table already excludes protected notes, so we can search all notes - const LARGE_SET_THRESHOLD = 1000; - const isLargeNoteSet = noteIds && noteIds.size > LARGE_SET_THRESHOLD; - - if (isLargeNoteSet) { - log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); - } - - // Only filter noteIds if the set is small enough to benefit from it - const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; - const nonProtectedNoteIds = shouldFilterByNoteIds - ? this.filterNonProtectedNoteIds(noteIds) - : []; - - let whereConditions: string[] = []; - const params: any[] = []; - - // Build LIKE conditions for each token - search BOTH title and content - switch (operator) { - case "*=*": // Contains (substring) - normalizedTokens.forEach(token => { - // Search in BOTH title and content with escaped wildcards - whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); - const escapedToken = this.escapeLikeWildcards(token); - params.push(`%${escapedToken}%`, `%${escapedToken}%`); - }); - break; - - case "*=": // Ends with - normalizedTokens.forEach(token => { - whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); - const escapedToken = this.escapeLikeWildcards(token); - params.push(`%${escapedToken}`, `%${escapedToken}`); - }); - break; - - case "=*": // Starts with - normalizedTokens.forEach(token => { - whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); - const escapedToken = this.escapeLikeWildcards(token); - params.push(`${escapedToken}%`, `${escapedToken}%`); - }); - break; - - default: - throw new FTSQueryError(`Unsupported LIKE operator: ${operator}`); - } - - // Validate that we have search criteria - if (whereConditions.length === 0 && nonProtectedNoteIds.length === 0) { - throw new FTSQueryError("No search criteria provided (empty tokens and no note filter)"); - } - - // SQLite parameter limit handling (999 params max) - const MAX_PARAMS_PER_QUERY = 900; // Leave margin for other params - - // Add noteId filter if provided - if (nonProtectedNoteIds.length > 0) { - const tokenParamCount = params.length; - const additionalParams = 2; // For limit and offset - - if (nonProtectedNoteIds.length <= MAX_PARAMS_PER_QUERY - tokenParamCount - additionalParams) { - // Normal case: all IDs fit in one query - whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); - params.push(...nonProtectedNoteIds); - } else { - // Large noteIds set: split into chunks and execute multiple queries - const chunks: string[][] = []; - for (let i = 0; i < nonProtectedNoteIds.length; i += MAX_PARAMS_PER_QUERY) { - chunks.push(nonProtectedNoteIds.slice(i, i + MAX_PARAMS_PER_QUERY)); - } - - log.info(`Large noteIds set detected (${nonProtectedNoteIds.length} notes), splitting into ${chunks.length} chunks`); - - // Execute a query for each chunk and combine results - const allResults: FTSSearchResult[] = []; - let remainingLimit = limit !== undefined ? limit : Number.MAX_SAFE_INTEGER; - let currentOffset = offset; - - for (const chunk of chunks) { - if (remainingLimit <= 0) break; - - const chunkWhereConditions = [...whereConditions]; - const chunkParams: any[] = [...params]; - - chunkWhereConditions.push(`noteId IN (${chunk.map(() => '?').join(',')})`); - chunkParams.push(...chunk); - - // Build chunk query - const chunkQuery = ` - SELECT noteId, title - FROM notes_fts - WHERE ${chunkWhereConditions.join(' AND ')} - ${remainingLimit !== Number.MAX_SAFE_INTEGER ? 'LIMIT ?' : ''} - ${currentOffset > 0 ? 'OFFSET ?' : ''} - `; - - if (remainingLimit !== Number.MAX_SAFE_INTEGER) chunkParams.push(remainingLimit); - if (currentOffset > 0) chunkParams.push(currentOffset); - - const chunkResults = sql.getRows<{ noteId: string; title: string }>(chunkQuery, chunkParams); - allResults.push(...chunkResults.map(row => ({ - noteId: row.noteId, - title: row.title, - score: 1.0 - }))); - - if (remainingLimit !== Number.MAX_SAFE_INTEGER) { - remainingLimit -= chunkResults.length; - } - currentOffset = 0; // Only apply offset to first chunk - } - - const searchTime = Date.now() - searchStartTime; - log.info(`FTS5 LIKE search (chunked) returned ${allResults.length} results in ${searchTime}ms`); - - return allResults; - } - } - - // Build query - LIKE queries are automatically optimized by trigram index - // Only add LIMIT/OFFSET if specified - const query = ` - SELECT noteId, title - FROM notes_fts - WHERE ${whereConditions.join(' AND ')} - ${limit !== undefined ? 'LIMIT ?' : ''} - ${offset > 0 ? 'OFFSET ?' : ''} - `; - - // Only add limit/offset params if specified - if (limit !== undefined) params.push(limit); - if (offset > 0) params.push(offset); - - // Log the search parameters - log.info(`FTS5 LIKE search: tokens=[${normalizedTokens.join(', ')}], operator=${operator}, limit=${limit || 'none'}, offset=${offset}`); - - const rows = sql.getRows<{ noteId: string; title: string }>(query, params); - - const searchTime = Date.now() - searchStartTime; - log.info(`FTS5 LIKE search returned ${rows.length} results in ${searchTime}ms`); - - return rows.map(row => ({ - noteId: row.noteId, - title: row.title, - score: 1.0 // LIKE queries don't have ranking - })); - - } catch (error: any) { - log.error(`FTS5 LIKE search error: ${error}`); - throw new FTSQueryError( - `FTS5 LIKE search failed: ${error.message}`, - undefined - ); - } - } - - /** - * Performs a synchronous full-text search using FTS5 - * - * @param tokens - Search tokens - * @param operator - Search operator - * @param noteIds - Optional set of note IDs to search within - * @param options - Search options - * @returns Array of search results - */ - searchSync( - tokens: string[], - operator: string, - noteIds?: Set, - options: FTSSearchOptions = {} - ): FTSSearchResult[] { - // Handle empty tokens efficiently - return all notes without MATCH query - if (tokens.length === 0) { - log.info('[FTS-OPTIMIZATION] Empty token array in searchSync - returning all indexed notes'); - - // Reuse the empty token logic from searchWithLike - const results: FTSSearchResult[] = []; - let query: string; - const params: any[] = []; - - if (noteIds && noteIds.size > 0) { - const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds); - if (nonProtectedNoteIds.length === 0) { - return []; // No non-protected notes to search - } - query = `SELECT noteId, title FROM notes_fts WHERE noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`; - params.push(...nonProtectedNoteIds); - } else { - // Return all indexed notes - query = `SELECT noteId, title FROM notes_fts`; - } - - for (const row of sql.iterateRows<{ noteId: string; title: string }>(query, params)) { - results.push({ - noteId: row.noteId, - title: row.title, - score: 0, // No ranking for empty query - snippet: undefined - }); - } - - log.info(`[FTS-OPTIMIZATION] Empty token search returned ${results.length} results`); - return results; - } - - const { - limit = FTS_CONFIG.DEFAULT_LIMIT, - offset = 0, - includeSnippets = true, - snippetLength = FTS_CONFIG.DEFAULT_SNIPPET_LENGTH, - highlightTag = FTS_CONFIG.DEFAULT_HIGHLIGHT_START, - searchProtected = false - } = options; - - try { - // Start timing for actual search - const searchStartTime = Date.now(); - - const ftsQuery = this.convertToFTS5Query(tokens, operator); - - // Validate query length - if (ftsQuery.length > FTS_CONFIG.MAX_QUERY_LENGTH) { - throw new FTSQueryError( - `Query too long: ${ftsQuery.length} characters (max: ${FTS_CONFIG.MAX_QUERY_LENGTH})`, - ftsQuery - ); - } - - // Check if we're searching for protected notes - // Protected notes are NOT in the FTS index, so we need to handle them separately - if (searchProtected && protectedSessionService.isProtectedSessionAvailable()) { - log.info("Protected session available - will search protected notes separately"); - // Return empty results from FTS and let the caller handle protected notes - // The caller should use a fallback search method for protected notes - return []; - } - - // Build the SQL query - let whereConditions = [`notes_fts MATCH ?`]; - const params: any[] = [ftsQuery]; - - // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses - // The FTS table already excludes protected notes, so we can search all notes - const LARGE_SET_THRESHOLD = 1000; - const isLargeNoteSet = noteIds && noteIds.size > LARGE_SET_THRESHOLD; - - if (isLargeNoteSet) { - log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); - } - - // Filter by noteIds if provided and set is small enough - const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; - if (shouldFilterByNoteIds) { - // First filter out any protected notes from the noteIds - const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds!); - if (nonProtectedNoteIds.length === 0) { - // All provided notes are protected, return empty results - return []; - } - whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); - params.push(...nonProtectedNoteIds); - } - - // Build snippet extraction if requested - const snippetSelect = includeSnippets - ? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '(query, params); - - // Post-filter for exact match operator (=) to handle word boundaries - // Trigram FTS5 doesn't respect word boundaries in phrase queries, - // so "test123" matches "test1234" due to shared trigrams. - // We need to post-filter results to only include exact word matches. - if (operator === "=") { - const phrase = tokens.join(" "); - results = results.filter(result => { - // Use content from result if available, otherwise fetch it - let noteContent = result.content; - if (!noteContent) { - noteContent = sql.getValue(` - SELECT b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId = ? - `, [result.noteId]); - } - - if (!noteContent) { - return false; - } - - // Check if phrase appears as exact words in content or title - return this.containsExactPhrase(phrase, result.title) || - this.containsExactPhrase(phrase, noteContent); - }); - } - - const searchTime = Date.now() - searchStartTime; - log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`); - - return results; - - } catch (error: any) { - // Provide structured error information - if (error instanceof FTSError) { - throw error; - } - - log.error(`FTS5 search error: ${error}`); - - // Determine if this is a recoverable error - const isRecoverable = - error.message?.includes('syntax error') || - error.message?.includes('malformed MATCH') || - error.message?.includes('no such table'); - - throw new FTSQueryError( - `FTS5 search failed: ${error.message}. ${isRecoverable ? 'Falling back to standard search.' : ''}`, - undefined - ); - } - } - - /** - * Filters out protected note IDs from the given set - */ - private filterNonProtectedNoteIds(noteIds: Set): string[] { - const noteIdList = Array.from(noteIds); - const placeholders = noteIdList.map(() => '?').join(','); - - const nonProtectedNotes = sql.getColumn(` - SELECT noteId - FROM notes - WHERE noteId IN (${placeholders}) - AND isProtected = 0 - `, noteIdList); - - return nonProtectedNotes; - } - - /** - * Checks if a phrase appears as exact words in text (respecting word boundaries) - * @param phrase - The phrase to search for (case-insensitive) - * @param text - The text to search in - * @returns true if the phrase appears as complete words, false otherwise - */ - private containsExactPhrase(phrase: string, text: string | null | undefined): boolean { - if (!text || !phrase || typeof text !== 'string') { - return false; - } - - // Normalize both to lowercase for case-insensitive comparison - const normalizedPhrase = phrase.toLowerCase().trim(); - const normalizedText = text.toLowerCase(); - - // Strip HTML tags for content matching - const plainText = striptags(normalizedText); - - // For single words, use word-boundary matching - if (!normalizedPhrase.includes(' ')) { - // Split text into words and check for exact match - const words = plainText.split(/\s+/); - return words.some(word => word === normalizedPhrase); - } - - // For multi-word phrases, check if the phrase appears as consecutive words - // Split text into words, then check if the phrase appears in the word sequence - const textWords = plainText.split(/\s+/); - const phraseWords = normalizedPhrase.split(/\s+/); - - // Sliding window to find exact phrase match - for (let i = 0; i <= textWords.length - phraseWords.length; i++) { - let match = true; - for (let j = 0; j < phraseWords.length; j++) { - if (textWords[i + j] !== phraseWords[j]) { - match = false; - break; - } - } - if (match) { - return true; - } - } - - return false; - } - - /** - * Searches attributes using FTS5 - * Returns noteIds of notes that have matching attributes - */ - searchAttributesSync( - tokens: string[], - operator: string, - noteIds?: Set - ): Set { - // Check if attributes_fts table exists - const tableExists = sql.getValue(` - SELECT COUNT(*) - FROM sqlite_master - WHERE type='table' AND name='attributes_fts' - `); - - if (!tableExists) { - log.info("attributes_fts table does not exist - skipping FTS attribute search"); - return new Set(); - } - - try { - // Sanitize tokens to prevent FTS5 syntax injection - const sanitizedTokens = tokens.map(token => this.sanitizeFTS5Token(token)); - - // Check if any tokens became invalid after sanitization - if (sanitizedTokens.some(t => t === '__empty_token__' || t === '__invalid_token__')) { - return new Set(); - } - - const phrase = sanitizedTokens.join(" "); - - // Build FTS5 query for exact match - const ftsQuery = operator === "=" ? `"${phrase}"` : phrase; - - // Search both name and value columns - const whereConditions: string[] = [ - `attributes_fts MATCH '${ftsQuery.replace(/'/g, "''")}'` - ]; - - const params: any[] = []; - - // Filter by noteIds if provided - if (noteIds && noteIds.size > 0 && noteIds.size < 1000) { - const noteIdList = Array.from(noteIds); - whereConditions.push(`noteId IN (${noteIdList.map(() => '?').join(',')})`); - params.push(...noteIdList); - } - - const query = ` - SELECT DISTINCT noteId, name, value - FROM attributes_fts - WHERE ${whereConditions.join(' AND ')} - `; - - const results = sql.getRows<{ - noteId: string; - name: string; - value: string; - }>(query, params); - - // Post-filter for exact word matches when operator is "=" - if (operator === "=") { - const matchingNoteIds = new Set(); - for (const result of results) { - // Check if phrase matches attribute name or value with word boundaries - // For attribute names, check exact match (attribute name "test125" matches search "test125") - // For attribute values, check if phrase appears as exact words - const nameMatch = result.name.toLowerCase() === phrase.toLowerCase(); - const valueMatch = result.value ? this.containsExactPhrase(phrase, result.value) : false; - - if (nameMatch || valueMatch) { - matchingNoteIds.add(result.noteId); - } - } - return matchingNoteIds; - } - - // For other operators, return all matching noteIds - const matchingNoteIds = new Set(results.map(r => r.noteId)); - return matchingNoteIds; - - } catch (error: any) { - log.error(`FTS5 attribute search error: ${error}`); - return new Set(); - } - } - - /** - * Searches protected notes separately (not in FTS index) - * This is a fallback method for protected notes - */ - searchProtectedNotesSync( - tokens: string[], - operator: string, - noteIds?: Set, - options: FTSSearchOptions = {} - ): FTSSearchResult[] { - if (!protectedSessionService.isProtectedSessionAvailable()) { - return []; - } - - const { - limit = FTS_CONFIG.DEFAULT_LIMIT, - offset = 0 - } = options; - - try { - // Build query for protected notes only - let whereConditions = [`n.isProtected = 1`, `n.isDeleted = 0`]; - const params: any[] = []; - - if (noteIds && noteIds.size > 0) { - const noteIdList = Array.from(noteIds); - whereConditions.push(`n.noteId IN (${noteIdList.map(() => '?').join(',')})`); - params.push(...noteIdList); - } - - // Get protected notes - const protectedNotes = sql.getRows<{ - noteId: string; - title: string; - content: string | null; - }>(` - SELECT n.noteId, n.title, b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE ${whereConditions.join(' AND ')} - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - LIMIT ? OFFSET ? - `, [...params, limit, offset]); - - const results: FTSSearchResult[] = []; - - for (const note of protectedNotes) { - if (!note.content) continue; - - try { - // Decrypt content - const decryptedContent = protectedSessionService.decryptString(note.content); - if (!decryptedContent) continue; - - // Simple token matching for protected notes - const contentLower = decryptedContent.toLowerCase(); - const titleLower = note.title.toLowerCase(); - let matches = false; - - switch (operator) { - case "=": // Exact match - const phrase = tokens.join(' ').toLowerCase(); - matches = contentLower.includes(phrase) || titleLower.includes(phrase); - break; - case "*=*": // Contains all tokens - matches = tokens.every(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - break; - case "~=": // Contains any token - case "~*": - matches = tokens.some(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - break; - default: - matches = tokens.every(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - } - - if (matches) { - results.push({ - noteId: note.noteId, - title: note.title, - score: 1.0, // Simple scoring for protected notes - snippet: this.generateSnippet(decryptedContent) - }); - } - } catch (error) { - log.info(`Could not decrypt protected note ${note.noteId}`); - } - } - - return results; - } catch (error: any) { - log.error(`Protected notes search error: ${error}`); - return []; - } - } - - /** - * Generates a snippet from content - */ - private generateSnippet(content: string, maxLength: number = 30): string { - // Strip HTML tags for snippet - const plainText = striptags(content); - const normalized = normalize(plainText); - - if (normalized.length <= maxLength * 10) { - return normalized; - } - - // Extract snippet around first occurrence - return normalized.substring(0, maxLength * 10) + '...'; - } - - /** - * Updates the FTS index for a specific note (synchronous) - * - * @param noteId - The note ID to update - * @param title - The note title - * @param content - The note content - */ - updateNoteIndex(noteId: string, title: string, content: string): void { - try { - sql.transactional(() => { - // Delete existing entry - sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); - - // Insert new entry - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `, [noteId, title, content]); - }); - } catch (error) { - log.error(`Failed to update FTS index for note ${noteId}: ${error}`); - } - } - - /** - * Removes a note from the FTS index (synchronous) - * - * @param noteId - The note ID to remove - */ - removeNoteFromIndex(noteId: string): void { - try { - sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); - } catch (error) { - log.error(`Failed to remove note ${noteId} from FTS index: ${error}`); - } - } - - /** - * Syncs missing notes to the FTS index (synchronous) - * This is useful after bulk operations like imports where triggers might not fire - * - * @param noteIds - Optional array of specific note IDs to sync. If not provided, syncs all missing notes. - * @returns The number of notes that were synced - */ - syncMissingNotes(noteIds?: string[]): number { - try { - let syncedCount = 0; - - sql.transactional(() => { - let query: string; - let params: any[] = []; - - if (noteIds && noteIds.length > 0) { - // Sync specific notes that are missing from FTS - const placeholders = noteIds.map(() => '?').join(','); - query = ` - WITH missing_notes AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId IN (${placeholders}) - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `; - params = noteIds; - } else { - // Sync all missing notes - query = ` - WITH missing_notes AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `; - } - - const result = sql.execute(query, params); - syncedCount = result.changes; - - if (syncedCount > 0) { - log.info(`Synced ${syncedCount} missing notes to FTS index`); - // Optimize if we synced a significant number of notes - if (syncedCount > 100) { - sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - } - } - }); - - return syncedCount; - } catch (error) { - log.error(`Failed to sync missing notes to FTS index: ${error}`); - return 0; - } - } - - /** - * Rebuilds the entire FTS index (synchronous) - * This is useful for maintenance or after bulk operations - */ - rebuildIndex(): void { - log.info("Rebuilding FTS5 index..."); - - try { - sql.transactional(() => { - // Clear existing index - sql.execute(`DELETE FROM notes_fts`); - - // Rebuild from notes - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - `); - - // Optimize the FTS table - sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - }); - - log.info("FTS5 index rebuild completed"); - } catch (error) { - log.error(`Failed to rebuild FTS index: ${error}`); - throw error; - } - } - - /** - * Gets statistics about the FTS index (synchronous) - * Includes fallback when dbstat is not available - */ - getIndexStats(): { - totalDocuments: number; - indexSize: number; - isOptimized: boolean; - dbstatAvailable: boolean; - } { - const totalDocuments = sql.getValue(` - SELECT COUNT(*) FROM notes_fts - `) || 0; - - let indexSize = 0; - let dbstatAvailable = false; - - try { - // Try to get index size from dbstat - // dbstat is a virtual table that may not be available in all SQLite builds - indexSize = sql.getValue(` - SELECT SUM(pgsize) - FROM dbstat - WHERE name LIKE 'notes_fts%' - `) || 0; - dbstatAvailable = true; - } catch (error: any) { - // dbstat not available, use fallback - if (error.message?.includes('no such table: dbstat')) { - log.info("dbstat virtual table not available, using fallback for index size estimation"); - - // Fallback: Estimate based on number of documents and average content size - try { - const avgContentSize = sql.getValue(` - SELECT AVG(LENGTH(content) + LENGTH(title)) - FROM notes_fts - LIMIT 1000 - `) || 0; - - // Rough estimate: avg size * document count * overhead factor - indexSize = Math.round(avgContentSize * totalDocuments * 1.5); - } catch (fallbackError) { - log.info(`Could not estimate index size: ${fallbackError}`); - indexSize = 0; - } - } else { - log.error(`Error accessing dbstat: ${error}`); - } - } - - return { - totalDocuments, - indexSize, - isOptimized: true, // FTS5 manages optimization internally - dbstatAvailable - }; - } -} - -// Export singleton instance -export const ftsSearchService = new FTSSearchService(); - -export default ftsSearchService; \ No newline at end of file +export { + // Error classes + FTSError, + FTSQueryError, + + // Types and configuration + FTS_CONFIG, + type FTSSearchResult, + type FTSSearchOptions, + type FTSErrorInfo, + type FTSIndexStats, + + // Query building utilities + convertToFTS5Query, + sanitizeFTS5Token, + escapeLikeWildcards, + containsExactPhrase, + generateSnippet, + + // Index management + assertFTS5Available, + checkFTS5Availability, + updateNoteIndex, + removeNoteFromIndex, + syncMissingNotes, + rebuildIndex, + getIndexStats, + filterNonProtectedNoteIds, + + // Search operations + searchWithLike, + searchSync, + searchAttributesSync, + searchProtectedNotesSync, + + // Legacy class-based API + ftsSearchService +} from "./fts/index.js"; + +// Default export for backward compatibility +export { default } from "./fts/index.js";