From 7c5553bd4b0784e979e966501c4d9488defe90c5 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Mon, 1 Sep 2025 21:40:05 -0700 Subject: [PATCH] feat(search): further improve fts search --- .../src/migrations/0234__add_fts5_search.ts | 59 +++----------- apps/server/src/routes/api/search.ts | 3 +- apps/server/src/services/search/fts_search.ts | 78 ++++++++++++++++--- 3 files changed, 83 insertions(+), 57 deletions(-) diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index cf0116313..366531572 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -17,18 +17,9 @@ export default function addFTS5SearchAndPerformanceIndexes() { // Create FTS5 virtual table with porter tokenizer log.info("Creating FTS5 virtual table..."); - // Set optimal SQLite pragmas for FTS5 operations with millions of notes + // Note: Transaction-safe pragmas are excluded here. + // They should be set at database initialization, not during migration. sql.executeScript(` - -- Memory and performance pragmas for large-scale FTS operations - PRAGMA cache_size = -262144; -- 256MB cache for better performance - PRAGMA temp_store = MEMORY; -- Use RAM for temporary storage - PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O - PRAGMA synchronous = NORMAL; -- Faster writes with good safety - PRAGMA journal_mode = WAL; -- Write-ahead logging for better concurrency - PRAGMA wal_autocheckpoint = 1000; -- Auto-checkpoint every 1000 pages - PRAGMA automatic_index = ON; -- Allow automatic indexes - PRAGMA threads = 4; -- Use multiple threads for sorting - -- Drop existing FTS tables if they exist DROP TABLE IF EXISTS notes_fts; DROP TABLE IF EXISTS notes_fts_trigram; @@ -70,11 +61,6 @@ export default function addFTS5SearchAndPerformanceIndexes() { // Process in optimized batches using a prepared statement sql.transactional(() => { - // Prepare statement for batch inserts - const insertStmt = sql.prepare(` - INSERT OR REPLACE INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `); let offset = 0; while (offset < totalNotes) { @@ -98,9 +84,12 @@ export default function addFTS5SearchAndPerformanceIndexes() { break; } - // Batch insert using prepared statement + // Batch insert for (const note of notesBatch) { - insertStmt.run(note.noteId, note.title, note.content); + sql.execute( + `INSERT OR REPLACE INTO notes_fts (noteId, title, content) VALUES (?, ?, ?)`, + [note.noteId, note.title, note.content] + ); } offset += notesBatch.length; @@ -116,9 +105,6 @@ export default function addFTS5SearchAndPerformanceIndexes() { break; } } - - // Finalize prepared statement - insertStmt.finalize(); }); } catch (error) { log.error(`Failed to populate FTS index: ${error}`); @@ -243,32 +229,11 @@ export default function addFTS5SearchAndPerformanceIndexes() { log.info("Optimizing FTS5 index..."); sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - // Set comprehensive SQLite pragmas optimized for millions of notes - log.info("Configuring SQLite pragmas for large-scale FTS performance..."); - - sql.executeScript(` - -- Memory Management (Critical for large databases) - PRAGMA cache_size = -262144; -- 256MB cache (was 50MB) - critical for FTS performance - PRAGMA temp_store = MEMORY; -- Use memory for temporary tables and indices - PRAGMA mmap_size = 536870912; -- 512MB memory-mapped I/O for better read performance - - -- Write Optimization (Important for batch operations) - PRAGMA synchronous = NORMAL; -- Balance between safety and performance (was FULL) - PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency - PRAGMA wal_autocheckpoint = 1000; -- Checkpoint every 1000 pages for memory management - - -- Query Optimization (Essential for FTS queries) - PRAGMA automatic_index = ON; -- Allow SQLite to create automatic indexes - PRAGMA optimize; -- Update query planner statistics - - -- FTS-Specific Optimizations - PRAGMA threads = 4; -- Use multiple threads for FTS operations (if available) - - -- Run comprehensive ANALYZE on all FTS-related tables - ANALYZE notes_fts; - ANALYZE notes; - ANALYZE blobs; - `); + // Run ANALYZE on FTS-related tables (these are safe within transactions) + log.info("Analyzing FTS tables for query optimization..."); + sql.execute(`ANALYZE notes_fts`); + sql.execute(`ANALYZE notes`); + sql.execute(`ANALYZE blobs`); log.info("FTS5 migration completed successfully"); } \ No newline at end of file diff --git a/apps/server/src/routes/api/search.ts b/apps/server/src/routes/api/search.ts index 49c1fadbc..5a83e5e9b 100644 --- a/apps/server/src/routes/api/search.ts +++ b/apps/server/src/routes/api/search.ts @@ -141,7 +141,8 @@ function syncFtsIndex(req: Request) { log.info(`FTS sync requested for ${noteIds?.length || 'all'} notes`); - const syncedCount = ftsSearchService.syncMissingNotes(noteIds); + // syncMissingNotes doesn't accept parameters - it syncs all missing notes + const syncedCount = ftsSearchService.syncMissingNotes(); return { success: true, diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index e31fc6e93..6205b7ca2 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -30,6 +30,7 @@ export interface FTSSearchOptions { limit?: number; offset?: number; searchProtected?: boolean; + includeSnippets?: boolean; } /** @@ -289,17 +290,12 @@ class FTSSearchService { // Insert missing notes using efficient batch processing sql.transactional(() => { - // Use prepared statement for better performance - const insertStmt = sql.prepare(` - INSERT OR REPLACE INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `); - for (const note of missingNotes) { - insertStmt.run(note.noteId, note.title, note.content); + sql.execute( + `INSERT OR REPLACE INTO notes_fts (noteId, title, content) VALUES (?, ?, ?)`, + [note.noteId, note.title, note.content] + ); } - - insertStmt.finalize(); }); log.info(`Synced ${missingNotes.length} missing notes to FTS index`); @@ -485,6 +481,70 @@ class FTSSearchService { return { documentCount: 0, indexSize: 0 }; } } + + /** + * Get FTS index statistics (alias for getStatistics for API compatibility) + */ + getIndexStats(): { totalDocuments: number; indexSize: number } { + const stats = this.getStatistics(); + return { + totalDocuments: stats.documentCount, + indexSize: stats.indexSize + }; + } + + /** + * Rebuild the entire FTS index from scratch + */ + rebuildIndex(): void { + if (!this.checkFTS5Availability()) { + throw new FTSNotAvailableError(); + } + + try { + log.info("Starting FTS index rebuild"); + + sql.transactional(() => { + // Clear existing index + sql.execute(`DELETE FROM notes_fts`); + + // Rebuild from all eligible notes + const notes = sql.getRows<{noteId: string, title: string, content: string}>(` + SELECT n.noteId, n.title, b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `); + + if (notes && notes.length > 0) { + // Process in batches for better performance + const batchSize = FTS_CONFIG.BATCH_SIZE; + + for (let i = 0; i < notes.length; i += batchSize) { + const batch = notes.slice(i, i + batchSize); + + for (const note of batch) { + sql.execute( + `INSERT INTO notes_fts (noteId, title, content) VALUES (?, ?, ?)`, + [note.noteId, note.title, note.content] + ); + } + } + + log.info(`Rebuilt FTS index with ${notes.length} notes`); + } + }); + + // Optimize after rebuild + this.optimizeIndex(); + } catch (error) { + log.error(`Error rebuilding FTS index: ${error}`); + throw new FTSError(`Failed to rebuild FTS index: ${error}`, 'FTS_REBUILD_ERROR'); + } + } } // Export singleton instance