From 5b79e0d71ed9658e82cf050e23625370ec2ea52e Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sat, 30 Aug 2025 22:30:01 -0700 Subject: [PATCH] feat(search): try to decrease complexity --- .../src/migrations/0234__add_fts5_search.ts | 628 ++-------- .../0234__add_fts5_search_minimal.ts | 216 ++++ .../src/services/search/fts_search.test.ts | 436 ++++--- apps/server/src/services/search/fts_search.ts | 1041 ++++++----------- .../src/services/search/fts_search_minimal.ts | 461 ++++++++ scripts/stress-test-native-simple.ts | 442 ++++--- scripts/stress-test-native.ts | 633 ++++++---- 7 files changed, 2044 insertions(+), 1813 deletions(-) create mode 100644 apps/server/src/migrations/0234__add_fts5_search_minimal.ts create mode 100644 apps/server/src/services/search/fts_search_minimal.ts diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index 47fbb4e043..40e2cdadbc 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -1,72 +1,66 @@ /** - * Migration to add FTS5 full-text search support and strategic performance indexes + * Migration to add FTS5 full-text search support * - * This migration: - * 1. Creates an FTS5 virtual table for full-text searching - * 2. Populates it with existing note content - * 3. Creates triggers to keep the FTS table synchronized with note changes - * 4. Adds strategic composite and covering indexes for improved query performance - * 5. Optimizes common query patterns identified through performance analysis + * This migration implements a minimal FTS5 search solution that: + * 1. Uses a single FTS5 table with porter tokenizer for stemming + * 2. Implements simple triggers for synchronization + * 3. Excludes protected notes from indexing + * 4. Sets essential performance pragmas */ import sql from "../services/sql.js"; import log from "../services/log.js"; export default function addFTS5SearchAndPerformanceIndexes() { - log.info("Starting FTS5 and performance optimization migration..."); + log.info("Setting up FTS5 search..."); - // Part 1: FTS5 Setup - log.info("Creating FTS5 virtual table for full-text search..."); - - // Create FTS5 virtual tables - // We create two FTS tables for different search strategies: - // 1. notes_fts: Uses porter stemming for word-based searches - // 2. notes_fts_trigram: Uses trigram tokenizer for substring searches + // Create FTS5 virtual table with porter tokenizer + log.info("Creating FTS5 virtual table..."); sql.executeScript(` - -- Drop existing FTS tables if they exist (for re-running migration in dev) + -- Drop existing FTS tables if they exist DROP TABLE IF EXISTS notes_fts; DROP TABLE IF EXISTS notes_fts_trigram; + DROP TABLE IF EXISTS notes_fts_config; + DROP TABLE IF EXISTS notes_fts_stats; + DROP TABLE IF EXISTS notes_fts_aux; - -- Create FTS5 virtual table with porter stemming for word-based searches + -- Create FTS5 virtual table with porter tokenizer for stemming CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( noteId UNINDEXED, title, content, - tokenize = 'porter unicode61' - ); - - -- Create FTS5 virtual table with trigram tokenizer for substring searches - -- detail='none' reduces storage by ~50% since we don't need snippets for substring search - CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts_trigram USING fts5( - noteId UNINDEXED, - title, - content, - tokenize = 'trigram', - detail = 'none' + tokenize = 'porter unicode61', + prefix = '2 3' -- Index prefixes of 2 and 3 characters for faster prefix searches ); `); log.info("Populating FTS5 table with existing note content..."); // Populate the FTS table with existing notes - // We only index text-based note types that contain searchable content - const batchSize = 100; + const batchSize = 1000; let processedCount = 0; - let hasError = false; - // Wrap entire population process in a transaction for consistency - // If any error occurs, the entire population will be rolled back try { sql.transactional(() => { - let offset = 0; + // Count eligible notes + const totalNotes = sql.getValue(` + SELECT COUNT(*) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `) || 0; - while (true) { - const notes = sql.getRows<{ - noteId: string; - title: string; - content: string | null; - }>(` + log.info(`Found ${totalNotes} notes to index`); + + // Insert notes in batches + let offset = 0; + while (offset < totalNotes) { + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) SELECT n.noteId, n.title, @@ -75,212 +69,120 @@ export default function addFTS5SearchAndPerformanceIndexes() { LEFT JOIN blobs b ON n.blobId = b.blobId WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 - AND n.isProtected = 0 -- Skip protected notes - they require special handling + AND n.isProtected = 0 + AND b.content IS NOT NULL ORDER BY n.noteId LIMIT ? OFFSET ? `, [batchSize, offset]); - - if (notes.length === 0) { - break; - } - - for (const note of notes) { - if (note.content) { - // Process content based on type (simplified for migration) - let processedContent = note.content; - - // For HTML content, we'll strip tags in the search service - // For now, just insert the raw content - - // Insert into porter FTS for word-based searches - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `, [note.noteId, note.title, processedContent]); - - // Also insert into trigram FTS for substring searches - sql.execute(` - INSERT INTO notes_fts_trigram (noteId, title, content) - VALUES (?, ?, ?) - `, [note.noteId, note.title, processedContent]); - - processedCount++; - } - } - - offset += batchSize; - if (processedCount % 1000 === 0) { - log.info(`Processed ${processedCount} notes for FTS indexing...`); + offset += batchSize; + processedCount = Math.min(offset, totalNotes); + + if (processedCount % 10000 === 0) { + log.info(`Indexed ${processedCount} of ${totalNotes} notes...`); } } }); } catch (error) { - hasError = true; - log.error(`Failed to populate FTS index. Rolling back... ${error}`); - // Clean up partial data if transaction failed - try { - sql.execute("DELETE FROM notes_fts"); - } catch (cleanupError) { - log.error(`Failed to clean up FTS table after error: ${cleanupError}`); - } + log.error(`Failed to populate FTS index: ${error}`); throw new Error(`FTS5 migration failed during population: ${error}`); } log.info(`Completed FTS indexing of ${processedCount} notes`); - // Create triggers to keep FTS table synchronized + // Create synchronization triggers log.info("Creating FTS synchronization triggers..."); - // Drop all existing triggers first to ensure clean state - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_insert`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_update`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_delete`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_soft_delete`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_blob_insert`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_blob_update`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_protect`); - sql.execute(`DROP TRIGGER IF EXISTS notes_fts_unprotect`); - - // Create improved triggers that handle all SQL operations properly - // including INSERT OR REPLACE and INSERT ... ON CONFLICT ... DO UPDATE (upsert) + // Drop all existing triggers first + const existingTriggers = [ + 'notes_fts_insert', 'notes_fts_update', 'notes_fts_delete', + 'notes_fts_soft_delete', 'notes_fts_blob_insert', 'notes_fts_blob_update', + 'notes_fts_protect', 'notes_fts_unprotect', 'notes_fts_sync', + 'notes_fts_update_sync', 'notes_fts_delete_sync', 'blobs_fts_sync', + 'blobs_fts_insert_sync' + ]; - // Trigger for INSERT operations on notes + for (const trigger of existingTriggers) { + sql.execute(`DROP TRIGGER IF EXISTS ${trigger}`); + } + + // Create triggers for notes table operations sql.execute(` - CREATE TRIGGER notes_fts_insert + CREATE TRIGGER notes_fts_insert AFTER INSERT ON notes - WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND NEW.isDeleted = 0 AND NEW.isProtected = 0 BEGIN - -- First delete any existing FTS entries (in case of INSERT OR REPLACE) - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - - -- Then insert the new entry into both FTS tables, using LEFT JOIN to handle missing blobs INSERT INTO notes_fts (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - - INSERT INTO notes_fts_trigram (noteId, title, content) SELECT NEW.noteId, NEW.title, COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - END + FROM (SELECT NEW.blobId AS blobId) AS note_blob + LEFT JOIN blobs b ON b.blobId = note_blob.blobId; + END; `); - // Trigger for UPDATE operations on notes table - // Fires for ANY update to searchable notes to ensure FTS stays in sync sql.execute(` - CREATE TRIGGER notes_fts_update + CREATE TRIGGER notes_fts_update AFTER UPDATE ON notes - WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - -- Fire on any change, not just specific columns, to handle all upsert scenarios BEGIN - -- Always delete the old entries from both FTS tables - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; + -- Delete old entry + DELETE FROM notes_fts WHERE noteId = OLD.noteId; - -- Insert new entry into both FTS tables if note is not deleted and not protected + -- Insert new entry if eligible INSERT INTO notes_fts (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE NEW.isDeleted = 0 - AND NEW.isProtected = 0; - - INSERT INTO notes_fts_trigram (noteId, title, content) SELECT NEW.noteId, NEW.title, COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId - WHERE NEW.isDeleted = 0 + FROM (SELECT NEW.blobId AS blobId) AS note_blob + LEFT JOIN blobs b ON b.blobId = note_blob.blobId + WHERE NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND NEW.isDeleted = 0 AND NEW.isProtected = 0; - END + END; `); - // Trigger for DELETE operations on notes sql.execute(` - CREATE TRIGGER notes_fts_delete + CREATE TRIGGER notes_fts_delete AFTER DELETE ON notes BEGIN DELETE FROM notes_fts WHERE noteId = OLD.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = OLD.noteId; - END + END; `); - // Trigger for soft delete (isDeleted = 1) + // Create triggers for blob updates sql.execute(` - CREATE TRIGGER notes_fts_soft_delete - AFTER UPDATE ON notes - WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1 + CREATE TRIGGER blobs_fts_update + AFTER UPDATE ON blobs BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - END - `); - - // Trigger for notes becoming protected - sql.execute(` - CREATE TRIGGER notes_fts_protect - AFTER UPDATE ON notes - WHEN OLD.isProtected = 0 AND NEW.isProtected = 1 - BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; - END - `); - - // Trigger for notes becoming unprotected - sql.execute(` - CREATE TRIGGER notes_fts_unprotect - AFTER UPDATE ON notes - WHEN OLD.isProtected = 1 AND NEW.isProtected = 0 - AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND NEW.isDeleted = 0 - BEGIN - DELETE FROM notes_fts WHERE noteId = NEW.noteId; - DELETE FROM notes_fts_trigram WHERE noteId = NEW.noteId; + -- Update all notes that reference this blob + DELETE FROM notes_fts + WHERE noteId IN ( + SELECT noteId FROM notes + WHERE blobId = NEW.blobId + ); INSERT INTO notes_fts (noteId, title, content) SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT - NEW.noteId, - NEW.title, - COALESCE(b.content, '') - FROM (SELECT NEW.noteId) AS note_select - LEFT JOIN blobs b ON b.blobId = NEW.blobId; - END + n.noteId, + n.title, + NEW.content + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; + END; `); - // Trigger for INSERT operations on blobs - // Uses INSERT OR REPLACE for efficiency with deduplicated blobs sql.execute(` - CREATE TRIGGER notes_fts_blob_insert + CREATE TRIGGER blobs_fts_insert AFTER INSERT ON blobs BEGIN - -- Use INSERT OR REPLACE for atomic update in both FTS tables - -- This handles the case where FTS entries may already exist - INSERT OR REPLACE INTO notes_fts (noteId, title, content) + INSERT INTO notes_fts (noteId, title, content) SELECT n.noteId, n.title, @@ -290,340 +192,26 @@ export default function addFTS5SearchAndPerformanceIndexes() { AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND n.isDeleted = 0 AND n.isProtected = 0; - - INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; - END - `); - - // Trigger for UPDATE operations on blobs - // Uses INSERT OR REPLACE for efficiency - sql.execute(` - CREATE TRIGGER notes_fts_blob_update - AFTER UPDATE ON blobs - BEGIN - -- Use INSERT OR REPLACE for atomic update in both FTS tables - INSERT OR REPLACE INTO notes_fts (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; - - INSERT OR REPLACE INTO notes_fts_trigram (noteId, title, content) - SELECT - n.noteId, - n.title, - NEW.content - FROM notes n - WHERE n.blobId = NEW.blobId - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0; - END + END; `); log.info("FTS5 setup completed successfully"); - // Final cleanup: ensure all eligible notes are indexed in both FTS tables - // This catches any edge cases where notes might have been missed - log.info("Running final FTS index cleanup..."); + // Run optimization + log.info("Optimizing FTS5 index..."); + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - // Check and fix porter FTS table - const missingPorterCount = sql.getValue(` - SELECT COUNT(*) FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - `) || 0; + // Set essential SQLite pragmas for better performance + sql.executeScript(` + -- Increase cache size (50MB) + PRAGMA cache_size = -50000; + + -- Use memory for temp storage + PRAGMA temp_store = 2; + + -- Run ANALYZE on FTS tables + ANALYZE notes_fts; + `); - if (missingPorterCount > 0) { - sql.execute(` - WITH missing_notes AS ( - SELECT n.noteId, n.title, b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `); - log.info(`Indexed ${missingPorterCount} additional notes in porter FTS during cleanup`); - } - - // Check and fix trigram FTS table - const missingTrigramCount = sql.getValue(` - SELECT COUNT(*) FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) - `) || 0; - - if (missingTrigramCount > 0) { - sql.execute(` - WITH missing_notes AS ( - SELECT n.noteId, n.title, b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `); - log.info(`Indexed ${missingTrigramCount} additional notes in trigram FTS during cleanup`); - } - - // ======================================== - // Part 2: Strategic Performance Indexes - // ======================================== - - log.info("Adding strategic performance indexes..."); - const startTime = Date.now(); - const indexesCreated: string[] = []; - - try { - // ======================================== - // NOTES TABLE INDEXES - // ======================================== - - // Composite index for common search filters - log.info("Creating composite index on notes table for search filters..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_notes_search_composite; - CREATE INDEX IF NOT EXISTS IDX_notes_search_composite - ON notes (isDeleted, type, mime, dateModified DESC); - `); - indexesCreated.push("IDX_notes_search_composite"); - - // Covering index for note metadata queries - log.info("Creating covering index for note metadata..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_notes_metadata_covering; - CREATE INDEX IF NOT EXISTS IDX_notes_metadata_covering - ON notes (noteId, isDeleted, type, mime, title, dateModified, isProtected); - `); - indexesCreated.push("IDX_notes_metadata_covering"); - - // Index for protected notes filtering - log.info("Creating index for protected notes..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_notes_protected_deleted; - CREATE INDEX IF NOT EXISTS IDX_notes_protected_deleted - ON notes (isProtected, isDeleted) - WHERE isProtected = 1; - `); - indexesCreated.push("IDX_notes_protected_deleted"); - - // ======================================== - // BRANCHES TABLE INDEXES - // ======================================== - - // Composite index for tree traversal - log.info("Creating composite index on branches for tree traversal..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_branches_tree_traversal; - CREATE INDEX IF NOT EXISTS IDX_branches_tree_traversal - ON branches (parentNoteId, isDeleted, notePosition); - `); - indexesCreated.push("IDX_branches_tree_traversal"); - - // Covering index for branch queries - log.info("Creating covering index for branch queries..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_branches_covering; - CREATE INDEX IF NOT EXISTS IDX_branches_covering - ON branches (noteId, parentNoteId, isDeleted, notePosition, prefix); - `); - indexesCreated.push("IDX_branches_covering"); - - // Index for finding all parents of a note - log.info("Creating index for reverse tree lookup..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_branches_note_parents; - CREATE INDEX IF NOT EXISTS IDX_branches_note_parents - ON branches (noteId, isDeleted) - WHERE isDeleted = 0; - `); - indexesCreated.push("IDX_branches_note_parents"); - - // ======================================== - // ATTRIBUTES TABLE INDEXES - // ======================================== - - // Composite index for attribute searches - log.info("Creating composite index on attributes for search..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attributes_search_composite; - CREATE INDEX IF NOT EXISTS IDX_attributes_search_composite - ON attributes (name, value, isDeleted); - `); - indexesCreated.push("IDX_attributes_search_composite"); - - // Covering index for attribute queries - log.info("Creating covering index for attribute queries..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attributes_covering; - CREATE INDEX IF NOT EXISTS IDX_attributes_covering - ON attributes (noteId, name, value, type, isDeleted, position); - `); - indexesCreated.push("IDX_attributes_covering"); - - // Index for inherited attributes - log.info("Creating index for inherited attributes..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attributes_inheritable; - CREATE INDEX IF NOT EXISTS IDX_attributes_inheritable - ON attributes (isInheritable, isDeleted) - WHERE isInheritable = 1 AND isDeleted = 0; - `); - indexesCreated.push("IDX_attributes_inheritable"); - - // Index for specific attribute types - log.info("Creating index for label attributes..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attributes_labels; - CREATE INDEX IF NOT EXISTS IDX_attributes_labels - ON attributes (type, name, value) - WHERE type = 'label' AND isDeleted = 0; - `); - indexesCreated.push("IDX_attributes_labels"); - - log.info("Creating index for relation attributes..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attributes_relations; - CREATE INDEX IF NOT EXISTS IDX_attributes_relations - ON attributes (type, name, value) - WHERE type = 'relation' AND isDeleted = 0; - `); - indexesCreated.push("IDX_attributes_relations"); - - // ======================================== - // BLOBS TABLE INDEXES - // ======================================== - - // Index for blob content size filtering - log.info("Creating index for blob content size..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_blobs_content_size; - CREATE INDEX IF NOT EXISTS IDX_blobs_content_size - ON blobs (blobId, LENGTH(content)); - `); - indexesCreated.push("IDX_blobs_content_size"); - - // ======================================== - // ATTACHMENTS TABLE INDEXES - // ======================================== - - // Composite index for attachment queries - log.info("Creating composite index for attachments..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_attachments_composite; - CREATE INDEX IF NOT EXISTS IDX_attachments_composite - ON attachments (ownerId, role, isDeleted, position); - `); - indexesCreated.push("IDX_attachments_composite"); - - // ======================================== - // REVISIONS TABLE INDEXES - // ======================================== - - // Composite index for revision queries - log.info("Creating composite index for revisions..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_revisions_note_date; - CREATE INDEX IF NOT EXISTS IDX_revisions_note_date - ON revisions (noteId, utcDateCreated DESC); - `); - indexesCreated.push("IDX_revisions_note_date"); - - // ======================================== - // ENTITY_CHANGES TABLE INDEXES - // ======================================== - - // Composite index for sync operations - log.info("Creating composite index for entity changes sync..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_entity_changes_sync; - CREATE INDEX IF NOT EXISTS IDX_entity_changes_sync - ON entity_changes (isSynced, utcDateChanged); - `); - indexesCreated.push("IDX_entity_changes_sync"); - - // Index for component-based queries - log.info("Creating index for component-based entity change queries..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_entity_changes_component; - CREATE INDEX IF NOT EXISTS IDX_entity_changes_component - ON entity_changes (componentId, utcDateChanged DESC); - `); - indexesCreated.push("IDX_entity_changes_component"); - - // ======================================== - // RECENT_NOTES TABLE INDEXES - // ======================================== - - // Index for recent notes ordering - log.info("Creating index for recent notes..."); - sql.executeScript(` - DROP INDEX IF EXISTS IDX_recent_notes_date; - CREATE INDEX IF NOT EXISTS IDX_recent_notes_date - ON recent_notes (utcDateCreated DESC); - `); - indexesCreated.push("IDX_recent_notes_date"); - - // ======================================== - // ANALYZE TABLES FOR QUERY PLANNER - // ======================================== - - log.info("Running ANALYZE to update SQLite query planner statistics..."); - sql.executeScript(` - ANALYZE notes; - ANALYZE branches; - ANALYZE attributes; - ANALYZE blobs; - ANALYZE attachments; - ANALYZE revisions; - ANALYZE entity_changes; - ANALYZE recent_notes; - ANALYZE notes_fts; - `); - - const endTime = Date.now(); - const duration = endTime - startTime; - - log.info(`Performance index creation completed in ${duration}ms`); - log.info(`Created ${indexesCreated.length} indexes: ${indexesCreated.join(", ")}`); - - } catch (error) { - log.error(`Error creating performance indexes: ${error}`); - throw error; - } - - log.info("FTS5 and performance optimization migration completed successfully"); + log.info("FTS5 migration completed successfully"); } \ No newline at end of file diff --git a/apps/server/src/migrations/0234__add_fts5_search_minimal.ts b/apps/server/src/migrations/0234__add_fts5_search_minimal.ts new file mode 100644 index 0000000000..32cef4c6ed --- /dev/null +++ b/apps/server/src/migrations/0234__add_fts5_search_minimal.ts @@ -0,0 +1,216 @@ +/** + * Minimal FTS5 implementation for Trilium Notes + * + * Design principles: + * - Use only native SQLite FTS5 functionality + * - Single FTS table with porter tokenizer for word search + * - Prefix indexes for substring matching + * - Simple triggers for synchronization + * - No complex memory management or optimization + * - Let SQLite handle the scale + */ + +import sql from "../services/sql.js"; +import log from "../services/log.js"; + +export default function addMinimalFTS5Search() { + log.info("Setting up minimal FTS5 search for large-scale databases..."); + + // Step 1: Clean up any existing FTS tables + log.info("Cleaning up existing FTS tables..."); + sql.executeScript(` + -- Drop all existing FTS-related tables + DROP TABLE IF EXISTS notes_fts; + DROP TABLE IF EXISTS notes_fts_trigram; + DROP TABLE IF EXISTS notes_fts_aux; + DROP TABLE IF EXISTS notes_fts_config; + DROP TABLE IF EXISTS notes_fts_stats; + DROP VIEW IF EXISTS notes_content; + `); + + // Step 2: Create the single FTS5 virtual table + log.info("Creating minimal FTS5 table..."); + sql.executeScript(` + -- Single FTS5 table with porter tokenizer + -- Porter provides stemming for better word matching + -- Prefix indexes enable efficient substring search + CREATE VIRTUAL TABLE notes_fts USING fts5( + noteId UNINDEXED, -- Store noteId but don't index it + title, + content, + tokenize = 'porter unicode61', + prefix = '2 3 4' -- Index prefixes of 2, 3, and 4 chars for substring search + ); + + -- Create an index on notes table for efficient FTS joins + CREATE INDEX IF NOT EXISTS idx_notes_fts_lookup + ON notes(noteId, type, isDeleted, isProtected); + `); + + // Step 3: Set PRAGMA settings for large databases + log.info("Configuring SQLite for large database performance..."); + sql.executeScript(` + -- Increase cache size to 256MB for better performance + PRAGMA cache_size = -256000; + + -- Use memory for temp storage + PRAGMA temp_store = MEMORY; + + -- Increase page size for better I/O with large data + -- Note: This only affects new databases, existing ones keep their page size + PRAGMA page_size = 8192; + + -- Enable query planner optimizations + PRAGMA optimize; + `); + + // Step 4: Initial population of FTS index + log.info("Populating FTS index with existing notes..."); + + try { + // Get total count for progress reporting + const totalNotes = sql.getValue(` + SELECT COUNT(*) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `) || 0; + + log.info(`Found ${totalNotes} notes to index`); + + if (totalNotes > 0) { + // Use a single INSERT...SELECT for maximum efficiency + // SQLite will handle the memory management internally + sql.transactional(() => { + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) + SELECT + n.noteId, + n.title, + -- Limit content to first 500KB to prevent memory issues + -- Most searches don't need the full content + SUBSTR(b.content, 1, 500000) as content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `); + }); + + log.info(`Indexed ${totalNotes} notes`); + + // Run initial optimization + log.info("Running initial FTS optimization..."); + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + } + } catch (error) { + log.error(`Failed to populate FTS index: ${error}`); + throw error; + } + + // Step 5: Create simple triggers for synchronization + log.info("Creating FTS synchronization triggers..."); + + sql.executeScript(` + -- Trigger for INSERT operations + CREATE TRIGGER notes_fts_insert + AFTER INSERT ON notes + FOR EACH ROW + WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND NEW.isDeleted = 0 + AND NEW.isProtected = 0 + BEGIN + INSERT INTO notes_fts (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + SUBSTR(b.content, 1, 500000) + FROM blobs b + WHERE b.blobId = NEW.blobId; + END; + + -- Trigger for UPDATE operations + CREATE TRIGGER notes_fts_update + AFTER UPDATE ON notes + FOR EACH ROW + BEGIN + -- Always delete the old entry + DELETE FROM notes_fts WHERE noteId = OLD.noteId; + + -- Insert new entry if eligible + INSERT INTO notes_fts (noteId, title, content) + SELECT + NEW.noteId, + NEW.title, + SUBSTR(b.content, 1, 500000) + FROM blobs b + WHERE b.blobId = NEW.blobId + AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND NEW.isDeleted = 0 + AND NEW.isProtected = 0; + END; + + -- Trigger for DELETE operations + CREATE TRIGGER notes_fts_delete + AFTER DELETE ON notes + FOR EACH ROW + BEGIN + DELETE FROM notes_fts WHERE noteId = OLD.noteId; + END; + + -- Trigger for blob updates + CREATE TRIGGER blobs_fts_update + AFTER UPDATE ON blobs + FOR EACH ROW + BEGIN + -- Update all notes that reference this blob + DELETE FROM notes_fts + WHERE noteId IN ( + SELECT noteId FROM notes WHERE blobId = NEW.blobId + ); + + INSERT INTO notes_fts (noteId, title, content) + SELECT + n.noteId, + n.title, + SUBSTR(NEW.content, 1, 500000) + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; + END; + + -- Trigger for blob inserts + CREATE TRIGGER blobs_fts_insert + AFTER INSERT ON blobs + FOR EACH ROW + BEGIN + INSERT INTO notes_fts (noteId, title, content) + SELECT + n.noteId, + n.title, + SUBSTR(NEW.content, 1, 500000) + FROM notes n + WHERE n.blobId = NEW.blobId + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0; + END; + `); + + // Step 6: Analyze tables for query optimizer + log.info("Analyzing tables for query optimizer..."); + sql.executeScript(` + ANALYZE notes; + ANALYZE notes_fts; + ANALYZE blobs; + `); + + log.info("Minimal FTS5 setup completed successfully"); +} \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search.test.ts b/apps/server/src/services/search/fts_search.test.ts index 194aabe83e..c88bdd1cd3 100644 --- a/apps/server/src/services/search/fts_search.test.ts +++ b/apps/server/src/services/search/fts_search.test.ts @@ -1,12 +1,12 @@ /** - * Tests for FTS5 search service improvements + * Tests for minimal FTS5 search service * - * This test file validates the fixes implemented for: - * 1. Transaction rollback in migration - * 2. Protected notes handling - * 3. Error recovery and communication - * 4. Input validation for token sanitization - * 5. dbstat fallback for index monitoring + * This test file validates the core FTS5 functionality: + * 1. FTS5 availability checking + * 2. Basic search operations + * 3. Protected notes handling + * 4. Error handling + * 5. Index statistics */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; @@ -17,7 +17,7 @@ vi.mock('../sql.js'); vi.mock('../log.js'); vi.mock('../protected_session.js'); -describe('FTS5 Search Service Improvements', () => { +describe('FTS5 Search Service', () => { let ftsSearchService: any; let mockSql: any; let mockLog: any; @@ -30,9 +30,11 @@ describe('FTS5 Search Service Improvements', () => { // Setup mocks mockSql = { getValue: vi.fn(), + getRow: vi.fn(), getRows: vi.fn(), getColumn: vi.fn(), execute: vi.fn(), + iterateRows: vi.fn(), transactional: vi.fn((fn: Function) => fn()) }; @@ -56,16 +58,169 @@ describe('FTS5 Search Service Improvements', () => { // Import the service after mocking const module = await import('./fts_search.js'); - ftsSearchService = module.ftsSearchService; + ftsSearchService = module.default; }); afterEach(() => { vi.clearAllMocks(); }); + describe('FTS5 Availability', () => { + it('should detect when FTS5 is available', () => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); + + const result = ftsSearchService.checkFTS5Availability(); + + expect(result).toBe(true); + expect(mockSql.getRow).toHaveBeenCalledWith(expect.stringContaining('pragma_compile_options')); + expect(mockSql.getValue).toHaveBeenCalledWith(expect.stringContaining('notes_fts')); + }); + + it('should detect when FTS5 is not available', () => { + mockSql.getRow.mockReturnValue(null); + + const result = ftsSearchService.checkFTS5Availability(); + + expect(result).toBe(false); + }); + + it('should cache FTS5 availability check', () => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); + + // First call + ftsSearchService.checkFTS5Availability(); + // Second call should use cached value + ftsSearchService.checkFTS5Availability(); + + // Should only be called once + expect(mockSql.getRow).toHaveBeenCalledTimes(1); + }); + }); + + describe('Basic Search', () => { + beforeEach(() => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); + }); + + it('should perform basic word search', () => { + const mockResults = [ + { noteId: 'note1', title: 'Test Note', score: 1.0 } + ]; + mockSql.getRows.mockReturnValue(mockResults); + + const results = ftsSearchService.searchSync(['test'], '*=*'); + + expect(results).toEqual(mockResults); + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.stringContaining('MATCH'), + expect.arrayContaining([expect.stringContaining('test')]) + ); + }); + + it('should handle phrase search', () => { + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchSync(['hello', 'world'], '='); + + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.stringContaining('MATCH'), + expect.arrayContaining(['"hello world"']) + ); + }); + + it('should apply limit and offset', () => { + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchSync(['test'], '=', undefined, { + limit: 50, + offset: 10 + }); + + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.stringContaining('LIMIT'), + expect.arrayContaining([expect.any(String), 50, 10]) + ); + }); + + it('should filter by noteIds when provided', () => { + mockSql.getRows.mockReturnValue([]); + const noteIds = new Set(['note1', 'note2']); + + ftsSearchService.searchSync(['test'], '=', noteIds); + + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.stringContaining("IN ('note1','note2')"), + expect.any(Array) + ); + }); + }); + + describe('Protected Notes', () => { + beforeEach(() => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); + }); + + it('should not return protected notes in regular search', () => { + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchSync(['test'], '='); + + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.stringContaining('isProtected = 0'), + expect.any(Array) + ); + }); + + it('should search protected notes separately when session available', () => { + mockProtectedSession.isProtectedSessionAvailable.mockReturnValue(true); + mockProtectedSession.decryptString.mockReturnValue('decrypted content test'); + + const mockIterator = function*() { + yield { + noteId: 'protected1', + title: 'Protected Note', + content: 'encrypted', + type: 'text', + mime: 'text/html' + }; + }; + mockSql.iterateRows.mockReturnValue(mockIterator()); + + const results = ftsSearchService.searchProtectedNotesSync(['test'], '*=*'); + + expect(results).toHaveLength(1); + expect(results[0].noteId).toBe('protected1'); + expect(mockProtectedSession.decryptString).toHaveBeenCalledWith('encrypted'); + }); + + it('should skip protected notes that cannot be decrypted', () => { + mockProtectedSession.isProtectedSessionAvailable.mockReturnValue(true); + mockProtectedSession.decryptString.mockReturnValue(null); + + const mockIterator = function*() { + yield { + noteId: 'protected1', + title: 'Protected Note', + content: 'encrypted', + type: 'text', + mime: 'text/html' + }; + }; + mockSql.iterateRows.mockReturnValue(mockIterator()); + + const results = ftsSearchService.searchProtectedNotesSync(['test'], '*=*'); + + expect(results).toHaveLength(0); + }); + }); + describe('Error Handling', () => { it('should throw FTSNotAvailableError when FTS5 is not available', () => { - mockSql.getValue.mockReturnValue(0); + mockSql.getRow.mockReturnValue(null); expect(() => { ftsSearchService.searchSync(['test'], '='); @@ -73,197 +228,106 @@ describe('FTS5 Search Service Improvements', () => { }); it('should throw FTSQueryError for invalid queries', () => { - mockSql.getValue.mockReturnValue(1); // FTS5 available + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); mockSql.getRows.mockImplementation(() => { throw new Error('syntax error in FTS5 query'); }); expect(() => { ftsSearchService.searchSync(['test'], '='); - }).toThrow(/FTS5 search failed.*Falling back to standard search/); + }).toThrow('Invalid FTS5 query'); + }); + }); + + describe('Index Management', () => { + beforeEach(() => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); }); - it('should provide structured error information', () => { - mockSql.getValue.mockReturnValue(1); - mockSql.getRows.mockImplementation(() => { - throw new Error('malformed MATCH expression'); + it('should sync missing notes to index', () => { + const missingNotes = [ + { noteId: 'note1', title: 'Note 1', content: 'Content 1' }, + { noteId: 'note2', title: 'Note 2', content: 'Content 2' } + ]; + mockSql.getRows.mockReturnValue(missingNotes); + + const count = ftsSearchService.syncMissingNotes(); + + expect(count).toBe(2); + expect(mockSql.execute).toHaveBeenCalledTimes(2); + }); + + it('should optimize index', () => { + ftsSearchService.optimizeIndex(); + + expect(mockSql.execute).toHaveBeenCalledWith( + expect.stringContaining('optimize') + ); + }); + + it('should get index statistics', () => { + mockSql.getValue + .mockReturnValueOnce(1) // FTS5 availability check + .mockReturnValueOnce(100) // document count + .mockReturnValueOnce(5000); // index size + + const stats = ftsSearchService.getStatistics(); + + expect(stats.documentCount).toBe(100); + expect(stats.indexSize).toBe(5000); + }); + + it('should handle errors in statistics gracefully', () => { + mockSql.getValue.mockImplementation(() => { + throw new Error('Database error'); }); - try { - ftsSearchService.searchSync(['test'], '='); - } catch (error: any) { - expect(error.name).toBe('FTSQueryError'); - expect(error.code).toBe('FTS_QUERY_ERROR'); - expect(error.recoverable).toBe(true); + const stats = ftsSearchService.getStatistics(); + + expect(stats.documentCount).toBe(0); + expect(stats.indexSize).toBe(0); + }); + }); + + describe('Query Building', () => { + beforeEach(() => { + mockSql.getRow.mockReturnValue({ 1: 1 }); + mockSql.getValue.mockReturnValue(1); + mockSql.getRows.mockReturnValue([]); + }); + + it('should build correct FTS5 query for different operators', () => { + const testCases = [ + { tokens: ['test'], operator: '=', expected: '"test"' }, + { tokens: ['hello', 'world'], operator: '=', expected: '"hello world"' }, + { tokens: ['test'], operator: '*=*', expected: '"test"' }, + { tokens: ['test', 'word'], operator: '*=*', expected: '"test" AND "word"' }, + { tokens: ['test'], operator: '!=', expected: 'NOT "test"' }, + { tokens: ['test'], operator: '*=', expected: '*test' }, + { tokens: ['test'], operator: '=*', expected: 'test*' }, + { tokens: ['test', 'word'], operator: '~=', expected: '"test" OR "word"' }, + ]; + + for (const { tokens, operator, expected } of testCases) { + mockSql.getRows.mockClear(); + ftsSearchService.searchSync(tokens, operator); + + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.any(String), + expect.arrayContaining([expected, expect.any(Number), expect.any(Number)]) + ); } }); - }); - describe('Protected Notes Handling', () => { - it('should not search protected notes in FTS index', () => { - mockSql.getValue.mockReturnValue(1); // FTS5 available - mockProtectedSession.isProtectedSessionAvailable.mockReturnValue(true); + it('should escape special characters in tokens', () => { + ftsSearchService.searchSync(['test"quote'], '='); - // Should return empty results when searching protected notes - const results = ftsSearchService.searchSync(['test'], '=', undefined, { - searchProtected: true - }); - - expect(results).toEqual([]); - expect(mockLog.info).toHaveBeenCalledWith( - 'Protected session available - will search protected notes separately' + expect(mockSql.getRows).toHaveBeenCalledWith( + expect.any(String), + expect.arrayContaining(['"test""quote"', expect.any(Number), expect.any(Number)]) ); }); - - it('should filter out protected notes from noteIds', () => { - mockSql.getValue.mockReturnValue(1); - mockSql.getColumn.mockReturnValue(['note1', 'note2']); // Non-protected notes - mockSql.getRows.mockReturnValue([]); - - const noteIds = new Set(['note1', 'note2', 'note3']); - ftsSearchService.searchSync(['test'], '=', noteIds); - - expect(mockSql.getColumn).toHaveBeenCalled(); - }); - - it('should search protected notes separately with decryption', () => { - mockProtectedSession.isProtectedSessionAvailable.mockReturnValue(true); - mockProtectedSession.decryptString.mockReturnValue('decrypted content with test'); - - mockSql.getRows.mockReturnValue([ - { noteId: 'protected1', title: 'Protected Note', content: 'encrypted_content' } - ]); - - const results = ftsSearchService.searchProtectedNotesSync(['test'], '*=*'); - - expect(mockProtectedSession.decryptString).toHaveBeenCalledWith('encrypted_content'); - expect(results).toHaveLength(1); - expect(results[0].noteId).toBe('protected1'); - }); - }); - - describe('Token Sanitization', () => { - it('should handle empty tokens after sanitization', () => { - mockSql.getValue.mockReturnValue(1); - mockSql.getRows.mockReturnValue([]); - - // Token with only special characters that get removed - const query = ftsSearchService.convertToFTS5Query(['()""'], '='); - - expect(query).toContain('__empty_token__'); - expect(mockLog.info).toHaveBeenCalledWith( - expect.stringContaining('Token became empty after sanitization') - ); - }); - - it('should detect potential SQL injection attempts', () => { - mockSql.getValue.mockReturnValue(1); - - const query = ftsSearchService.convertToFTS5Query(['test; DROP TABLE'], '='); - - expect(query).toContain('__invalid_token__'); - expect(mockLog.error).toHaveBeenCalledWith( - expect.stringContaining('Potential SQL injection attempt detected') - ); - }); - - it('should properly sanitize valid tokens', () => { - mockSql.getValue.mockReturnValue(1); - - const query = ftsSearchService.convertToFTS5Query(['hello (world)'], '='); - - expect(query).toBe('"hello world"'); - expect(query).not.toContain('('); - expect(query).not.toContain(')'); - }); - }); - - describe('Index Statistics with dbstat Fallback', () => { - it('should use dbstat when available', () => { - mockSql.getValue - .mockReturnValueOnce(1) // FTS5 available - .mockReturnValueOnce(100) // document count - .mockReturnValueOnce(50000); // index size from dbstat - - const stats = ftsSearchService.getIndexStats(); - - expect(stats).toEqual({ - totalDocuments: 100, - indexSize: 50000, - isOptimized: true, - dbstatAvailable: true - }); - }); - - it('should fallback when dbstat is not available', () => { - mockSql.getValue - .mockReturnValueOnce(1) // FTS5 available - .mockReturnValueOnce(100) // document count - .mockImplementationOnce(() => { - throw new Error('no such table: dbstat'); - }) - .mockReturnValueOnce(500); // average content size - - const stats = ftsSearchService.getIndexStats(); - - expect(stats.dbstatAvailable).toBe(false); - expect(stats.indexSize).toBe(75000); // 500 * 100 * 1.5 - expect(mockLog.info).toHaveBeenCalledWith( - 'dbstat virtual table not available, using fallback for index size estimation' - ); - }); - - it('should handle fallback errors gracefully', () => { - mockSql.getValue - .mockReturnValueOnce(1) // FTS5 available - .mockReturnValueOnce(100) // document count - .mockImplementationOnce(() => { - throw new Error('no such table: dbstat'); - }) - .mockImplementationOnce(() => { - throw new Error('Cannot estimate size'); - }); - - const stats = ftsSearchService.getIndexStats(); - - expect(stats.indexSize).toBe(0); - expect(stats.dbstatAvailable).toBe(false); - }); - }); - - describe('Migration Transaction Handling', () => { - // Note: This would be tested in the migration test file - // Including a placeholder test here for documentation - it('migration should rollback on failure (tested in migration tests)', () => { - // The migration file now wraps the entire population in a transaction - // If any error occurs, all changes are rolled back - // This prevents partial indexing - expect(true).toBe(true); - }); - }); - - describe('Blob Update Trigger Optimization', () => { - // Note: This is tested via SQL trigger behavior - it('trigger should limit batch size (tested via SQL)', () => { - // The trigger now processes maximum 50 notes at a time - // This prevents performance issues with widely-shared blobs - expect(true).toBe(true); - }); - }); -}); - -describe('Integration with NoteContentFulltextExp', () => { - it('should handle FTS errors with proper fallback', () => { - // This tests the integration between FTS service and the expression handler - // The expression handler now properly catches FTSError types - // and provides appropriate user feedback - expect(true).toBe(true); - }); - - it('should search protected and non-protected notes separately', () => { - // The expression handler now calls both searchSync (for non-protected) - // and searchProtectedNotesSync (for protected notes) - // Results are combined for the user - expect(true).toBe(true); }); }); \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index 96474a93d1..d5b1558049 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -1,12 +1,11 @@ /** - * FTS5 Search Service + * Minimal FTS5 Search Service * - * Encapsulates all FTS5-specific operations for full-text searching. - * Provides efficient text search using SQLite's FTS5 extension with: - * - Porter stemming for better matching - * - Snippet extraction for context - * - Highlighting of matched terms - * - Query syntax conversion from Trilium to FTS5 + * Provides basic full-text search using SQLite's FTS5 extension with: + * - Single FTS table with porter tokenizer + * - Basic word and substring search + * - Protected notes handled separately + * - Simple error handling */ import sql from "../sql.js"; @@ -15,6 +14,24 @@ import protectedSessionService from "../protected_session.js"; import striptags from "striptags"; import { normalize } from "../utils.js"; +/** + * Search result interface + */ +export interface FTSSearchResult { + noteId: string; + title: string; + score: number; +} + +/** + * Search options interface + */ +export interface FTSSearchOptions { + limit?: number; + offset?: number; + searchProtected?: boolean; +} + /** * Custom error classes for FTS operations */ @@ -39,52 +56,23 @@ export class FTSQueryError extends FTSError { } } -export interface FTSSearchResult { - noteId: string; - title: string; - score: number; - snippet?: string; - highlights?: string[]; -} - -export interface FTSSearchOptions { - limit?: number; - offset?: number; - includeSnippets?: boolean; - snippetLength?: number; - highlightTag?: string; - searchProtected?: boolean; -} - -export interface FTSErrorInfo { - error: FTSError; - fallbackUsed: boolean; - message: string; -} - /** - * Configuration for FTS5 search operations + * Configuration for FTS5 search */ const FTS_CONFIG = { - /** Maximum number of results to return by default */ DEFAULT_LIMIT: 100, - /** Default snippet length in tokens */ - DEFAULT_SNIPPET_LENGTH: 30, - /** Default highlight tags */ - DEFAULT_HIGHLIGHT_START: '', - DEFAULT_HIGHLIGHT_END: '', - /** Maximum query length to prevent DoS */ - MAX_QUERY_LENGTH: 1000, - /** Snippet column indices */ - SNIPPET_COLUMN_TITLE: 1, - SNIPPET_COLUMN_CONTENT: 2, + MAX_RESULTS: 10000, + BATCH_SIZE: 1000 }; +/** + * FTS5 Search Service + */ class FTSSearchService { private isFTS5Available: boolean | null = null; /** - * Checks if FTS5 is available in the current SQLite instance + * Check if FTS5 is available and properly configured */ checkFTS5Availability(): boolean { if (this.isFTS5Available !== null) { @@ -92,122 +80,42 @@ class FTSSearchService { } try { - // Check if both FTS5 tables are available - const porterTableExists = sql.getValue(` - SELECT COUNT(*) - FROM sqlite_master - WHERE type = 'table' - AND name = 'notes_fts' + // Check if FTS5 extension is available + const result = sql.getRow(` + SELECT 1 FROM pragma_compile_options + WHERE compile_options LIKE '%ENABLE_FTS5%' `); - const trigramTableExists = sql.getValue(` - SELECT COUNT(*) - FROM sqlite_master - WHERE type = 'table' - AND name = 'notes_fts_trigram' + if (!result) { + this.isFTS5Available = false; + return false; + } + + // Check if notes_fts table exists + const tableExists = sql.getValue(` + SELECT COUNT(*) FROM sqlite_master + WHERE type = 'table' AND name = 'notes_fts' `); - - this.isFTS5Available = porterTableExists > 0 && trigramTableExists > 0; + + this.isFTS5Available = tableExists > 0; if (!this.isFTS5Available) { - log.info("FTS5 tables not found. Full-text search will use fallback implementation."); + log.info("FTS5 table not found, full-text search not available"); } + + return this.isFTS5Available; } catch (error) { log.error(`Error checking FTS5 availability: ${error}`); this.isFTS5Available = false; - } - - return this.isFTS5Available; - } - - /** - * Converts Trilium search syntax to FTS5 MATCH syntax - * - * @param tokens - Array of search tokens - * @param operator - Trilium search operator - * @returns FTS5 MATCH query string - */ - convertToFTS5Query(tokens: string[], operator: string): string { - if (!tokens || tokens.length === 0) { - throw new Error("No search tokens provided"); - } - - // Sanitize tokens to prevent FTS5 syntax injection - const sanitizedTokens = tokens.map(token => - this.sanitizeFTS5Token(token) - ); - - switch (operator) { - case "=": // Exact match (phrase search) - return `"${sanitizedTokens.join(" ")}"`; - - case "*=*": // Contains all tokens (AND) - // For substring matching, we'll use the trigram table - // which is designed for substring searches - // The trigram tokenizer will handle the substring matching - return sanitizedTokens.join(" AND "); - - case "*=": // Ends with - return sanitizedTokens.map(t => `*${t}`).join(" AND "); - - case "=*": // Starts with - return sanitizedTokens.map(t => `${t}*`).join(" AND "); - - case "!=": // Does not contain (NOT) - return `NOT (${sanitizedTokens.join(" OR ")})`; - - case "~=": // Fuzzy match (use OR for more flexible matching) - case "~*": // Fuzzy contains - return sanitizedTokens.join(" OR "); - - case "%=": // Regex match - fallback to OR search - log.error(`Regex search operator ${operator} not fully supported in FTS5, using OR search`); - return sanitizedTokens.join(" OR "); - - default: - // Default to AND search - return sanitizedTokens.join(" AND "); + return false; } } /** - * Sanitizes a token for safe use in FTS5 queries - * Validates that the token is not empty after sanitization - */ - private sanitizeFTS5Token(token: string): string { - // Remove special FTS5 characters that could break syntax - const sanitized = token - .replace(/["\(\)\*]/g, '') // Remove quotes, parens, wildcards - .replace(/\s+/g, ' ') // Normalize whitespace - .trim(); - - // Validate that token is not empty after sanitization - if (!sanitized || sanitized.length === 0) { - log.info(`Token became empty after sanitization: "${token}"`); - // Return a safe placeholder that won't match anything - return "__empty_token__"; - } - - // Additional validation: ensure token doesn't contain SQL injection attempts - if (sanitized.includes(';') || sanitized.includes('--')) { - log.error(`Potential SQL injection attempt detected in token: "${token}"`); - return "__invalid_token__"; - } - - return sanitized; - } - - /** - * Performs a synchronous full-text search using FTS5 - * - * @param tokens - Search tokens - * @param operator - Search operator - * @param noteIds - Optional set of note IDs to search within - * @param options - Search options - * @returns Array of search results + * Perform synchronous FTS5 search */ searchSync( - tokens: string[], + tokens: string[], operator: string, noteIds?: Set, options: FTSSearchOptions = {} @@ -216,190 +124,66 @@ class FTSSearchService { throw new FTSNotAvailableError(); } - let { - limit = FTS_CONFIG.DEFAULT_LIMIT, - offset = 0, - includeSnippets = true, - snippetLength = FTS_CONFIG.DEFAULT_SNIPPET_LENGTH, - highlightTag = FTS_CONFIG.DEFAULT_HIGHLIGHT_START, - searchProtected = false - } = options; - - // Track if we need post-filtering - let needsPostFiltering = false; + const limit = Math.min(options.limit || FTS_CONFIG.DEFAULT_LIMIT, FTS_CONFIG.MAX_RESULTS); + const offset = options.offset || 0; try { - const ftsQuery = this.convertToFTS5Query(tokens, operator); + // Build FTS5 query based on operator + let ftsQuery = this.buildFTSQuery(tokens, operator); - // Validate query length - if (ftsQuery.length > FTS_CONFIG.MAX_QUERY_LENGTH) { - throw new FTSQueryError( - `Query too long: ${ftsQuery.length} characters (max: ${FTS_CONFIG.MAX_QUERY_LENGTH})`, - ftsQuery - ); - } - - // Check if we're searching for protected notes - // Protected notes are NOT in the FTS index, so we need to handle them separately - if (searchProtected && protectedSessionService.isProtectedSessionAvailable()) { - log.info("Protected session available - will search protected notes separately"); - // Return empty results from FTS and let the caller handle protected notes - // The caller should use a fallback search method for protected notes - return []; - } - - // Determine which FTS table to use based on operator - // Use trigram table for substring searches (*=* operator) - const ftsTable = operator === '*=*' ? 'notes_fts_trigram' : 'notes_fts'; - - // Build the SQL query - let whereConditions = [`${ftsTable} MATCH ?`]; - const params: any[] = [ftsQuery]; - - // Filter by noteIds if provided - if (noteIds && noteIds.size > 0) { - // First filter out any protected notes from the noteIds - const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds); - if (nonProtectedNoteIds.length === 0) { - // All provided notes are protected, return empty results - return []; - } - - // SQLite has a limit on the number of parameters (usually 999 or 32766) - // If we have too many noteIds, we need to handle this differently - const SQLITE_MAX_PARAMS = 900; // Conservative limit to be safe - - if (nonProtectedNoteIds.length > SQLITE_MAX_PARAMS) { - // Too many noteIds to filter in SQL - we'll filter in post-processing - // This is less efficient but avoids the SQL variable limit - log.info(`Too many noteIds for SQL filter (${nonProtectedNoteIds.length}), will filter in post-processing`); - // Don't add the noteId filter to the query - // But we need to get ALL results since we'll filter them - needsPostFiltering = true; - // Set limit to -1 to remove limit entirely - limit = -1; // No limit - } else { - whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); - params.push(...nonProtectedNoteIds); - } - } - - // Build snippet extraction if requested - // Note: snippet function uses the table name from the query - const snippetSelect = includeSnippets - ? `, snippet(${ftsTable}, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', ' 0) { + // Filter by specific noteIds + const noteIdList = Array.from(noteIds).join("','"); query = ` SELECT - noteId, - title, - rank as score - ${snippetSelect} - FROM ${ftsTable} - WHERE ${whereConditions.join(' AND ')} - ORDER BY rank - `; - } else { - query = ` - SELECT - noteId, - title, - rank as score - ${snippetSelect} - FROM ${ftsTable} - WHERE ${whereConditions.join(' AND ')} + f.noteId, + n.title, + -rank as score + FROM notes_fts f + JOIN notes n ON n.noteId = f.noteId + WHERE notes_fts MATCH ? + AND f.noteId IN ('${noteIdList}') + AND n.isDeleted = 0 + AND n.isProtected = 0 ORDER BY rank LIMIT ? OFFSET ? `; - params.push(limit, offset); + params = [ftsQuery, limit, offset]; + } else { + // Search all eligible notes + query = ` + SELECT + f.noteId, + n.title, + -rank as score + FROM notes_fts f + JOIN notes n ON n.noteId = f.noteId + WHERE notes_fts MATCH ? + AND n.isDeleted = 0 + AND n.isProtected = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + params = [ftsQuery, limit, offset]; } - let results = sql.getRows<{ - noteId: string; - title: string; - score: number; - snippet?: string; - }>(query, params); - - // Post-process filtering if we had too many noteIds for SQL - if (needsPostFiltering && noteIds && noteIds.size > 0) { - const noteIdSet = new Set(this.filterNonProtectedNoteIds(noteIds)); - results = results.filter(result => noteIdSet.has(result.noteId)); - log.info(`Post-filtered FTS results: ${results.length} results after filtering from ${noteIdSet.size} allowed noteIds`); - } - - return results; - + const results = sql.getRows(query, params); + return results || []; } catch (error: any) { - // Provide structured error information - if (error instanceof FTSError) { - throw error; + // Handle FTS5 query syntax errors + if (error.message?.includes('syntax error') || error.message?.includes('fts5')) { + throw new FTSQueryError(`Invalid FTS5 query: ${error.message}`, tokens.join(' ')); } - - log.error(`FTS5 search error: ${error}`); - - // Determine if this is a recoverable error - const isRecoverable = - error.message?.includes('syntax error') || - error.message?.includes('malformed MATCH') || - error.message?.includes('no such table'); - - throw new FTSQueryError( - `FTS5 search failed: ${error.message}. ${isRecoverable ? 'Falling back to standard search.' : ''}`, - undefined - ); + throw new FTSError(`FTS5 search failed: ${error.message}`, 'FTS_SEARCH_ERROR'); } } /** - * Filters out protected note IDs from the given set - */ - private filterNonProtectedNoteIds(noteIds: Set): string[] { - const noteIdList = Array.from(noteIds); - const BATCH_SIZE = 900; // Conservative limit for SQL parameters - - if (noteIdList.length <= BATCH_SIZE) { - // Small enough to do in one query - const placeholders = noteIdList.map(() => '?').join(','); - - const nonProtectedNotes = sql.getColumn(` - SELECT noteId - FROM notes - WHERE noteId IN (${placeholders}) - AND isProtected = 0 - `, noteIdList); - - return nonProtectedNotes; - } else { - // Process in batches to avoid SQL parameter limit - const nonProtectedNotes: string[] = []; - - for (let i = 0; i < noteIdList.length; i += BATCH_SIZE) { - const batch = noteIdList.slice(i, i + BATCH_SIZE); - const placeholders = batch.map(() => '?').join(','); - - const batchResults = sql.getColumn(` - SELECT noteId - FROM notes - WHERE noteId IN (${placeholders}) - AND isProtected = 0 - `, batch); - - nonProtectedNotes.push(...batchResults); - } - - return nonProtectedNotes; - } - } - - /** - * Searches protected notes separately (not in FTS index) - * This is a fallback method for protected notes + * Search protected notes separately (not indexed in FTS) */ searchProtectedNotesSync( tokens: string[], @@ -411,445 +195,274 @@ class FTSSearchService { return []; } - const { - limit = FTS_CONFIG.DEFAULT_LIMIT, - offset = 0 - } = options; + const results: FTSSearchResult[] = []; + const searchTerms = tokens.map(t => normalize(t.toLowerCase())); + + // Query protected notes directly + let query = ` + SELECT n.noteId, n.title, b.content, n.type, n.mime + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.isProtected = 1 + AND n.isDeleted = 0 + AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + `; + + if (noteIds && noteIds.size > 0) { + const noteIdList = Array.from(noteIds).join("','"); + query += ` AND n.noteId IN ('${noteIdList}')`; + } + + for (const row of sql.iterateRows(query)) { + try { + // Decrypt content + let content = row.content; + if (content) { + content = protectedSessionService.decryptString(content); + if (!content) continue; + + // Process content based on type + content = this.preprocessContent(content, row.type, row.mime); + + // Check if content matches search terms + if (this.matchesSearch(content, row.title, searchTerms, operator)) { + results.push({ + noteId: row.noteId, + title: row.title, + score: 1.0 // Basic scoring for protected notes + }); + } + } + } catch (e) { + log.debug(`Cannot decrypt protected note ${row.noteId}`); + } + } + + return results; + } + + /** + * Sync missing notes to FTS index + */ + syncMissingNotes(): number { + if (!this.checkFTS5Availability()) { + return 0; + } try { - // Build query for protected notes only - let whereConditions = [`n.isProtected = 1`, `n.isDeleted = 0`]; - const params: any[] = []; - let needPostFilter = false; - let postFilterNoteIds: Set | null = null; - - if (noteIds && noteIds.size > 0) { - const noteIdList = Array.from(noteIds); - const BATCH_SIZE = 900; // Conservative SQL parameter limit - - if (noteIdList.length > BATCH_SIZE) { - // Too many noteIds, we'll filter in post-processing - needPostFilter = true; - postFilterNoteIds = noteIds; - log.info(`Too many noteIds for protected notes SQL filter (${noteIdList.length}), will filter in post-processing`); - } else { - whereConditions.push(`n.noteId IN (${noteIdList.map(() => '?').join(',')})`); - params.push(...noteIdList); - } - } - - // Get protected notes - let protectedNotes = sql.getRows<{ - noteId: string; - title: string; - content: string | null; - }>(` + // Find notes that should be indexed but aren't + const missingNotes = sql.getRows<{noteId: string, title: string, content: string}>(` SELECT n.noteId, n.title, b.content FROM notes n LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE ${whereConditions.join(' AND ')} - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - LIMIT ? OFFSET ? - `, [...params, limit, offset]); - - // Post-filter if needed - if (needPostFilter && postFilterNoteIds) { - protectedNotes = protectedNotes.filter(note => postFilterNoteIds!.has(note.noteId)); + LEFT JOIN notes_fts f ON f.noteId = n.noteId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND f.noteId IS NULL + LIMIT 1000 + `); + + if (!missingNotes || missingNotes.length === 0) { + return 0; } - const results: FTSSearchResult[] = []; - - for (const note of protectedNotes) { - if (!note.content) continue; - - try { - // Decrypt content - const decryptedContent = protectedSessionService.decryptString(note.content); - if (!decryptedContent) continue; - - // Simple token matching for protected notes - const contentLower = decryptedContent.toLowerCase(); - const titleLower = note.title.toLowerCase(); - let matches = false; - - switch (operator) { - case "=": // Exact match - const phrase = tokens.join(' ').toLowerCase(); - matches = contentLower.includes(phrase) || titleLower.includes(phrase); - break; - case "*=*": // Contains all tokens - matches = tokens.every(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - break; - case "~=": // Contains any token - case "~*": - matches = tokens.some(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - break; - default: - matches = tokens.every(token => - contentLower.includes(token.toLowerCase()) || - titleLower.includes(token.toLowerCase()) - ); - } - - if (matches) { - results.push({ - noteId: note.noteId, - title: note.title, - score: 1.0, // Simple scoring for protected notes - snippet: this.generateSnippet(decryptedContent) - }); - } - } catch (error) { - log.info(`Could not decrypt protected note ${note.noteId}`); - } - } - - return results; - } catch (error: any) { - log.error(`Protected notes search error: ${error}`); - return []; - } - } - - /** - * Generates a snippet from content - */ - private generateSnippet(content: string, maxLength: number = 30): string { - // Strip HTML tags for snippet - const plainText = striptags(content); - const normalized = normalize(plainText); - - if (normalized.length <= maxLength * 10) { - return normalized; - } - - // Extract snippet around first occurrence - return normalized.substring(0, maxLength * 10) + '...'; - } - - /** - * Updates the FTS index for a specific note (synchronous) - * - * @param noteId - The note ID to update - * @param title - The note title - * @param content - The note content - */ - updateNoteIndex(noteId: string, title: string, content: string): void { - if (!this.checkFTS5Availability()) { - return; - } - - try { + // Insert missing notes in batches sql.transactional(() => { - // Delete existing entries from both FTS tables - sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); - sql.execute(`DELETE FROM notes_fts_trigram WHERE noteId = ?`, [noteId]); - - // Insert new entries into both FTS tables - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - VALUES (?, ?, ?) - `, [noteId, title, content]); - - sql.execute(` - INSERT INTO notes_fts_trigram (noteId, title, content) - VALUES (?, ?, ?) - `, [noteId, title, content]); - }); - } catch (error) { - log.error(`Failed to update FTS index for note ${noteId}: ${error}`); - } - } - - /** - * Removes a note from the FTS index (synchronous) - * - * @param noteId - The note ID to remove - */ - removeNoteFromIndex(noteId: string): void { - if (!this.checkFTS5Availability()) { - return; - } - - try { - sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); - sql.execute(`DELETE FROM notes_fts_trigram WHERE noteId = ?`, [noteId]); - } catch (error) { - log.error(`Failed to remove note ${noteId} from FTS index: ${error}`); - } - } - - /** - * Syncs missing notes to the FTS index (synchronous) - * This is useful after bulk operations like imports where triggers might not fire - * - * @param noteIds - Optional array of specific note IDs to sync. If not provided, syncs all missing notes. - * @returns The number of notes that were synced - */ - syncMissingNotes(noteIds?: string[]): number { - if (!this.checkFTS5Availability()) { - log.error("Cannot sync FTS index - FTS5 not available"); - return 0; - } - - try { - let syncedCount = 0; - - sql.transactional(() => { - const BATCH_SIZE = 900; // Conservative SQL parameter limit - - if (noteIds && noteIds.length > 0) { - // Process in batches if too many noteIds - for (let i = 0; i < noteIds.length; i += BATCH_SIZE) { - const batch = noteIds.slice(i, i + BATCH_SIZE); - const placeholders = batch.map(() => '?').join(','); - - // Sync to porter FTS table - const queryPorter = ` - WITH missing_notes AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId IN (${placeholders}) - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `; - - const resultPorter = sql.execute(queryPorter, batch); - - // Sync to trigram FTS table - const queryTrigram = ` - WITH missing_notes_trigram AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.noteId IN (${placeholders}) - AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT noteId, title, content FROM missing_notes_trigram - `; - - const resultTrigram = sql.execute(queryTrigram, batch); - syncedCount += Math.max(resultPorter.changes, resultTrigram.changes); - } - } else { - // Sync all missing notes to porter FTS table - const queryPorter = ` - WITH missing_notes AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) + for (const note of missingNotes) { + sql.execute(` INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `; - - const resultPorter = sql.execute(queryPorter, []); - - // Sync all missing notes to trigram FTS table - const queryTrigram = ` - WITH missing_notes_trigram AS ( - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts_trigram WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT noteId, title, content FROM missing_notes_trigram - `; - - const resultTrigram = sql.execute(queryTrigram, []); - syncedCount = Math.max(resultPorter.changes, resultTrigram.changes); - } - - if (syncedCount > 0) { - log.info(`Synced ${syncedCount} missing notes to FTS index`); - // Optimize both FTS tables if we synced a significant number of notes - if (syncedCount > 100) { - sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - sql.execute(`INSERT INTO notes_fts_trigram(notes_fts_trigram) VALUES('optimize')`); - } + VALUES (?, ?, ?) + `, [note.noteId, note.title, note.content]); } }); - - return syncedCount; + + log.info(`Synced ${missingNotes.length} missing notes to FTS index`); + return missingNotes.length; } catch (error) { - log.error(`Failed to sync missing notes to FTS index: ${error}`); + log.error(`Error syncing missing notes: ${error}`); return 0; } } /** - * Rebuilds the entire FTS index (synchronous) - * This is useful for maintenance or after bulk operations + * Build FTS5 query string from tokens and operator */ - rebuildIndex(): void { - if (!this.checkFTS5Availability()) { - log.error("Cannot rebuild FTS index - FTS5 not available"); - return; - } + private buildFTSQuery(tokens: string[], operator: string): string { + // Escape special characters in tokens + const escapedTokens = tokens.map(token => { + // Escape double quotes in the token + return token.replace(/"/g, '""'); + }); - log.info("Rebuilding FTS5 index..."); - - try { - sql.transactional(() => { - // Clear existing indexes - sql.execute(`DELETE FROM notes_fts`); - sql.execute(`DELETE FROM notes_fts_trigram`); - - // Rebuild both FTS tables from notes - sql.execute(` - INSERT INTO notes_fts (noteId, title, content) - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - `); + switch (operator) { + case '=': // Exact match (phrase search) + return `"${escapedTokens.join(' ')}"`; - sql.execute(` - INSERT INTO notes_fts_trigram (noteId, title, content) - SELECT - n.noteId, - n.title, - b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - `); - - // Optimize both FTS tables - sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); - sql.execute(`INSERT INTO notes_fts_trigram(notes_fts_trigram) VALUES('optimize')`); - }); - - log.info("FTS5 index rebuild completed"); - } catch (error) { - log.error(`Failed to rebuild FTS index: ${error}`); - throw error; + case '*=*': // Contains all tokens (AND) + return escapedTokens.map(t => `"${t}"`).join(' AND '); + + case '!=': // Does not contain (use NOT) + return escapedTokens.map(t => `NOT "${t}"`).join(' AND '); + + case '*=': // Ends with (use wildcard prefix) + return escapedTokens.map(t => `*${t}`).join(' AND '); + + case '=*': // Starts with (use wildcard suffix) + return escapedTokens.map(t => `${t}*`).join(' AND '); + + case '~=': // Fuzzy match (use OR for flexibility) + case '~*': + return escapedTokens.map(t => `"${t}"`).join(' OR '); + + default: // Default to AND search + return escapedTokens.map(t => `"${t}"`).join(' AND '); } } /** - * Gets statistics about the FTS index (synchronous) - * Includes fallback when dbstat is not available + * Preprocess content based on note type */ - getIndexStats(): { - totalDocuments: number; - indexSize: number; - isOptimized: boolean; - dbstatAvailable: boolean; - } { - if (!this.checkFTS5Availability()) { - return { - totalDocuments: 0, - indexSize: 0, - isOptimized: false, - dbstatAvailable: false - }; + private preprocessContent(content: string, type: string, mime: string): string { + content = normalize(content.toString()); + + if (type === "text" && mime === "text/html") { + // Strip HTML tags but preserve link URLs + content = striptags(content, ['a'], ' '); + content = content.replace(/<\/a>/gi, ''); + content = content.replace(/ /g, ' '); + } else if (type === "mindMap" && mime === "application/json") { + try { + const mindMapData = JSON.parse(content); + const topics = this.extractMindMapTopics(mindMapData); + content = topics.join(' '); + } catch (e) { + // Invalid JSON, use original content + } + } else if (type === "canvas" && mime === "application/json") { + try { + const canvasData = JSON.parse(content); + if (canvasData.elements) { + const texts = canvasData.elements + .filter((el: any) => el.type === 'text' && el.text) + .map((el: any) => el.text); + content = texts.join(' '); + } + } catch (e) { + // Invalid JSON, use original content + } } - const totalDocuments = sql.getValue(` - SELECT COUNT(DISTINCT noteId) - FROM ( - SELECT noteId FROM notes_fts - UNION - SELECT noteId FROM notes_fts_trigram - ) - `) || 0; + return content.trim(); + } - let indexSize = 0; - let dbstatAvailable = false; + /** + * Extract topics from mind map data + */ + private extractMindMapTopics(data: any): string[] { + const topics: string[] = []; + + function collectTopics(node: any) { + if (node?.topic) { + topics.push(node.topic); + } + if (node?.children && Array.isArray(node.children)) { + for (const child of node.children) { + collectTopics(child); + } + } + } + + if (data?.nodedata) { + collectTopics(data.nodedata); + } + + return topics; + } + + /** + * Check if content matches search terms + */ + private matchesSearch(content: string, title: string, searchTerms: string[], operator: string): boolean { + const fullText = normalize(`${title} ${content}`).toLowerCase(); + + switch (operator) { + case '=': // Exact match + const phrase = searchTerms.join(' '); + return fullText.includes(phrase); + + case '*=*': // Contains all + return searchTerms.every(term => fullText.includes(term)); + + case '!=': // Does not contain + return !searchTerms.some(term => fullText.includes(term)); + + case '*=': // Ends with + return searchTerms.every(term => { + const words = fullText.split(/\s+/); + return words.some(word => word.endsWith(term)); + }); + + case '=*': // Starts with + return searchTerms.every(term => { + const words = fullText.split(/\s+/); + return words.some(word => word.startsWith(term)); + }); + + case '~=': // Fuzzy match (at least one term) + case '~*': + return searchTerms.some(term => fullText.includes(term)); + + default: + return searchTerms.every(term => fullText.includes(term)); + } + } + + /** + * Optimize FTS index (run during maintenance) + */ + optimizeIndex(): void { + if (!this.checkFTS5Availability()) { + return; + } try { - // Try to get index size from dbstat - // dbstat is a virtual table that may not be available in all SQLite builds - // Get size for both FTS tables - indexSize = sql.getValue(` + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + log.info("FTS5 index optimized"); + } catch (error) { + log.error(`Error optimizing FTS5 index: ${error}`); + } + } + + /** + * Get FTS index statistics + */ + getStatistics(): { documentCount: number; indexSize: number } { + if (!this.checkFTS5Availability()) { + return { documentCount: 0, indexSize: 0 }; + } + + try { + const documentCount = sql.getValue(` + SELECT COUNT(*) FROM notes_fts + `) || 0; + + // Estimate index size from SQLite internal tables + const indexSize = sql.getValue(` SELECT SUM(pgsize) FROM dbstat - WHERE name LIKE 'notes_fts%' - OR name LIKE 'notes_fts_trigram%' + WHERE name LIKE 'notes_fts%' `) || 0; - dbstatAvailable = true; - } catch (error: any) { - // dbstat not available, use fallback - if (error.message?.includes('no such table: dbstat')) { - log.info("dbstat virtual table not available, using fallback for index size estimation"); - - // Fallback: Estimate based on number of documents and average content size - try { - const avgContentSize = sql.getValue(` - SELECT AVG(LENGTH(content) + LENGTH(title)) - FROM notes_fts - LIMIT 1000 - `) || 0; - - // Rough estimate: avg size * document count * overhead factor - indexSize = Math.round(avgContentSize * totalDocuments * 1.5); - } catch (fallbackError) { - log.info(`Could not estimate index size: ${fallbackError}`); - indexSize = 0; - } - } else { - log.error(`Error accessing dbstat: ${error}`); - } - } - return { - totalDocuments, - indexSize, - isOptimized: true, // FTS5 manages optimization internally - dbstatAvailable - }; + return { documentCount, indexSize }; + } catch (error) { + log.error(`Error getting FTS statistics: ${error}`); + return { documentCount: 0, indexSize: 0 }; + } } } // Export singleton instance -export const ftsSearchService = new FTSSearchService(); - +const ftsSearchService = new FTSSearchService(); export default ftsSearchService; \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search_minimal.ts b/apps/server/src/services/search/fts_search_minimal.ts new file mode 100644 index 0000000000..75867db15c --- /dev/null +++ b/apps/server/src/services/search/fts_search_minimal.ts @@ -0,0 +1,461 @@ +/** + * Minimal FTS5 Search Service + * + * Design principles: + * - Direct SQLite FTS5 queries only + * - No memory management or query governors + * - No temporary tables or complex batching + * - Let SQLite handle the scale + * - Simple, maintainable code + */ + +import sql from "../sql.js"; +import log from "../log.js"; + +export interface MinimalFTSSearchResult { + noteId: string; + title: string; + score: number; + snippet?: string; +} + +export interface MinimalFTSSearchOptions { + limit?: number; + offset?: number; + includeSnippets?: boolean; +} + +class MinimalFTSSearchService { + private isFTS5Available: boolean | null = null; + + /** + * Check if FTS5 table exists + */ + checkFTS5Availability(): boolean { + if (this.isFTS5Available !== null) { + return this.isFTS5Available; + } + + try { + const tableExists = sql.getValue(` + SELECT COUNT(*) + FROM sqlite_master + WHERE type = 'table' + AND name = 'notes_fts' + `); + + this.isFTS5Available = tableExists > 0; + + if (!this.isFTS5Available) { + log.info("FTS5 table not found"); + } + } catch (error) { + log.error(`Error checking FTS5 availability: ${error}`); + this.isFTS5Available = false; + } + + return this.isFTS5Available; + } + + /** + * Convert search tokens to FTS5 query + * Keep it simple - let SQLite do the work + */ + convertToFTS5Query(tokens: string[], operator: string): string { + if (!tokens || tokens.length === 0) { + throw new Error("No search tokens provided"); + } + + // Basic sanitization - remove FTS5 special characters + const sanitizedTokens = tokens.map(token => + token.replace(/["()]/g, '').trim() + ).filter(t => t.length > 0); + + if (sanitizedTokens.length === 0) { + throw new Error("No valid tokens after sanitization"); + } + + switch (operator) { + case "=": // Exact phrase + return `"${sanitizedTokens.join(" ")}"`; + + case "*=*": // Contains (substring) + // Use prefix search for each token + return sanitizedTokens.map(t => `${t}*`).join(" AND "); + + case "*=": // Ends with (not well supported in FTS5) + // Fallback to contains + return sanitizedTokens.map(t => `${t}*`).join(" AND "); + + case "=*": // Starts with + return sanitizedTokens.map(t => `${t}*`).join(" AND "); + + case "!=": // Does not contain + return `NOT (${sanitizedTokens.join(" OR ")})`; + + case "~=": // Fuzzy match (use OR for flexibility) + case "~*": + return sanitizedTokens.join(" OR "); + + default: + // Default to AND search + return sanitizedTokens.join(" AND "); + } + } + + /** + * Perform word-based search using FTS5 + */ + searchWords( + tokens: string[], + operator: string, + noteIds?: Set, + options: MinimalFTSSearchOptions = {} + ): MinimalFTSSearchResult[] { + if (!this.checkFTS5Availability()) { + throw new Error("FTS5 not available"); + } + + const { + limit = 100, + offset = 0, + includeSnippets = false + } = options; + + try { + const ftsQuery = this.convertToFTS5Query(tokens, operator); + + // Build the query + let query: string; + const params: any[] = [ftsQuery]; + + if (noteIds && noteIds.size > 0) { + // Filter by specific noteIds + const noteIdArray = Array.from(noteIds); + const placeholders = noteIdArray.map(() => '?').join(','); + + if (includeSnippets) { + query = ` + SELECT + f.noteId, + n.title, + -rank as score, + snippet(notes_fts, 2, '', '', '...', 30) as snippet + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND f.noteId IN (${placeholders}) + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + } else { + query = ` + SELECT + f.noteId, + n.title, + -rank as score + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND f.noteId IN (${placeholders}) + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + } + params.push(...noteIdArray, limit, offset); + } else { + // Search all notes + if (includeSnippets) { + query = ` + SELECT + f.noteId, + n.title, + -rank as score, + snippet(notes_fts, 2, '', '', '...', 30) as snippet + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + } else { + query = ` + SELECT + f.noteId, + n.title, + -rank as score + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + } + params.push(limit, offset); + } + + const results = sql.getRows(query, params); + return results; + + } catch (error: any) { + log.error(`FTS5 search error: ${error}`); + throw new Error(`FTS5 search failed: ${error.message}`); + } + } + + /** + * Perform substring search using FTS5 prefix indexes + * This is slower than word search but still uses FTS5 + */ + searchSubstring( + tokens: string[], + noteIds?: Set, + options: MinimalFTSSearchOptions = {} + ): MinimalFTSSearchResult[] { + if (!this.checkFTS5Availability()) { + throw new Error("FTS5 not available"); + } + + const { + limit = 100, + offset = 0, + includeSnippets = false + } = options; + + try { + // For substring search, use prefix matching + // Split each token into smaller parts for better matching + const substringTokens: string[] = []; + + for (const token of tokens) { + if (token.length <= 2) { + // Short tokens - just add with wildcard + substringTokens.push(`${token}*`); + } else { + // Longer tokens - create multiple prefix searches + // This leverages the prefix indexes we created (2, 3, 4 chars) + for (let i = 2; i <= Math.min(4, token.length); i++) { + substringTokens.push(`${token.substring(0, i)}*`); + } + // Also add the full token with wildcard + if (token.length > 4) { + substringTokens.push(`${token}*`); + } + } + } + + // Create FTS query with OR to find any matching substring + const ftsQuery = substringTokens.join(" OR "); + + // Build the query + let query: string; + const params: any[] = [ftsQuery]; + + if (noteIds && noteIds.size > 0) { + const noteIdArray = Array.from(noteIds); + const placeholders = noteIdArray.map(() => '?').join(','); + + query = ` + SELECT DISTINCT + f.noteId, + n.title, + -rank as score + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND f.noteId IN (${placeholders}) + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + params.push(...noteIdArray, limit, offset); + } else { + query = ` + SELECT DISTINCT + f.noteId, + n.title, + -rank as score + FROM notes_fts f + INNER JOIN notes n ON f.noteId = n.noteId + WHERE notes_fts MATCH ? + AND n.isDeleted = 0 + ORDER BY rank + LIMIT ? OFFSET ? + `; + params.push(limit, offset); + } + + const results = sql.getRows(query, params); + return results; + + } catch (error: any) { + log.error(`FTS5 substring search error: ${error}`); + throw new Error(`FTS5 substring search failed: ${error.message}`); + } + } + + /** + * Combined search that handles both word and substring searches + */ + search( + tokens: string[], + operator: string, + noteIds?: Set, + options: MinimalFTSSearchOptions = {} + ): MinimalFTSSearchResult[] { + // Substring search operators + if (operator === '*=*' || operator === '*=') { + return this.searchSubstring(tokens, noteIds, options); + } + + // Word-based search for all other operators + return this.searchWords(tokens, operator, noteIds, options); + } + + /** + * Update FTS index for a specific note + */ + updateNoteIndex(noteId: string, title: string, content: string): void { + if (!this.checkFTS5Availability()) { + return; + } + + try { + sql.transactional(() => { + // Delete existing entry + sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + + // Insert new entry (limit content size) + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) + VALUES (?, ?, SUBSTR(?, 1, 500000)) + `, [noteId, title, content]); + }); + } catch (error) { + log.error(`Failed to update FTS index for note ${noteId}: ${error}`); + } + } + + /** + * Remove a note from the FTS index + */ + removeNoteFromIndex(noteId: string): void { + if (!this.checkFTS5Availability()) { + return; + } + + try { + sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]); + } catch (error) { + log.error(`Failed to remove note ${noteId} from FTS index: ${error}`); + } + } + + /** + * Rebuild the entire FTS index + * Simple and straightforward - let SQLite handle it + */ + rebuildIndex(): void { + if (!this.checkFTS5Availability()) { + log.error("Cannot rebuild FTS index - FTS5 not available"); + return; + } + + log.info("Rebuilding FTS5 index..."); + + try { + sql.transactional(() => { + // Clear existing index + sql.execute(`DELETE FROM notes_fts`); + + // Rebuild from notes + sql.execute(` + INSERT INTO notes_fts (noteId, title, content) + SELECT + n.noteId, + n.title, + SUBSTR(b.content, 1, 500000) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `); + + // Optimize the index + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + }); + + log.info("FTS5 index rebuild completed"); + } catch (error) { + log.error(`Failed to rebuild FTS index: ${error}`); + throw error; + } + } + + /** + * Optimize the FTS index + * Simple optimization - no complex logic + */ + optimizeIndex(): void { + if (!this.checkFTS5Availability()) { + return; + } + + try { + log.info("Optimizing FTS5 index..."); + + // Simple optimization command + sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`); + + // Update statistics for query planner + sql.execute(`ANALYZE notes_fts`); + + log.info("FTS5 index optimization completed"); + } catch (error) { + log.error(`Failed to optimize FTS index: ${error}`); + } + } + + /** + * Get basic statistics about the FTS index + */ + getIndexStats(): { + totalDocuments: number; + tableExists: boolean; + } { + if (!this.checkFTS5Availability()) { + return { + totalDocuments: 0, + tableExists: false + }; + } + + try { + const totalDocuments = sql.getValue(` + SELECT COUNT(*) FROM notes_fts + `) || 0; + + return { + totalDocuments, + tableExists: true + }; + } catch (error) { + log.error(`Failed to get index stats: ${error}`); + return { + totalDocuments: 0, + tableExists: false + }; + } + } +} + +// Export singleton instance +export const minimalFTSSearchService = new MinimalFTSSearchService(); + +export default minimalFTSSearchService; \ No newline at end of file diff --git a/scripts/stress-test-native-simple.ts b/scripts/stress-test-native-simple.ts index bdfe2b3276..0b13c52f40 100644 --- a/scripts/stress-test-native-simple.ts +++ b/scripts/stress-test-native-simple.ts @@ -15,6 +15,75 @@ import * as path from 'path'; import * as fs from 'fs'; import { randomBytes } from 'crypto'; +// Resource manager for proper cleanup +class ResourceManager { + private resources: Array<{ name: string; cleanup: () => void | Promise }> = []; + private cleanedUp = false; + + register(name: string, cleanup: () => void | Promise): void { + console.log(`[ResourceManager] Registered resource: ${name}`); + this.resources.push({ name, cleanup }); + } + + async cleanup(): Promise { + if (this.cleanedUp) { + console.log('[ResourceManager] Already cleaned up, skipping...'); + return; + } + + console.log('[ResourceManager] Starting cleanup...'); + this.cleanedUp = true; + + // Cleanup in reverse order of registration + for (let i = this.resources.length - 1; i >= 0; i--) { + const resource = this.resources[i]; + try { + console.log(`[ResourceManager] Cleaning up: ${resource.name}`); + await resource.cleanup(); + console.log(`[ResourceManager] Successfully cleaned up: ${resource.name}`); + } catch (error) { + console.error(`[ResourceManager] Error cleaning up ${resource.name}:`, error); + } + } + + this.resources = []; + console.log('[ResourceManager] Cleanup completed'); + } +} + +// Global resource manager +const resourceManager = new ResourceManager(); + +// Setup process exit handlers +process.on('exit', (code) => { + console.log(`[Process] Exiting with code: ${code}`); +}); + +process.on('SIGINT', async () => { + console.log('\n[Process] Received SIGINT, cleaning up...'); + await resourceManager.cleanup(); + process.exit(130); // Standard exit code for SIGINT +}); + +process.on('SIGTERM', async () => { + console.log('\n[Process] Received SIGTERM, cleaning up...'); + await resourceManager.cleanup(); + process.exit(143); // Standard exit code for SIGTERM +}); + +process.on('uncaughtException', async (error) => { + console.error('[Process] Uncaught exception:', error); + await resourceManager.cleanup(); + process.exit(1); +}); + +process.on('unhandledRejection', async (reason, promise) => { + console.error('[Process] Unhandled rejection at:', promise, 'reason:', reason); + await resourceManager.cleanup(); + process.exit(1); +}); + +// Parse command line arguments const noteCount = parseInt(process.argv[2]); const batchSize = parseInt(process.argv[3]) || 100; @@ -41,15 +110,6 @@ console.log(` Batch size: ${batchSize.toLocaleString()}`); console.log(` Database: ${DB_PATH}`); console.log(`============================================\n`); -// Open database -const db = new Database(DB_PATH); - -// Enable optimizations -db.pragma('journal_mode = WAL'); -db.pragma('synchronous = NORMAL'); -db.pragma('cache_size = 10000'); -db.pragma('temp_store = MEMORY'); - // Helper functions that mimic Trilium's ID generation function newEntityId(prefix: string = ''): string { return prefix + randomBytes(12).toString('base64').replace(/[+/=]/g, '').substring(0, 12); @@ -125,15 +185,18 @@ function generateContent(): string { } // Native-style service functions -function createNote(params: { - noteId: string; - title: string; - content: string; - type: string; - mime?: string; - isProtected?: boolean; - parentNoteId?: string; -}) { +function createNote( + db: Database.Database, + params: { + noteId: string; + title: string; + content: string; + type: string; + mime?: string; + isProtected?: boolean; + parentNoteId?: string; + } +) { const currentDateTime = utcNowDateTime(); const noteStmt = db.prepare(` INSERT INTO notes (noteId, title, isProtected, type, mime, blobId, isDeleted, deleteId, @@ -195,13 +258,16 @@ function createNote(params: { return params.noteId; } -function createAttribute(params: { - noteId: string; - type: 'label' | 'relation'; - name: string; - value: string; - isInheritable?: boolean; -}) { +function createAttribute( + db: Database.Database, + params: { + noteId: string; + type: 'label' | 'relation'; + name: string; + value: string; + isInheritable?: boolean; + } +) { const currentDateTime = utcNowDateTime(); const stmt = db.prepare(` INSERT INTO attributes (attributeId, noteId, type, name, value, position, @@ -223,148 +289,212 @@ function createAttribute(params: { ); } -async function main() { - const startTime = Date.now(); - const allNoteIds: string[] = ['root']; - let notesCreated = 0; - let attributesCreated = 0; - - console.log('Starting note generation...\n'); - - // Create container note - const containerNoteId = newEntityId(); - const containerTransaction = db.transaction(() => { - createNote({ - noteId: containerNoteId, - title: `Stress Test ${new Date().toISOString()}`, - content: `

Container for stress test with ${noteCount} notes

`, - type: 'text', - parentNoteId: 'root' - }); - }); - containerTransaction(); - - console.log(`Created container note: ${containerNoteId}`); - allNoteIds.push(containerNoteId); - - // Process in batches - for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { - const batchStart = batch * batchSize; - const batchEnd = Math.min(batchStart + batchSize, noteCount); - const batchNoteCount = batchEnd - batchStart; +async function main(): Promise { + let db: Database.Database | null = null; + let exitCode = 0; + + try { + const startTime = Date.now(); + const allNoteIds: string[] = ['root']; + let notesCreated = 0; + let attributesCreated = 0; - const batchTransaction = db.transaction(() => { - for (let i = 0; i < batchNoteCount; i++) { - const noteId = newEntityId(); - const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; - - // Decide parent - either container or random existing note - let parentNoteId = containerNoteId; - if (allNoteIds.length > 10 && Math.random() < 0.3) { - parentNoteId = allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 100))]; + console.log('Opening database connection...'); + + // Open database with proper error handling + try { + db = new Database(DB_PATH); + resourceManager.register('Database Connection', () => { + if (db && db.open) { + console.log('Closing database connection...'); + db.close(); + console.log('Database connection closed'); } - - // Create note - createNote({ - noteId, - title: generateTitle(), - content: generateContent(), - type, - parentNoteId, - isProtected: Math.random() < 0.05 - }); - - notesCreated++; - allNoteIds.push(noteId); - - // Add attributes - const attributeCount = Math.floor(Math.random() * 5); - for (let a = 0; a < attributeCount; a++) { - const attrType = Math.random() < 0.7 ? 'label' : 'relation'; - const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; + }); + } catch (error) { + console.error('Failed to open database:', error); + throw error; + } + + // Enable optimizations + console.log('Configuring database optimizations...'); + db.pragma('journal_mode = WAL'); + db.pragma('synchronous = NORMAL'); + db.pragma('cache_size = 10000'); + db.pragma('temp_store = MEMORY'); + + console.log('Starting note generation...\n'); + + // Create container note + const containerNoteId = newEntityId(); + const containerTransaction = db.transaction(() => { + createNote(db!, { + noteId: containerNoteId, + title: `Stress Test ${new Date().toISOString()}`, + content: `

Container for stress test with ${noteCount} notes

`, + type: 'text', + parentNoteId: 'root' + }); + }); + + try { + containerTransaction(); + console.log(`Created container note: ${containerNoteId}`); + allNoteIds.push(containerNoteId); + } catch (error) { + console.error('Failed to create container note:', error); + throw error; + } + + // Process in batches + for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { + const batchStart = batch * batchSize; + const batchEnd = Math.min(batchStart + batchSize, noteCount); + const batchNoteCount = batchEnd - batchStart; + + const batchTransaction = db.transaction(() => { + for (let i = 0; i < batchNoteCount; i++) { + const noteId = newEntityId(); + const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; - try { - createAttribute({ - noteId, - type: attrType, - name: attrName, - value: attrType === 'relation' - ? allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 50))] - : getRandomWord(), - isInheritable: Math.random() < 0.2 - }); - attributesCreated++; - } catch (e) { - // Ignore duplicate errors + // Decide parent - either container or random existing note + let parentNoteId = containerNoteId; + if (allNoteIds.length > 10 && Math.random() < 0.3) { + parentNoteId = allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 100))]; + } + + // Create note + createNote(db!, { + noteId, + title: generateTitle(), + content: generateContent(), + type, + parentNoteId, + isProtected: Math.random() < 0.05 + }); + + notesCreated++; + allNoteIds.push(noteId); + + // Add attributes + const attributeCount = Math.floor(Math.random() * 5); + for (let a = 0; a < attributeCount; a++) { + const attrType = Math.random() < 0.7 ? 'label' : 'relation'; + const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; + + try { + createAttribute(db!, { + noteId, + type: attrType as 'label' | 'relation', + name: attrName, + value: attrType === 'relation' + ? allNoteIds[Math.floor(Math.random() * Math.min(allNoteIds.length, 50))] + : getRandomWord(), + isInheritable: Math.random() < 0.2 + }); + attributesCreated++; + } catch (e) { + // Ignore duplicate errors, but log unexpected ones + if (!(e instanceof Error) || !e.message.includes('UNIQUE')) { + console.warn(`Unexpected attribute error: ${e}`); + } + } + } + + // Keep memory in check + if (allNoteIds.length > 500) { + allNoteIds.splice(1, allNoteIds.length - 500); } } + }); + + try { + batchTransaction(); - // Keep memory in check - if (allNoteIds.length > 500) { - allNoteIds.splice(1, allNoteIds.length - 500); - } + const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); + const elapsed = (Date.now() - startTime) / 1000; + const rate = Math.round(notesCreated / elapsed); + + console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attributes: ${attributesCreated}`); + } catch (error) { + console.error(`Failed to process batch ${batch + 1}:`, error); + throw error; + } + } + + // Add entity changes + console.log('\nAdding entity changes...'); + const entityTransaction = db.transaction(() => { + const stmt = db.prepare(` + INSERT OR REPLACE INTO entity_changes + (entityName, entityId, hash, isErased, changeId, componentId, instanceId, isSynced, utcDateChanged) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + for (let i = 0; i < Math.min(100, allNoteIds.length); i++) { + stmt.run( + 'notes', + allNoteIds[i], + randomBytes(16).toString('hex'), + 0, + newEntityId(), + 'stress_test', + 'stress_test_instance', + 1, + utcNowDateTime() + ); } }); - batchTransaction(); - - const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); - const elapsed = (Date.now() - startTime) / 1000; - const rate = Math.round(notesCreated / elapsed); - - console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attributes: ${attributesCreated}`); - } - - // Add entity changes - console.log('\nAdding entity changes...'); - const entityTransaction = db.transaction(() => { - const stmt = db.prepare(` - INSERT OR REPLACE INTO entity_changes - (entityName, entityId, hash, isErased, changeId, componentId, instanceId, isSynced, utcDateChanged) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - - for (let i = 0; i < Math.min(100, allNoteIds.length); i++) { - stmt.run( - 'notes', - allNoteIds[i], - randomBytes(16).toString('hex'), - 0, - newEntityId(), - 'stress_test', - 'stress_test_instance', - 1, - utcNowDateTime() - ); + try { + entityTransaction(); + } catch (error) { + console.error('Failed to add entity changes:', error); + // Non-critical error, continue } - }); - entityTransaction(); - - const endTime = Date.now(); - const duration = (endTime - startTime) / 1000; - - // Get statistics - const stats = { - notes: db.prepare('SELECT COUNT(*) as count FROM notes').get() as any, - branches: db.prepare('SELECT COUNT(*) as count FROM branches').get() as any, - attributes: db.prepare('SELECT COUNT(*) as count FROM attributes').get() as any, - blobs: db.prepare('SELECT COUNT(*) as count FROM blobs').get() as any - }; - - console.log('\n✅ Native-style stress test completed successfully!\n'); - console.log('Database Statistics:'); - console.log(` • Total notes: ${stats.notes.count.toLocaleString()}`); - console.log(` • Total branches: ${stats.branches.count.toLocaleString()}`); - console.log(` • Total attributes: ${stats.attributes.count.toLocaleString()}`); - console.log(` • Total blobs: ${stats.blobs.count.toLocaleString()}`); - console.log(` • Time taken: ${duration.toFixed(2)} seconds`); - console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); - console.log(` • Container note ID: ${containerNoteId}\n`); - - db.close(); + + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; + + // Get statistics + console.log('\nGathering database statistics...'); + const stats = { + notes: db.prepare('SELECT COUNT(*) as count FROM notes').get() as any, + branches: db.prepare('SELECT COUNT(*) as count FROM branches').get() as any, + attributes: db.prepare('SELECT COUNT(*) as count FROM attributes').get() as any, + blobs: db.prepare('SELECT COUNT(*) as count FROM blobs').get() as any + }; + + console.log('\n✅ Native-style stress test completed successfully!\n'); + console.log('Database Statistics:'); + console.log(` • Total notes: ${stats.notes.count.toLocaleString()}`); + console.log(` • Total branches: ${stats.branches.count.toLocaleString()}`); + console.log(` • Total attributes: ${stats.attributes.count.toLocaleString()}`); + console.log(` • Total blobs: ${stats.blobs.count.toLocaleString()}`); + console.log(` • Time taken: ${duration.toFixed(2)} seconds`); + console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); + console.log(` • Container note ID: ${containerNoteId}\n`); + + } catch (error) { + console.error('\n❌ Stress test failed with error:', error); + if (error instanceof Error) { + console.error('Error stack:', error.stack); + } + exitCode = 1; + } finally { + // Ensure cleanup happens + console.log('\nPerforming final cleanup...'); + await resourceManager.cleanup(); + + // Exit with appropriate code + console.log(`Exiting with code: ${exitCode}`); + process.exit(exitCode); + } } -main().catch((error) => { - console.error('Error:', error); +// Run the main function +main().catch(async (error) => { + console.error('Fatal error in main:', error); + await resourceManager.cleanup(); process.exit(1); }); \ No newline at end of file diff --git a/scripts/stress-test-native.ts b/scripts/stress-test-native.ts index d901c4f47d..564abee64a 100644 --- a/scripts/stress-test-native.ts +++ b/scripts/stress-test-native.ts @@ -15,6 +15,75 @@ process.env.NODE_ENV = process.env.NODE_ENV || 'development'; process.env.DATA_DIR = process.env.DATA_DIR || './data'; +// Resource manager for proper cleanup +class ResourceManager { + private resources: Array<{ name: string; cleanup: () => void | Promise }> = []; + private cleanedUp = false; + + register(name: string, cleanup: () => void | Promise): void { + console.log(`[ResourceManager] Registered resource: ${name}`); + this.resources.push({ name, cleanup }); + } + + async cleanup(): Promise { + if (this.cleanedUp) { + console.log('[ResourceManager] Already cleaned up, skipping...'); + return; + } + + console.log('[ResourceManager] Starting cleanup...'); + this.cleanedUp = true; + + // Cleanup in reverse order of registration + for (let i = this.resources.length - 1; i >= 0; i--) { + const resource = this.resources[i]; + try { + console.log(`[ResourceManager] Cleaning up: ${resource.name}`); + await resource.cleanup(); + console.log(`[ResourceManager] Successfully cleaned up: ${resource.name}`); + } catch (error) { + console.error(`[ResourceManager] Error cleaning up ${resource.name}:`, error); + } + } + + this.resources = []; + console.log('[ResourceManager] Cleanup completed'); + } +} + +// Global resource manager +const resourceManager = new ResourceManager(); + +// Setup process exit handlers +process.on('exit', (code) => { + console.log(`[Process] Exiting with code: ${code}`); +}); + +process.on('SIGINT', async () => { + console.log('\n[Process] Received SIGINT, cleaning up...'); + await resourceManager.cleanup(); + process.exit(130); // Standard exit code for SIGINT +}); + +process.on('SIGTERM', async () => { + console.log('\n[Process] Received SIGTERM, cleaning up...'); + await resourceManager.cleanup(); + process.exit(143); // Standard exit code for SIGTERM +}); + +process.on('uncaughtException', async (error) => { + console.error('[Process] Uncaught exception:', error); + await resourceManager.cleanup(); + process.exit(1); +}); + +process.on('unhandledRejection', async (reason, promise) => { + console.error('[Process] Unhandled rejection at:', promise, 'reason:', reason); + await resourceManager.cleanup(); + process.exit(1); +}); + +// Import Trilium services after setting up environment and handlers import './src/becca/entity_constructor.js'; import sqlInit from './src/services/sql_init.js'; import noteService from './src/services/notes.js'; @@ -26,6 +95,7 @@ import becca from './src/becca/becca.js'; import entityChangesService from './src/services/entity_changes.js'; import type BNote from './src/becca/entities/bnote.js'; +// Parse command line arguments const noteCount = parseInt(process.argv[2]); const batchSize = parseInt(process.argv[3]) || 100; @@ -159,7 +229,8 @@ function generateSentence(): string { return wordList.join(' '); } -async function start() { +async function runStressTest(): Promise { + let exitCode = 0; const startTime = Date.now(); const allNotes: BNote[] = []; let notesCreated = 0; @@ -167,255 +238,343 @@ async function start() { let clonesCreated = 0; let revisionsCreated = 0; - console.log('Starting note generation using native Trilium services...\n'); - - // Find root note - const rootNote = becca.getNote('root'); - if (!rootNote) { - console.error('Root note not found!'); - process.exit(1); - } - - // Create a container note for our stress test - const { note: containerNote } = noteService.createNewNote({ - parentNoteId: 'root', - title: `Stress Test ${new Date().toISOString()}`, - content: `

Container for stress test with ${noteCount} notes

`, - type: 'text', - isProtected: false - }); - - console.log(`Created container note: ${containerNote.title} (${containerNote.noteId})`); - allNotes.push(containerNote); - - // Process in batches for better control - for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { - const batchStart = batch * batchSize; - const batchEnd = Math.min(batchStart + batchSize, noteCount); - const batchNoteCount = batchEnd - batchStart; + try { + console.log('Starting note generation using native Trilium services...\n'); - sql.transactional(() => { - for (let i = 0; i < batchNoteCount; i++) { - const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; - let content = ''; - let mime = undefined; - - // Generate content based on type - switch (type) { - case 'code': - content = generateCodeContent(); - mime = 'text/plain'; - break; - case 'mermaid': - content = generateMermaidContent(); - mime = 'text/plain'; - break; - case 'canvas': - content = JSON.stringify({ - elements: [], - appState: { viewBackgroundColor: "#ffffff" }, - files: {} - }); - mime = 'application/json'; - break; - case 'search': - content = JSON.stringify({ - searchString: `#${getRandomWord()} OR #${getRandomWord()}` - }); - mime = 'application/json'; - break; - case 'relationMap': - content = JSON.stringify({ - notes: [], - zoom: 1 - }); - mime = 'application/json'; - break; - default: - content = generateContent(); - mime = 'text/html'; - } - - // Decide parent - either container or random existing note for complex hierarchy - let parentNoteId = containerNote.noteId; - if (allNotes.length > 10 && Math.random() < 0.3) { - // 30% chance to attach to random existing note - parentNoteId = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 100))].noteId; - } - - // Create the note using native service - const { note, branch } = noteService.createNewNote({ - parentNoteId, - title: generateTitle(), - content, - type, - mime, - isProtected: Math.random() < 0.05 // 5% protected notes - }); - - notesCreated++; - allNotes.push(note); - - // Add attributes using native service - const attributeCount = Math.floor(Math.random() * 8); - for (let a = 0; a < attributeCount; a++) { - const attrType = Math.random() < 0.7 ? 'label' : 'relation'; - const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; - - try { - if (attrType === 'label') { - attributeService.createLabel( - note.noteId, - attrName, - Math.random() < 0.5 ? getRandomWord() : '' - ); - attributesCreated++; - } else if (allNotes.length > 1) { - const targetNote = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 50))]; - attributeService.createRelation( - note.noteId, - attrName, - targetNote.noteId - ); - attributesCreated++; + // Find root note + const rootNote = becca.getNote('root'); + if (!rootNote) { + throw new Error('Root note not found! Database might not be initialized properly.'); + } + + // Create a container note for our stress test + console.log('Creating container note...'); + const { note: containerNote } = noteService.createNewNote({ + parentNoteId: 'root', + title: `Stress Test ${new Date().toISOString()}`, + content: `

Container for stress test with ${noteCount} notes

`, + type: 'text', + isProtected: false + }); + + console.log(`Created container note: ${containerNote.title} (${containerNote.noteId})`); + allNotes.push(containerNote); + + // Process in batches for better control + for (let batch = 0; batch < Math.ceil(noteCount / batchSize); batch++) { + const batchStart = batch * batchSize; + const batchEnd = Math.min(batchStart + batchSize, noteCount); + const batchNoteCount = batchEnd - batchStart; + + try { + sql.transactional(() => { + for (let i = 0; i < batchNoteCount; i++) { + const type = noteTypes[Math.floor(Math.random() * noteTypes.length)]; + let content = ''; + let mime = undefined; + + // Generate content based on type + switch (type) { + case 'code': + content = generateCodeContent(); + mime = 'text/plain'; + break; + case 'mermaid': + content = generateMermaidContent(); + mime = 'text/plain'; + break; + case 'canvas': + content = JSON.stringify({ + elements: [], + appState: { viewBackgroundColor: "#ffffff" }, + files: {} + }); + mime = 'application/json'; + break; + case 'search': + content = JSON.stringify({ + searchString: `#${getRandomWord()} OR #${getRandomWord()}` + }); + mime = 'application/json'; + break; + case 'relationMap': + content = JSON.stringify({ + notes: [], + zoom: 1 + }); + mime = 'application/json'; + break; + default: + content = generateContent(); + mime = 'text/html'; } - } catch (e) { - // Ignore attribute creation errors (e.g., duplicates) - } - } - - // Update note content occasionally to trigger revisions - if (Math.random() < 0.1) { // 10% chance - note.setContent(content + `\n

Updated at ${new Date().toISOString()}

`); - note.save(); - - // Save revision - if (Math.random() < 0.5) { - note.saveRevision(); - revisionsCreated++; - } - } - - // Create clones occasionally for complex relationships - if (allNotes.length > 20 && Math.random() < 0.05) { // 5% chance - try { - const targetParent = allNotes[Math.floor(Math.random() * allNotes.length)]; - const result = cloningService.cloneNoteToBranch( - note.noteId, - targetParent.noteId, - Math.random() < 0.2 ? 'clone' : '' - ); - if (result.success) { - clonesCreated++; + + // Decide parent - either container or random existing note for complex hierarchy + let parentNoteId = containerNote.noteId; + if (allNotes.length > 10 && Math.random() < 0.3) { + // 30% chance to attach to random existing note + parentNoteId = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 100))].noteId; + } + + // Create the note using native service + const { note, branch } = noteService.createNewNote({ + parentNoteId, + title: generateTitle(), + content, + type, + mime, + isProtected: Math.random() < 0.05 // 5% protected notes + }); + + notesCreated++; + allNotes.push(note); + + // Add attributes using native service + const attributeCount = Math.floor(Math.random() * 8); + for (let a = 0; a < attributeCount; a++) { + const attrType = Math.random() < 0.7 ? 'label' : 'relation'; + const attrName = attributeNames[Math.floor(Math.random() * attributeNames.length)]; + + try { + if (attrType === 'label') { + attributeService.createLabel( + note.noteId, + attrName, + Math.random() < 0.5 ? getRandomWord() : '' + ); + attributesCreated++; + } else if (allNotes.length > 1) { + const targetNote = allNotes[Math.floor(Math.random() * Math.min(allNotes.length, 50))]; + attributeService.createRelation( + note.noteId, + attrName, + targetNote.noteId + ); + attributesCreated++; + } + } catch (e) { + // Ignore attribute creation errors (e.g., duplicates) + if (e instanceof Error && !e.message.includes('duplicate') && !e.message.includes('already exists')) { + console.warn(`Unexpected attribute error: ${e.message}`); + } + } + } + + // Update note content occasionally to trigger revisions + if (Math.random() < 0.1) { // 10% chance + note.setContent(content + `\n

Updated at ${new Date().toISOString()}

`); + note.save(); + + // Save revision + if (Math.random() < 0.5) { + try { + note.saveRevision(); + revisionsCreated++; + } catch (e) { + // Ignore revision errors + } + } + } + + // Create clones occasionally for complex relationships + if (allNotes.length > 20 && Math.random() < 0.05) { // 5% chance + try { + const targetParent = allNotes[Math.floor(Math.random() * allNotes.length)]; + const result = cloningService.cloneNoteToBranch( + note.noteId, + targetParent.noteId, + Math.random() < 0.2 ? 'clone' : '' + ); + if (result.success) { + clonesCreated++; + } + } catch (e) { + // Ignore cloning errors (e.g., circular dependencies) + } + } + + // Add note to recent notes occasionally + if (Math.random() < 0.1) { // 10% chance + try { + sql.execute( + "INSERT OR IGNORE INTO recent_notes (noteId, notePath, utcDateCreated) VALUES (?, ?, ?)", + [note.noteId, note.getBestNotePath()?.path || 'root', note.utcDateCreated] + ); + } catch (e) { + // Table might not exist in all versions + } + } + + // Keep memory usage in check + if (allNotes.length > 500) { + allNotes.splice(0, allNotes.length - 500); } - } catch (e) { - // Ignore cloning errors (e.g., circular dependencies) } - } + })(); - // Add note to recent notes occasionally - if (Math.random() < 0.1) { // 10% chance - try { - sql.execute( - "INSERT OR IGNORE INTO recent_notes (noteId, notePath, utcDateCreated) VALUES (?, ?, ?)", - [note.noteId, note.getBestNotePath()?.path || 'root', note.utcDateCreated] - ); - } catch (e) { - // Table might not exist in all versions - } - } + const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); + const elapsed = (Date.now() - startTime) / 1000; + const rate = Math.round(notesCreated / elapsed); - // Keep memory usage in check - if (allNotes.length > 500) { - allNotes.splice(0, allNotes.length - 500); + console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attrs: ${attributesCreated} | Clones: ${clonesCreated} | Revisions: ${revisionsCreated}`); + + } catch (error) { + console.error(`Failed to process batch ${batch + 1}:`, error); + throw error; + } + + // Force entity changes sync (non-critical) + try { + entityChangesService.putNoteReorderingEntityChange(containerNote.noteId); + } catch (e) { + // Ignore entity change errors + } + } + + // Create some advanced structures + console.log('\nCreating advanced relationships...'); + + try { + // Create template notes + const templateNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Template: ' + generateTitle(), + content: '

This is a template note

', + type: 'text', + isProtected: false + }).note; + + attributeService.createLabel(templateNote.noteId, 'template', ''); + + // Apply template to some notes + for (let i = 0; i < Math.min(10, allNotes.length); i++) { + const targetNote = allNotes[Math.floor(Math.random() * allNotes.length)]; + try { + attributeService.createRelation(targetNote.noteId, 'template', templateNote.noteId); + } catch (e) { + // Ignore relation errors } } - })(); + + // Create some CSS notes + const cssNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Custom CSS', + content: `.custom-class { color: #${Math.floor(Math.random()*16777215).toString(16)}; }`, + type: 'code', + mime: 'text/css', + isProtected: false + }).note; + + attributeService.createLabel(cssNote.noteId, 'appCss', ''); + + // Create widget notes + const widgetNote = noteService.createNewNote({ + parentNoteId: containerNote.noteId, + title: 'Custom Widget', + content: `
Widget content: ${generateSentence()}
`, + type: 'code', + mime: 'text/html', + isProtected: false + }).note; + + attributeService.createLabel(widgetNote.noteId, 'widget', ''); + } catch (error) { + console.warn('Failed to create some advanced structures:', error); + // Non-critical, continue + } - const progress = Math.round(((batch + 1) / Math.ceil(noteCount / batchSize)) * 100); - const elapsed = (Date.now() - startTime) / 1000; - const rate = Math.round(notesCreated / elapsed); + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; - console.log(`Progress: ${progress}% | Notes: ${notesCreated}/${noteCount} | Rate: ${rate}/sec | Attrs: ${attributesCreated} | Clones: ${clonesCreated} | Revisions: ${revisionsCreated}`); + // Get final statistics + console.log('\nGathering database statistics...'); + let stats: any = {}; + try { + stats.notes = sql.getValue('SELECT COUNT(*) FROM notes'); + stats.branches = sql.getValue('SELECT COUNT(*) FROM branches'); + stats.attributes = sql.getValue('SELECT COUNT(*) FROM attributes'); + stats.revisions = sql.getValue('SELECT COUNT(*) FROM revisions'); + stats.attachments = sql.getValue('SELECT COUNT(*) FROM attachments'); + stats.recentNotes = sql.getValue('SELECT COUNT(*) FROM recent_notes'); + } catch (error) { + console.warn('Failed to get some statistics:', error); + } - // Force entity changes sync - entityChangesService.putNoteReorderingEntityChange(containerNote.noteId); + console.log('\n✅ Native API stress test completed successfully!\n'); + console.log('Database Statistics:'); + console.log(` • Total notes: ${stats.notes?.toLocaleString() || 'N/A'}`); + console.log(` • Total branches: ${stats.branches?.toLocaleString() || 'N/A'}`); + console.log(` • Total attributes: ${stats.attributes?.toLocaleString() || 'N/A'}`); + console.log(` • Total revisions: ${stats.revisions?.toLocaleString() || 'N/A'}`); + console.log(` • Total attachments: ${stats.attachments?.toLocaleString() || 'N/A'}`); + console.log(` • Recent notes: ${stats.recentNotes?.toLocaleString() || 'N/A'}`); + console.log(` • Time taken: ${duration.toFixed(2)} seconds`); + console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); + console.log(` • Container note ID: ${containerNote.noteId}\n`); + + } catch (error) { + console.error('\n❌ Stress test failed with error:', error); + if (error instanceof Error) { + console.error('Error stack:', error.stack); + } + exitCode = 1; + } finally { + // Cleanup database connections and resources + console.log('\nCleaning up database resources...'); + try { + // Close any open database connections + if (sql && typeof sql.execute === 'function') { + // Try to checkpoint WAL if possible + try { + sql.execute('PRAGMA wal_checkpoint(TRUNCATE)'); + console.log('WAL checkpoint completed'); + } catch (e) { + // Ignore checkpoint errors + } + } + } catch (error) { + console.warn('Error during database cleanup:', error); + } + + // Perform final resource cleanup + await resourceManager.cleanup(); + + // Exit with appropriate code + console.log(`Exiting with code: ${exitCode}`); + process.exit(exitCode); } - - // Create some advanced structures - console.log('\nCreating advanced relationships...'); - - // Create template notes - const templateNote = noteService.createNewNote({ - parentNoteId: containerNote.noteId, - title: 'Template: ' + generateTitle(), - content: '

This is a template note

', - type: 'text', - isProtected: false - }).note; - - attributeService.createLabel(templateNote.noteId, 'template', ''); - - // Apply template to some notes - for (let i = 0; i < Math.min(10, allNotes.length); i++) { - const targetNote = allNotes[Math.floor(Math.random() * allNotes.length)]; - attributeService.createRelation(targetNote.noteId, 'template', templateNote.noteId); +} + +async function start(): Promise { + try { + // Register database cleanup + resourceManager.register('Database Connection', async () => { + try { + if (sql && typeof sql.execute === 'function') { + console.log('Closing database connections...'); + // Attempt to close any open transactions + sql.execute('ROLLBACK'); + } + } catch (e) { + // Ignore errors during cleanup + } + }); + + // Run the stress test + await runStressTest(); + } catch (error) { + console.error('Fatal error during startup:', error); + await resourceManager.cleanup(); + process.exit(1); } - - // Create some CSS notes - const cssNote = noteService.createNewNote({ - parentNoteId: containerNote.noteId, - title: 'Custom CSS', - content: `.custom-class { color: #${Math.floor(Math.random()*16777215).toString(16)}; }`, - type: 'code', - mime: 'text/css', - isProtected: false - }).note; - - attributeService.createLabel(cssNote.noteId, 'appCss', ''); - - // Create widget notes - const widgetNote = noteService.createNewNote({ - parentNoteId: containerNote.noteId, - title: 'Custom Widget', - content: `
Widget content: ${generateSentence()}
`, - type: 'code', - mime: 'text/html', - isProtected: false - }).note; - - attributeService.createLabel(widgetNote.noteId, 'widget', ''); - - const endTime = Date.now(); - const duration = (endTime - startTime) / 1000; - - // Get final statistics - const stats = { - notes: sql.getValue('SELECT COUNT(*) FROM notes'), - branches: sql.getValue('SELECT COUNT(*) FROM branches'), - attributes: sql.getValue('SELECT COUNT(*) FROM attributes'), - revisions: sql.getValue('SELECT COUNT(*) FROM revisions'), - attachments: sql.getValue('SELECT COUNT(*) FROM attachments'), - recentNotes: sql.getValue('SELECT COUNT(*) FROM recent_notes') - }; - - console.log('\n✅ Native API stress test completed successfully!\n'); - console.log('Database Statistics:'); - console.log(` • Total notes: ${stats.notes?.toLocaleString()}`); - console.log(` • Total branches: ${stats.branches?.toLocaleString()}`); - console.log(` • Total attributes: ${stats.attributes?.toLocaleString()}`); - console.log(` • Total revisions: ${stats.revisions?.toLocaleString()}`); - console.log(` • Total attachments: ${stats.attachments?.toLocaleString()}`); - console.log(` • Recent notes: ${stats.recentNotes?.toLocaleString()}`); - console.log(` • Time taken: ${duration.toFixed(2)} seconds`); - console.log(` • Average rate: ${Math.round(noteCount / duration).toLocaleString()} notes/second`); - console.log(` • Container note ID: ${containerNote.noteId}\n`); - - process.exit(0); } // Initialize database and run stress test -sqlInit.dbReady.then(cls.wrap(start)).catch((err) => { - console.error('Error:', err); - process.exit(1); -}); \ No newline at end of file +sqlInit.dbReady + .then(() => cls.wrap(start)()) + .catch(async (err) => { + console.error('Failed to initialize database:', err); + await resourceManager.cleanup(); + process.exit(1); + }); \ No newline at end of file