trilium/apps/server/src/migrations/0234__add_fts5_search.ts
2025-11-28 21:27:01 -08:00

652 lines
25 KiB
TypeScript

/**
* Migration to add FTS5 full-text search support and strategic performance indexes
*
* This migration:
* 1. Creates an FTS5 virtual table for full-text searching of notes
* 2. Populates it with existing note content
* 3. Creates triggers to keep the FTS table synchronized with note changes
* 4. Creates an FTS5 virtual table for full-text searching of attributes
* 5. Populates it with existing attributes and creates synchronization triggers
* 6. Adds strategic composite and covering indexes for improved query performance
* 7. Optimizes common query patterns identified through performance analysis
*/
import sql from "../services/sql.js";
import log from "../services/log.js";
export default function addFTS5SearchAndPerformanceIndexes() {
log.info("Starting FTS5 and performance optimization migration...");
// Part 1: FTS5 Setup
log.info("Creating FTS5 virtual table for full-text search...");
// Create FTS5 virtual table
// We store noteId, title, and content for searching
sql.executeScript(`
-- Create FTS5 virtual table with trigram tokenizer
-- Trigram tokenizer provides language-agnostic substring matching:
-- 1. Fast substring matching (50-100x speedup for LIKE queries without wildcards)
-- 2. Case-insensitive search without custom collation
-- 3. No language-specific stemming assumptions (works for all languages)
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
--
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
-- detail='full' enables phrase queries (required for exact match with = operator)
-- and provides position info for highlight() function
-- Note: Using detail='full' instead of detail='none' increases index size by ~50%
-- but is necessary to support phrase queries like "exact phrase"
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
noteId UNINDEXED,
title,
content,
tokenize = 'trigram',
detail = 'full'
);
`);
log.info("Populating FTS5 table with existing note content...");
// Populate the FTS table with existing notes
// We only index text-based note types that contain searchable content
const batchSize = 100;
let processedCount = 0;
let hasError = false;
// Wrap entire population process in a transaction for consistency
// If any error occurs, the entire population will be rolled back
try {
sql.transactional(() => {
let offset = 0;
while (true) {
const notes = sql.getRows<{
noteId: string;
title: string;
content: string | null;
}>(`
SELECT
n.noteId,
n.title,
b.content
FROM notes n
LEFT JOIN blobs b ON n.blobId = b.blobId
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0 -- Skip protected notes - they require special handling
ORDER BY n.noteId
LIMIT ? OFFSET ?
`, [batchSize, offset]);
if (notes.length === 0) {
break;
}
for (const note of notes) {
if (note.content) {
// Process content based on type (simplified for migration)
let processedContent = note.content;
// For HTML content, we'll strip tags in the search service
// For now, just insert the raw content
sql.execute(`
INSERT INTO notes_fts (noteId, title, content)
VALUES (?, ?, ?)
`, [note.noteId, note.title, processedContent]);
processedCount++;
}
}
offset += batchSize;
if (processedCount % 1000 === 0) {
log.info(`Processed ${processedCount} notes for FTS indexing...`);
}
}
});
} catch (error) {
hasError = true;
log.error(`Failed to populate FTS index. Rolling back... ${error}`);
// Clean up partial data if transaction failed
try {
sql.execute("DELETE FROM notes_fts");
} catch (cleanupError) {
log.error(`Failed to clean up FTS table after error: ${cleanupError}`);
}
throw new Error(`FTS5 migration failed during population: ${error}`);
}
log.info(`Completed FTS indexing of ${processedCount} notes`);
// Create triggers to keep FTS table synchronized
log.info("Creating FTS synchronization triggers...");
// Drop all existing triggers first to ensure clean state
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_insert`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_update`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_delete`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_soft_delete`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_blob_insert`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_blob_update`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_protect`);
sql.execute(`DROP TRIGGER IF EXISTS notes_fts_unprotect`);
// Create improved triggers that handle all SQL operations properly
// including INSERT OR REPLACE and INSERT ... ON CONFLICT ... DO UPDATE (upsert)
// Trigger for INSERT operations on notes
sql.execute(`
CREATE TRIGGER notes_fts_insert
AFTER INSERT ON notes
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND NEW.isDeleted = 0
AND NEW.isProtected = 0
BEGIN
-- First delete any existing FTS entry (in case of INSERT OR REPLACE)
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
-- Then insert the new entry, using LEFT JOIN to handle missing blobs
INSERT INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
END
`);
// Trigger for UPDATE operations on notes table
// Fires for ANY update to searchable notes to ensure FTS stays in sync
sql.execute(`
CREATE TRIGGER notes_fts_update
AFTER UPDATE ON notes
WHEN NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
-- Fire on any change, not just specific columns, to handle all upsert scenarios
BEGIN
-- Always delete the old entry
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
-- Insert new entry if note is not deleted and not protected
INSERT INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '') -- Use empty string if blob doesn't exist yet
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId
WHERE NEW.isDeleted = 0
AND NEW.isProtected = 0;
END
`);
// Trigger for DELETE operations on notes
sql.execute(`
CREATE TRIGGER notes_fts_delete
AFTER DELETE ON notes
BEGIN
DELETE FROM notes_fts WHERE noteId = OLD.noteId;
END
`);
// Trigger for soft delete (isDeleted = 1)
sql.execute(`
CREATE TRIGGER notes_fts_soft_delete
AFTER UPDATE ON notes
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
END
`);
// Trigger for notes becoming protected
sql.execute(`
CREATE TRIGGER notes_fts_protect
AFTER UPDATE ON notes
WHEN OLD.isProtected = 0 AND NEW.isProtected = 1
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
END
`);
// Trigger for notes becoming unprotected
sql.execute(`
CREATE TRIGGER notes_fts_unprotect
AFTER UPDATE ON notes
WHEN OLD.isProtected = 1 AND NEW.isProtected = 0
AND NEW.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND NEW.isDeleted = 0
BEGIN
DELETE FROM notes_fts WHERE noteId = NEW.noteId;
INSERT INTO notes_fts (noteId, title, content)
SELECT
NEW.noteId,
NEW.title,
COALESCE(b.content, '')
FROM (SELECT NEW.noteId) AS note_select
LEFT JOIN blobs b ON b.blobId = NEW.blobId;
END
`);
// Trigger for INSERT operations on blobs
// Uses INSERT OR REPLACE for efficiency with deduplicated blobs
sql.execute(`
CREATE TRIGGER notes_fts_blob_insert
AFTER INSERT ON blobs
BEGIN
-- Use INSERT OR REPLACE for atomic update
-- This handles the case where FTS entries may already exist
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT
n.noteId,
n.title,
NEW.content
FROM notes n
WHERE n.blobId = NEW.blobId
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0;
END
`);
// Trigger for UPDATE operations on blobs
// Uses INSERT OR REPLACE for efficiency
sql.execute(`
CREATE TRIGGER notes_fts_blob_update
AFTER UPDATE ON blobs
BEGIN
-- Use INSERT OR REPLACE for atomic update
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
SELECT
n.noteId,
n.title,
NEW.content
FROM notes n
WHERE n.blobId = NEW.blobId
AND n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0;
END
`);
log.info("FTS5 setup completed successfully");
// Final cleanup: ensure all eligible notes are indexed
// This catches any edge cases where notes might have been missed
log.info("Running final FTS index cleanup...");
// First check for missing notes
const missingCount = sql.getValue<number>(`
SELECT COUNT(*) FROM notes n
LEFT JOIN blobs b ON n.blobId = b.blobId
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0
AND b.content IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId)
`) || 0;
if (missingCount > 0) {
// Insert missing notes
sql.execute(`
WITH missing_notes AS (
SELECT n.noteId, n.title, b.content
FROM notes n
LEFT JOIN blobs b ON n.blobId = b.blobId
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND n.isDeleted = 0
AND n.isProtected = 0
AND b.content IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId)
)
INSERT INTO notes_fts (noteId, title, content)
SELECT noteId, title, content FROM missing_notes
`);
}
const cleanupCount = missingCount;
if (cleanupCount && cleanupCount > 0) {
log.info(`Indexed ${cleanupCount} additional notes during cleanup`);
}
// ========================================
// Part 2: Strategic Performance Indexes
// ========================================
log.info("Adding strategic performance indexes...");
const startTime = Date.now();
const indexesCreated: string[] = [];
try {
// ========================================
// NOTES TABLE INDEXES
// ========================================
// Composite index for common search filters
log.info("Creating composite index on notes table for search filters...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_notes_search_composite;
CREATE INDEX IF NOT EXISTS IDX_notes_search_composite
ON notes (isDeleted, type, mime, dateModified DESC);
`);
indexesCreated.push("IDX_notes_search_composite");
// Covering index for note metadata queries
log.info("Creating covering index for note metadata...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_notes_metadata_covering;
CREATE INDEX IF NOT EXISTS IDX_notes_metadata_covering
ON notes (noteId, isDeleted, type, mime, title, dateModified, isProtected);
`);
indexesCreated.push("IDX_notes_metadata_covering");
// Index for protected notes filtering
log.info("Creating index for protected notes...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_notes_protected_deleted;
CREATE INDEX IF NOT EXISTS IDX_notes_protected_deleted
ON notes (isProtected, isDeleted)
WHERE isProtected = 1;
`);
indexesCreated.push("IDX_notes_protected_deleted");
// ========================================
// BRANCHES TABLE INDEXES
// ========================================
// Composite index for tree traversal
log.info("Creating composite index on branches for tree traversal...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_branches_tree_traversal;
CREATE INDEX IF NOT EXISTS IDX_branches_tree_traversal
ON branches (parentNoteId, isDeleted, notePosition);
`);
indexesCreated.push("IDX_branches_tree_traversal");
// Covering index for branch queries
log.info("Creating covering index for branch queries...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_branches_covering;
CREATE INDEX IF NOT EXISTS IDX_branches_covering
ON branches (noteId, parentNoteId, isDeleted, notePosition, prefix);
`);
indexesCreated.push("IDX_branches_covering");
// Index for finding all parents of a note
log.info("Creating index for reverse tree lookup...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_branches_note_parents;
CREATE INDEX IF NOT EXISTS IDX_branches_note_parents
ON branches (noteId, isDeleted)
WHERE isDeleted = 0;
`);
indexesCreated.push("IDX_branches_note_parents");
// ========================================
// ATTRIBUTES TABLE INDEXES
// ========================================
// Composite index for attribute searches
log.info("Creating composite index on attributes for search...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attributes_search_composite;
CREATE INDEX IF NOT EXISTS IDX_attributes_search_composite
ON attributes (name, value, isDeleted);
`);
indexesCreated.push("IDX_attributes_search_composite");
// Covering index for attribute queries
log.info("Creating covering index for attribute queries...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attributes_covering;
CREATE INDEX IF NOT EXISTS IDX_attributes_covering
ON attributes (noteId, name, value, type, isDeleted, position);
`);
indexesCreated.push("IDX_attributes_covering");
// Index for inherited attributes
log.info("Creating index for inherited attributes...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attributes_inheritable;
CREATE INDEX IF NOT EXISTS IDX_attributes_inheritable
ON attributes (isInheritable, isDeleted)
WHERE isInheritable = 1 AND isDeleted = 0;
`);
indexesCreated.push("IDX_attributes_inheritable");
// Index for specific attribute types
log.info("Creating index for label attributes...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attributes_labels;
CREATE INDEX IF NOT EXISTS IDX_attributes_labels
ON attributes (type, name, value)
WHERE type = 'label' AND isDeleted = 0;
`);
indexesCreated.push("IDX_attributes_labels");
log.info("Creating index for relation attributes...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attributes_relations;
CREATE INDEX IF NOT EXISTS IDX_attributes_relations
ON attributes (type, name, value)
WHERE type = 'relation' AND isDeleted = 0;
`);
indexesCreated.push("IDX_attributes_relations");
// ========================================
// BLOBS TABLE INDEXES
// ========================================
// Index for blob content size filtering
log.info("Creating index for blob content size...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_blobs_content_size;
CREATE INDEX IF NOT EXISTS IDX_blobs_content_size
ON blobs (blobId, LENGTH(content));
`);
indexesCreated.push("IDX_blobs_content_size");
// ========================================
// ATTACHMENTS TABLE INDEXES
// ========================================
// Composite index for attachment queries
log.info("Creating composite index for attachments...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_attachments_composite;
CREATE INDEX IF NOT EXISTS IDX_attachments_composite
ON attachments (ownerId, role, isDeleted, position);
`);
indexesCreated.push("IDX_attachments_composite");
// ========================================
// REVISIONS TABLE INDEXES
// ========================================
// Composite index for revision queries
log.info("Creating composite index for revisions...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_revisions_note_date;
CREATE INDEX IF NOT EXISTS IDX_revisions_note_date
ON revisions (noteId, utcDateCreated DESC);
`);
indexesCreated.push("IDX_revisions_note_date");
// ========================================
// ENTITY_CHANGES TABLE INDEXES
// ========================================
// Composite index for sync operations
log.info("Creating composite index for entity changes sync...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_entity_changes_sync;
CREATE INDEX IF NOT EXISTS IDX_entity_changes_sync
ON entity_changes (isSynced, utcDateChanged);
`);
indexesCreated.push("IDX_entity_changes_sync");
// Index for component-based queries
log.info("Creating index for component-based entity change queries...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_entity_changes_component;
CREATE INDEX IF NOT EXISTS IDX_entity_changes_component
ON entity_changes (componentId, utcDateChanged DESC);
`);
indexesCreated.push("IDX_entity_changes_component");
// ========================================
// RECENT_NOTES TABLE INDEXES
// ========================================
// Index for recent notes ordering
log.info("Creating index for recent notes...");
sql.executeScript(`
DROP INDEX IF EXISTS IDX_recent_notes_date;
CREATE INDEX IF NOT EXISTS IDX_recent_notes_date
ON recent_notes (utcDateCreated DESC);
`);
indexesCreated.push("IDX_recent_notes_date");
// ========================================
// ANALYZE TABLES FOR QUERY PLANNER
// ========================================
log.info("Running ANALYZE to update SQLite query planner statistics...");
sql.executeScript(`
ANALYZE notes;
ANALYZE branches;
ANALYZE attributes;
ANALYZE blobs;
ANALYZE attachments;
ANALYZE revisions;
ANALYZE entity_changes;
ANALYZE recent_notes;
ANALYZE notes_fts;
`);
const endTime = Date.now();
const duration = endTime - startTime;
log.info(`Performance index creation completed in ${duration}ms`);
log.info(`Created ${indexesCreated.length} indexes: ${indexesCreated.join(", ")}`);
} catch (error) {
log.error(`Error creating performance indexes: ${error}`);
throw error;
}
// ========================================
// Part 3: Attributes FTS5 Setup
// ========================================
log.info("Creating FTS5 index for attributes...");
sql.transactional(() => {
// Create FTS5 virtual table for attributes
// IMPORTANT: Trigram requires minimum 3-character tokens for matching
// detail='full' enables phrase queries (required for exact match with = operator)
// and provides position info for highlight() function
sql.execute(`
CREATE VIRTUAL TABLE IF NOT EXISTS attributes_fts USING fts5(
attributeId UNINDEXED,
noteId UNINDEXED,
name,
value,
tokenize = 'trigram',
detail = 'full'
)
`);
log.info("Populating attributes_fts table...");
// Populate FTS table with existing attributes (non-deleted only)
const attrStartTime = Date.now();
sql.execute(`
INSERT INTO attributes_fts (attributeId, noteId, name, value)
SELECT
attributeId,
noteId,
name,
COALESCE(value, '')
FROM attributes
WHERE isDeleted = 0
`);
const populateTime = Date.now() - attrStartTime;
const attrCount = sql.getValue<number>(`SELECT COUNT(*) FROM attributes_fts`) || 0;
log.info(`Populated ${attrCount} attributes in ${populateTime}ms`);
// Create triggers to keep FTS index synchronized with attributes table
// Trigger 1: INSERT - Add new attributes to FTS
sql.execute(`
CREATE TRIGGER attributes_fts_insert
AFTER INSERT ON attributes
WHEN NEW.isDeleted = 0
BEGIN
INSERT INTO attributes_fts (attributeId, noteId, name, value)
VALUES (NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, ''));
END
`);
// Trigger 2: UPDATE - Update FTS when attributes change
sql.execute(`
CREATE TRIGGER attributes_fts_update
AFTER UPDATE ON attributes
BEGIN
-- Remove old entry
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
-- Add new entry if not deleted
INSERT INTO attributes_fts (attributeId, noteId, name, value)
SELECT NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, '')
WHERE NEW.isDeleted = 0;
END
`);
// Trigger 3: DELETE - Remove from FTS
sql.execute(`
CREATE TRIGGER attributes_fts_delete
AFTER DELETE ON attributes
BEGIN
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
END
`);
// Trigger 4: Soft delete (isDeleted = 1) - Remove from FTS
sql.execute(`
CREATE TRIGGER attributes_fts_soft_delete
AFTER UPDATE ON attributes
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
BEGIN
DELETE FROM attributes_fts WHERE attributeId = NEW.attributeId;
END
`);
// Run ANALYZE to update query planner statistics
log.info("Running ANALYZE on attributes_fts...");
sql.execute(`ANALYZE attributes_fts`);
log.info("Attributes FTS5 setup completed successfully");
});
// ========================================
// Part 4: Cleanup legacy custom search tables
// ========================================
// Remove tables from previous custom SQLite search implementation
// that has been replaced by FTS5
log.info("Cleaning up legacy custom search tables...");
sql.executeScript(`DROP TABLE IF EXISTS note_search_content`);
sql.executeScript(`DROP TABLE IF EXISTS note_tokens`);
// Clean up any entity changes for these tables
sql.execute(`
DELETE FROM entity_changes
WHERE entityName IN ('note_search_content', 'note_tokens')
`);
log.info("FTS5 and performance optimization migration completed successfully");
}