From 21aaec2c384127d356b5bd28768b8fd91de3d80f Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sat, 30 Aug 2025 20:48:42 +0000 Subject: [PATCH] feat(search): also fix tests for new fts functionality --- .../src/migrations/0234__add_fts5_search.ts | 45 +- .../search/fts_blob_deduplication.test.ts | 405 ++++++++++++++++++ .../src/services/search/fts_search.test.ts | 8 +- 3 files changed, 440 insertions(+), 18 deletions(-) create mode 100644 apps/server/src/services/search/fts_blob_deduplication.test.ts diff --git a/apps/server/src/migrations/0234__add_fts5_search.ts b/apps/server/src/migrations/0234__add_fts5_search.ts index c5ec1a0af8..f6f5c00053 100644 --- a/apps/server/src/migrations/0234__add_fts5_search.ts +++ b/apps/server/src/migrations/0234__add_fts5_search.ts @@ -264,20 +264,37 @@ export default function addFTS5SearchAndPerformanceIndexes() { // Final cleanup: ensure all eligible notes are indexed // This catches any edge cases where notes might have been missed log.info("Running final FTS index cleanup..."); - const cleanupCount = sql.getValue(` - WITH missing_notes AS ( - SELECT n.noteId, n.title, b.content - FROM notes n - LEFT JOIN blobs b ON n.blobId = b.blobId - WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND n.isDeleted = 0 - AND n.isProtected = 0 - AND b.content IS NOT NULL - AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) - ) - INSERT INTO notes_fts (noteId, title, content) - SELECT noteId, title, content FROM missing_notes - `); + + // First check for missing notes + const missingCount = sql.getValue(` + SELECT COUNT(*) FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) + `) || 0; + + if (missingCount > 0) { + // Insert missing notes + sql.execute(` + WITH missing_notes AS ( + SELECT n.noteId, n.title, b.content + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM notes_fts WHERE noteId = n.noteId) + ) + INSERT INTO notes_fts (noteId, title, content) + SELECT noteId, title, content FROM missing_notes + `); + } + + const cleanupCount = missingCount; if (cleanupCount && cleanupCount > 0) { log.info(`Indexed ${cleanupCount} additional notes during cleanup`); diff --git a/apps/server/src/services/search/fts_blob_deduplication.test.ts b/apps/server/src/services/search/fts_blob_deduplication.test.ts new file mode 100644 index 0000000000..399d7af855 --- /dev/null +++ b/apps/server/src/services/search/fts_blob_deduplication.test.ts @@ -0,0 +1,405 @@ +/** + * Tests for FTS5 blob deduplication scenarios + * + * This test file validates that FTS indexing works correctly when: + * 1. Multiple notes share the same blob (deduplication) + * 2. Notes change content to match existing blobs + * 3. Blobs are updated and affect multiple notes + * 4. Notes switch between unique and shared blobs + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import sql from '../sql.js'; +import beccaLoader from '../../becca/becca_loader.js'; +import noteService from '../notes.js'; +import searchService from './services/search.js'; +import { ftsSearchService } from './fts_search.js'; + +describe('FTS5 Blob Deduplication Tests', () => { + beforeEach(() => { + // Ensure we have a clean test database with FTS enabled + sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); + sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); + sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); + + // Reload becca to ensure cache is in sync + beccaLoader.load(); + }); + + afterEach(() => { + // Clean up test data + sql.execute("DELETE FROM notes WHERE noteId LIKE 'test_%'"); + sql.execute("DELETE FROM blobs WHERE blobId LIKE 'test_%'"); + sql.execute("DELETE FROM notes_fts WHERE noteId LIKE 'test_%'"); + }); + + describe('Blob Deduplication Scenarios', () => { + it('should index multiple notes sharing the same blob', async () => { + // Create first note with unique content + const note1 = await noteService.createNewNote({ + noteId: 'test_note1', + parentNoteId: 'root', + title: 'Test Note 1', + content: 'Shared content for deduplication test', + type: 'text' + }); + + // Create second note with the same content (will share blob) + const note2 = await noteService.createNewNote({ + noteId: 'test_note2', + parentNoteId: 'root', + title: 'Test Note 2', + content: 'Shared content for deduplication test', + type: 'text' + }); + + // Verify both notes share the same blob + const blob1 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note1']); + const blob2 = sql.getRow("SELECT blobId FROM notes WHERE noteId = ?", ['test_note2']); + expect(blob1.blobId).toBe(blob2.blobId); + + // Verify both notes are indexed in FTS + const ftsCount = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId IN (?, ?)", + ['test_note1', 'test_note2'] + ); + expect(ftsCount).toBe(2); + + // Search should find both notes + const searchResults = searchService.searchNotes('deduplication'); + const foundNoteIds = searchResults.map(r => r.noteId); + expect(foundNoteIds).toContain('test_note1'); + expect(foundNoteIds).toContain('test_note2'); + }); + + it('should update FTS when note content changes to match existing blob', async () => { + // Create first note with unique content + const note1 = await noteService.createNewNote({ + noteId: 'test_note3', + parentNoteId: 'root', + title: 'Note with existing content', + content: 'This is existing content in the database', + type: 'text' + }); + + // Create second note with different content + const note2 = await noteService.createNewNote({ + noteId: 'test_note4', + parentNoteId: 'root', + title: 'Note with different content', + content: 'This is completely different content', + type: 'text' + }); + + // Verify notes have different blobs initially + const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); + const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); + expect(initialBlob1).not.toBe(initialBlob2); + + // Change note2's content to match note1 (deduplication occurs) + await noteService.updateNoteContent('test_note4', 'This is existing content in the database'); + + // Verify both notes now share the same blob + const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note3']); + const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note4']); + expect(finalBlob1).toBe(finalBlob2); + + // Verify FTS is updated correctly for note2 + const ftsContent = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note4'] + ); + expect(ftsContent).toBe('This is existing content in the database'); + + // Search for old content should not find note2 + const oldContentSearch = searchService.searchNotes('completely different'); + const oldSearchIds = oldContentSearch.map(r => r.noteId); + expect(oldSearchIds).not.toContain('test_note4'); + + // Search for new content should find both notes + const newContentSearch = searchService.searchNotes('existing content'); + const newSearchIds = newContentSearch.map(r => r.noteId); + expect(newSearchIds).toContain('test_note3'); + expect(newSearchIds).toContain('test_note4'); + }); + + it('should update all notes when shared blob content changes', async () => { + // Create three notes with the same content + const sharedContent = 'Original shared content for blob update test'; + + await noteService.createNewNote({ + noteId: 'test_note5', + parentNoteId: 'root', + title: 'Shared Note 1', + content: sharedContent, + type: 'text' + }); + + await noteService.createNewNote({ + noteId: 'test_note6', + parentNoteId: 'root', + title: 'Shared Note 2', + content: sharedContent, + type: 'text' + }); + + await noteService.createNewNote({ + noteId: 'test_note7', + parentNoteId: 'root', + title: 'Shared Note 3', + content: sharedContent, + type: 'text' + }); + + // Verify all three share the same blob + const blobIds = sql.getColumn( + "SELECT DISTINCT blobId FROM notes WHERE noteId IN (?, ?, ?)", + ['test_note5', 'test_note6', 'test_note7'] + ); + expect(blobIds.length).toBe(1); + const sharedBlobId = blobIds[0]; + + // Update the blob content directly (simulating what would happen in real update) + sql.execute( + "UPDATE blobs SET content = ? WHERE blobId = ?", + ['Updated shared content for all notes', sharedBlobId] + ); + + // Verify FTS is updated for all three notes + const ftsContents = sql.getColumn( + "SELECT content FROM notes_fts WHERE noteId IN (?, ?, ?) ORDER BY noteId", + ['test_note5', 'test_note6', 'test_note7'] + ); + + expect(ftsContents).toHaveLength(3); + ftsContents.forEach(content => { + expect(content).toBe('Updated shared content for all notes'); + }); + + // Search for old content should find nothing + const oldSearch = searchService.searchNotes('Original shared'); + expect(oldSearch.filter(r => r.noteId.startsWith('test_'))).toHaveLength(0); + + // Search for new content should find all three + const newSearch = searchService.searchNotes('Updated shared'); + const foundIds = newSearch.map(r => r.noteId).filter(id => id.startsWith('test_')); + expect(foundIds).toContain('test_note5'); + expect(foundIds).toContain('test_note6'); + expect(foundIds).toContain('test_note7'); + }); + + it('should handle note switching from shared to unique blob', async () => { + // Create two notes with shared content + const sharedContent = 'Shared content before divergence'; + + const note1 = await noteService.createNewNote({ + noteId: 'test_note8', + parentNoteId: 'root', + title: 'Diverging Note 1', + content: sharedContent, + type: 'text' + }); + + const note2 = await noteService.createNewNote({ + noteId: 'test_note9', + parentNoteId: 'root', + title: 'Diverging Note 2', + content: sharedContent, + type: 'text' + }); + + // Verify they share the same blob + const initialBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); + const initialBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); + expect(initialBlob1).toBe(initialBlob2); + + // Change note2 to unique content + await noteService.updateNoteContent('test_note9', 'Unique content after divergence'); + + // Verify they now have different blobs + const finalBlob1 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note8']); + const finalBlob2 = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note9']); + expect(finalBlob1).not.toBe(finalBlob2); + + // Verify FTS is correctly updated + const ftsContent1 = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note8'] + ); + const ftsContent2 = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note9'] + ); + + expect(ftsContent1).toBe('Shared content before divergence'); + expect(ftsContent2).toBe('Unique content after divergence'); + + // Search should find correct notes + const sharedSearch = searchService.searchNotes('before divergence'); + expect(sharedSearch.map(r => r.noteId)).toContain('test_note8'); + expect(sharedSearch.map(r => r.noteId)).not.toContain('test_note9'); + + const uniqueSearch = searchService.searchNotes('after divergence'); + expect(uniqueSearch.map(r => r.noteId)).not.toContain('test_note8'); + expect(uniqueSearch.map(r => r.noteId)).toContain('test_note9'); + }); + + it('should handle import scenarios where notes exist before blobs', async () => { + // Simulate import scenario: create note without blob first + sql.execute(` + INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) + VALUES ('test_note10', 'Import Test Note', 'text', 'text/html', 'pending_blob_123', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) + `); + + // Verify note is not in FTS yet (no blob content) + const initialFts = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", + ['test_note10'] + ); + expect(initialFts).toBe(0); + + // Now create the blob (simulating delayed blob creation during import) + sql.execute(` + INSERT INTO blobs (blobId, content, dateModified, utcDateModified) + VALUES ('pending_blob_123', 'Imported content finally available', datetime('now'), datetime('now')) + `); + + // Verify note is now indexed in FTS + const finalFts = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note10'] + ); + expect(finalFts).toBe('Imported content finally available'); + + // Search should now find the note + const searchResults = searchService.searchNotes('Imported content'); + expect(searchResults.map(r => r.noteId)).toContain('test_note10'); + }); + + it('should correctly handle protected notes during deduplication', async () => { + // Create a regular note + const note1 = await noteService.createNewNote({ + noteId: 'test_note11', + parentNoteId: 'root', + title: 'Regular Note', + content: 'Content that will be shared', + type: 'text' + }); + + // Create a protected note with same content + sql.execute(` + INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) + VALUES ('test_note12', 'Protected Note', 'text', 'text/html', + (SELECT blobId FROM notes WHERE noteId = 'test_note11'), + 0, 1, datetime('now'), datetime('now'), datetime('now'), datetime('now')) + `); + + // Verify protected note is NOT in FTS + const protectedInFts = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", + ['test_note12'] + ); + expect(protectedInFts).toBe(0); + + // Verify regular note IS in FTS + const regularInFts = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", + ['test_note11'] + ); + expect(regularInFts).toBe(1); + + // Update blob content + const blobId = sql.getValue("SELECT blobId FROM notes WHERE noteId = ?", ['test_note11']); + sql.execute("UPDATE blobs SET content = ? WHERE blobId = ?", ['Updated shared content', blobId]); + + // Verify regular note is updated in FTS + const updatedContent = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note11'] + ); + expect(updatedContent).toBe('Updated shared content'); + + // Verify protected note is still NOT in FTS + const protectedStillNotInFts = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", + ['test_note12'] + ); + expect(protectedStillNotInFts).toBe(0); + }); + }); + + describe('FTS Sync and Cleanup', () => { + it('should sync missing notes to FTS index', async () => { + // Manually create notes without triggering FTS (simulating missed triggers) + sql.execute(` + INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) + VALUES ('test_note13', 'Missed Note 1', 'text', 'text/html', 'blob_missed_1', 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) + `); + + sql.execute(` + INSERT INTO blobs (blobId, content, dateModified, utcDateModified) + VALUES ('blob_missed_1', 'Content that was missed by triggers', datetime('now'), datetime('now')) + `); + + // Delete from FTS to simulate missing index + sql.execute("DELETE FROM notes_fts WHERE noteId = 'test_note13'"); + + // Verify note is missing from FTS + const beforeSync = sql.getValue( + "SELECT COUNT(*) FROM notes_fts WHERE noteId = ?", + ['test_note13'] + ); + expect(beforeSync).toBe(0); + + // Run sync + const syncedCount = ftsSearchService.syncMissingNotes(['test_note13']); + expect(syncedCount).toBe(1); + + // Verify note is now in FTS + const afterSync = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + ['test_note13'] + ); + expect(afterSync).toBe('Content that was missed by triggers'); + }); + + it('should handle FTS rebuild correctly', () => { + // Create some test notes + const noteIds = ['test_note14', 'test_note15', 'test_note16']; + noteIds.forEach((noteId, index) => { + sql.execute(` + INSERT INTO notes (noteId, title, type, mime, blobId, isDeleted, isProtected, dateCreated, dateModified, utcDateCreated, utcDateModified) + VALUES (?, ?, 'text', 'text/html', ?, 0, 0, datetime('now'), datetime('now'), datetime('now'), datetime('now')) + `, [noteId, `Test Note ${index}`, `blob_${noteId}`]); + + sql.execute(` + INSERT INTO blobs (blobId, content, dateModified, utcDateModified) + VALUES (?, ?, datetime('now'), datetime('now')) + `, [`blob_${noteId}`, `Content for note ${index}`]); + }); + + // Corrupt FTS by adding invalid entries + sql.execute("INSERT INTO notes_fts (noteId, title, content) VALUES ('invalid_note', 'Invalid', 'Invalid content')"); + + // Rebuild index + ftsSearchService.rebuildIndex(); + + // Verify only valid notes are in FTS + const ftsCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId LIKE 'test_%'"); + expect(ftsCount).toBe(3); + + // Verify invalid entry is gone + const invalidCount = sql.getValue("SELECT COUNT(*) FROM notes_fts WHERE noteId = 'invalid_note'"); + expect(invalidCount).toBe(0); + + // Verify content is correct + noteIds.forEach((noteId, index) => { + const content = sql.getValue( + "SELECT content FROM notes_fts WHERE noteId = ?", + [noteId] + ); + expect(content).toBe(`Content for note ${index}`); + }); + }); + }); +}); \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search.test.ts b/apps/server/src/services/search/fts_search.test.ts index 55b3628af9..194aabe83e 100644 --- a/apps/server/src/services/search/fts_search.test.ts +++ b/apps/server/src/services/search/fts_search.test.ts @@ -110,7 +110,7 @@ describe('FTS5 Search Service Improvements', () => { }); expect(results).toEqual([]); - expect(mockLog.debug).toHaveBeenCalledWith( + expect(mockLog.info).toHaveBeenCalledWith( 'Protected session available - will search protected notes separately' ); }); @@ -151,7 +151,7 @@ describe('FTS5 Search Service Improvements', () => { const query = ftsSearchService.convertToFTS5Query(['()""'], '='); expect(query).toContain('__empty_token__'); - expect(mockLog.debug).toHaveBeenCalledWith( + expect(mockLog.info).toHaveBeenCalledWith( expect.stringContaining('Token became empty after sanitization') ); }); @@ -162,7 +162,7 @@ describe('FTS5 Search Service Improvements', () => { const query = ftsSearchService.convertToFTS5Query(['test; DROP TABLE'], '='); expect(query).toContain('__invalid_token__'); - expect(mockLog.warn).toHaveBeenCalledWith( + expect(mockLog.error).toHaveBeenCalledWith( expect.stringContaining('Potential SQL injection attempt detected') ); }); @@ -208,7 +208,7 @@ describe('FTS5 Search Service Improvements', () => { expect(stats.dbstatAvailable).toBe(false); expect(stats.indexSize).toBe(75000); // 500 * 100 * 1.5 - expect(mockLog.debug).toHaveBeenCalledWith( + expect(mockLog.info).toHaveBeenCalledWith( 'dbstat virtual table not available, using fallback for index size estimation' ); });