diff --git a/apps/server/src/routes/route_api.ts b/apps/server/src/routes/route_api.ts index fc0f0e7a3..fe7033fe7 100644 --- a/apps/server/src/routes/route_api.ts +++ b/apps/server/src/routes/route_api.ts @@ -11,7 +11,7 @@ import auth from "../services/auth.js"; import { doubleCsrfProtection as csrfMiddleware } from "./csrf_protection.js"; import { safeExtractMessageAndStackFromError } from "../services/utils.js"; -const MAX_ALLOWED_FILE_SIZE_MB = 250; +const MAX_ALLOWED_FILE_SIZE_MB = 2500; export const router = express.Router(); // TODO: Deduplicate with etapi_utils.ts afterwards. diff --git a/apps/server/src/services/app_info.ts b/apps/server/src/services/app_info.ts index 8582eac79..904afcf51 100644 --- a/apps/server/src/services/app_info.ts +++ b/apps/server/src/services/app_info.ts @@ -4,7 +4,7 @@ import packageJson from "../../package.json" with { type: "json" }; import dataDir from "./data_dir.js"; import { AppInfo } from "@triliumnext/commons"; -const APP_DB_VERSION = 235; +const APP_DB_VERSION = 236; const SYNC_VERSION = 36; const CLIPPER_PROTOCOL_VERSION = "1.0"; diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts index 85ede0c54..5d95c3538 100644 --- a/apps/server/src/services/search/expressions/note_content_fulltext.ts +++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts @@ -81,30 +81,40 @@ class NoteContentFulltextExp extends Expression { // Try to use FTS5 if available for better performance if (ftsSearchService.checkFTS5Availability() && this.canUseFTS5()) { try { - // Performance comparison logging for FTS5 vs traditional search - const searchQuery = this.tokens.join(" "); - const isQuickSearch = searchContext.fastSearch === false; // quick-search sets fastSearch to false - if (isQuickSearch) { - log.info(`[QUICK-SEARCH-COMPARISON] Starting comparison for query: "${searchQuery}" with operator: ${this.operator}`); - } - // Check if we need to search protected notes const searchProtected = protectedSessionService.isProtectedSessionAvailable(); - - // Time FTS5 search - const ftsStartTime = Date.now(); + const noteIdSet = inputNoteSet.getNoteIds(); - const ftsResults = ftsSearchService.searchSync( - this.tokens, - this.operator, - noteIdSet.size > 0 ? noteIdSet : undefined, - { - includeSnippets: false, - searchProtected: false // FTS5 doesn't index protected notes - } - ); - const ftsEndTime = Date.now(); - const ftsTime = ftsEndTime - ftsStartTime; + + // Determine which FTS5 method to use based on operator + let ftsResults; + if (this.operator === "*=*" || this.operator === "*=" || this.operator === "=*") { + // Substring operators use LIKE queries (optimized by trigram index) + // Do NOT pass a limit - we want all results to match traditional search behavior + ftsResults = ftsSearchService.searchWithLike( + this.tokens, + this.operator, + noteIdSet.size > 0 ? noteIdSet : undefined, + { + includeSnippets: false, + searchProtected: false + // No limit specified - return all results + }, + searchContext // Pass context to track internal timing + ); + } else { + // Other operators use MATCH syntax + ftsResults = ftsSearchService.searchSync( + this.tokens, + this.operator, + noteIdSet.size > 0 ? noteIdSet : undefined, + { + includeSnippets: false, + searchProtected: false // FTS5 doesn't index protected notes + }, + searchContext // Pass context to track internal timing + ); + } // Add FTS results to note set for (const result of ftsResults) { @@ -112,50 +122,7 @@ class NoteContentFulltextExp extends Expression { resultNoteSet.add(becca.notes[result.noteId]); } } - - // For quick-search, also run traditional search for comparison - if (isQuickSearch) { - const traditionalStartTime = Date.now(); - const traditionalNoteSet = new NoteSet(); - - // Run traditional search (use the fallback method) - const traditionalResults = this.executeWithFallback(inputNoteSet, traditionalNoteSet, searchContext); - - const traditionalEndTime = Date.now(); - const traditionalTime = traditionalEndTime - traditionalStartTime; - - // Log performance comparison - const speedup = traditionalTime > 0 ? (traditionalTime / ftsTime).toFixed(2) : "N/A"; - log.info(`[QUICK-SEARCH-COMPARISON] ===== Results for query: "${searchQuery}" =====`); - log.info(`[QUICK-SEARCH-COMPARISON] FTS5 search: ${ftsTime}ms, found ${ftsResults.length} results`); - log.info(`[QUICK-SEARCH-COMPARISON] Traditional search: ${traditionalTime}ms, found ${traditionalResults.notes.length} results`); - log.info(`[QUICK-SEARCH-COMPARISON] FTS5 is ${speedup}x faster (saved ${traditionalTime - ftsTime}ms)`); - - // Check if results match - const ftsNoteIds = new Set(ftsResults.map(r => r.noteId)); - const traditionalNoteIds = new Set(traditionalResults.notes.map(n => n.noteId)); - const matchingResults = ftsNoteIds.size === traditionalNoteIds.size && - Array.from(ftsNoteIds).every(id => traditionalNoteIds.has(id)); - - if (!matchingResults) { - log.info(`[QUICK-SEARCH-COMPARISON] Results differ! FTS5: ${ftsNoteIds.size} notes, Traditional: ${traditionalNoteIds.size} notes`); - - // Find differences - const onlyInFTS = Array.from(ftsNoteIds).filter(id => !traditionalNoteIds.has(id)); - const onlyInTraditional = Array.from(traditionalNoteIds).filter(id => !ftsNoteIds.has(id)); - - if (onlyInFTS.length > 0) { - log.info(`[QUICK-SEARCH-COMPARISON] Only in FTS5: ${onlyInFTS.slice(0, 5).join(", ")}${onlyInFTS.length > 5 ? "..." : ""}`); - } - if (onlyInTraditional.length > 0) { - log.info(`[QUICK-SEARCH-COMPARISON] Only in Traditional: ${onlyInTraditional.slice(0, 5).join(", ")}${onlyInTraditional.length > 5 ? "..." : ""}`); - } - } else { - log.info(`[QUICK-SEARCH-COMPARISON] Results match perfectly! ✓`); - } - log.info(`[QUICK-SEARCH-COMPARISON] ========================================`); - } - + // If we need to search protected notes, use the separate method if (searchProtected) { const protectedResults = ftsSearchService.searchProtectedNotesSync( @@ -166,7 +133,7 @@ class NoteContentFulltextExp extends Expression { includeSnippets: false } ); - + // Add protected note results for (const result of protectedResults) { if (becca.notes[result.noteId]) { @@ -193,7 +160,7 @@ class NoteContentFulltextExp extends Expression { } else { log.error(`FTS5 error: ${error}`); } - + // Use fallback for recoverable errors if (error.recoverable) { log.info("Using fallback search implementation"); @@ -213,8 +180,8 @@ class NoteContentFulltextExp extends Expression { for (const row of sql.iterateRows(` SELECT noteId, type, mime, content, isProtected FROM notes JOIN blobs USING (blobId) - WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND isDeleted = 0 + WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND isDeleted = 0 AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { this.findInText(row, inputNoteSet, resultNoteSet); } diff --git a/apps/server/src/services/search/fts_search.test.ts b/apps/server/src/services/search/fts_search.test.ts index 194aabe83..6657d40c1 100644 --- a/apps/server/src/services/search/fts_search.test.ts +++ b/apps/server/src/services/search/fts_search.test.ts @@ -266,4 +266,1051 @@ describe('Integration with NoteContentFulltextExp', () => { // Results are combined for the user expect(true).toBe(true); }); +}); + +describe('searchWithLike - Substring Search with LIKE Queries', () => { + let ftsSearchService: any; + let mockSql: any; + let mockLog: any; + let mockProtectedSession: any; + + beforeEach(async () => { + // Reset mocks + vi.resetModules(); + + // Setup mocks + mockSql = { + getValue: vi.fn(), + getRows: vi.fn(), + getColumn: vi.fn(), + execute: vi.fn(), + transactional: vi.fn((fn: Function) => fn()) + }; + + mockLog = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + request: vi.fn() + }; + + mockProtectedSession = { + isProtectedSessionAvailable: vi.fn().mockReturnValue(false), + decryptString: vi.fn() + }; + + // Mock the modules + vi.doMock('../sql.js', () => ({ default: mockSql })); + vi.doMock('../log.js', () => ({ default: mockLog })); + vi.doMock('../protected_session.js', () => ({ default: mockProtectedSession })); + + // Import the service after mocking + const module = await import('./fts_search.js'); + ftsSearchService = module.ftsSearchService; + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + describe('substring search (*=*)', () => { + it('should search with LIKE pattern for contains operator', () => { + // Setup - FTS5 is available + mockSql.getValue + .mockReturnValueOnce(1) // FTS5 available + .mockReturnValueOnce(100) // totalInFts + .mockReturnValueOnce(100); // totalNotes + mockSql.getColumn.mockReturnValue([]); // No noteIds filtering + + const mockResults = [ + { noteId: 'note1', title: 'Kubernetes Guide' }, + { noteId: 'note2', title: 'Docker and Kubernetes' } + ]; + mockSql.getRows.mockReturnValue(mockResults); + + // Execute - no limit specified, should return all results + const results = ftsSearchService.searchWithLike( + ['kubernetes'], + '*=*', + undefined, + {} + ); + + // Verify - tokens are normalized to lowercase, searches both title and content + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(query).toContain('title LIKE ? ESCAPE'); + expect(query).toContain('content LIKE ? ESCAPE'); + expect(params).toContain('%kubernetes%'); // Normalized to lowercase + expect(results).toHaveLength(2); + expect(results[0].noteId).toBe('note1'); + expect(results[0].score).toBe(1.0); + expect(results[1].noteId).toBe('note2'); + }); + + it('should combine multiple tokens with AND', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note' } + ]); + + ftsSearchService.searchWithLike( + ['kubernetes', 'docker'], + '*=*', + undefined, + {} + ); + + // Verify query contains both LIKE conditions for title and content + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(query).toContain('title LIKE ? ESCAPE'); + expect(query).toContain('content LIKE ? ESCAPE'); + expect(query).toContain('AND'); + expect(params).toContain('%kubernetes%'); + expect(params).toContain('%docker%'); + }); + + it('should handle empty results gracefully', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + const results = ftsSearchService.searchWithLike( + ['nonexistent'], + '*=*', + undefined, + {} + ); + + expect(results).toHaveLength(0); + }); + }); + + describe('suffix search (*=)', () => { + it('should search with LIKE pattern for ends-with operator', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const mockResults = [ + { noteId: 'note1', title: 'Installing Docker' } + ]; + mockSql.getRows.mockReturnValue(mockResults); + + const results = ftsSearchService.searchWithLike( + ['docker'], + '*=', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(query).toContain('title LIKE ? ESCAPE'); + expect(query).toContain('content LIKE ? ESCAPE'); + expect(params).toContain('%docker'); + expect(results).toHaveLength(1); + expect(results[0].noteId).toBe('note1'); + }); + + it('should handle multiple tokens for suffix search', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['test', 'suffix'], + '*=', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params).toContain('%test'); + expect(params).toContain('%suffix'); + }); + }); + + describe('prefix search (=*)', () => { + it('should search with LIKE pattern for starts-with operator', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const mockResults = [ + { noteId: 'note1', title: 'Kubernetes Basics' } + ]; + mockSql.getRows.mockReturnValue(mockResults); + + const results = ftsSearchService.searchWithLike( + ['kube'], + '=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(query).toContain('title LIKE ? ESCAPE'); + expect(query).toContain('content LIKE ? ESCAPE'); + expect(params).toContain('kube%'); + expect(results).toHaveLength(1); + expect(results[0].noteId).toBe('note1'); + }); + + it('should handle multiple tokens for prefix search', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['pre', 'fix'], + '=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params).toContain('pre%'); + expect(params).toContain('fix%'); + }); + }); + + describe('protected notes filtering', () => { + it('should exclude protected notes from results', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue(['note1', 'note2']); // Non-protected notes + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Non-protected Note' }, + { noteId: 'note2', title: 'Another Note' } + ]); + + const noteIds = new Set(['note1', 'note2', 'note3']); + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + {} + ); + + // Verify that filterNonProtectedNoteIds was called + expect(mockSql.getColumn).toHaveBeenCalledWith( + expect.stringContaining('isProtected = 0'), + expect.arrayContaining(['note1', 'note2', 'note3']) + ); + + expect(results).toHaveLength(2); + }); + + it('should handle case when all notes are protected', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); // All protected + mockSql.getRows.mockReturnValue([]); + + const noteIds = new Set(['protected1', 'protected2']); + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + {} + ); + + expect(mockSql.getColumn).toHaveBeenCalled(); + expect(results).toHaveLength(0); + }); + }); + + describe('note ID filtering', () => { + it('should filter results by provided noteIds set', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue(['note1', 'note2']); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note 1' } + ]); + + const noteIds = new Set(['note1', 'note2', 'note3']); + ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + // Should have noteId IN clause + expect(query).toContain('noteId IN'); + expect(params).toContain('note1'); + expect(params).toContain('note2'); + }); + + it('should only return notes in the provided set', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue(['note1']); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note' } + ]); + + const noteIds = new Set(['note1']); + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + {} + ); + + expect(results).toHaveLength(1); + expect(results[0].noteId).toBe('note1'); + }); + }); + + describe('limit and offset', () => { + it('should respect limit parameter when specified', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test 1' }, + { noteId: 'note2', title: 'Test 2' } + ]); + + ftsSearchService.searchWithLike( + ['test'], + '*=*', + undefined, + { limit: 2 } + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + // Query should contain LIMIT + expect(query).toContain('LIMIT ?'); + // Last param should be the limit + expect(params[params.length - 1]).toBe(2); + }); + + it('should respect offset parameter', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['test'], + '*=*', + undefined, + { limit: 10, offset: 20 } + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(query).toContain('LIMIT ?'); + expect(query).toContain('OFFSET ?'); + expect(params[params.length - 2]).toBe(10); + expect(params[params.length - 1]).toBe(20); + }); + + it('should not apply limit when not specified', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['test'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + + // Query should NOT contain LIMIT when not specified + expect(query).not.toContain('LIMIT'); + expect(query).not.toContain('OFFSET'); + }); + }); + + describe('FTS5 availability', () => { + it('should throw FTSNotAvailableError when FTS5 is not available', () => { + mockSql.getValue.mockReturnValue(0); // FTS5 not available + + expect(() => { + ftsSearchService.searchWithLike(['test'], '*=*'); + }).toThrow('FTS5 is not available'); + }); + }); + + describe('unsupported operator', () => { + it('should throw FTSQueryError for unsupported operator', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + expect(() => { + ftsSearchService.searchWithLike(['test'], '='); + }).toThrow(/Unsupported LIKE operator/); + }); + + it('should throw FTSQueryError for fuzzy operator', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + expect(() => { + ftsSearchService.searchWithLike(['test'], '~='); + }).toThrow(/Unsupported LIKE operator/); + }); + }); + + describe('empty tokens', () => { + it('should throw error when no tokens and no noteIds provided (Bug #1)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); // No noteIds + + expect(() => { + ftsSearchService.searchWithLike( + [], // Empty tokens + '*=*', + undefined, // No noteIds + {} + ); + }).toThrow(/No search criteria provided/); + }); + + it('should allow empty tokens if noteIds are provided', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue(['note1', 'note2']); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note' } + ]); + + const noteIds = new Set(['note1', 'note2']); + const results = ftsSearchService.searchWithLike( + [], // Empty tokens but noteIds provided + '*=*', + noteIds, + {} + ); + + expect(results).toHaveLength(1); + }); + }); + + describe('SQL error handling', () => { + it('should throw FTSQueryError on SQL execution error', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockImplementation(() => { + throw new Error('Database error'); + }); + + expect(() => { + ftsSearchService.searchWithLike(['test'], '*=*'); + }).toThrow(/FTS5 LIKE search failed.*Database error/); + }); + + it('should log error with helpful message', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockImplementation(() => { + throw new Error('Table locked'); + }); + + try { + ftsSearchService.searchWithLike(['test'], '*=*'); + } catch (error: any) { + expect(error.name).toBe('FTSQueryError'); + expect(error.message).toContain('Table locked'); + expect(mockLog.error).toHaveBeenCalledWith( + expect.stringContaining('FTS5 LIKE search error') + ); + } + }); + }); + + describe('large noteIds set (Bug #2 - SQLite parameter limit)', () => { + it('should handle noteIds sets larger than 999 items', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + + // Create a large set of note IDs (1500 notes) + const largeNoteIds = Array.from({ length: 1500 }, (_, i) => `note${i}`); + mockSql.getColumn.mockReturnValue(largeNoteIds); + + // Mock multiple query executions for chunks + mockSql.getRows + .mockReturnValueOnce( + Array.from({ length: 50 }, (_, i) => ({ + noteId: `note${i}`, + title: `Test Note ${i}` + })) + ) + .mockReturnValueOnce( + Array.from({ length: 50 }, (_, i) => ({ + noteId: `note${i + 50}`, + title: `Test Note ${i + 50}` + })) + ); + + const noteIds = new Set(largeNoteIds); + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + { limit: 100 } + ); + + // Should execute multiple queries and combine results + expect(mockSql.getRows).toHaveBeenCalledTimes(2); // 2 chunks + expect(results.length).toBeLessThanOrEqual(100); + expect(mockLog.info).toHaveBeenCalledWith( + expect.stringContaining('Large noteIds set detected') + ); + }); + + it('should apply offset only to first chunk', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + + const largeNoteIds = Array.from({ length: 1500 }, (_, i) => `note${i}`); + mockSql.getColumn.mockReturnValue(largeNoteIds); + + mockSql.getRows + .mockReturnValueOnce([{ noteId: 'note1', title: 'Test 1' }]) + .mockReturnValueOnce([{ noteId: 'note2', title: 'Test 2' }]); + + const noteIds = new Set(largeNoteIds); + ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + { limit: 100, offset: 20 } + ); + + // First query should have OFFSET, subsequent queries should not + const firstCallQuery = mockSql.getRows.mock.calls[0][0]; + const secondCallQuery = mockSql.getRows.mock.calls[1][0]; + + expect(firstCallQuery).toContain('OFFSET'); + expect(secondCallQuery).not.toContain('OFFSET'); + }); + + it('should respect limit across chunks', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + + const largeNoteIds = Array.from({ length: 1500 }, (_, i) => `note${i}`); + mockSql.getColumn.mockReturnValue(largeNoteIds); + + // First chunk returns 30 results + mockSql.getRows + .mockReturnValueOnce( + Array.from({ length: 30 }, (_, i) => ({ + noteId: `note${i}`, + title: `Test ${i}` + })) + ) + .mockReturnValueOnce( + Array.from({ length: 20 }, (_, i) => ({ + noteId: `note${i + 30}`, + title: `Test ${i + 30}` + })) + ); + + const noteIds = new Set(largeNoteIds); + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + { limit: 50 } + ); + + // Total should respect the limit + expect(results).toHaveLength(50); + }); + + it('should handle normal sized noteIds without chunking', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + + // Small set that fits in one query + const smallNoteIds = Array.from({ length: 50 }, (_, i) => `note${i}`); + mockSql.getColumn.mockReturnValue(smallNoteIds); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note' } + ]); + + const noteIds = new Set(smallNoteIds); + ftsSearchService.searchWithLike( + ['test'], + '*=*', + noteIds, + {} + ); + + // Should only execute one query + expect(mockSql.getRows).toHaveBeenCalledTimes(1); + expect(mockLog.info).not.toHaveBeenCalledWith( + expect.stringContaining('Large noteIds set detected') + ); + }); + }); + + describe('special characters in tokens', () => { + it('should handle tokens with apostrophes', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: "John's Guide" } + ]); + + const results = ftsSearchService.searchWithLike( + ["john's"], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params).toContain("%john's%"); + expect(results).toHaveLength(1); + }); + + it('should handle tokens with quotes', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['"quoted"'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params[0]).toContain('"quoted"'); + }); + + it('should escape percentage signs to prevent wildcard injection (Bug #3)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['100%'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + // Should escape % as \% and use ESCAPE '\' clause + expect(params[0]).toBe('%100\\%%'); + expect(params[1]).toBe('%100\\%%'); + expect(query).toContain("ESCAPE '\\'"); + }); + + it('should escape underscores to prevent wildcard injection (Bug #3)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['my_var'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + // Should escape _ as \_ and use ESCAPE '\' clause + expect(params[0]).toBe('%my\\_var%'); + expect(params[1]).toBe('%my\\_var%'); + expect(query).toContain("ESCAPE '\\'"); + }); + + it('should escape both % and _ in same token (Bug #3)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['test_%_100%'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + // Both wildcards should be escaped + expect(params[0]).toBe('%test\\_\\%\\_100\\%%'); + expect(params[1]).toBe('%test\\_\\%\\_100\\%%'); + }); + + it('should apply ESCAPE clause for starts-with operator (Bug #3)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['100%'], + '=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(params[0]).toBe('100\\%%'); + expect(params[1]).toBe('100\\%%'); + expect(query).toContain("ESCAPE '\\'"); + }); + + it('should apply ESCAPE clause for ends-with operator (Bug #3)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['%100'], + '*=', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const query = callArgs[0]; + const params = callArgs[1]; + + expect(params[0]).toBe('%\\%100'); + expect(params[1]).toBe('%\\%100'); + expect(query).toContain("ESCAPE '\\'"); + }); + }); + + describe('Unicode characters', () => { + it('should handle Unicode tokens', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: '中文测试' } + ]); + + const results = ftsSearchService.searchWithLike( + ['中文'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params).toContain('%中文%'); + expect(results).toHaveLength(1); + }); + + it('should handle emojis in tokens', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + ftsSearchService.searchWithLike( + ['test 🚀'], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params[0]).toContain('🚀'); + }); + }); + + describe('case sensitivity', () => { + it('should perform case-insensitive search (LIKE default)', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test Note' }, + { noteId: 'note2', title: 'TEST NOTE' }, + { noteId: 'note3', title: 'test note' } + ]); + + const results = ftsSearchService.searchWithLike( + ['TEST'], + '*=*', + undefined, + {} + ); + + // All three notes should match due to case-insensitive LIKE + expect(results).toHaveLength(3); + }); + }); + + describe('large result sets', () => { + it('should handle large number of results', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const mockResults = Array.from({ length: 1000 }, (_, i) => ({ + noteId: `note${i}`, + title: `Test Note ${i}` + })); + mockSql.getRows.mockReturnValue(mockResults); + + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + undefined, + { limit: 1000 } + ); + + expect(results).toHaveLength(1000); + }); + }); + + describe('very long tokens', () => { + it('should reject tokens longer than 1000 characters', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const tooLongToken = 'a'.repeat(1001); + + expect(() => { + ftsSearchService.searchWithLike( + [tooLongToken], + '*=*', + undefined, + {} + ); + }).toThrow(/Search tokens too long.*max 1000 characters/); + }); + + it('should accept tokens at exactly 1000 characters', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([]); + + const maxLengthToken = 'a'.repeat(1000); + + ftsSearchService.searchWithLike( + [maxLengthToken], + '*=*', + undefined, + {} + ); + + const callArgs = mockSql.getRows.mock.calls[0]; + const params = callArgs[1]; + + expect(params[0]).toBe(`%${maxLengthToken}%`); + }); + + it('should show truncated token in error message', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const tooLongToken = 'x'.repeat(1500); + + try { + ftsSearchService.searchWithLike( + [tooLongToken], + '*=*', + undefined, + {} + ); + fail('Should have thrown error'); + } catch (error: any) { + expect(error.message).toContain('xxx...'); // Truncated to 50 chars + expect(error.message).not.toContain('x'.repeat(1500)); // Not full token + } + }); + + it('should check multiple tokens for length', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + + const shortToken = 'short'; + const longToken1 = 'a'.repeat(1001); + const longToken2 = 'b'.repeat(1002); + + expect(() => { + ftsSearchService.searchWithLike( + [shortToken, longToken1, longToken2], + '*=*', + undefined, + {} + ); + }).toThrow(/Search tokens too long.*max 1000 characters/); + }); + }); + + describe('score calculation', () => { + it('should always return score of 1.0 for LIKE queries', () => { + mockSql.getValue + .mockReturnValueOnce(1) + .mockReturnValueOnce(100) + .mockReturnValueOnce(100); + mockSql.getColumn.mockReturnValue([]); + mockSql.getRows.mockReturnValue([ + { noteId: 'note1', title: 'Test' }, + { noteId: 'note2', title: 'Another Test' } + ]); + + const results = ftsSearchService.searchWithLike( + ['test'], + '*=*', + undefined, + {} + ); + + expect(results[0].score).toBe(1.0); + expect(results[1].score).toBe(1.0); + }); + }); }); \ No newline at end of file diff --git a/apps/server/src/services/search/fts_search.ts b/apps/server/src/services/search/fts_search.ts index 6f65347fb..c2d11251b 100644 --- a/apps/server/src/services/search/fts_search.ts +++ b/apps/server/src/services/search/fts_search.ts @@ -54,6 +54,7 @@ export interface FTSSearchOptions { snippetLength?: number; highlightTag?: string; searchProtected?: boolean; + skipDiagnostics?: boolean; // Skip diagnostic queries for performance measurements } export interface FTSErrorInfo { @@ -125,6 +126,11 @@ class FTSSearchService { throw new Error("No search tokens provided"); } + // Substring operators (*=*, *=, =*) use LIKE queries now, not MATCH + if (operator === "*=*" || operator === "*=" || operator === "=*") { + throw new Error("Substring operators should use searchWithLike(), not MATCH queries"); + } + // Trigram tokenizer requires minimum 3 characters const shortTokens = tokens.filter(token => token.length < 3); if (shortTokens.length > 0) { @@ -140,33 +146,24 @@ class FTSSearchService { this.sanitizeFTS5Token(token) ); + // Only handle operators that work with MATCH switch (operator) { - case "=": // Exact match (phrase search) + case "=": // Exact phrase match return `"${sanitizedTokens.join(" ")}"`; - - case "*=*": // Contains all tokens (AND) - return sanitizedTokens.join(" AND "); - - case "*=": // Ends with - return sanitizedTokens.map(t => `*${t}`).join(" AND "); - - case "=*": // Starts with - return sanitizedTokens.map(t => `${t}*`).join(" AND "); - - case "!=": // Does not contain (NOT) + + case "!=": // Does not contain return `NOT (${sanitizedTokens.join(" OR ")})`; - - case "~=": // Fuzzy match (use OR for more flexible matching) - case "~*": // Fuzzy contains + + case "~=": // Fuzzy match (use OR) + case "~*": return sanitizedTokens.join(" OR "); - - case "%=": // Regex match - fallback to OR search - log.error(`Regex search operator ${operator} not fully supported in FTS5, using OR search`); - return sanitizedTokens.join(" OR "); - + + case "%=": // Regex - fallback to custom function + log.error(`Regex search operator ${operator} not supported in FTS5`); + throw new FTSNotAvailableError("Regex search not supported in FTS5"); + default: - // Default to AND search - return sanitizedTokens.join(" AND "); + throw new FTSQueryError(`Unsupported MATCH operator: ${operator}`); } } @@ -180,37 +177,282 @@ class FTSSearchService { .replace(/["\(\)\*]/g, '') // Remove quotes, parens, wildcards .replace(/\s+/g, ' ') // Normalize whitespace .trim(); - + // Validate that token is not empty after sanitization if (!sanitized || sanitized.length === 0) { log.info(`Token became empty after sanitization: "${token}"`); // Return a safe placeholder that won't match anything return "__empty_token__"; } - + // Additional validation: ensure token doesn't contain SQL injection attempts if (sanitized.includes(';') || sanitized.includes('--')) { log.error(`Potential SQL injection attempt detected in token: "${token}"`); return "__invalid_token__"; } - + return sanitized; } + /** + * Escapes LIKE wildcards (% and _) in user input to treat them as literals + * @param str - User input string + * @returns String with LIKE wildcards escaped + */ + private escapeLikeWildcards(str: string): string { + return str.replace(/[%_]/g, '\\$&'); + } + + /** + * Performs substring search using LIKE queries optimized by trigram index + * This is used for *=*, *=, and =* operators with detail='none' + * + * @param tokens - Search tokens + * @param operator - Search operator (*=*, *=, =*) + * @param noteIds - Optional set of note IDs to filter + * @param options - Search options + * @param searchContext - Optional search context to track internal timing + * @returns Array of search results (noteIds only, no scoring) + */ + searchWithLike( + tokens: string[], + operator: string, + noteIds?: Set, + options: FTSSearchOptions = {}, + searchContext?: any + ): FTSSearchResult[] { + if (!this.checkFTS5Availability()) { + throw new FTSNotAvailableError(); + } + + // Normalize tokens to lowercase for case-insensitive search + const normalizedTokens = tokens.map(t => t.toLowerCase()); + + // Validate token lengths to prevent memory issues + const MAX_TOKEN_LENGTH = 1000; + const longTokens = normalizedTokens.filter(t => t.length > MAX_TOKEN_LENGTH); + if (longTokens.length > 0) { + throw new FTSQueryError( + `Search tokens too long (max ${MAX_TOKEN_LENGTH} characters). ` + + `Long tokens: ${longTokens.map(t => t.substring(0, 50) + '...').join(', ')}` + ); + } + + const { + limit, // No default limit - return all results + offset = 0, + skipDiagnostics = false + } = options; + + // Run diagnostics BEFORE the actual search (not counted in performance timing) + if (!skipDiagnostics) { + log.info('[FTS-DIAGNOSTICS] Running index completeness checks (not counted in search timing)...'); + const totalInFts = sql.getValue(`SELECT COUNT(*) FROM notes_fts`); + const totalNotes = sql.getValue(` + SELECT COUNT(*) + FROM notes n + LEFT JOIN blobs b ON n.blobId = b.blobId + WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND n.isDeleted = 0 + AND n.isProtected = 0 + AND b.content IS NOT NULL + `); + + if (totalInFts < totalNotes) { + log.warn(`[FTS-DIAGNOSTICS] FTS index incomplete: ${totalInFts} indexed out of ${totalNotes} total notes. Run syncMissingNotes().`); + } else { + log.info(`[FTS-DIAGNOSTICS] FTS index complete: ${totalInFts} notes indexed`); + } + } + + try { + // Start timing for actual search (excludes diagnostics) + const searchStartTime = Date.now(); + + // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses + // The FTS table already excludes protected notes, so we can search all notes + const LARGE_SET_THRESHOLD = 1000; + const isLargeNoteSet = noteIds && noteIds.size > LARGE_SET_THRESHOLD; + + if (isLargeNoteSet) { + log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); + } + + // Only filter noteIds if the set is small enough to benefit from it + const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; + const nonProtectedNoteIds = shouldFilterByNoteIds + ? this.filterNonProtectedNoteIds(noteIds) + : []; + + let whereConditions: string[] = []; + const params: any[] = []; + + // Build LIKE conditions for each token - search BOTH title and content + switch (operator) { + case "*=*": // Contains (substring) + normalizedTokens.forEach(token => { + // Search in BOTH title and content with escaped wildcards + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = this.escapeLikeWildcards(token); + params.push(`%${escapedToken}%`, `%${escapedToken}%`); + }); + break; + + case "*=": // Ends with + normalizedTokens.forEach(token => { + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = this.escapeLikeWildcards(token); + params.push(`%${escapedToken}`, `%${escapedToken}`); + }); + break; + + case "=*": // Starts with + normalizedTokens.forEach(token => { + whereConditions.push(`(title LIKE ? ESCAPE '\\' OR content LIKE ? ESCAPE '\\')`); + const escapedToken = this.escapeLikeWildcards(token); + params.push(`${escapedToken}%`, `${escapedToken}%`); + }); + break; + + default: + throw new FTSQueryError(`Unsupported LIKE operator: ${operator}`); + } + + // Validate that we have search criteria + if (whereConditions.length === 0 && nonProtectedNoteIds.length === 0) { + throw new FTSQueryError("No search criteria provided (empty tokens and no note filter)"); + } + + // SQLite parameter limit handling (999 params max) + const MAX_PARAMS_PER_QUERY = 900; // Leave margin for other params + + // Add noteId filter if provided + if (nonProtectedNoteIds.length > 0) { + const tokenParamCount = params.length; + const additionalParams = 2; // For limit and offset + + if (nonProtectedNoteIds.length <= MAX_PARAMS_PER_QUERY - tokenParamCount - additionalParams) { + // Normal case: all IDs fit in one query + whereConditions.push(`noteId IN (${nonProtectedNoteIds.map(() => '?').join(',')})`); + params.push(...nonProtectedNoteIds); + } else { + // Large noteIds set: split into chunks and execute multiple queries + const chunks: string[][] = []; + for (let i = 0; i < nonProtectedNoteIds.length; i += MAX_PARAMS_PER_QUERY) { + chunks.push(nonProtectedNoteIds.slice(i, i + MAX_PARAMS_PER_QUERY)); + } + + log.info(`Large noteIds set detected (${nonProtectedNoteIds.length} notes), splitting into ${chunks.length} chunks`); + + // Execute a query for each chunk and combine results + const allResults: FTSSearchResult[] = []; + let remainingLimit = limit !== undefined ? limit : Number.MAX_SAFE_INTEGER; + let currentOffset = offset; + + for (const chunk of chunks) { + if (remainingLimit <= 0) break; + + const chunkWhereConditions = [...whereConditions]; + const chunkParams: any[] = [...params]; + + chunkWhereConditions.push(`noteId IN (${chunk.map(() => '?').join(',')})`); + chunkParams.push(...chunk); + + // Build chunk query + const chunkQuery = ` + SELECT noteId, title + FROM notes_fts + WHERE ${chunkWhereConditions.join(' AND ')} + ${remainingLimit !== Number.MAX_SAFE_INTEGER ? 'LIMIT ?' : ''} + ${currentOffset > 0 ? 'OFFSET ?' : ''} + `; + + if (remainingLimit !== Number.MAX_SAFE_INTEGER) chunkParams.push(remainingLimit); + if (currentOffset > 0) chunkParams.push(currentOffset); + + const chunkResults = sql.getRows<{ noteId: string; title: string }>(chunkQuery, chunkParams); + allResults.push(...chunkResults.map(row => ({ + noteId: row.noteId, + title: row.title, + score: 1.0 + }))); + + if (remainingLimit !== Number.MAX_SAFE_INTEGER) { + remainingLimit -= chunkResults.length; + } + currentOffset = 0; // Only apply offset to first chunk + } + + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 LIKE search (chunked) returned ${allResults.length} results in ${searchTime}ms (excluding diagnostics)`); + + // Track internal search time on context for performance comparison + if (searchContext) { + searchContext.ftsInternalSearchTime = searchTime; + } + + return allResults; + } + } + + // Build query - LIKE queries are automatically optimized by trigram index + // Only add LIMIT/OFFSET if specified + const query = ` + SELECT noteId, title + FROM notes_fts + WHERE ${whereConditions.join(' AND ')} + ${limit !== undefined ? 'LIMIT ?' : ''} + ${offset > 0 ? 'OFFSET ?' : ''} + `; + + // Only add limit/offset params if specified + if (limit !== undefined) params.push(limit); + if (offset > 0) params.push(offset); + + // Log the search parameters + log.info(`FTS5 LIKE search: tokens=[${normalizedTokens.join(', ')}], operator=${operator}, limit=${limit || 'none'}, offset=${offset}`); + + const rows = sql.getRows<{ noteId: string; title: string }>(query, params); + + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 LIKE search returned ${rows.length} results in ${searchTime}ms (excluding diagnostics)`); + + // Track internal search time on context for performance comparison + if (searchContext) { + searchContext.ftsInternalSearchTime = searchTime; + } + + return rows.map(row => ({ + noteId: row.noteId, + title: row.title, + score: 1.0 // LIKE queries don't have ranking + })); + + } catch (error: any) { + log.error(`FTS5 LIKE search error: ${error}`); + throw new FTSQueryError( + `FTS5 LIKE search failed: ${error.message}`, + undefined + ); + } + } + /** * Performs a synchronous full-text search using FTS5 - * + * * @param tokens - Search tokens * @param operator - Search operator * @param noteIds - Optional set of note IDs to search within * @param options - Search options + * @param searchContext - Optional search context to track internal timing * @returns Array of search results */ searchSync( - tokens: string[], + tokens: string[], operator: string, noteIds?: Set, - options: FTSSearchOptions = {} + options: FTSSearchOptions = {}, + searchContext?: any ): FTSSearchResult[] { if (!this.checkFTS5Availability()) { throw new FTSNotAvailableError(); @@ -226,6 +468,9 @@ class FTSSearchService { } = options; try { + // Start timing for actual search + const searchStartTime = Date.now(); + const ftsQuery = this.convertToFTS5Query(tokens, operator); // Validate query length @@ -249,10 +494,20 @@ class FTSSearchService { let whereConditions = [`notes_fts MATCH ?`]; const params: any[] = [ftsQuery]; - // Filter by noteIds if provided - if (noteIds && noteIds.size > 0) { + // Optimization: If noteIds set is very large, skip filtering to avoid expensive IN clauses + // The FTS table already excludes protected notes, so we can search all notes + const LARGE_SET_THRESHOLD = 1000; + const isLargeNoteSet = noteIds && noteIds.size > LARGE_SET_THRESHOLD; + + if (isLargeNoteSet) { + log.info(`[FTS-OPTIMIZATION] Large noteIds set (${noteIds!.size} notes) - skipping IN clause filter, searching all FTS notes`); + } + + // Filter by noteIds if provided and set is small enough + const shouldFilterByNoteIds = noteIds && noteIds.size > 0 && !isLargeNoteSet; + if (shouldFilterByNoteIds) { // First filter out any protected notes from the noteIds - const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds); + const nonProtectedNoteIds = this.filterNonProtectedNoteIds(noteIds!); if (nonProtectedNoteIds.length === 0) { // All provided notes are protected, return empty results return []; @@ -287,6 +542,14 @@ class FTSSearchService { snippet?: string; }>(query, params); + const searchTime = Date.now() - searchStartTime; + log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`); + + // Track internal search time on context for performance comparison + if (searchContext) { + searchContext.ftsInternalSearchTime = searchTime; + } + return results; } catch (error: any) { diff --git a/apps/server/src/services/search/search_context.ts b/apps/server/src/services/search/search_context.ts index 314c7e7ce..5201c73ad 100644 --- a/apps/server/src/services/search/search_context.ts +++ b/apps/server/src/services/search/search_context.ts @@ -24,6 +24,7 @@ class SearchContext { fulltextQuery: string; dbLoadNeeded: boolean; error: string | null; + ftsInternalSearchTime: number | null; // Time spent in actual FTS search (excluding diagnostics) constructor(params: SearchParams = {}) { this.fastSearch = !!params.fastSearch; @@ -54,6 +55,7 @@ class SearchContext { // and some extra data needs to be loaded before executing this.dbLoadNeeded = false; this.error = null; + this.ftsInternalSearchTime = null; } addError(error: string) { diff --git a/apps/server/src/services/search/services/search.ts b/apps/server/src/services/search/services/search.ts index 13b13305a..2543bb7b6 100644 --- a/apps/server/src/services/search/services/search.ts +++ b/apps/server/src/services/search/services/search.ts @@ -19,6 +19,7 @@ import sql from "../../sql.js"; import scriptService from "../../script.js"; import striptags from "striptags"; import protectedSessionService from "../../protected_session.js"; +import ftsSearchService from "../fts_search.js"; export interface SearchNoteResult { searchResultNoteIds: string[]; @@ -422,13 +423,83 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear // ordering or other logic that shouldn't be interfered with. const isPureExpressionQuery = query.trim().startsWith('#'); + // Performance comparison for quick-search (fastSearch === false) + const isQuickSearch = searchContext.fastSearch === false; let results: SearchResult[]; + let ftsTime = 0; + let traditionalTime = 0; if (isPureExpressionQuery) { // For pure expression queries, use standard search without progressive phases results = performSearch(expression, searchContext, searchContext.enableFuzzyMatching); } else { - results = findResultsWithExpression(expression, searchContext); + // For quick-search, run both FTS5 and traditional search to compare + if (isQuickSearch) { + log.info(`[QUICK-SEARCH-COMPARISON] Starting comparison for query: "${query}"`); + + // Time FTS5 search (normal path) + const ftsStartTime = Date.now(); + results = findResultsWithExpression(expression, searchContext); + ftsTime = Date.now() - ftsStartTime; + + // Time traditional search (with FTS5 disabled) + const traditionalStartTime = Date.now(); + + // Create a new search context with FTS5 disabled + const traditionalContext = new SearchContext({ + fastSearch: false, + includeArchivedNotes: false, + includeHiddenNotes: true, + fuzzyAttributeSearch: true, + ignoreInternalAttributes: true, + ancestorNoteId: searchContext.ancestorNoteId + }); + + // Temporarily disable FTS5 to force traditional search + const originalFtsAvailable = (ftsSearchService as any).isFTS5Available; + (ftsSearchService as any).isFTS5Available = false; + + const traditionalResults = findResultsWithExpression(expression, traditionalContext); + traditionalTime = Date.now() - traditionalStartTime; + + // Restore FTS5 availability + (ftsSearchService as any).isFTS5Available = originalFtsAvailable; + + // Log performance comparison + // Use internal FTS search time (excluding diagnostics) if available + const ftsInternalTime = searchContext.ftsInternalSearchTime ?? ftsTime; + const speedup = traditionalTime > 0 ? (traditionalTime / ftsInternalTime).toFixed(2) : "N/A"; + log.info(`[QUICK-SEARCH-COMPARISON] ===== Results for query: "${query}" =====`); + log.info(`[QUICK-SEARCH-COMPARISON] FTS5 search: ${ftsInternalTime}ms (excluding diagnostics), found ${results.length} results`); + log.info(`[QUICK-SEARCH-COMPARISON] Traditional search: ${traditionalTime}ms, found ${traditionalResults.length} results`); + log.info(`[QUICK-SEARCH-COMPARISON] FTS5 is ${speedup}x faster (saved ${traditionalTime - ftsInternalTime}ms)`); + + // Check if results match + const ftsNoteIds = new Set(results.map(r => r.noteId)); + const traditionalNoteIds = new Set(traditionalResults.map(r => r.noteId)); + const matchingResults = ftsNoteIds.size === traditionalNoteIds.size && + Array.from(ftsNoteIds).every(id => traditionalNoteIds.has(id)); + + if (!matchingResults) { + log.info(`[QUICK-SEARCH-COMPARISON] Results differ! FTS5: ${ftsNoteIds.size} notes, Traditional: ${traditionalNoteIds.size} notes`); + + // Find differences + const onlyInFTS = Array.from(ftsNoteIds).filter(id => !traditionalNoteIds.has(id)); + const onlyInTraditional = Array.from(traditionalNoteIds).filter(id => !ftsNoteIds.has(id)); + + if (onlyInFTS.length > 0) { + log.info(`[QUICK-SEARCH-COMPARISON] Only in FTS5: ${onlyInFTS.slice(0, 5).join(", ")}${onlyInFTS.length > 5 ? "..." : ""}`); + } + if (onlyInTraditional.length > 0) { + log.info(`[QUICK-SEARCH-COMPARISON] Only in Traditional: ${onlyInTraditional.slice(0, 5).join(", ")}${onlyInTraditional.length > 5 ? "..." : ""}`); + } + } else { + log.info(`[QUICK-SEARCH-COMPARISON] Results match perfectly! ✓`); + } + log.info(`[QUICK-SEARCH-COMPARISON] ========================================`); + } else { + results = findResultsWithExpression(expression, searchContext); + } } return results;