mirror of
https://github.com/zadam/trilium.git
synced 2025-12-04 22:44:25 +01:00
feat(fts5): also create an fts5 index for attributes, and allow them to be searchable using fts5 indexes
Some checks failed
Checks / main (push) Has been cancelled
Some checks failed
Checks / main (push) Has been cancelled
This commit is contained in:
parent
15719a1ee9
commit
3957d789da
@ -227,14 +227,16 @@ CREATE TABLE IF NOT EXISTS sessions (
|
||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||
--
|
||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
||||
-- (loses position info for highlight() function, but snippet() still works)
|
||||
-- detail='full' enables phrase queries (required for exact match with = operator)
|
||||
-- and provides position info for highlight() function
|
||||
-- Note: Using detail='full' instead of detail='none' increases index size by ~50%
|
||||
-- but is necessary to support phrase queries like "exact phrase"
|
||||
CREATE VIRTUAL TABLE notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'trigram',
|
||||
detail = 'none'
|
||||
detail = 'full'
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS table synchronized with notes
|
||||
@ -354,14 +356,14 @@ END;
|
||||
-- Trigger for INSERT operations on blobs
|
||||
-- Handles: INSERT, INSERT OR REPLACE, and the INSERT part of upsert
|
||||
-- Updates all notes that reference this blob (common during import and deduplication)
|
||||
CREATE TRIGGER notes_fts_blob_insert
|
||||
CREATE TRIGGER notes_fts_blob_insert
|
||||
AFTER INSERT ON blobs
|
||||
BEGIN
|
||||
-- Use INSERT OR REPLACE to handle both new and existing FTS entries
|
||||
-- This is crucial for blob deduplication where multiple notes may already
|
||||
-- exist that reference this blob before the blob itself is created
|
||||
INSERT OR REPLACE INTO notes_fts (noteId, title, content)
|
||||
SELECT
|
||||
SELECT
|
||||
n.noteId,
|
||||
n.title,
|
||||
NEW.content
|
||||
@ -371,3 +373,65 @@ BEGIN
|
||||
AND n.isDeleted = 0
|
||||
AND n.isProtected = 0;
|
||||
END;
|
||||
|
||||
-- =====================================================
|
||||
-- FTS5 Full-Text Search Index for Attributes
|
||||
-- =====================================================
|
||||
-- This FTS5 table enables fast full-text searching of attribute names and values
|
||||
-- Benefits:
|
||||
-- - Fast free-text searches like ="somevalue" (10-50ms vs 1-2 seconds)
|
||||
-- - Scales well with large attribute counts (650K+ attributes)
|
||||
-- - Consistent performance with notes_fts
|
||||
--
|
||||
-- Uses trigram tokenizer with detail='full' for:
|
||||
-- 1. Substring matching (3+ characters)
|
||||
-- 2. Phrase query support (exact matches with word boundaries)
|
||||
-- 3. Multi-language support without stemming assumptions
|
||||
|
||||
CREATE VIRTUAL TABLE attributes_fts USING fts5(
|
||||
attributeId UNINDEXED,
|
||||
noteId UNINDEXED,
|
||||
name,
|
||||
value,
|
||||
tokenize = 'trigram',
|
||||
detail = 'full'
|
||||
);
|
||||
|
||||
-- Triggers to keep attributes_fts synchronized with attributes table
|
||||
|
||||
-- Trigger for INSERT operations
|
||||
CREATE TRIGGER attributes_fts_insert
|
||||
AFTER INSERT ON attributes
|
||||
WHEN NEW.isDeleted = 0
|
||||
BEGIN
|
||||
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||
VALUES (NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, ''));
|
||||
END;
|
||||
|
||||
-- Trigger for UPDATE operations
|
||||
CREATE TRIGGER attributes_fts_update
|
||||
AFTER UPDATE ON attributes
|
||||
BEGIN
|
||||
-- Remove old entry
|
||||
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||
|
||||
-- Add new entry if not deleted
|
||||
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||
SELECT NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, '')
|
||||
WHERE NEW.isDeleted = 0;
|
||||
END;
|
||||
|
||||
-- Trigger for DELETE operations
|
||||
CREATE TRIGGER attributes_fts_delete
|
||||
AFTER DELETE ON attributes
|
||||
BEGIN
|
||||
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||
END;
|
||||
|
||||
-- Trigger for soft delete (isDeleted = 1)
|
||||
CREATE TRIGGER attributes_fts_soft_delete
|
||||
AFTER UPDATE ON attributes
|
||||
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||
BEGIN
|
||||
DELETE FROM attributes_fts WHERE attributeId = NEW.attributeId;
|
||||
END;
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
/**
|
||||
* Migration to add FTS5 full-text search support and strategic performance indexes
|
||||
*
|
||||
*
|
||||
* This migration:
|
||||
* 1. Creates an FTS5 virtual table for full-text searching
|
||||
* 1. Creates an FTS5 virtual table for full-text searching of notes
|
||||
* 2. Populates it with existing note content
|
||||
* 3. Creates triggers to keep the FTS table synchronized with note changes
|
||||
* 4. Adds strategic composite and covering indexes for improved query performance
|
||||
* 5. Optimizes common query patterns identified through performance analysis
|
||||
* 4. Creates an FTS5 virtual table for full-text searching of attributes
|
||||
* 5. Populates it with existing attributes and creates synchronization triggers
|
||||
* 6. Adds strategic composite and covering indexes for improved query performance
|
||||
* 7. Optimizes common query patterns identified through performance analysis
|
||||
*/
|
||||
|
||||
import sql from "../services/sql.js";
|
||||
@ -46,14 +48,16 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||
--
|
||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
||||
-- (loses position info for highlight() function, but snippet() still works)
|
||||
-- detail='full' enables phrase queries (required for exact match with = operator)
|
||||
-- and provides position info for highlight() function
|
||||
-- Note: Using detail='full' instead of detail='none' increases index size by ~50%
|
||||
-- but is necessary to support phrase queries like "exact phrase"
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
||||
noteId UNINDEXED,
|
||||
title,
|
||||
content,
|
||||
tokenize = 'trigram',
|
||||
detail = 'none'
|
||||
detail = 'full'
|
||||
);
|
||||
`);
|
||||
|
||||
@ -549,5 +553,100 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// Part 3: Attributes FTS5 Setup
|
||||
// ========================================
|
||||
|
||||
log.info("Creating FTS5 index for attributes...");
|
||||
|
||||
sql.transactional(() => {
|
||||
// Create FTS5 virtual table for attributes
|
||||
// IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||
// detail='full' enables phrase queries (required for exact match with = operator)
|
||||
// and provides position info for highlight() function
|
||||
sql.execute(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS attributes_fts USING fts5(
|
||||
attributeId UNINDEXED,
|
||||
noteId UNINDEXED,
|
||||
name,
|
||||
value,
|
||||
tokenize = 'trigram',
|
||||
detail = 'full'
|
||||
)
|
||||
`);
|
||||
|
||||
log.info("Populating attributes_fts table...");
|
||||
|
||||
// Populate FTS table with existing attributes (non-deleted only)
|
||||
const attrStartTime = Date.now();
|
||||
sql.execute(`
|
||||
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||
SELECT
|
||||
attributeId,
|
||||
noteId,
|
||||
name,
|
||||
COALESCE(value, '')
|
||||
FROM attributes
|
||||
WHERE isDeleted = 0
|
||||
`);
|
||||
|
||||
const populateTime = Date.now() - attrStartTime;
|
||||
const attrCount = sql.getValue<number>(`SELECT COUNT(*) FROM attributes_fts`) || 0;
|
||||
log.info(`Populated ${attrCount} attributes in ${populateTime}ms`);
|
||||
|
||||
// Create triggers to keep FTS index synchronized with attributes table
|
||||
|
||||
// Trigger 1: INSERT - Add new attributes to FTS
|
||||
sql.execute(`
|
||||
CREATE TRIGGER attributes_fts_insert
|
||||
AFTER INSERT ON attributes
|
||||
WHEN NEW.isDeleted = 0
|
||||
BEGIN
|
||||
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||
VALUES (NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, ''));
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger 2: UPDATE - Update FTS when attributes change
|
||||
sql.execute(`
|
||||
CREATE TRIGGER attributes_fts_update
|
||||
AFTER UPDATE ON attributes
|
||||
BEGIN
|
||||
-- Remove old entry
|
||||
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||
|
||||
-- Add new entry if not deleted
|
||||
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||
SELECT NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, '')
|
||||
WHERE NEW.isDeleted = 0;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger 3: DELETE - Remove from FTS
|
||||
sql.execute(`
|
||||
CREATE TRIGGER attributes_fts_delete
|
||||
AFTER DELETE ON attributes
|
||||
BEGIN
|
||||
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Trigger 4: Soft delete (isDeleted = 1) - Remove from FTS
|
||||
sql.execute(`
|
||||
CREATE TRIGGER attributes_fts_soft_delete
|
||||
AFTER UPDATE ON attributes
|
||||
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||
BEGIN
|
||||
DELETE FROM attributes_fts WHERE attributeId = NEW.attributeId;
|
||||
END
|
||||
`);
|
||||
|
||||
// Run ANALYZE to update query planner statistics
|
||||
log.info("Running ANALYZE on attributes_fts...");
|
||||
sql.execute(`ANALYZE attributes_fts`);
|
||||
|
||||
log.info("Attributes FTS5 setup completed successfully");
|
||||
});
|
||||
|
||||
log.info("FTS5 and performance optimization migration completed successfully");
|
||||
}
|
||||
@ -1319,4 +1319,185 @@ describe('searchWithLike - Substring Search with LIKE Queries', () => {
|
||||
expect(results[1].score).toBe(1.0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Exact Match with Word Boundaries (= operator)', () => {
|
||||
let ftsSearchService: any;
|
||||
let mockSql: any;
|
||||
let mockLog: any;
|
||||
let mockProtectedSession: any;
|
||||
|
||||
beforeEach(async () => {
|
||||
// Reset mocks
|
||||
vi.resetModules();
|
||||
|
||||
// Setup mocks
|
||||
mockSql = {
|
||||
getValue: vi.fn(),
|
||||
getRows: vi.fn(),
|
||||
getColumn: vi.fn(),
|
||||
execute: vi.fn(),
|
||||
transactional: vi.fn((fn: Function) => fn()),
|
||||
iterateRows: vi.fn()
|
||||
};
|
||||
|
||||
mockLog = {
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
request: vi.fn()
|
||||
};
|
||||
|
||||
mockProtectedSession = {
|
||||
isProtectedSessionAvailable: vi.fn().mockReturnValue(false),
|
||||
decryptString: vi.fn()
|
||||
};
|
||||
|
||||
// Mock the modules
|
||||
vi.doMock('../sql.js', () => ({ default: mockSql }));
|
||||
vi.doMock('../log.js', () => ({ default: mockLog }));
|
||||
vi.doMock('../protected_session.js', () => ({ default: mockProtectedSession }));
|
||||
|
||||
// Import the service after mocking
|
||||
const module = await import('./fts_search.js');
|
||||
ftsSearchService = module.ftsSearchService;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('Word boundary matching with trigram tokenizer', () => {
|
||||
it('should NOT match "test123" when searching for "test1234" (exact match only)', () => {
|
||||
// This test SHOULD FAIL initially because trigram FTS5 phrase queries
|
||||
// don't respect word boundaries - "test123" matches "test1234" via shared trigrams
|
||||
mockSql.getValue.mockReturnValue(1); // FTS5 available
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
// Mock FTS5 returning BOTH notes (this is the bug)
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'Test', score: 1.0, content: '<p>test123</p>' },
|
||||
{ noteId: 'note2', title: 'Test 2', score: 1.0, content: '<p>test1234</p>' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
// After the fix, we should post-filter and only return note1
|
||||
// Currently this test will FAIL because we get 2 results
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
expect(results[0].content).toContain('test123');
|
||||
expect(results[0].content).not.toContain('test1234');
|
||||
});
|
||||
|
||||
it('should NOT match "abc" when searching for "abcd" (exact word boundary)', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
// FTS5 returns both due to trigram overlap
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'ABC', score: 1.0, content: 'abc' },
|
||||
{ noteId: 'note2', title: 'ABCD', score: 1.0, content: 'abcd' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['abc'], '=');
|
||||
|
||||
// Should only match exact word "abc", not "abcd"
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should match "test123" in "test123 test1234" but still filter out "test1234" match', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'Both', score: 1.0, content: 'test123 test1234' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
// Should match because content contains "test123" as a complete word
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should handle multi-word exact phrases with word boundaries', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'Match', score: 1.0, content: 'hello world' },
|
||||
{ noteId: 'note2', title: 'No Match', score: 1.0, content: 'hello world2' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['hello', 'world'], '=');
|
||||
|
||||
// Should only match exact phrase "hello world", not "hello world2"
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should match word at start of content', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'Start', score: 1.0, content: 'test123 other words' },
|
||||
{ noteId: 'note2', title: 'Not Start', score: 1.0, content: 'test1234 other words' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should match word at end of content', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'End', score: 1.0, content: 'other words test123' },
|
||||
{ noteId: 'note2', title: 'Not End', score: 1.0, content: 'other words test1234' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should match word as entire content', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'Exact', score: 1.0, content: 'test123' },
|
||||
{ noteId: 'note2', title: 'Not Exact', score: 1.0, content: 'test1234' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
|
||||
it('should also check title for exact matches with word boundaries', () => {
|
||||
mockSql.getValue.mockReturnValue(1);
|
||||
mockSql.getColumn.mockReturnValue([]);
|
||||
|
||||
mockSql.getRows.mockReturnValue([
|
||||
{ noteId: 'note1', title: 'test123', score: 1.0, content: 'other content' },
|
||||
{ noteId: 'note2', title: 'test1234', score: 1.0, content: 'other content' }
|
||||
]);
|
||||
|
||||
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||
|
||||
// Should match based on title
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].noteId).toBe('note1');
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -585,16 +585,20 @@ class FTSSearchService {
|
||||
}
|
||||
|
||||
// Build snippet extraction if requested
|
||||
const snippetSelect = includeSnippets
|
||||
const snippetSelect = includeSnippets
|
||||
? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '</')}', '...', ${snippetLength}) as snippet`
|
||||
: '';
|
||||
|
||||
// For exact match (=), include content for post-filtering word boundaries
|
||||
const contentSelect = operator === "=" ? ', content' : '';
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
SELECT
|
||||
noteId,
|
||||
title,
|
||||
rank as score
|
||||
${snippetSelect}
|
||||
${contentSelect}
|
||||
FROM notes_fts
|
||||
WHERE ${whereConditions.join(' AND ')}
|
||||
ORDER BY rank
|
||||
@ -603,13 +607,42 @@ class FTSSearchService {
|
||||
|
||||
params.push(limit, offset);
|
||||
|
||||
const results = sql.getRows<{
|
||||
let results = sql.getRows<{
|
||||
noteId: string;
|
||||
title: string;
|
||||
score: number;
|
||||
snippet?: string;
|
||||
content?: string;
|
||||
}>(query, params);
|
||||
|
||||
// Post-filter for exact match operator (=) to handle word boundaries
|
||||
// Trigram FTS5 doesn't respect word boundaries in phrase queries,
|
||||
// so "test123" matches "test1234" due to shared trigrams.
|
||||
// We need to post-filter results to only include exact word matches.
|
||||
if (operator === "=") {
|
||||
const phrase = tokens.join(" ");
|
||||
results = results.filter(result => {
|
||||
// Use content from result if available, otherwise fetch it
|
||||
let noteContent = result.content;
|
||||
if (!noteContent) {
|
||||
noteContent = sql.getValue<string>(`
|
||||
SELECT b.content
|
||||
FROM notes n
|
||||
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||
WHERE n.noteId = ?
|
||||
`, [result.noteId]);
|
||||
}
|
||||
|
||||
if (!noteContent) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if phrase appears as exact words in content or title
|
||||
return this.containsExactPhrase(phrase, result.title) ||
|
||||
this.containsExactPhrase(phrase, noteContent);
|
||||
});
|
||||
}
|
||||
|
||||
const searchTime = Date.now() - searchStartTime;
|
||||
log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`);
|
||||
|
||||
@ -647,17 +680,166 @@ class FTSSearchService {
|
||||
private filterNonProtectedNoteIds(noteIds: Set<string>): string[] {
|
||||
const noteIdList = Array.from(noteIds);
|
||||
const placeholders = noteIdList.map(() => '?').join(',');
|
||||
|
||||
|
||||
const nonProtectedNotes = sql.getColumn<string>(`
|
||||
SELECT noteId
|
||||
FROM notes
|
||||
SELECT noteId
|
||||
FROM notes
|
||||
WHERE noteId IN (${placeholders})
|
||||
AND isProtected = 0
|
||||
`, noteIdList);
|
||||
|
||||
|
||||
return nonProtectedNotes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a phrase appears as exact words in text (respecting word boundaries)
|
||||
* @param phrase - The phrase to search for (case-insensitive)
|
||||
* @param text - The text to search in
|
||||
* @returns true if the phrase appears as complete words, false otherwise
|
||||
*/
|
||||
private containsExactPhrase(phrase: string, text: string | null | undefined): boolean {
|
||||
if (!text || !phrase || typeof text !== 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Normalize both to lowercase for case-insensitive comparison
|
||||
const normalizedPhrase = phrase.toLowerCase().trim();
|
||||
const normalizedText = text.toLowerCase();
|
||||
|
||||
// Strip HTML tags for content matching
|
||||
const plainText = striptags(normalizedText);
|
||||
|
||||
// For single words, use word-boundary matching
|
||||
if (!normalizedPhrase.includes(' ')) {
|
||||
// Split text into words and check for exact match
|
||||
const words = plainText.split(/\s+/);
|
||||
return words.some(word => word === normalizedPhrase);
|
||||
}
|
||||
|
||||
// For multi-word phrases, check if the phrase appears as consecutive words
|
||||
// Split text into words, then check if the phrase appears in the word sequence
|
||||
const textWords = plainText.split(/\s+/);
|
||||
const phraseWords = normalizedPhrase.split(/\s+/);
|
||||
|
||||
// Sliding window to find exact phrase match
|
||||
for (let i = 0; i <= textWords.length - phraseWords.length; i++) {
|
||||
let match = true;
|
||||
for (let j = 0; j < phraseWords.length; j++) {
|
||||
if (textWords[i + j] !== phraseWords[j]) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches attributes using FTS5
|
||||
* Returns noteIds of notes that have matching attributes
|
||||
*/
|
||||
searchAttributesSync(
|
||||
tokens: string[],
|
||||
operator: string,
|
||||
noteIds?: Set<string>
|
||||
): Set<string> {
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!this.checkFTS5Availability()) {
|
||||
return new Set();
|
||||
}
|
||||
|
||||
// Check if attributes_fts table exists
|
||||
const tableExists = sql.getValue<number>(`
|
||||
SELECT COUNT(*)
|
||||
FROM sqlite_master
|
||||
WHERE type='table' AND name='attributes_fts'
|
||||
`);
|
||||
|
||||
if (!tableExists) {
|
||||
log.info("attributes_fts table does not exist - skipping FTS attribute search");
|
||||
return new Set();
|
||||
}
|
||||
|
||||
try {
|
||||
// Sanitize tokens to prevent FTS5 syntax injection
|
||||
const sanitizedTokens = tokens.map(token => this.sanitizeFTS5Token(token));
|
||||
|
||||
// Check if any tokens became invalid after sanitization
|
||||
if (sanitizedTokens.some(t => t === '__empty_token__' || t === '__invalid_token__')) {
|
||||
return new Set();
|
||||
}
|
||||
|
||||
const phrase = sanitizedTokens.join(" ");
|
||||
|
||||
// Build FTS5 query for exact match
|
||||
const ftsQuery = operator === "=" ? `"${phrase}"` : phrase;
|
||||
|
||||
// Search both name and value columns
|
||||
const whereConditions: string[] = [
|
||||
`attributes_fts MATCH '${ftsQuery.replace(/'/g, "''")}'`
|
||||
];
|
||||
|
||||
const params: any[] = [];
|
||||
|
||||
// Filter by noteIds if provided
|
||||
if (noteIds && noteIds.size > 0 && noteIds.size < 1000) {
|
||||
const noteIdList = Array.from(noteIds);
|
||||
whereConditions.push(`noteId IN (${noteIdList.map(() => '?').join(',')})`);
|
||||
params.push(...noteIdList);
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT DISTINCT noteId, name, value
|
||||
FROM attributes_fts
|
||||
WHERE ${whereConditions.join(' AND ')}
|
||||
`;
|
||||
|
||||
const results = sql.getRows<{
|
||||
noteId: string;
|
||||
name: string;
|
||||
value: string;
|
||||
}>(query, params);
|
||||
|
||||
log.info(`[FTS5-ATTRIBUTES-RAW] FTS5 query returned ${results.length} raw attribute matches`);
|
||||
|
||||
// Post-filter for exact word matches when operator is "="
|
||||
if (operator === "=") {
|
||||
const matchingNoteIds = new Set<string>();
|
||||
for (const result of results) {
|
||||
// Check if phrase matches attribute name or value with word boundaries
|
||||
// For attribute names, check exact match (attribute name "test125" matches search "test125")
|
||||
// For attribute values, check if phrase appears as exact words
|
||||
const nameMatch = result.name.toLowerCase() === phrase.toLowerCase();
|
||||
const valueMatch = result.value ? this.containsExactPhrase(phrase, result.value) : false;
|
||||
|
||||
log.info(`[FTS5-ATTRIBUTES-FILTER] Checking attribute: name="${result.name}", value="${result.value}", phrase="${phrase}", nameMatch=${nameMatch}, valueMatch=${valueMatch}`);
|
||||
|
||||
if (nameMatch || valueMatch) {
|
||||
matchingNoteIds.add(result.noteId);
|
||||
}
|
||||
}
|
||||
const filterTime = Date.now() - startTime;
|
||||
log.info(`[FTS5-ATTRIBUTES-FILTERED] After post-filtering: ${matchingNoteIds.size} notes match (total time: ${filterTime}ms)`);
|
||||
return matchingNoteIds;
|
||||
}
|
||||
|
||||
// For other operators, return all matching noteIds
|
||||
const searchTime = Date.now() - startTime;
|
||||
const matchingNoteIds = new Set(results.map(r => r.noteId));
|
||||
log.info(`[FTS5-ATTRIBUTES-TIME] Attribute search completed in ${searchTime}ms, found ${matchingNoteIds.size} notes`);
|
||||
return matchingNoteIds;
|
||||
|
||||
} catch (error: any) {
|
||||
log.error(`FTS5 attribute search error: ${error}`);
|
||||
return new Set();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches protected notes separately (not in FTS index)
|
||||
* This is a fallback method for protected notes
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user