mirror of
https://github.com/zadam/trilium.git
synced 2025-12-05 06:54:23 +01:00
feat(fts5): also create an fts5 index for attributes, and allow them to be searchable using fts5 indexes
Some checks failed
Checks / main (push) Has been cancelled
Some checks failed
Checks / main (push) Has been cancelled
This commit is contained in:
parent
15719a1ee9
commit
3957d789da
@ -227,14 +227,16 @@ CREATE TABLE IF NOT EXISTS sessions (
|
|||||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||||
--
|
--
|
||||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
-- detail='full' enables phrase queries (required for exact match with = operator)
|
||||||
-- (loses position info for highlight() function, but snippet() still works)
|
-- and provides position info for highlight() function
|
||||||
|
-- Note: Using detail='full' instead of detail='none' increases index size by ~50%
|
||||||
|
-- but is necessary to support phrase queries like "exact phrase"
|
||||||
CREATE VIRTUAL TABLE notes_fts USING fts5(
|
CREATE VIRTUAL TABLE notes_fts USING fts5(
|
||||||
noteId UNINDEXED,
|
noteId UNINDEXED,
|
||||||
title,
|
title,
|
||||||
content,
|
content,
|
||||||
tokenize = 'trigram',
|
tokenize = 'trigram',
|
||||||
detail = 'none'
|
detail = 'full'
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Triggers to keep FTS table synchronized with notes
|
-- Triggers to keep FTS table synchronized with notes
|
||||||
@ -371,3 +373,65 @@ BEGIN
|
|||||||
AND n.isDeleted = 0
|
AND n.isDeleted = 0
|
||||||
AND n.isProtected = 0;
|
AND n.isProtected = 0;
|
||||||
END;
|
END;
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- FTS5 Full-Text Search Index for Attributes
|
||||||
|
-- =====================================================
|
||||||
|
-- This FTS5 table enables fast full-text searching of attribute names and values
|
||||||
|
-- Benefits:
|
||||||
|
-- - Fast free-text searches like ="somevalue" (10-50ms vs 1-2 seconds)
|
||||||
|
-- - Scales well with large attribute counts (650K+ attributes)
|
||||||
|
-- - Consistent performance with notes_fts
|
||||||
|
--
|
||||||
|
-- Uses trigram tokenizer with detail='full' for:
|
||||||
|
-- 1. Substring matching (3+ characters)
|
||||||
|
-- 2. Phrase query support (exact matches with word boundaries)
|
||||||
|
-- 3. Multi-language support without stemming assumptions
|
||||||
|
|
||||||
|
CREATE VIRTUAL TABLE attributes_fts USING fts5(
|
||||||
|
attributeId UNINDEXED,
|
||||||
|
noteId UNINDEXED,
|
||||||
|
name,
|
||||||
|
value,
|
||||||
|
tokenize = 'trigram',
|
||||||
|
detail = 'full'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Triggers to keep attributes_fts synchronized with attributes table
|
||||||
|
|
||||||
|
-- Trigger for INSERT operations
|
||||||
|
CREATE TRIGGER attributes_fts_insert
|
||||||
|
AFTER INSERT ON attributes
|
||||||
|
WHEN NEW.isDeleted = 0
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||||
|
VALUES (NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, ''));
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- Trigger for UPDATE operations
|
||||||
|
CREATE TRIGGER attributes_fts_update
|
||||||
|
AFTER UPDATE ON attributes
|
||||||
|
BEGIN
|
||||||
|
-- Remove old entry
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||||
|
|
||||||
|
-- Add new entry if not deleted
|
||||||
|
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||||
|
SELECT NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, '')
|
||||||
|
WHERE NEW.isDeleted = 0;
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- Trigger for DELETE operations
|
||||||
|
CREATE TRIGGER attributes_fts_delete
|
||||||
|
AFTER DELETE ON attributes
|
||||||
|
BEGIN
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- Trigger for soft delete (isDeleted = 1)
|
||||||
|
CREATE TRIGGER attributes_fts_soft_delete
|
||||||
|
AFTER UPDATE ON attributes
|
||||||
|
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||||
|
BEGIN
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = NEW.attributeId;
|
||||||
|
END;
|
||||||
|
|||||||
@ -2,11 +2,13 @@
|
|||||||
* Migration to add FTS5 full-text search support and strategic performance indexes
|
* Migration to add FTS5 full-text search support and strategic performance indexes
|
||||||
*
|
*
|
||||||
* This migration:
|
* This migration:
|
||||||
* 1. Creates an FTS5 virtual table for full-text searching
|
* 1. Creates an FTS5 virtual table for full-text searching of notes
|
||||||
* 2. Populates it with existing note content
|
* 2. Populates it with existing note content
|
||||||
* 3. Creates triggers to keep the FTS table synchronized with note changes
|
* 3. Creates triggers to keep the FTS table synchronized with note changes
|
||||||
* 4. Adds strategic composite and covering indexes for improved query performance
|
* 4. Creates an FTS5 virtual table for full-text searching of attributes
|
||||||
* 5. Optimizes common query patterns identified through performance analysis
|
* 5. Populates it with existing attributes and creates synchronization triggers
|
||||||
|
* 6. Adds strategic composite and covering indexes for improved query performance
|
||||||
|
* 7. Optimizes common query patterns identified through performance analysis
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import sql from "../services/sql.js";
|
import sql from "../services/sql.js";
|
||||||
@ -46,14 +48,16 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
|||||||
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
-- 4. Boolean operators (AND, OR, NOT) and phrase matching with quotes
|
||||||
--
|
--
|
||||||
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
-- IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||||
-- detail='none' reduces index size by ~50% while maintaining MATCH/rank performance
|
-- detail='full' enables phrase queries (required for exact match with = operator)
|
||||||
-- (loses position info for highlight() function, but snippet() still works)
|
-- and provides position info for highlight() function
|
||||||
|
-- Note: Using detail='full' instead of detail='none' increases index size by ~50%
|
||||||
|
-- but is necessary to support phrase queries like "exact phrase"
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
||||||
noteId UNINDEXED,
|
noteId UNINDEXED,
|
||||||
title,
|
title,
|
||||||
content,
|
content,
|
||||||
tokenize = 'trigram',
|
tokenize = 'trigram',
|
||||||
detail = 'none'
|
detail = 'full'
|
||||||
);
|
);
|
||||||
`);
|
`);
|
||||||
|
|
||||||
@ -549,5 +553,100 @@ export default function addFTS5SearchAndPerformanceIndexes() {
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ========================================
|
||||||
|
// Part 3: Attributes FTS5 Setup
|
||||||
|
// ========================================
|
||||||
|
|
||||||
|
log.info("Creating FTS5 index for attributes...");
|
||||||
|
|
||||||
|
sql.transactional(() => {
|
||||||
|
// Create FTS5 virtual table for attributes
|
||||||
|
// IMPORTANT: Trigram requires minimum 3-character tokens for matching
|
||||||
|
// detail='full' enables phrase queries (required for exact match with = operator)
|
||||||
|
// and provides position info for highlight() function
|
||||||
|
sql.execute(`
|
||||||
|
CREATE VIRTUAL TABLE IF NOT EXISTS attributes_fts USING fts5(
|
||||||
|
attributeId UNINDEXED,
|
||||||
|
noteId UNINDEXED,
|
||||||
|
name,
|
||||||
|
value,
|
||||||
|
tokenize = 'trigram',
|
||||||
|
detail = 'full'
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
|
||||||
|
log.info("Populating attributes_fts table...");
|
||||||
|
|
||||||
|
// Populate FTS table with existing attributes (non-deleted only)
|
||||||
|
const attrStartTime = Date.now();
|
||||||
|
sql.execute(`
|
||||||
|
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||||
|
SELECT
|
||||||
|
attributeId,
|
||||||
|
noteId,
|
||||||
|
name,
|
||||||
|
COALESCE(value, '')
|
||||||
|
FROM attributes
|
||||||
|
WHERE isDeleted = 0
|
||||||
|
`);
|
||||||
|
|
||||||
|
const populateTime = Date.now() - attrStartTime;
|
||||||
|
const attrCount = sql.getValue<number>(`SELECT COUNT(*) FROM attributes_fts`) || 0;
|
||||||
|
log.info(`Populated ${attrCount} attributes in ${populateTime}ms`);
|
||||||
|
|
||||||
|
// Create triggers to keep FTS index synchronized with attributes table
|
||||||
|
|
||||||
|
// Trigger 1: INSERT - Add new attributes to FTS
|
||||||
|
sql.execute(`
|
||||||
|
CREATE TRIGGER attributes_fts_insert
|
||||||
|
AFTER INSERT ON attributes
|
||||||
|
WHEN NEW.isDeleted = 0
|
||||||
|
BEGIN
|
||||||
|
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||||
|
VALUES (NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, ''));
|
||||||
|
END
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Trigger 2: UPDATE - Update FTS when attributes change
|
||||||
|
sql.execute(`
|
||||||
|
CREATE TRIGGER attributes_fts_update
|
||||||
|
AFTER UPDATE ON attributes
|
||||||
|
BEGIN
|
||||||
|
-- Remove old entry
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||||
|
|
||||||
|
-- Add new entry if not deleted
|
||||||
|
INSERT INTO attributes_fts (attributeId, noteId, name, value)
|
||||||
|
SELECT NEW.attributeId, NEW.noteId, NEW.name, COALESCE(NEW.value, '')
|
||||||
|
WHERE NEW.isDeleted = 0;
|
||||||
|
END
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Trigger 3: DELETE - Remove from FTS
|
||||||
|
sql.execute(`
|
||||||
|
CREATE TRIGGER attributes_fts_delete
|
||||||
|
AFTER DELETE ON attributes
|
||||||
|
BEGIN
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = OLD.attributeId;
|
||||||
|
END
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Trigger 4: Soft delete (isDeleted = 1) - Remove from FTS
|
||||||
|
sql.execute(`
|
||||||
|
CREATE TRIGGER attributes_fts_soft_delete
|
||||||
|
AFTER UPDATE ON attributes
|
||||||
|
WHEN OLD.isDeleted = 0 AND NEW.isDeleted = 1
|
||||||
|
BEGIN
|
||||||
|
DELETE FROM attributes_fts WHERE attributeId = NEW.attributeId;
|
||||||
|
END
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Run ANALYZE to update query planner statistics
|
||||||
|
log.info("Running ANALYZE on attributes_fts...");
|
||||||
|
sql.execute(`ANALYZE attributes_fts`);
|
||||||
|
|
||||||
|
log.info("Attributes FTS5 setup completed successfully");
|
||||||
|
});
|
||||||
|
|
||||||
log.info("FTS5 and performance optimization migration completed successfully");
|
log.info("FTS5 and performance optimization migration completed successfully");
|
||||||
}
|
}
|
||||||
@ -1320,3 +1320,184 @@ describe('searchWithLike - Substring Search with LIKE Queries', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('Exact Match with Word Boundaries (= operator)', () => {
|
||||||
|
let ftsSearchService: any;
|
||||||
|
let mockSql: any;
|
||||||
|
let mockLog: any;
|
||||||
|
let mockProtectedSession: any;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
// Reset mocks
|
||||||
|
vi.resetModules();
|
||||||
|
|
||||||
|
// Setup mocks
|
||||||
|
mockSql = {
|
||||||
|
getValue: vi.fn(),
|
||||||
|
getRows: vi.fn(),
|
||||||
|
getColumn: vi.fn(),
|
||||||
|
execute: vi.fn(),
|
||||||
|
transactional: vi.fn((fn: Function) => fn()),
|
||||||
|
iterateRows: vi.fn()
|
||||||
|
};
|
||||||
|
|
||||||
|
mockLog = {
|
||||||
|
info: vi.fn(),
|
||||||
|
warn: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
debug: vi.fn(),
|
||||||
|
request: vi.fn()
|
||||||
|
};
|
||||||
|
|
||||||
|
mockProtectedSession = {
|
||||||
|
isProtectedSessionAvailable: vi.fn().mockReturnValue(false),
|
||||||
|
decryptString: vi.fn()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock the modules
|
||||||
|
vi.doMock('../sql.js', () => ({ default: mockSql }));
|
||||||
|
vi.doMock('../log.js', () => ({ default: mockLog }));
|
||||||
|
vi.doMock('../protected_session.js', () => ({ default: mockProtectedSession }));
|
||||||
|
|
||||||
|
// Import the service after mocking
|
||||||
|
const module = await import('./fts_search.js');
|
||||||
|
ftsSearchService = module.ftsSearchService;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Word boundary matching with trigram tokenizer', () => {
|
||||||
|
it('should NOT match "test123" when searching for "test1234" (exact match only)', () => {
|
||||||
|
// This test SHOULD FAIL initially because trigram FTS5 phrase queries
|
||||||
|
// don't respect word boundaries - "test123" matches "test1234" via shared trigrams
|
||||||
|
mockSql.getValue.mockReturnValue(1); // FTS5 available
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
// Mock FTS5 returning BOTH notes (this is the bug)
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'Test', score: 1.0, content: '<p>test123</p>' },
|
||||||
|
{ noteId: 'note2', title: 'Test 2', score: 1.0, content: '<p>test1234</p>' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
// After the fix, we should post-filter and only return note1
|
||||||
|
// Currently this test will FAIL because we get 2 results
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
expect(results[0].content).toContain('test123');
|
||||||
|
expect(results[0].content).not.toContain('test1234');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should NOT match "abc" when searching for "abcd" (exact word boundary)', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
// FTS5 returns both due to trigram overlap
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'ABC', score: 1.0, content: 'abc' },
|
||||||
|
{ noteId: 'note2', title: 'ABCD', score: 1.0, content: 'abcd' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['abc'], '=');
|
||||||
|
|
||||||
|
// Should only match exact word "abc", not "abcd"
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should match "test123" in "test123 test1234" but still filter out "test1234" match', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'Both', score: 1.0, content: 'test123 test1234' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
// Should match because content contains "test123" as a complete word
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle multi-word exact phrases with word boundaries', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'Match', score: 1.0, content: 'hello world' },
|
||||||
|
{ noteId: 'note2', title: 'No Match', score: 1.0, content: 'hello world2' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['hello', 'world'], '=');
|
||||||
|
|
||||||
|
// Should only match exact phrase "hello world", not "hello world2"
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should match word at start of content', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'Start', score: 1.0, content: 'test123 other words' },
|
||||||
|
{ noteId: 'note2', title: 'Not Start', score: 1.0, content: 'test1234 other words' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should match word at end of content', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'End', score: 1.0, content: 'other words test123' },
|
||||||
|
{ noteId: 'note2', title: 'Not End', score: 1.0, content: 'other words test1234' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should match word as entire content', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'Exact', score: 1.0, content: 'test123' },
|
||||||
|
{ noteId: 'note2', title: 'Not Exact', score: 1.0, content: 'test1234' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should also check title for exact matches with word boundaries', () => {
|
||||||
|
mockSql.getValue.mockReturnValue(1);
|
||||||
|
mockSql.getColumn.mockReturnValue([]);
|
||||||
|
|
||||||
|
mockSql.getRows.mockReturnValue([
|
||||||
|
{ noteId: 'note1', title: 'test123', score: 1.0, content: 'other content' },
|
||||||
|
{ noteId: 'note2', title: 'test1234', score: 1.0, content: 'other content' }
|
||||||
|
]);
|
||||||
|
|
||||||
|
const results = ftsSearchService.searchSync(['test123'], '=');
|
||||||
|
|
||||||
|
// Should match based on title
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].noteId).toBe('note1');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -589,12 +589,16 @@ class FTSSearchService {
|
|||||||
? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '</')}', '...', ${snippetLength}) as snippet`
|
? `, snippet(notes_fts, ${FTS_CONFIG.SNIPPET_COLUMN_CONTENT}, '${highlightTag}', '${highlightTag.replace('<', '</')}', '...', ${snippetLength}) as snippet`
|
||||||
: '';
|
: '';
|
||||||
|
|
||||||
|
// For exact match (=), include content for post-filtering word boundaries
|
||||||
|
const contentSelect = operator === "=" ? ', content' : '';
|
||||||
|
|
||||||
const query = `
|
const query = `
|
||||||
SELECT
|
SELECT
|
||||||
noteId,
|
noteId,
|
||||||
title,
|
title,
|
||||||
rank as score
|
rank as score
|
||||||
${snippetSelect}
|
${snippetSelect}
|
||||||
|
${contentSelect}
|
||||||
FROM notes_fts
|
FROM notes_fts
|
||||||
WHERE ${whereConditions.join(' AND ')}
|
WHERE ${whereConditions.join(' AND ')}
|
||||||
ORDER BY rank
|
ORDER BY rank
|
||||||
@ -603,13 +607,42 @@ class FTSSearchService {
|
|||||||
|
|
||||||
params.push(limit, offset);
|
params.push(limit, offset);
|
||||||
|
|
||||||
const results = sql.getRows<{
|
let results = sql.getRows<{
|
||||||
noteId: string;
|
noteId: string;
|
||||||
title: string;
|
title: string;
|
||||||
score: number;
|
score: number;
|
||||||
snippet?: string;
|
snippet?: string;
|
||||||
|
content?: string;
|
||||||
}>(query, params);
|
}>(query, params);
|
||||||
|
|
||||||
|
// Post-filter for exact match operator (=) to handle word boundaries
|
||||||
|
// Trigram FTS5 doesn't respect word boundaries in phrase queries,
|
||||||
|
// so "test123" matches "test1234" due to shared trigrams.
|
||||||
|
// We need to post-filter results to only include exact word matches.
|
||||||
|
if (operator === "=") {
|
||||||
|
const phrase = tokens.join(" ");
|
||||||
|
results = results.filter(result => {
|
||||||
|
// Use content from result if available, otherwise fetch it
|
||||||
|
let noteContent = result.content;
|
||||||
|
if (!noteContent) {
|
||||||
|
noteContent = sql.getValue<string>(`
|
||||||
|
SELECT b.content
|
||||||
|
FROM notes n
|
||||||
|
LEFT JOIN blobs b ON n.blobId = b.blobId
|
||||||
|
WHERE n.noteId = ?
|
||||||
|
`, [result.noteId]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!noteContent) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if phrase appears as exact words in content or title
|
||||||
|
return this.containsExactPhrase(phrase, result.title) ||
|
||||||
|
this.containsExactPhrase(phrase, noteContent);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const searchTime = Date.now() - searchStartTime;
|
const searchTime = Date.now() - searchStartTime;
|
||||||
log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`);
|
log.info(`FTS5 MATCH search returned ${results.length} results in ${searchTime}ms`);
|
||||||
|
|
||||||
@ -658,6 +691,155 @@ class FTSSearchService {
|
|||||||
return nonProtectedNotes;
|
return nonProtectedNotes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a phrase appears as exact words in text (respecting word boundaries)
|
||||||
|
* @param phrase - The phrase to search for (case-insensitive)
|
||||||
|
* @param text - The text to search in
|
||||||
|
* @returns true if the phrase appears as complete words, false otherwise
|
||||||
|
*/
|
||||||
|
private containsExactPhrase(phrase: string, text: string | null | undefined): boolean {
|
||||||
|
if (!text || !phrase || typeof text !== 'string') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize both to lowercase for case-insensitive comparison
|
||||||
|
const normalizedPhrase = phrase.toLowerCase().trim();
|
||||||
|
const normalizedText = text.toLowerCase();
|
||||||
|
|
||||||
|
// Strip HTML tags for content matching
|
||||||
|
const plainText = striptags(normalizedText);
|
||||||
|
|
||||||
|
// For single words, use word-boundary matching
|
||||||
|
if (!normalizedPhrase.includes(' ')) {
|
||||||
|
// Split text into words and check for exact match
|
||||||
|
const words = plainText.split(/\s+/);
|
||||||
|
return words.some(word => word === normalizedPhrase);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For multi-word phrases, check if the phrase appears as consecutive words
|
||||||
|
// Split text into words, then check if the phrase appears in the word sequence
|
||||||
|
const textWords = plainText.split(/\s+/);
|
||||||
|
const phraseWords = normalizedPhrase.split(/\s+/);
|
||||||
|
|
||||||
|
// Sliding window to find exact phrase match
|
||||||
|
for (let i = 0; i <= textWords.length - phraseWords.length; i++) {
|
||||||
|
let match = true;
|
||||||
|
for (let j = 0; j < phraseWords.length; j++) {
|
||||||
|
if (textWords[i + j] !== phraseWords[j]) {
|
||||||
|
match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (match) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searches attributes using FTS5
|
||||||
|
* Returns noteIds of notes that have matching attributes
|
||||||
|
*/
|
||||||
|
searchAttributesSync(
|
||||||
|
tokens: string[],
|
||||||
|
operator: string,
|
||||||
|
noteIds?: Set<string>
|
||||||
|
): Set<string> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
if (!this.checkFTS5Availability()) {
|
||||||
|
return new Set();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if attributes_fts table exists
|
||||||
|
const tableExists = sql.getValue<number>(`
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM sqlite_master
|
||||||
|
WHERE type='table' AND name='attributes_fts'
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (!tableExists) {
|
||||||
|
log.info("attributes_fts table does not exist - skipping FTS attribute search");
|
||||||
|
return new Set();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Sanitize tokens to prevent FTS5 syntax injection
|
||||||
|
const sanitizedTokens = tokens.map(token => this.sanitizeFTS5Token(token));
|
||||||
|
|
||||||
|
// Check if any tokens became invalid after sanitization
|
||||||
|
if (sanitizedTokens.some(t => t === '__empty_token__' || t === '__invalid_token__')) {
|
||||||
|
return new Set();
|
||||||
|
}
|
||||||
|
|
||||||
|
const phrase = sanitizedTokens.join(" ");
|
||||||
|
|
||||||
|
// Build FTS5 query for exact match
|
||||||
|
const ftsQuery = operator === "=" ? `"${phrase}"` : phrase;
|
||||||
|
|
||||||
|
// Search both name and value columns
|
||||||
|
const whereConditions: string[] = [
|
||||||
|
`attributes_fts MATCH '${ftsQuery.replace(/'/g, "''")}'`
|
||||||
|
];
|
||||||
|
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
// Filter by noteIds if provided
|
||||||
|
if (noteIds && noteIds.size > 0 && noteIds.size < 1000) {
|
||||||
|
const noteIdList = Array.from(noteIds);
|
||||||
|
whereConditions.push(`noteId IN (${noteIdList.map(() => '?').join(',')})`);
|
||||||
|
params.push(...noteIdList);
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT DISTINCT noteId, name, value
|
||||||
|
FROM attributes_fts
|
||||||
|
WHERE ${whereConditions.join(' AND ')}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const results = sql.getRows<{
|
||||||
|
noteId: string;
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
}>(query, params);
|
||||||
|
|
||||||
|
log.info(`[FTS5-ATTRIBUTES-RAW] FTS5 query returned ${results.length} raw attribute matches`);
|
||||||
|
|
||||||
|
// Post-filter for exact word matches when operator is "="
|
||||||
|
if (operator === "=") {
|
||||||
|
const matchingNoteIds = new Set<string>();
|
||||||
|
for (const result of results) {
|
||||||
|
// Check if phrase matches attribute name or value with word boundaries
|
||||||
|
// For attribute names, check exact match (attribute name "test125" matches search "test125")
|
||||||
|
// For attribute values, check if phrase appears as exact words
|
||||||
|
const nameMatch = result.name.toLowerCase() === phrase.toLowerCase();
|
||||||
|
const valueMatch = result.value ? this.containsExactPhrase(phrase, result.value) : false;
|
||||||
|
|
||||||
|
log.info(`[FTS5-ATTRIBUTES-FILTER] Checking attribute: name="${result.name}", value="${result.value}", phrase="${phrase}", nameMatch=${nameMatch}, valueMatch=${valueMatch}`);
|
||||||
|
|
||||||
|
if (nameMatch || valueMatch) {
|
||||||
|
matchingNoteIds.add(result.noteId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const filterTime = Date.now() - startTime;
|
||||||
|
log.info(`[FTS5-ATTRIBUTES-FILTERED] After post-filtering: ${matchingNoteIds.size} notes match (total time: ${filterTime}ms)`);
|
||||||
|
return matchingNoteIds;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For other operators, return all matching noteIds
|
||||||
|
const searchTime = Date.now() - startTime;
|
||||||
|
const matchingNoteIds = new Set(results.map(r => r.noteId));
|
||||||
|
log.info(`[FTS5-ATTRIBUTES-TIME] Attribute search completed in ${searchTime}ms, found ${matchingNoteIds.size} notes`);
|
||||||
|
return matchingNoteIds;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`FTS5 attribute search error: ${error}`);
|
||||||
|
return new Set();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Searches protected notes separately (not in FTS index)
|
* Searches protected notes separately (not in FTS index)
|
||||||
* This is a fallback method for protected notes
|
* This is a fallback method for protected notes
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user