mirror of
https://github.com/zadam/trilium.git
synced 2025-12-05 15:04:24 +01:00
461 lines
15 KiB
TypeScript
461 lines
15 KiB
TypeScript
/**
|
|
* Minimal FTS5 Search Service
|
|
*
|
|
* Design principles:
|
|
* - Direct SQLite FTS5 queries only
|
|
* - No memory management or query governors
|
|
* - No temporary tables or complex batching
|
|
* - Let SQLite handle the scale
|
|
* - Simple, maintainable code
|
|
*/
|
|
|
|
import sql from "../sql.js";
|
|
import log from "../log.js";
|
|
|
|
export interface MinimalFTSSearchResult {
|
|
noteId: string;
|
|
title: string;
|
|
score: number;
|
|
snippet?: string;
|
|
}
|
|
|
|
export interface MinimalFTSSearchOptions {
|
|
limit?: number;
|
|
offset?: number;
|
|
includeSnippets?: boolean;
|
|
}
|
|
|
|
class MinimalFTSSearchService {
|
|
private isFTS5Available: boolean | null = null;
|
|
|
|
/**
|
|
* Check if FTS5 table exists
|
|
*/
|
|
checkFTS5Availability(): boolean {
|
|
if (this.isFTS5Available !== null) {
|
|
return this.isFTS5Available;
|
|
}
|
|
|
|
try {
|
|
const tableExists = sql.getValue<number>(`
|
|
SELECT COUNT(*)
|
|
FROM sqlite_master
|
|
WHERE type = 'table'
|
|
AND name = 'notes_fts'
|
|
`);
|
|
|
|
this.isFTS5Available = tableExists > 0;
|
|
|
|
if (!this.isFTS5Available) {
|
|
log.info("FTS5 table not found");
|
|
}
|
|
} catch (error) {
|
|
log.error(`Error checking FTS5 availability: ${error}`);
|
|
this.isFTS5Available = false;
|
|
}
|
|
|
|
return this.isFTS5Available;
|
|
}
|
|
|
|
/**
|
|
* Convert search tokens to FTS5 query
|
|
* Keep it simple - let SQLite do the work
|
|
*/
|
|
convertToFTS5Query(tokens: string[], operator: string): string {
|
|
if (!tokens || tokens.length === 0) {
|
|
throw new Error("No search tokens provided");
|
|
}
|
|
|
|
// Basic sanitization - remove FTS5 special characters
|
|
const sanitizedTokens = tokens.map(token =>
|
|
token.replace(/["()]/g, '').trim()
|
|
).filter(t => t.length > 0);
|
|
|
|
if (sanitizedTokens.length === 0) {
|
|
throw new Error("No valid tokens after sanitization");
|
|
}
|
|
|
|
switch (operator) {
|
|
case "=": // Exact phrase
|
|
return `"${sanitizedTokens.join(" ")}"`;
|
|
|
|
case "*=*": // Contains (substring)
|
|
// Use prefix search for each token
|
|
return sanitizedTokens.map(t => `${t}*`).join(" AND ");
|
|
|
|
case "*=": // Ends with (not well supported in FTS5)
|
|
// Fallback to contains
|
|
return sanitizedTokens.map(t => `${t}*`).join(" AND ");
|
|
|
|
case "=*": // Starts with
|
|
return sanitizedTokens.map(t => `${t}*`).join(" AND ");
|
|
|
|
case "!=": // Does not contain
|
|
return `NOT (${sanitizedTokens.join(" OR ")})`;
|
|
|
|
case "~=": // Fuzzy match (use OR for flexibility)
|
|
case "~*":
|
|
return sanitizedTokens.join(" OR ");
|
|
|
|
default:
|
|
// Default to AND search
|
|
return sanitizedTokens.join(" AND ");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform word-based search using FTS5
|
|
*/
|
|
searchWords(
|
|
tokens: string[],
|
|
operator: string,
|
|
noteIds?: Set<string>,
|
|
options: MinimalFTSSearchOptions = {}
|
|
): MinimalFTSSearchResult[] {
|
|
if (!this.checkFTS5Availability()) {
|
|
throw new Error("FTS5 not available");
|
|
}
|
|
|
|
const {
|
|
limit = 100,
|
|
offset = 0,
|
|
includeSnippets = false
|
|
} = options;
|
|
|
|
try {
|
|
const ftsQuery = this.convertToFTS5Query(tokens, operator);
|
|
|
|
// Build the query
|
|
let query: string;
|
|
const params: any[] = [ftsQuery];
|
|
|
|
if (noteIds && noteIds.size > 0) {
|
|
// Filter by specific noteIds
|
|
const noteIdArray = Array.from(noteIds);
|
|
const placeholders = noteIdArray.map(() => '?').join(',');
|
|
|
|
if (includeSnippets) {
|
|
query = `
|
|
SELECT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score,
|
|
snippet(notes_fts, 2, '<mark>', '</mark>', '...', 30) as snippet
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND f.noteId IN (${placeholders})
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
} else {
|
|
query = `
|
|
SELECT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND f.noteId IN (${placeholders})
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
}
|
|
params.push(...noteIdArray, limit, offset);
|
|
} else {
|
|
// Search all notes
|
|
if (includeSnippets) {
|
|
query = `
|
|
SELECT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score,
|
|
snippet(notes_fts, 2, '<mark>', '</mark>', '...', 30) as snippet
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
} else {
|
|
query = `
|
|
SELECT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
}
|
|
params.push(limit, offset);
|
|
}
|
|
|
|
const results = sql.getRows<MinimalFTSSearchResult>(query, params);
|
|
return results;
|
|
|
|
} catch (error: any) {
|
|
log.error(`FTS5 search error: ${error}`);
|
|
throw new Error(`FTS5 search failed: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform substring search using FTS5 prefix indexes
|
|
* This is slower than word search but still uses FTS5
|
|
*/
|
|
searchSubstring(
|
|
tokens: string[],
|
|
noteIds?: Set<string>,
|
|
options: MinimalFTSSearchOptions = {}
|
|
): MinimalFTSSearchResult[] {
|
|
if (!this.checkFTS5Availability()) {
|
|
throw new Error("FTS5 not available");
|
|
}
|
|
|
|
const {
|
|
limit = 100,
|
|
offset = 0,
|
|
includeSnippets = false
|
|
} = options;
|
|
|
|
try {
|
|
// For substring search, use prefix matching
|
|
// Split each token into smaller parts for better matching
|
|
const substringTokens: string[] = [];
|
|
|
|
for (const token of tokens) {
|
|
if (token.length <= 2) {
|
|
// Short tokens - just add with wildcard
|
|
substringTokens.push(`${token}*`);
|
|
} else {
|
|
// Longer tokens - create multiple prefix searches
|
|
// This leverages the prefix indexes we created (2, 3, 4 chars)
|
|
for (let i = 2; i <= Math.min(4, token.length); i++) {
|
|
substringTokens.push(`${token.substring(0, i)}*`);
|
|
}
|
|
// Also add the full token with wildcard
|
|
if (token.length > 4) {
|
|
substringTokens.push(`${token}*`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create FTS query with OR to find any matching substring
|
|
const ftsQuery = substringTokens.join(" OR ");
|
|
|
|
// Build the query
|
|
let query: string;
|
|
const params: any[] = [ftsQuery];
|
|
|
|
if (noteIds && noteIds.size > 0) {
|
|
const noteIdArray = Array.from(noteIds);
|
|
const placeholders = noteIdArray.map(() => '?').join(',');
|
|
|
|
query = `
|
|
SELECT DISTINCT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND f.noteId IN (${placeholders})
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
params.push(...noteIdArray, limit, offset);
|
|
} else {
|
|
query = `
|
|
SELECT DISTINCT
|
|
f.noteId,
|
|
n.title,
|
|
-rank as score
|
|
FROM notes_fts f
|
|
INNER JOIN notes n ON f.noteId = n.noteId
|
|
WHERE notes_fts MATCH ?
|
|
AND n.isDeleted = 0
|
|
ORDER BY rank
|
|
LIMIT ? OFFSET ?
|
|
`;
|
|
params.push(limit, offset);
|
|
}
|
|
|
|
const results = sql.getRows<MinimalFTSSearchResult>(query, params);
|
|
return results;
|
|
|
|
} catch (error: any) {
|
|
log.error(`FTS5 substring search error: ${error}`);
|
|
throw new Error(`FTS5 substring search failed: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Combined search that handles both word and substring searches
|
|
*/
|
|
search(
|
|
tokens: string[],
|
|
operator: string,
|
|
noteIds?: Set<string>,
|
|
options: MinimalFTSSearchOptions = {}
|
|
): MinimalFTSSearchResult[] {
|
|
// Substring search operators
|
|
if (operator === '*=*' || operator === '*=') {
|
|
return this.searchSubstring(tokens, noteIds, options);
|
|
}
|
|
|
|
// Word-based search for all other operators
|
|
return this.searchWords(tokens, operator, noteIds, options);
|
|
}
|
|
|
|
/**
|
|
* Update FTS index for a specific note
|
|
*/
|
|
updateNoteIndex(noteId: string, title: string, content: string): void {
|
|
if (!this.checkFTS5Availability()) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
sql.transactional(() => {
|
|
// Delete existing entry
|
|
sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]);
|
|
|
|
// Insert new entry (limit content size)
|
|
sql.execute(`
|
|
INSERT INTO notes_fts (noteId, title, content)
|
|
VALUES (?, ?, SUBSTR(?, 1, 500000))
|
|
`, [noteId, title, content]);
|
|
});
|
|
} catch (error) {
|
|
log.error(`Failed to update FTS index for note ${noteId}: ${error}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove a note from the FTS index
|
|
*/
|
|
removeNoteFromIndex(noteId: string): void {
|
|
if (!this.checkFTS5Availability()) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
sql.execute(`DELETE FROM notes_fts WHERE noteId = ?`, [noteId]);
|
|
} catch (error) {
|
|
log.error(`Failed to remove note ${noteId} from FTS index: ${error}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Rebuild the entire FTS index
|
|
* Simple and straightforward - let SQLite handle it
|
|
*/
|
|
rebuildIndex(): void {
|
|
if (!this.checkFTS5Availability()) {
|
|
log.error("Cannot rebuild FTS index - FTS5 not available");
|
|
return;
|
|
}
|
|
|
|
log.info("Rebuilding FTS5 index...");
|
|
|
|
try {
|
|
sql.transactional(() => {
|
|
// Clear existing index
|
|
sql.execute(`DELETE FROM notes_fts`);
|
|
|
|
// Rebuild from notes
|
|
sql.execute(`
|
|
INSERT INTO notes_fts (noteId, title, content)
|
|
SELECT
|
|
n.noteId,
|
|
n.title,
|
|
SUBSTR(b.content, 1, 500000)
|
|
FROM notes n
|
|
LEFT JOIN blobs b ON n.blobId = b.blobId
|
|
WHERE n.type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
|
|
AND n.isDeleted = 0
|
|
AND n.isProtected = 0
|
|
AND b.content IS NOT NULL
|
|
`);
|
|
|
|
// Optimize the index
|
|
sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`);
|
|
});
|
|
|
|
log.info("FTS5 index rebuild completed");
|
|
} catch (error) {
|
|
log.error(`Failed to rebuild FTS index: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Optimize the FTS index
|
|
* Simple optimization - no complex logic
|
|
*/
|
|
optimizeIndex(): void {
|
|
if (!this.checkFTS5Availability()) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
log.info("Optimizing FTS5 index...");
|
|
|
|
// Simple optimization command
|
|
sql.execute(`INSERT INTO notes_fts(notes_fts) VALUES('optimize')`);
|
|
|
|
// Update statistics for query planner
|
|
sql.execute(`ANALYZE notes_fts`);
|
|
|
|
log.info("FTS5 index optimization completed");
|
|
} catch (error) {
|
|
log.error(`Failed to optimize FTS index: ${error}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get basic statistics about the FTS index
|
|
*/
|
|
getIndexStats(): {
|
|
totalDocuments: number;
|
|
tableExists: boolean;
|
|
} {
|
|
if (!this.checkFTS5Availability()) {
|
|
return {
|
|
totalDocuments: 0,
|
|
tableExists: false
|
|
};
|
|
}
|
|
|
|
try {
|
|
const totalDocuments = sql.getValue<number>(`
|
|
SELECT COUNT(*) FROM notes_fts
|
|
`) || 0;
|
|
|
|
return {
|
|
totalDocuments,
|
|
tableExists: true
|
|
};
|
|
} catch (error) {
|
|
log.error(`Failed to get index stats: ${error}`);
|
|
return {
|
|
totalDocuments: 0,
|
|
tableExists: false
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
// Export singleton instance
|
|
export const minimalFTSSearchService = new MinimalFTSSearchService();
|
|
|
|
export default minimalFTSSearchService; |