mirror of
https://github.com/zadam/trilium.git
synced 2025-10-29 10:39:00 +01:00
feat(quick_search): only "fallback" to fuzzy search, if there aren't that many search results found from user's query
This commit is contained in:
parent
9cef8c8e70
commit
e9409577db
@ -183,12 +183,12 @@ class NoteFlatTextExp extends Expression {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fuzzy fallback only for tokens >= 4 characters
|
||||
if (token.length >= 4) {
|
||||
// Fuzzy fallback only if enabled and for tokens >= 4 characters
|
||||
if (searchContext?.enableFuzzyMatching && token.length >= 4) {
|
||||
const matchedWord = fuzzyMatchWordWithResult(token, text);
|
||||
if (matchedWord) {
|
||||
// Track the fuzzy matched word for highlighting
|
||||
if (searchContext && !searchContext.highlightedTokens.includes(matchedWord)) {
|
||||
if (!searchContext.highlightedTokens.includes(matchedWord)) {
|
||||
searchContext.highlightedTokens.push(matchedWord);
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -18,6 +18,7 @@ class SearchContext {
|
||||
debug?: boolean;
|
||||
debugInfo: {} | null;
|
||||
fuzzyAttributeSearch: boolean;
|
||||
enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase
|
||||
highlightedTokens: string[];
|
||||
originalQuery: string;
|
||||
fulltextQuery: string;
|
||||
@ -45,6 +46,7 @@ class SearchContext {
|
||||
this.debug = params.debug;
|
||||
this.debugInfo = null;
|
||||
this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch;
|
||||
this.enableFuzzyMatching = true; // Default to true for backward compatibility
|
||||
this.highlightedTokens = [];
|
||||
this.originalQuery = "";
|
||||
this.fulltextQuery = ""; // complete fulltext part
|
||||
|
||||
@ -20,7 +20,11 @@ const SCORE_WEIGHTS = {
|
||||
TOKEN_FUZZY_MATCH: 0.5,
|
||||
TITLE_FACTOR: 2.0,
|
||||
PATH_FACTOR: 0.3,
|
||||
HIDDEN_NOTE_PENALTY: 3
|
||||
HIDDEN_NOTE_PENALTY: 3,
|
||||
// Score caps to prevent fuzzy matches from outranking exact matches
|
||||
MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches
|
||||
MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches
|
||||
MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search
|
||||
} as const;
|
||||
|
||||
|
||||
@ -31,11 +35,13 @@ class SearchResult {
|
||||
highlightedNotePathTitle?: string;
|
||||
contentSnippet?: string;
|
||||
highlightedContentSnippet?: string;
|
||||
private fuzzyScore: number; // Track fuzzy score separately
|
||||
|
||||
constructor(notePathArray: string[]) {
|
||||
this.notePathArray = notePathArray;
|
||||
this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray);
|
||||
this.score = 0;
|
||||
this.fuzzyScore = 0;
|
||||
}
|
||||
|
||||
get notePath() {
|
||||
@ -46,8 +52,9 @@ class SearchResult {
|
||||
return this.notePathArray[this.notePathArray.length - 1];
|
||||
}
|
||||
|
||||
computeScore(fulltextQuery: string, tokens: string[]) {
|
||||
computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) {
|
||||
this.score = 0;
|
||||
this.fuzzyScore = 0; // Reset fuzzy score tracking
|
||||
|
||||
const note = becca.notes[this.noteId];
|
||||
const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase());
|
||||
@ -65,22 +72,23 @@ class SearchResult {
|
||||
this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH;
|
||||
} else if (this.isWordMatch(normalizedTitle, normalizedQuery)) {
|
||||
this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH;
|
||||
} else {
|
||||
// Try fuzzy matching for typos
|
||||
} else if (enableFuzzyMatching) {
|
||||
// Try fuzzy matching for typos only if enabled
|
||||
const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery);
|
||||
this.score += fuzzyScore;
|
||||
this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions
|
||||
}
|
||||
|
||||
// Add scores for token matches
|
||||
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR);
|
||||
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR);
|
||||
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
|
||||
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);
|
||||
|
||||
if (note.isInHiddenSubtree()) {
|
||||
this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
|
||||
}
|
||||
}
|
||||
|
||||
addScoreForStrings(tokens: string[], str: string, factor: number) {
|
||||
addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) {
|
||||
const normalizedStr = normalizeSearchText(str.toLowerCase());
|
||||
const chunks = normalizedStr.split(" ");
|
||||
|
||||
@ -96,11 +104,22 @@ class SearchResult {
|
||||
} else if (chunk.includes(normalizedToken)) {
|
||||
tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor;
|
||||
} else {
|
||||
// Try fuzzy matching for individual tokens
|
||||
// Try fuzzy matching for individual tokens with caps applied
|
||||
const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
|
||||
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
|
||||
this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
|
||||
|
||||
const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
tokenScore += fuzzyWeight * token.length * factor;
|
||||
// Apply caps: limit token length multiplier and per-token contribution
|
||||
const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
|
||||
const fuzzyTokenScore = Math.min(
|
||||
fuzzyWeight * cappedTokenLength * factor,
|
||||
SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
|
||||
);
|
||||
|
||||
tokenScore += fuzzyTokenScore;
|
||||
this.fuzzyScore += fuzzyTokenScore;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -119,9 +138,14 @@ class SearchResult {
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates fuzzy matching score for title matches
|
||||
* Calculates fuzzy matching score for title matches with caps applied
|
||||
*/
|
||||
private calculateFuzzyTitleScore(title: string, query: string): number {
|
||||
// Check if we've already hit the fuzzy scoring cap
|
||||
if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
|
||||
const maxLen = Math.max(title.length, query.length);
|
||||
|
||||
@ -130,7 +154,10 @@ class SearchResult {
|
||||
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
|
||||
editDistance / maxLen <= 0.3) {
|
||||
const similarity = 1 - (editDistance / maxLen);
|
||||
return SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
|
||||
const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
|
||||
|
||||
// Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
|
||||
return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@ -237,6 +237,28 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
|
||||
loadNeededInfoFromDatabase();
|
||||
}
|
||||
|
||||
// Phase 1: Try exact matches first (without fuzzy matching)
|
||||
const exactResults = performSearch(expression, searchContext, false);
|
||||
|
||||
// Check if we have sufficient high-quality results
|
||||
const minResultThreshold = 5;
|
||||
const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
|
||||
|
||||
const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
|
||||
|
||||
// If we have enough high-quality exact matches, return them
|
||||
if (highQualityResults.length >= minResultThreshold) {
|
||||
return exactResults;
|
||||
}
|
||||
|
||||
// Phase 2: Add fuzzy matching as fallback
|
||||
const fuzzyResults = performSearch(expression, searchContext, true);
|
||||
|
||||
// Merge results, ensuring exact matches always rank higher than fuzzy matches
|
||||
return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
|
||||
}
|
||||
|
||||
function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] {
|
||||
const allNoteSet = becca.getAllNoteSet();
|
||||
|
||||
const noteIdToNotePath: Record<string, string[]> = {};
|
||||
@ -244,6 +266,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
|
||||
noteIdToNotePath
|
||||
};
|
||||
|
||||
// Store original fuzzy setting and temporarily override it
|
||||
const originalFuzzyMatching = searchContext.enableFuzzyMatching;
|
||||
searchContext.enableFuzzyMatching = enableFuzzyMatching;
|
||||
|
||||
const noteSet = expression.execute(allNoteSet, executionContext, searchContext);
|
||||
|
||||
const searchResults = noteSet.notes.map((note) => {
|
||||
@ -257,9 +283,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
|
||||
});
|
||||
|
||||
for (const res of searchResults) {
|
||||
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens);
|
||||
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching);
|
||||
}
|
||||
|
||||
// Restore original fuzzy setting
|
||||
searchContext.enableFuzzyMatching = originalFuzzyMatching;
|
||||
|
||||
if (!noteSet.sorted) {
|
||||
searchResults.sort((a, b) => {
|
||||
if (a.score > b.score) {
|
||||
@ -281,6 +310,35 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
|
||||
return searchResults;
|
||||
}
|
||||
|
||||
function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] {
|
||||
// Create a map of exact result note IDs for deduplication
|
||||
const exactNoteIds = new Set(exactResults.map(result => result.noteId));
|
||||
|
||||
// Add fuzzy results that aren't already in exact results
|
||||
const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId));
|
||||
|
||||
// Combine results with exact matches first, then fuzzy matches
|
||||
const combinedResults = [...exactResults, ...additionalFuzzyResults];
|
||||
|
||||
// Sort combined results by score
|
||||
combinedResults.sort((a, b) => {
|
||||
if (a.score > b.score) {
|
||||
return -1;
|
||||
} else if (a.score < b.score) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// if score does not decide then sort results by depth of the note.
|
||||
if (a.notePathArray.length === b.notePathArray.length) {
|
||||
return a.notePathTitle < b.notePathTitle ? -1 : 1;
|
||||
}
|
||||
|
||||
return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
|
||||
});
|
||||
|
||||
return combinedResults;
|
||||
}
|
||||
|
||||
function parseQueryToExpression(query: string, searchContext: SearchContext) {
|
||||
const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query);
|
||||
searchContext.fulltextQuery = fulltextQuery;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user