feat(quick_search): only "fallback" to fuzzy search, if there aren't that many search results found from user's query

This commit is contained in:
perf3ct 2025-08-03 20:43:16 +00:00
parent 9cef8c8e70
commit e9409577db
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 103 additions and 16 deletions

View File

@ -183,12 +183,12 @@ class NoteFlatTextExp extends Expression {
return true; return true;
} }
// Fuzzy fallback only for tokens >= 4 characters // Fuzzy fallback only if enabled and for tokens >= 4 characters
if (token.length >= 4) { if (searchContext?.enableFuzzyMatching && token.length >= 4) {
const matchedWord = fuzzyMatchWordWithResult(token, text); const matchedWord = fuzzyMatchWordWithResult(token, text);
if (matchedWord) { if (matchedWord) {
// Track the fuzzy matched word for highlighting // Track the fuzzy matched word for highlighting
if (searchContext && !searchContext.highlightedTokens.includes(matchedWord)) { if (!searchContext.highlightedTokens.includes(matchedWord)) {
searchContext.highlightedTokens.push(matchedWord); searchContext.highlightedTokens.push(matchedWord);
} }
return true; return true;

View File

@ -18,6 +18,7 @@ class SearchContext {
debug?: boolean; debug?: boolean;
debugInfo: {} | null; debugInfo: {} | null;
fuzzyAttributeSearch: boolean; fuzzyAttributeSearch: boolean;
enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase
highlightedTokens: string[]; highlightedTokens: string[];
originalQuery: string; originalQuery: string;
fulltextQuery: string; fulltextQuery: string;
@ -45,6 +46,7 @@ class SearchContext {
this.debug = params.debug; this.debug = params.debug;
this.debugInfo = null; this.debugInfo = null;
this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch; this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch;
this.enableFuzzyMatching = true; // Default to true for backward compatibility
this.highlightedTokens = []; this.highlightedTokens = [];
this.originalQuery = ""; this.originalQuery = "";
this.fulltextQuery = ""; // complete fulltext part this.fulltextQuery = ""; // complete fulltext part

View File

@ -20,7 +20,11 @@ const SCORE_WEIGHTS = {
TOKEN_FUZZY_MATCH: 0.5, TOKEN_FUZZY_MATCH: 0.5,
TITLE_FACTOR: 2.0, TITLE_FACTOR: 2.0,
PATH_FACTOR: 0.3, PATH_FACTOR: 0.3,
HIDDEN_NOTE_PENALTY: 3 HIDDEN_NOTE_PENALTY: 3,
// Score caps to prevent fuzzy matches from outranking exact matches
MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches
MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches
MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search
} as const; } as const;
@ -31,11 +35,13 @@ class SearchResult {
highlightedNotePathTitle?: string; highlightedNotePathTitle?: string;
contentSnippet?: string; contentSnippet?: string;
highlightedContentSnippet?: string; highlightedContentSnippet?: string;
private fuzzyScore: number; // Track fuzzy score separately
constructor(notePathArray: string[]) { constructor(notePathArray: string[]) {
this.notePathArray = notePathArray; this.notePathArray = notePathArray;
this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray); this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray);
this.score = 0; this.score = 0;
this.fuzzyScore = 0;
} }
get notePath() { get notePath() {
@ -46,8 +52,9 @@ class SearchResult {
return this.notePathArray[this.notePathArray.length - 1]; return this.notePathArray[this.notePathArray.length - 1];
} }
computeScore(fulltextQuery: string, tokens: string[]) { computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) {
this.score = 0; this.score = 0;
this.fuzzyScore = 0; // Reset fuzzy score tracking
const note = becca.notes[this.noteId]; const note = becca.notes[this.noteId];
const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase()); const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase());
@ -65,22 +72,23 @@ class SearchResult {
this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH; this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH;
} else if (this.isWordMatch(normalizedTitle, normalizedQuery)) { } else if (this.isWordMatch(normalizedTitle, normalizedQuery)) {
this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH; this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH;
} else { } else if (enableFuzzyMatching) {
// Try fuzzy matching for typos // Try fuzzy matching for typos only if enabled
const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery); const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery);
this.score += fuzzyScore; this.score += fuzzyScore;
this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions
} }
// Add scores for token matches // Add scores for token matches
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR); this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR); this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);
if (note.isInHiddenSubtree()) { if (note.isInHiddenSubtree()) {
this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY; this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
} }
} }
addScoreForStrings(tokens: string[], str: string, factor: number) { addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) {
const normalizedStr = normalizeSearchText(str.toLowerCase()); const normalizedStr = normalizeSearchText(str.toLowerCase());
const chunks = normalizedStr.split(" "); const chunks = normalizedStr.split(" ");
@ -96,11 +104,22 @@ class SearchResult {
} else if (chunk.includes(normalizedToken)) { } else if (chunk.includes(normalizedToken)) {
tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor; tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor;
} else { } else {
// Try fuzzy matching for individual tokens // Try fuzzy matching for individual tokens with caps applied
const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) { if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
tokenScore += fuzzyWeight * token.length * factor; // Apply caps: limit token length multiplier and per-token contribution
const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
const fuzzyTokenScore = Math.min(
fuzzyWeight * cappedTokenLength * factor,
SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
);
tokenScore += fuzzyTokenScore;
this.fuzzyScore += fuzzyTokenScore;
} }
} }
} }
@ -119,9 +138,14 @@ class SearchResult {
} }
/** /**
* Calculates fuzzy matching score for title matches * Calculates fuzzy matching score for title matches with caps applied
*/ */
private calculateFuzzyTitleScore(title: string, query: string): number { private calculateFuzzyTitleScore(title: string, query: string): number {
// Check if we've already hit the fuzzy scoring cap
if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
return 0;
}
const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
const maxLen = Math.max(title.length, query.length); const maxLen = Math.max(title.length, query.length);
@ -130,7 +154,10 @@ class SearchResult {
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
editDistance / maxLen <= 0.3) { editDistance / maxLen <= 0.3) {
const similarity = 1 - (editDistance / maxLen); const similarity = 1 - (editDistance / maxLen);
return SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
// Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
} }
return 0; return 0;

View File

@ -237,6 +237,28 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
loadNeededInfoFromDatabase(); loadNeededInfoFromDatabase();
} }
// Phase 1: Try exact matches first (without fuzzy matching)
const exactResults = performSearch(expression, searchContext, false);
// Check if we have sufficient high-quality results
const minResultThreshold = 5;
const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
// If we have enough high-quality exact matches, return them
if (highQualityResults.length >= minResultThreshold) {
return exactResults;
}
// Phase 2: Add fuzzy matching as fallback
const fuzzyResults = performSearch(expression, searchContext, true);
// Merge results, ensuring exact matches always rank higher than fuzzy matches
return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
}
function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] {
const allNoteSet = becca.getAllNoteSet(); const allNoteSet = becca.getAllNoteSet();
const noteIdToNotePath: Record<string, string[]> = {}; const noteIdToNotePath: Record<string, string[]> = {};
@ -244,6 +266,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
noteIdToNotePath noteIdToNotePath
}; };
// Store original fuzzy setting and temporarily override it
const originalFuzzyMatching = searchContext.enableFuzzyMatching;
searchContext.enableFuzzyMatching = enableFuzzyMatching;
const noteSet = expression.execute(allNoteSet, executionContext, searchContext); const noteSet = expression.execute(allNoteSet, executionContext, searchContext);
const searchResults = noteSet.notes.map((note) => { const searchResults = noteSet.notes.map((note) => {
@ -257,9 +283,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
}); });
for (const res of searchResults) { for (const res of searchResults) {
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens); res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching);
} }
// Restore original fuzzy setting
searchContext.enableFuzzyMatching = originalFuzzyMatching;
if (!noteSet.sorted) { if (!noteSet.sorted) {
searchResults.sort((a, b) => { searchResults.sort((a, b) => {
if (a.score > b.score) { if (a.score > b.score) {
@ -281,6 +310,35 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
return searchResults; return searchResults;
} }
function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] {
// Create a map of exact result note IDs for deduplication
const exactNoteIds = new Set(exactResults.map(result => result.noteId));
// Add fuzzy results that aren't already in exact results
const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId));
// Combine results with exact matches first, then fuzzy matches
const combinedResults = [...exactResults, ...additionalFuzzyResults];
// Sort combined results by score
combinedResults.sort((a, b) => {
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
}
// if score does not decide then sort results by depth of the note.
if (a.notePathArray.length === b.notePathArray.length) {
return a.notePathTitle < b.notePathTitle ? -1 : 1;
}
return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
});
return combinedResults;
}
function parseQueryToExpression(query: string, searchContext: SearchContext) { function parseQueryToExpression(query: string, searchContext: SearchContext) {
const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query); const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query);
searchContext.fulltextQuery = fulltextQuery; searchContext.fulltextQuery = fulltextQuery;