feat(quick_search): only "fallback" to fuzzy search, if there aren't that many search results found from user's query

This commit is contained in:
perf3ct 2025-08-03 20:43:16 +00:00
parent 9cef8c8e70
commit e9409577db
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 103 additions and 16 deletions

View File

@ -183,12 +183,12 @@ class NoteFlatTextExp extends Expression {
return true;
}
// Fuzzy fallback only for tokens >= 4 characters
if (token.length >= 4) {
// Fuzzy fallback only if enabled and for tokens >= 4 characters
if (searchContext?.enableFuzzyMatching && token.length >= 4) {
const matchedWord = fuzzyMatchWordWithResult(token, text);
if (matchedWord) {
// Track the fuzzy matched word for highlighting
if (searchContext && !searchContext.highlightedTokens.includes(matchedWord)) {
if (!searchContext.highlightedTokens.includes(matchedWord)) {
searchContext.highlightedTokens.push(matchedWord);
}
return true;

View File

@ -18,6 +18,7 @@ class SearchContext {
debug?: boolean;
debugInfo: {} | null;
fuzzyAttributeSearch: boolean;
enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase
highlightedTokens: string[];
originalQuery: string;
fulltextQuery: string;
@ -45,6 +46,7 @@ class SearchContext {
this.debug = params.debug;
this.debugInfo = null;
this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch;
this.enableFuzzyMatching = true; // Default to true for backward compatibility
this.highlightedTokens = [];
this.originalQuery = "";
this.fulltextQuery = ""; // complete fulltext part

View File

@ -20,7 +20,11 @@ const SCORE_WEIGHTS = {
TOKEN_FUZZY_MATCH: 0.5,
TITLE_FACTOR: 2.0,
PATH_FACTOR: 0.3,
HIDDEN_NOTE_PENALTY: 3
HIDDEN_NOTE_PENALTY: 3,
// Score caps to prevent fuzzy matches from outranking exact matches
MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches
MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches
MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search
} as const;
@ -31,11 +35,13 @@ class SearchResult {
highlightedNotePathTitle?: string;
contentSnippet?: string;
highlightedContentSnippet?: string;
private fuzzyScore: number; // Track fuzzy score separately
constructor(notePathArray: string[]) {
this.notePathArray = notePathArray;
this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray);
this.score = 0;
this.fuzzyScore = 0;
}
get notePath() {
@ -46,8 +52,9 @@ class SearchResult {
return this.notePathArray[this.notePathArray.length - 1];
}
computeScore(fulltextQuery: string, tokens: string[]) {
computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) {
this.score = 0;
this.fuzzyScore = 0; // Reset fuzzy score tracking
const note = becca.notes[this.noteId];
const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase());
@ -65,22 +72,23 @@ class SearchResult {
this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH;
} else if (this.isWordMatch(normalizedTitle, normalizedQuery)) {
this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH;
} else {
// Try fuzzy matching for typos
} else if (enableFuzzyMatching) {
// Try fuzzy matching for typos only if enabled
const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery);
this.score += fuzzyScore;
this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions
}
// Add scores for token matches
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR);
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR);
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);
if (note.isInHiddenSubtree()) {
this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
}
}
addScoreForStrings(tokens: string[], str: string, factor: number) {
addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) {
const normalizedStr = normalizeSearchText(str.toLowerCase());
const chunks = normalizedStr.split(" ");
@ -96,11 +104,22 @@ class SearchResult {
} else if (chunk.includes(normalizedToken)) {
tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor;
} else {
// Try fuzzy matching for individual tokens
// Try fuzzy matching for individual tokens with caps applied
const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
tokenScore += fuzzyWeight * token.length * factor;
// Apply caps: limit token length multiplier and per-token contribution
const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
const fuzzyTokenScore = Math.min(
fuzzyWeight * cappedTokenLength * factor,
SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
);
tokenScore += fuzzyTokenScore;
this.fuzzyScore += fuzzyTokenScore;
}
}
}
@ -119,9 +138,14 @@ class SearchResult {
}
/**
* Calculates fuzzy matching score for title matches
* Calculates fuzzy matching score for title matches with caps applied
*/
private calculateFuzzyTitleScore(title: string, query: string): number {
// Check if we've already hit the fuzzy scoring cap
if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
return 0;
}
const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
const maxLen = Math.max(title.length, query.length);
@ -130,7 +154,10 @@ class SearchResult {
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
editDistance / maxLen <= 0.3) {
const similarity = 1 - (editDistance / maxLen);
return SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
// Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
}
return 0;

View File

@ -237,6 +237,28 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
loadNeededInfoFromDatabase();
}
// Phase 1: Try exact matches first (without fuzzy matching)
const exactResults = performSearch(expression, searchContext, false);
// Check if we have sufficient high-quality results
const minResultThreshold = 5;
const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
// If we have enough high-quality exact matches, return them
if (highQualityResults.length >= minResultThreshold) {
return exactResults;
}
// Phase 2: Add fuzzy matching as fallback
const fuzzyResults = performSearch(expression, searchContext, true);
// Merge results, ensuring exact matches always rank higher than fuzzy matches
return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
}
function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] {
const allNoteSet = becca.getAllNoteSet();
const noteIdToNotePath: Record<string, string[]> = {};
@ -244,6 +266,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
noteIdToNotePath
};
// Store original fuzzy setting and temporarily override it
const originalFuzzyMatching = searchContext.enableFuzzyMatching;
searchContext.enableFuzzyMatching = enableFuzzyMatching;
const noteSet = expression.execute(allNoteSet, executionContext, searchContext);
const searchResults = noteSet.notes.map((note) => {
@ -257,9 +283,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
});
for (const res of searchResults) {
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens);
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching);
}
// Restore original fuzzy setting
searchContext.enableFuzzyMatching = originalFuzzyMatching;
if (!noteSet.sorted) {
searchResults.sort((a, b) => {
if (a.score > b.score) {
@ -281,6 +310,35 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
return searchResults;
}
function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] {
// Create a map of exact result note IDs for deduplication
const exactNoteIds = new Set(exactResults.map(result => result.noteId));
// Add fuzzy results that aren't already in exact results
const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId));
// Combine results with exact matches first, then fuzzy matches
const combinedResults = [...exactResults, ...additionalFuzzyResults];
// Sort combined results by score
combinedResults.sort((a, b) => {
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
}
// if score does not decide then sort results by depth of the note.
if (a.notePathArray.length === b.notePathArray.length) {
return a.notePathTitle < b.notePathTitle ? -1 : 1;
}
return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
});
return combinedResults;
}
function parseQueryToExpression(query: string, searchContext: SearchContext) {
const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query);
searchContext.fulltextQuery = fulltextQuery;