diff --git a/apps/server/src/services/search/expressions/note_flat_text.ts b/apps/server/src/services/search/expressions/note_flat_text.ts index d77ac06a9..5bd48a0ef 100644 --- a/apps/server/src/services/search/expressions/note_flat_text.ts +++ b/apps/server/src/services/search/expressions/note_flat_text.ts @@ -183,12 +183,12 @@ class NoteFlatTextExp extends Expression { return true; } - // Fuzzy fallback only for tokens >= 4 characters - if (token.length >= 4) { + // Fuzzy fallback only if enabled and for tokens >= 4 characters + if (searchContext?.enableFuzzyMatching && token.length >= 4) { const matchedWord = fuzzyMatchWordWithResult(token, text); if (matchedWord) { // Track the fuzzy matched word for highlighting - if (searchContext && !searchContext.highlightedTokens.includes(matchedWord)) { + if (!searchContext.highlightedTokens.includes(matchedWord)) { searchContext.highlightedTokens.push(matchedWord); } return true; diff --git a/apps/server/src/services/search/search_context.ts b/apps/server/src/services/search/search_context.ts index 29fb7dbda..314c7e7ce 100644 --- a/apps/server/src/services/search/search_context.ts +++ b/apps/server/src/services/search/search_context.ts @@ -18,6 +18,7 @@ class SearchContext { debug?: boolean; debugInfo: {} | null; fuzzyAttributeSearch: boolean; + enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase highlightedTokens: string[]; originalQuery: string; fulltextQuery: string; @@ -45,6 +46,7 @@ class SearchContext { this.debug = params.debug; this.debugInfo = null; this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch; + this.enableFuzzyMatching = true; // Default to true for backward compatibility this.highlightedTokens = []; this.originalQuery = ""; this.fulltextQuery = ""; // complete fulltext part diff --git a/apps/server/src/services/search/search_result.ts b/apps/server/src/services/search/search_result.ts index 88773584d..9d7aa247c 100644 --- a/apps/server/src/services/search/search_result.ts +++ b/apps/server/src/services/search/search_result.ts @@ -20,7 +20,11 @@ const SCORE_WEIGHTS = { TOKEN_FUZZY_MATCH: 0.5, TITLE_FACTOR: 2.0, PATH_FACTOR: 0.3, - HIDDEN_NOTE_PENALTY: 3 + HIDDEN_NOTE_PENALTY: 3, + // Score caps to prevent fuzzy matches from outranking exact matches + MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches + MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches + MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search } as const; @@ -31,11 +35,13 @@ class SearchResult { highlightedNotePathTitle?: string; contentSnippet?: string; highlightedContentSnippet?: string; + private fuzzyScore: number; // Track fuzzy score separately constructor(notePathArray: string[]) { this.notePathArray = notePathArray; this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray); this.score = 0; + this.fuzzyScore = 0; } get notePath() { @@ -46,8 +52,9 @@ class SearchResult { return this.notePathArray[this.notePathArray.length - 1]; } - computeScore(fulltextQuery: string, tokens: string[]) { + computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) { this.score = 0; + this.fuzzyScore = 0; // Reset fuzzy score tracking const note = becca.notes[this.noteId]; const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase()); @@ -65,22 +72,23 @@ class SearchResult { this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH; } else if (this.isWordMatch(normalizedTitle, normalizedQuery)) { this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH; - } else { - // Try fuzzy matching for typos + } else if (enableFuzzyMatching) { + // Try fuzzy matching for typos only if enabled const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery); this.score += fuzzyScore; + this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions } // Add scores for token matches - this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR); - this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR); + this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching); + this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching); if (note.isInHiddenSubtree()) { this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY; } } - addScoreForStrings(tokens: string[], str: string, factor: number) { + addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) { const normalizedStr = normalizeSearchText(str.toLowerCase()); const chunks = normalizedStr.split(" "); @@ -96,11 +104,22 @@ class SearchResult { } else if (chunk.includes(normalizedToken)) { tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor; } else { - // Try fuzzy matching for individual tokens + // Try fuzzy matching for individual tokens with caps applied const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); - if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) { + if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && + normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH && + this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) { + const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); - tokenScore += fuzzyWeight * token.length * factor; + // Apply caps: limit token length multiplier and per-token contribution + const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER); + const fuzzyTokenScore = Math.min( + fuzzyWeight * cappedTokenLength * factor, + SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN + ); + + tokenScore += fuzzyTokenScore; + this.fuzzyScore += fuzzyTokenScore; } } } @@ -119,9 +138,14 @@ class SearchResult { } /** - * Calculates fuzzy matching score for title matches + * Calculates fuzzy matching score for title matches with caps applied */ private calculateFuzzyTitleScore(title: string, query: string): number { + // Check if we've already hit the fuzzy scoring cap + if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) { + return 0; + } + const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE); const maxLen = Math.max(title.length, query.length); @@ -130,7 +154,10 @@ class SearchResult { editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && editDistance / maxLen <= 0.3) { const similarity = 1 - (editDistance / maxLen); - return SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches + const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches + + // Apply cap to ensure fuzzy title matches don't exceed reasonable bounds + return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3); } return 0; diff --git a/apps/server/src/services/search/services/search.ts b/apps/server/src/services/search/services/search.ts index de3415744..fbc4c3ae4 100644 --- a/apps/server/src/services/search/services/search.ts +++ b/apps/server/src/services/search/services/search.ts @@ -237,6 +237,28 @@ function findResultsWithExpression(expression: Expression, searchContext: Search loadNeededInfoFromDatabase(); } + // Phase 1: Try exact matches first (without fuzzy matching) + const exactResults = performSearch(expression, searchContext, false); + + // Check if we have sufficient high-quality results + const minResultThreshold = 5; + const minScoreForQuality = 10; // Minimum score to consider a result "high quality" + + const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality); + + // If we have enough high-quality exact matches, return them + if (highQualityResults.length >= minResultThreshold) { + return exactResults; + } + + // Phase 2: Add fuzzy matching as fallback + const fuzzyResults = performSearch(expression, searchContext, true); + + // Merge results, ensuring exact matches always rank higher than fuzzy matches + return mergeExactAndFuzzyResults(exactResults, fuzzyResults); +} + +function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] { const allNoteSet = becca.getAllNoteSet(); const noteIdToNotePath: Record = {}; @@ -244,6 +266,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search noteIdToNotePath }; + // Store original fuzzy setting and temporarily override it + const originalFuzzyMatching = searchContext.enableFuzzyMatching; + searchContext.enableFuzzyMatching = enableFuzzyMatching; + const noteSet = expression.execute(allNoteSet, executionContext, searchContext); const searchResults = noteSet.notes.map((note) => { @@ -257,9 +283,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search }); for (const res of searchResults) { - res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens); + res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching); } + // Restore original fuzzy setting + searchContext.enableFuzzyMatching = originalFuzzyMatching; + if (!noteSet.sorted) { searchResults.sort((a, b) => { if (a.score > b.score) { @@ -281,6 +310,35 @@ function findResultsWithExpression(expression: Expression, searchContext: Search return searchResults; } +function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] { + // Create a map of exact result note IDs for deduplication + const exactNoteIds = new Set(exactResults.map(result => result.noteId)); + + // Add fuzzy results that aren't already in exact results + const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId)); + + // Combine results with exact matches first, then fuzzy matches + const combinedResults = [...exactResults, ...additionalFuzzyResults]; + + // Sort combined results by score + combinedResults.sort((a, b) => { + if (a.score > b.score) { + return -1; + } else if (a.score < b.score) { + return 1; + } + + // if score does not decide then sort results by depth of the note. + if (a.notePathArray.length === b.notePathArray.length) { + return a.notePathTitle < b.notePathTitle ? -1 : 1; + } + + return a.notePathArray.length < b.notePathArray.length ? -1 : 1; + }); + + return combinedResults; +} + function parseQueryToExpression(query: string, searchContext: SearchContext) { const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query); searchContext.fulltextQuery = fulltextQuery;