feat(quick_search): only "fallback" to fuzzy search, if there aren't that many search results found from user's query

2025-12-14 03:14:24 +01:00 · 2025-08-03 20:43:16 +00:00 · 2025-08-03 20:43:16 +00:00 · e9409577db
commit e9409577db
parent 9cef8c8e70
4 changed files with 103 additions and 16 deletions
--- a/apps/server/src/services/search/expressions/note_flat_text.ts
+++ b/apps/server/src/services/search/expressions/note_flat_text.ts
@ -183,12 +183,12 @@ class NoteFlatTextExp extends Expression {
            return true;
        }
        
-        // Fuzzy fallback only for tokens >= 4 characters
-        if (token.length >= 4) {
+        // Fuzzy fallback only if enabled and for tokens >= 4 characters
+        if (searchContext?.enableFuzzyMatching && token.length >= 4) {
            const matchedWord = fuzzyMatchWordWithResult(token, text);
            if (matchedWord) {
                // Track the fuzzy matched word for highlighting
-                if (searchContext && !searchContext.highlightedTokens.includes(matchedWord)) {
+                if (!searchContext.highlightedTokens.includes(matchedWord)) {
                    searchContext.highlightedTokens.push(matchedWord);
                }
                return true;
--- a/apps/server/src/services/search/search_context.ts
+++ b/apps/server/src/services/search/search_context.ts
@ -18,6 +18,7 @@ class SearchContext {
    debug?: boolean;
    debugInfo: {} | null;
    fuzzyAttributeSearch: boolean;
+    enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase
    highlightedTokens: string[];
    originalQuery: string;
    fulltextQuery: string;
@ -45,6 +46,7 @@ class SearchContext {
        this.debug = params.debug;
        this.debugInfo = null;
        this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch;
+        this.enableFuzzyMatching = true; // Default to true for backward compatibility
        this.highlightedTokens = [];
        this.originalQuery = "";
        this.fulltextQuery = ""; // complete fulltext part
--- a/apps/server/src/services/search/search_result.ts
+++ b/apps/server/src/services/search/search_result.ts
@ -20,7 +20,11 @@ const SCORE_WEIGHTS = {
    TOKEN_FUZZY_MATCH: 0.5,
    TITLE_FACTOR: 2.0,
    PATH_FACTOR: 0.3,
-    HIDDEN_NOTE_PENALTY: 3
+    HIDDEN_NOTE_PENALTY: 3,
+    // Score caps to prevent fuzzy matches from outranking exact matches
+    MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches
+    MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches
+    MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search
 } as const;


@ -31,11 +35,13 @@ class SearchResult {
    highlightedNotePathTitle?: string;
    contentSnippet?: string;
    highlightedContentSnippet?: string;
+    private fuzzyScore: number; // Track fuzzy score separately

    constructor(notePathArray: string[]) {
        this.notePathArray = notePathArray;
        this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray);
        this.score = 0;
+        this.fuzzyScore = 0;
    }

    get notePath() {
@ -46,8 +52,9 @@ class SearchResult {
        return this.notePathArray[this.notePathArray.length - 1];
    }

-    computeScore(fulltextQuery: string, tokens: string[]) {
+    computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) {
        this.score = 0;
+        this.fuzzyScore = 0; // Reset fuzzy score tracking

        const note = becca.notes[this.noteId];
        const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase());
@ -65,22 +72,23 @@ class SearchResult {
            this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH;
        } else if (this.isWordMatch(normalizedTitle, normalizedQuery)) {
            this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH;
-        } else {
-            // Try fuzzy matching for typos
+        } else if (enableFuzzyMatching) {
+            // Try fuzzy matching for typos only if enabled
            const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery);
            this.score += fuzzyScore;
+            this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions
        }

        // Add scores for token matches
-        this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR);
-        this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR);
+        this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
+        this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);

        if (note.isInHiddenSubtree()) {
            this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
        }
    }

-    addScoreForStrings(tokens: string[], str: string, factor: number) {
+    addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) {
        const normalizedStr = normalizeSearchText(str.toLowerCase());
        const chunks = normalizedStr.split(" ");

@ -96,11 +104,22 @@ class SearchResult {
                } else if (chunk.includes(normalizedToken)) {
                    tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor;
                } else {
-                    // Try fuzzy matching for individual tokens
+                    // Try fuzzy matching for individual tokens with caps applied
                    const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
-                    if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
+                    if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && 
+                        normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
+                        this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
+                        
                        const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
-                        tokenScore += fuzzyWeight * token.length * factor;
+                        // Apply caps: limit token length multiplier and per-token contribution
+                        const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
+                        const fuzzyTokenScore = Math.min(
+                            fuzzyWeight * cappedTokenLength * factor,
+                            SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
+                        );
+                        
+                        tokenScore += fuzzyTokenScore;
+                        this.fuzzyScore += fuzzyTokenScore;
                    }
                }
            }
@ -119,9 +138,14 @@ class SearchResult {
    }

    /**
-     * Calculates fuzzy matching score for title matches
+     * Calculates fuzzy matching score for title matches with caps applied
     */
    private calculateFuzzyTitleScore(title: string, query: string): number {
+        // Check if we've already hit the fuzzy scoring cap
+        if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
+            return 0;
+        }
+        
        const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
        const maxLen = Math.max(title.length, query.length);
        
@ -130,7 +154,10 @@ class SearchResult {
            editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE && 
            editDistance / maxLen <= 0.3) {
            const similarity = 1 - (editDistance / maxLen);
-            return SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
+            const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
+            
+            // Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
+            return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
        }
        
        return 0;
--- a/apps/server/src/services/search/services/search.ts
+++ b/apps/server/src/services/search/services/search.ts
@ -237,6 +237,28 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
        loadNeededInfoFromDatabase();
    }

+    // Phase 1: Try exact matches first (without fuzzy matching)
+    const exactResults = performSearch(expression, searchContext, false);
+    
+    // Check if we have sufficient high-quality results
+    const minResultThreshold = 5;
+    const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
+    
+    const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
+    
+    // If we have enough high-quality exact matches, return them
+    if (highQualityResults.length >= minResultThreshold) {
+        return exactResults;
+    }
+    
+    // Phase 2: Add fuzzy matching as fallback
+    const fuzzyResults = performSearch(expression, searchContext, true);
+    
+    // Merge results, ensuring exact matches always rank higher than fuzzy matches
+    return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
+}
+
+function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] {
    const allNoteSet = becca.getAllNoteSet();

    const noteIdToNotePath: Record<string, string[]> = {};
@ -244,6 +266,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
        noteIdToNotePath
    };

+    // Store original fuzzy setting and temporarily override it
+    const originalFuzzyMatching = searchContext.enableFuzzyMatching;
+    searchContext.enableFuzzyMatching = enableFuzzyMatching;
+
    const noteSet = expression.execute(allNoteSet, executionContext, searchContext);

    const searchResults = noteSet.notes.map((note) => {
@ -257,9 +283,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
    });

    for (const res of searchResults) {
-        res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens);
+        res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching);
    }

+    // Restore original fuzzy setting
+    searchContext.enableFuzzyMatching = originalFuzzyMatching;
+
    if (!noteSet.sorted) {
        searchResults.sort((a, b) => {
            if (a.score > b.score) {
@ -281,6 +310,35 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
    return searchResults;
 }

+function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] {
+    // Create a map of exact result note IDs for deduplication
+    const exactNoteIds = new Set(exactResults.map(result => result.noteId));
+    
+    // Add fuzzy results that aren't already in exact results
+    const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId));
+    
+    // Combine results with exact matches first, then fuzzy matches
+    const combinedResults = [...exactResults, ...additionalFuzzyResults];
+    
+    // Sort combined results by score
+    combinedResults.sort((a, b) => {
+        if (a.score > b.score) {
+            return -1;
+        } else if (a.score < b.score) {
+            return 1;
+        }
+
+        // if score does not decide then sort results by depth of the note.
+        if (a.notePathArray.length === b.notePathArray.length) {
+            return a.notePathTitle < b.notePathTitle ? -1 : 1;
+        }
+
+        return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
+    });
+    
+    return combinedResults;
+}
+
 function parseQueryToExpression(query: string, searchContext: SearchContext) {
    const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query);
    searchContext.fulltextQuery = fulltextQuery;