feat(quick_search): add tests for updated fuzzy search progressive fuzzy search functionality

2025-12-05 15:04:24 +01:00 · 2025-08-03 21:02:56 +00:00 · 2025-08-03 21:02:56 +00:00 · 22740a6c8d
commit 22740a6c8d
parent e9409577db
2 changed files with 278 additions and 0 deletions
--- a/apps/server/src/services/search/services/progressive_search.spec.ts
+++ b/apps/server/src/services/search/services/progressive_search.spec.ts
@ -0,0 +1,241 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import searchService from "./search.js";
+import BNote from "../../../becca/entities/bnote.js";
+import BBranch from "../../../becca/entities/bbranch.js";
+import SearchContext from "../search_context.js";
+import becca from "../../../becca/becca.js";
+import { findNoteByTitle, note, NoteBuilder } from "../../../test/becca_mocking.js";
+
+describe("Progressive Search Strategy", () => {
+    let rootNote: any;
+
+    beforeEach(() => {
+        becca.reset();
+
+        rootNote = new NoteBuilder(new BNote({ noteId: "root", title: "root", type: "text" }));
+        new BBranch({
+            branchId: "none_root",
+            noteId: "root",
+            parentNoteId: "none",
+            notePosition: 10
+        });
+    });
+
+    describe("Phase 1: Exact Matches Only", () => {
+        it("should complete search with exact matches when sufficient results found", () => {
+            // Create notes with exact matches
+            rootNote
+                .child(note("Test Document One"))
+                .child(note("Test Report Two"))
+                .child(note("Test Analysis Three"))
+                .child(note("Test Summary Four"))
+                .child(note("Test Review Five"))
+                .child(note("Typo Test Documnt")); // This has a typo
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test", searchContext);
+
+            // Should find 5+ exact matches and not process the typo
+            expect(searchResults.length).toBeGreaterThanOrEqual(5);
+            
+            // Verify all results have high scores (exact matches)
+            const highQualityResults = searchResults.filter(result => result.score >= 10);
+            expect(highQualityResults.length).toBeGreaterThanOrEqual(5);
+            
+            // The typo document should not be in results since we have enough exact matches
+            expect(findNoteByTitle(searchResults, "Typo Test Documnt")).toBeFalsy();
+        });
+
+        it("should use exact match scoring only in Phase 1", () => {
+            rootNote
+                .child(note("Testing Exact Match"))
+                .child(note("Test Document"))
+                .child(note("Another Test"));
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test", searchContext);
+
+            // All results should have scores from exact matching only
+            for (const result of searchResults) {
+                expect(result.score).toBeGreaterThan(0);
+                // Scores should be from exact/prefix/contains matches, not fuzzy
+                expect(result.score % 0.5).not.toBe(0); // Fuzzy scores are multiples of 0.5
+            }
+        });
+    });
+
+    describe("Phase 2: Fuzzy Fallback", () => {
+        it("should trigger fuzzy matching when insufficient exact matches", () => {
+            // Create only a few notes, some with typos
+            rootNote
+                .child(note("Document One"))
+                .child(note("Report Two"))
+                .child(note("Anaylsis Three")) // Typo: "Analysis"
+                .child(note("Sumary Four")); // Typo: "Summary"
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
+
+            // Should find the typo through fuzzy matching
+            expect(searchResults.length).toBeGreaterThan(0);
+            expect(findNoteByTitle(searchResults, "Anaylsis Three")).toBeTruthy();
+        });
+
+        it("should merge exact and fuzzy results with exact matches ranked higher", () => {
+            rootNote
+                .child(note("Analysis Report")) // Exact match
+                .child(note("Data Analysis")) // Exact match
+                .child(note("Anaylsis Doc")) // Fuzzy match
+                .child(note("Statistical Anlaysis")); // Fuzzy match
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
+
+            expect(searchResults.length).toBe(4);
+
+            // First two results should be exact matches with higher scores
+            const exactMatches = ["Analysis Report", "Data Analysis"];
+            const fuzzyMatches = ["Anaylsis Doc", "Statistical Anlaysis"];
+
+            // Find exact and fuzzy match results
+            const exactResults = searchResults.filter(result => 
+                exactMatches.includes(becca.notes[result.noteId].title)
+            );
+            const fuzzyResults = searchResults.filter(result => 
+                fuzzyMatches.includes(becca.notes[result.noteId].title)
+            );
+
+            expect(exactResults.length).toBe(2);
+            expect(fuzzyResults.length).toBe(2);
+
+            // Exact matches should have higher scores than fuzzy matches
+            const lowestExactScore = Math.min(...exactResults.map(r => r.score));
+            const highestFuzzyScore = Math.max(...fuzzyResults.map(r => r.score));
+            
+            expect(lowestExactScore).toBeGreaterThan(highestFuzzyScore);
+        });
+
+        it("should not duplicate results between phases", () => {
+            rootNote
+                .child(note("Test Document")) // Would match in both phases
+                .child(note("Tset Report")); // Only fuzzy match
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test", searchContext);
+
+            // Should only have unique results
+            const noteIds = searchResults.map(r => r.noteId);
+            const uniqueNoteIds = [...new Set(noteIds)];
+            
+            expect(noteIds.length).toBe(uniqueNoteIds.length);
+            expect(findNoteByTitle(searchResults, "Test Document")).toBeTruthy();
+            expect(findNoteByTitle(searchResults, "Tset Report")).toBeTruthy();
+        });
+    });
+
+    describe("Result Sufficiency Thresholds", () => {
+        it("should respect minimum result count threshold", () => {
+            // Create exactly 4 high-quality results (below threshold of 5)
+            rootNote
+                .child(note("Test One"))
+                .child(note("Test Two"))
+                .child(note("Test Three"))
+                .child(note("Test Four"))
+                .child(note("Tset Five")); // Typo that should be found via fuzzy
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test", searchContext);
+
+            // Should proceed to Phase 2 and include fuzzy match
+            expect(searchResults.length).toBe(5);
+            expect(findNoteByTitle(searchResults, "Tset Five")).toBeTruthy();
+        });
+
+        it("should respect minimum quality score threshold", () => {
+            // Create notes that might have low exact match scores
+            rootNote
+                .child(note("Testing Document")) // Should have decent score
+                .child(note("Document with test inside")) // Lower score due to position
+                .child(note("Another test case"))
+                .child(note("Test case example"))
+                .child(note("Tset with typo")); // Fuzzy match
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test", searchContext);
+
+            // Should include fuzzy results if exact results don't meet quality threshold
+            expect(searchResults.length).toBeGreaterThan(4);
+        });
+    });
+
+    describe("Fuzzy Score Management", () => {
+        it("should cap fuzzy token scores to prevent outranking exact matches", () => {
+            // Create note with exact match
+            rootNote.child(note("Test Document"));
+            // Create note that could accumulate high fuzzy scores
+            rootNote.child(note("Tset Documnt with many fuzzy tockens for testng")); // Multiple typos
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test document", searchContext);
+
+            expect(searchResults.length).toBe(2);
+            
+            // Find the exact and fuzzy match results
+            const exactResult = searchResults.find(r => becca.notes[r.noteId].title === "Test Document");
+            const fuzzyResult = searchResults.find(r => becca.notes[r.noteId].title.includes("Tset"));
+
+            expect(exactResult).toBeTruthy();
+            expect(fuzzyResult).toBeTruthy();
+            
+            // Exact match should always score higher than fuzzy, even with multiple fuzzy matches
+            expect(exactResult!.score).toBeGreaterThan(fuzzyResult!.score);
+        });
+
+        it("should enforce maximum total fuzzy score per search", () => {
+            // Create note with many potential fuzzy matches
+            rootNote.child(note("Tset Documnt Anaylsis Sumary Reportng")); // Many typos
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("test document analysis summary reporting", searchContext);
+
+            expect(searchResults.length).toBe(1);
+            
+            // Total score should be bounded despite many fuzzy matches
+            expect(searchResults[0].score).toBeLessThan(500); // Should not exceed reasonable bounds due to caps
+        });
+    });
+
+    describe("SearchContext Integration", () => {
+        it("should respect enableFuzzyMatching flag", () => {
+            rootNote
+                .child(note("Test Document"))
+                .child(note("Tset Report")); // Typo
+
+            // Test with fuzzy matching disabled
+            const exactOnlyContext = new SearchContext();
+            exactOnlyContext.enableFuzzyMatching = false;
+            
+            const exactResults = searchService.findResultsWithQuery("test", exactOnlyContext);
+            expect(exactResults.length).toBe(1);
+            expect(findNoteByTitle(exactResults, "Test Document")).toBeTruthy();
+            expect(findNoteByTitle(exactResults, "Tset Report")).toBeFalsy();
+
+            // Test with fuzzy matching enabled (default)
+            const fuzzyContext = new SearchContext();
+            const fuzzyResults = searchService.findResultsWithQuery("test", fuzzyContext);
+            expect(fuzzyResults.length).toBe(2);
+            expect(findNoteByTitle(fuzzyResults, "Tset Report")).toBeTruthy();
+        });
+    });
+
+    describe("Edge Cases", () => {
+        it("should handle empty search results gracefully", () => {
+            rootNote.child(note("Unrelated Content"));
+
+            const searchContext = new SearchContext();
+            const searchResults = searchService.findResultsWithQuery("nonexistent", searchContext);
+
+            expect(searchResults.length).toBe(0);
+        });
+    });
+});
--- a/apps/server/src/services/search/services/search.spec.ts
+++ b/apps/server/src/services/search/services/search.spec.ts
@ -578,6 +578,43 @@ describe("Search", () => {
        expect(searchResults.length).toEqual(10);
    });

+    it("progressive search prioritizes exact matches over fuzzy matches", () => {
+        rootNote
+            .child(note("Analysis Report")) // Exact match
+            .child(note("Data Analysis")) // Exact match
+            .child(note("Test Analysis")) // Exact match
+            .child(note("Statistical Analysis")) // Exact match
+            .child(note("Business Analysis")) // Exact match
+            .child(note("Advanced Anaylsis")) // Fuzzy match (typo)
+            .child(note("Quick Anlaysis")); // Fuzzy match (typo)
+
+        const searchContext = new SearchContext();
+        const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
+
+        // Should find all matches but exact ones should rank higher
+        expect(searchResults.length).toEqual(7);
+
+        // First 5 results should be exact matches with higher scores
+        const topResults = searchResults.slice(0, 5);
+        const bottomResults = searchResults.slice(5);
+
+        const topTitles = topResults.map(r => becca.notes[r.noteId].title);
+        const bottomTitles = bottomResults.map(r => becca.notes[r.noteId].title);
+
+        // All top results should be exact matches
+        expect(topTitles.every(title => title.toLowerCase().includes("analysis"))).toBeTruthy();
+        
+        // Bottom results should be fuzzy matches
+        expect(bottomTitles.some(title => title.includes("Anaylsis") || title.includes("Anlaysis"))).toBeTruthy();
+
+        // Verify score ordering
+        const lowestExactScore = Math.min(...topResults.map(r => r.score));
+        const highestFuzzyScore = Math.max(...bottomResults.map(r => r.score));
+        
+        expect(lowestExactScore).toBeGreaterThan(highestFuzzyScore);
+    });
+
+
    // FIXME: test what happens when we order without any filter criteria

    // it("comparison between labels", () => {