feat(quick_search): add tests for updated fuzzy search progressive fuzzy search functionality

This commit is contained in:
perf3ct 2025-08-03 21:02:56 +00:00
parent e9409577db
commit 22740a6c8d
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
2 changed files with 278 additions and 0 deletions

View File

@ -0,0 +1,241 @@
import { describe, it, expect, beforeEach } from "vitest";
import searchService from "./search.js";
import BNote from "../../../becca/entities/bnote.js";
import BBranch from "../../../becca/entities/bbranch.js";
import SearchContext from "../search_context.js";
import becca from "../../../becca/becca.js";
import { findNoteByTitle, note, NoteBuilder } from "../../../test/becca_mocking.js";
describe("Progressive Search Strategy", () => {
let rootNote: any;
beforeEach(() => {
becca.reset();
rootNote = new NoteBuilder(new BNote({ noteId: "root", title: "root", type: "text" }));
new BBranch({
branchId: "none_root",
noteId: "root",
parentNoteId: "none",
notePosition: 10
});
});
describe("Phase 1: Exact Matches Only", () => {
it("should complete search with exact matches when sufficient results found", () => {
// Create notes with exact matches
rootNote
.child(note("Test Document One"))
.child(note("Test Report Two"))
.child(note("Test Analysis Three"))
.child(note("Test Summary Four"))
.child(note("Test Review Five"))
.child(note("Typo Test Documnt")); // This has a typo
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should find 5+ exact matches and not process the typo
expect(searchResults.length).toBeGreaterThanOrEqual(5);
// Verify all results have high scores (exact matches)
const highQualityResults = searchResults.filter(result => result.score >= 10);
expect(highQualityResults.length).toBeGreaterThanOrEqual(5);
// The typo document should not be in results since we have enough exact matches
expect(findNoteByTitle(searchResults, "Typo Test Documnt")).toBeFalsy();
});
it("should use exact match scoring only in Phase 1", () => {
rootNote
.child(note("Testing Exact Match"))
.child(note("Test Document"))
.child(note("Another Test"));
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// All results should have scores from exact matching only
for (const result of searchResults) {
expect(result.score).toBeGreaterThan(0);
// Scores should be from exact/prefix/contains matches, not fuzzy
expect(result.score % 0.5).not.toBe(0); // Fuzzy scores are multiples of 0.5
}
});
});
describe("Phase 2: Fuzzy Fallback", () => {
it("should trigger fuzzy matching when insufficient exact matches", () => {
// Create only a few notes, some with typos
rootNote
.child(note("Document One"))
.child(note("Report Two"))
.child(note("Anaylsis Three")) // Typo: "Analysis"
.child(note("Sumary Four")); // Typo: "Summary"
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
// Should find the typo through fuzzy matching
expect(searchResults.length).toBeGreaterThan(0);
expect(findNoteByTitle(searchResults, "Anaylsis Three")).toBeTruthy();
});
it("should merge exact and fuzzy results with exact matches ranked higher", () => {
rootNote
.child(note("Analysis Report")) // Exact match
.child(note("Data Analysis")) // Exact match
.child(note("Anaylsis Doc")) // Fuzzy match
.child(note("Statistical Anlaysis")); // Fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
expect(searchResults.length).toBe(4);
// First two results should be exact matches with higher scores
const exactMatches = ["Analysis Report", "Data Analysis"];
const fuzzyMatches = ["Anaylsis Doc", "Statistical Anlaysis"];
// Find exact and fuzzy match results
const exactResults = searchResults.filter(result =>
exactMatches.includes(becca.notes[result.noteId].title)
);
const fuzzyResults = searchResults.filter(result =>
fuzzyMatches.includes(becca.notes[result.noteId].title)
);
expect(exactResults.length).toBe(2);
expect(fuzzyResults.length).toBe(2);
// Exact matches should have higher scores than fuzzy matches
const lowestExactScore = Math.min(...exactResults.map(r => r.score));
const highestFuzzyScore = Math.max(...fuzzyResults.map(r => r.score));
expect(lowestExactScore).toBeGreaterThan(highestFuzzyScore);
});
it("should not duplicate results between phases", () => {
rootNote
.child(note("Test Document")) // Would match in both phases
.child(note("Tset Report")); // Only fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should only have unique results
const noteIds = searchResults.map(r => r.noteId);
const uniqueNoteIds = [...new Set(noteIds)];
expect(noteIds.length).toBe(uniqueNoteIds.length);
expect(findNoteByTitle(searchResults, "Test Document")).toBeTruthy();
expect(findNoteByTitle(searchResults, "Tset Report")).toBeTruthy();
});
});
describe("Result Sufficiency Thresholds", () => {
it("should respect minimum result count threshold", () => {
// Create exactly 4 high-quality results (below threshold of 5)
rootNote
.child(note("Test One"))
.child(note("Test Two"))
.child(note("Test Three"))
.child(note("Test Four"))
.child(note("Tset Five")); // Typo that should be found via fuzzy
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should proceed to Phase 2 and include fuzzy match
expect(searchResults.length).toBe(5);
expect(findNoteByTitle(searchResults, "Tset Five")).toBeTruthy();
});
it("should respect minimum quality score threshold", () => {
// Create notes that might have low exact match scores
rootNote
.child(note("Testing Document")) // Should have decent score
.child(note("Document with test inside")) // Lower score due to position
.child(note("Another test case"))
.child(note("Test case example"))
.child(note("Tset with typo")); // Fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should include fuzzy results if exact results don't meet quality threshold
expect(searchResults.length).toBeGreaterThan(4);
});
});
describe("Fuzzy Score Management", () => {
it("should cap fuzzy token scores to prevent outranking exact matches", () => {
// Create note with exact match
rootNote.child(note("Test Document"));
// Create note that could accumulate high fuzzy scores
rootNote.child(note("Tset Documnt with many fuzzy tockens for testng")); // Multiple typos
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test document", searchContext);
expect(searchResults.length).toBe(2);
// Find the exact and fuzzy match results
const exactResult = searchResults.find(r => becca.notes[r.noteId].title === "Test Document");
const fuzzyResult = searchResults.find(r => becca.notes[r.noteId].title.includes("Tset"));
expect(exactResult).toBeTruthy();
expect(fuzzyResult).toBeTruthy();
// Exact match should always score higher than fuzzy, even with multiple fuzzy matches
expect(exactResult!.score).toBeGreaterThan(fuzzyResult!.score);
});
it("should enforce maximum total fuzzy score per search", () => {
// Create note with many potential fuzzy matches
rootNote.child(note("Tset Documnt Anaylsis Sumary Reportng")); // Many typos
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test document analysis summary reporting", searchContext);
expect(searchResults.length).toBe(1);
// Total score should be bounded despite many fuzzy matches
expect(searchResults[0].score).toBeLessThan(500); // Should not exceed reasonable bounds due to caps
});
});
describe("SearchContext Integration", () => {
it("should respect enableFuzzyMatching flag", () => {
rootNote
.child(note("Test Document"))
.child(note("Tset Report")); // Typo
// Test with fuzzy matching disabled
const exactOnlyContext = new SearchContext();
exactOnlyContext.enableFuzzyMatching = false;
const exactResults = searchService.findResultsWithQuery("test", exactOnlyContext);
expect(exactResults.length).toBe(1);
expect(findNoteByTitle(exactResults, "Test Document")).toBeTruthy();
expect(findNoteByTitle(exactResults, "Tset Report")).toBeFalsy();
// Test with fuzzy matching enabled (default)
const fuzzyContext = new SearchContext();
const fuzzyResults = searchService.findResultsWithQuery("test", fuzzyContext);
expect(fuzzyResults.length).toBe(2);
expect(findNoteByTitle(fuzzyResults, "Tset Report")).toBeTruthy();
});
});
describe("Edge Cases", () => {
it("should handle empty search results gracefully", () => {
rootNote.child(note("Unrelated Content"));
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("nonexistent", searchContext);
expect(searchResults.length).toBe(0);
});
});
});

View File

@ -578,6 +578,43 @@ describe("Search", () => {
expect(searchResults.length).toEqual(10);
});
it("progressive search prioritizes exact matches over fuzzy matches", () => {
rootNote
.child(note("Analysis Report")) // Exact match
.child(note("Data Analysis")) // Exact match
.child(note("Test Analysis")) // Exact match
.child(note("Statistical Analysis")) // Exact match
.child(note("Business Analysis")) // Exact match
.child(note("Advanced Anaylsis")) // Fuzzy match (typo)
.child(note("Quick Anlaysis")); // Fuzzy match (typo)
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
// Should find all matches but exact ones should rank higher
expect(searchResults.length).toEqual(7);
// First 5 results should be exact matches with higher scores
const topResults = searchResults.slice(0, 5);
const bottomResults = searchResults.slice(5);
const topTitles = topResults.map(r => becca.notes[r.noteId].title);
const bottomTitles = bottomResults.map(r => becca.notes[r.noteId].title);
// All top results should be exact matches
expect(topTitles.every(title => title.toLowerCase().includes("analysis"))).toBeTruthy();
// Bottom results should be fuzzy matches
expect(bottomTitles.some(title => title.includes("Anaylsis") || title.includes("Anlaysis"))).toBeTruthy();
// Verify score ordering
const lowestExactScore = Math.min(...topResults.map(r => r.score));
const highestFuzzyScore = Math.max(...bottomResults.map(r => r.score));
expect(lowestExactScore).toBeGreaterThan(highestFuzzyScore);
});
// FIXME: test what happens when we order without any filter criteria
// it("comparison between labels", () => {