From 1928356ad5126ad22790529bbcab7e790b62f560 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sun, 3 Aug 2025 18:18:38 +0000 Subject: [PATCH] feat(quick_search): edit distance searching in quick search works --- .../search/expressions/note_flat_text.ts | 28 ++++++++++-- .../src/services/search/services/parse.ts | 2 +- .../src/services/search/utils/text_utils.ts | 44 ++++++++++++------- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/apps/server/src/services/search/expressions/note_flat_text.ts b/apps/server/src/services/search/expressions/note_flat_text.ts index 90472c5c5..1fb6487d2 100644 --- a/apps/server/src/services/search/expressions/note_flat_text.ts +++ b/apps/server/src/services/search/expressions/note_flat_text.ts @@ -7,7 +7,7 @@ import Expression from "./expression.js"; import NoteSet from "../note_set.js"; import becca from "../../../becca/becca.js"; import { normalize } from "../../utils.js"; -import { normalizeSearchText } from "../utils/text_utils.js"; +import { normalizeSearchText, fuzzyMatchWord } from "../utils/text_utils.js"; import beccaService from "../../../becca/becca_service.js"; class NoteFlatTextExp extends Expression { @@ -78,7 +78,7 @@ class NoteFlatTextExp extends Expression { const foundTokens: string[] = foundAttrTokens.slice(); for (const token of remainingTokens) { - if (title.includes(token)) { + if (this.smartMatch(title, token)) { foundTokens.push(token); } } @@ -121,7 +121,7 @@ class NoteFlatTextExp extends Expression { const foundTokens = foundAttrTokens.slice(); for (const token of this.tokens) { - if (title.includes(token)) { + if (this.smartMatch(title, token)) { foundTokens.push(token); } } @@ -160,7 +160,7 @@ class NoteFlatTextExp extends Expression { for (const note of noteSet.notes) { const normalizedFlatText = normalizeSearchText(note.getFlatText()); for (const token of this.tokens) { - if (normalizedFlatText.includes(token)) { + if (this.smartMatch(normalizedFlatText, token)) { candidateNotes.push(note); break; } @@ -169,6 +169,26 @@ class NoteFlatTextExp extends Expression { return candidateNotes; } + + /** + * Smart matching that tries exact match first, then fuzzy fallback + * @param text The text to search in + * @param token The token to search for + * @returns True if match found (exact or fuzzy) + */ + private smartMatch(text: string, token: string): boolean { + // Exact match has priority + if (text.includes(token)) { + return true; + } + + // Fuzzy fallback only for tokens >= 4 characters + if (token.length >= 4) { + return fuzzyMatchWord(token, text); + } + + return false; + } } export default NoteFlatTextExp; diff --git a/apps/server/src/services/search/services/parse.ts b/apps/server/src/services/search/services/parse.ts index 6cfaad6e6..e96ca3896 100644 --- a/apps/server/src/services/search/services/parse.ts +++ b/apps/server/src/services/search/services/parse.ts @@ -40,7 +40,7 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext) { } } -const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%="]); +const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%=", "~=", "~*"]); function isOperator(token: TokenData) { if (Array.isArray(token)) { diff --git a/apps/server/src/services/search/utils/text_utils.ts b/apps/server/src/services/search/utils/text_utils.ts index 7a850e71b..bbaadb58e 100644 --- a/apps/server/src/services/search/utils/text_utils.ts +++ b/apps/server/src/services/search/utils/text_utils.ts @@ -257,35 +257,49 @@ export function validateAndPreprocessContent(content: string, noteId?: string): * @param maxDistance Maximum allowed edit distance * @returns True if the word matches the token within the distance threshold */ -export function fuzzyMatchWord(token: string, word: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): boolean { +export function fuzzyMatchWord(token: string, text: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): boolean { // Input validation - if (typeof token !== 'string' || typeof word !== 'string') { + if (typeof token !== 'string' || typeof text !== 'string') { return false; } - if (token.length === 0 || word.length === 0) { + if (token.length === 0 || text.length === 0) { return false; } try { + // Normalize both strings for comparison + const normalizedToken = token.toLowerCase(); + const normalizedText = text.toLowerCase(); + // Exact match check first (most common case) - if (word.includes(token)) { + if (normalizedText.includes(normalizedToken)) { return true; } - // Length difference check for early exit - if (Math.abs(word.length - token.length) > maxDistance) { - return false; + // For fuzzy matching, we need to check individual words in the text + // Split the text into words and check each word against the token + const words = normalizedText.split(/\s+/).filter(word => word.length > 0); + + for (const word of words) { + // Skip if word is too different in length for fuzzy matching + if (Math.abs(word.length - normalizedToken.length) > maxDistance) { + continue; + } + + // For very short tokens or very different lengths, be more strict + if (normalizedToken.length < 4 || Math.abs(word.length - normalizedToken.length) > 2) { + continue; + } + + // Use optimized edit distance calculation + const distance = calculateOptimizedEditDistance(normalizedToken, word, maxDistance); + if (distance <= maxDistance) { + return true; + } } - // For very short tokens or very different lengths, be more strict - if (token.length < 4 || Math.abs(word.length - token.length) > 2) { - return false; - } - - // Use optimized edit distance calculation - const distance = calculateOptimizedEditDistance(token, word, maxDistance); - return distance <= maxDistance; + return false; } catch (error) { // Log error and return false for safety console.warn('Error in fuzzy word matching:', error);