feat(quick_search): edit distance searching in quick search works

This commit is contained in:
perf3ct 2025-08-03 18:18:38 +00:00
parent 2d358342c5
commit 1928356ad5
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
3 changed files with 54 additions and 20 deletions

View File

@ -7,7 +7,7 @@ import Expression from "./expression.js";
import NoteSet from "../note_set.js";
import becca from "../../../becca/becca.js";
import { normalize } from "../../utils.js";
import { normalizeSearchText } from "../utils/text_utils.js";
import { normalizeSearchText, fuzzyMatchWord } from "../utils/text_utils.js";
import beccaService from "../../../becca/becca_service.js";
class NoteFlatTextExp extends Expression {
@ -78,7 +78,7 @@ class NoteFlatTextExp extends Expression {
const foundTokens: string[] = foundAttrTokens.slice();
for (const token of remainingTokens) {
if (title.includes(token)) {
if (this.smartMatch(title, token)) {
foundTokens.push(token);
}
}
@ -121,7 +121,7 @@ class NoteFlatTextExp extends Expression {
const foundTokens = foundAttrTokens.slice();
for (const token of this.tokens) {
if (title.includes(token)) {
if (this.smartMatch(title, token)) {
foundTokens.push(token);
}
}
@ -160,7 +160,7 @@ class NoteFlatTextExp extends Expression {
for (const note of noteSet.notes) {
const normalizedFlatText = normalizeSearchText(note.getFlatText());
for (const token of this.tokens) {
if (normalizedFlatText.includes(token)) {
if (this.smartMatch(normalizedFlatText, token)) {
candidateNotes.push(note);
break;
}
@ -169,6 +169,26 @@ class NoteFlatTextExp extends Expression {
return candidateNotes;
}
/**
* Smart matching that tries exact match first, then fuzzy fallback
* @param text The text to search in
* @param token The token to search for
* @returns True if match found (exact or fuzzy)
*/
private smartMatch(text: string, token: string): boolean {
// Exact match has priority
if (text.includes(token)) {
return true;
}
// Fuzzy fallback only for tokens >= 4 characters
if (token.length >= 4) {
return fuzzyMatchWord(token, text);
}
return false;
}
}
export default NoteFlatTextExp;

View File

@ -40,7 +40,7 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext) {
}
}
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%="]);
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%=", "~=", "~*"]);
function isOperator(token: TokenData) {
if (Array.isArray(token)) {

View File

@ -257,35 +257,49 @@ export function validateAndPreprocessContent(content: string, noteId?: string):
* @param maxDistance Maximum allowed edit distance
* @returns True if the word matches the token within the distance threshold
*/
export function fuzzyMatchWord(token: string, word: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): boolean {
export function fuzzyMatchWord(token: string, text: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): boolean {
// Input validation
if (typeof token !== 'string' || typeof word !== 'string') {
if (typeof token !== 'string' || typeof text !== 'string') {
return false;
}
if (token.length === 0 || word.length === 0) {
if (token.length === 0 || text.length === 0) {
return false;
}
try {
// Normalize both strings for comparison
const normalizedToken = token.toLowerCase();
const normalizedText = text.toLowerCase();
// Exact match check first (most common case)
if (word.includes(token)) {
if (normalizedText.includes(normalizedToken)) {
return true;
}
// Length difference check for early exit
if (Math.abs(word.length - token.length) > maxDistance) {
return false;
// For fuzzy matching, we need to check individual words in the text
// Split the text into words and check each word against the token
const words = normalizedText.split(/\s+/).filter(word => word.length > 0);
for (const word of words) {
// Skip if word is too different in length for fuzzy matching
if (Math.abs(word.length - normalizedToken.length) > maxDistance) {
continue;
}
// For very short tokens or very different lengths, be more strict
if (normalizedToken.length < 4 || Math.abs(word.length - normalizedToken.length) > 2) {
continue;
}
// Use optimized edit distance calculation
const distance = calculateOptimizedEditDistance(normalizedToken, word, maxDistance);
if (distance <= maxDistance) {
return true;
}
}
// For very short tokens or very different lengths, be more strict
if (token.length < 4 || Math.abs(word.length - token.length) > 2) {
return false;
}
// Use optimized edit distance calculation
const distance = calculateOptimizedEditDistance(token, word, maxDistance);
return distance <= maxDistance;
return false;
} catch (error) {
// Log error and return false for safety
console.warn('Error in fuzzy word matching:', error);