feat(search): improve search weights and operators (#6536)

This commit is contained in:
Elian Doran 2025-08-13 13:10:30 +03:00 committed by GitHub
commit 6e37c9ee5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1668 additions and 133 deletions

View File

@ -2255,6 +2255,13 @@ footer.webview-footer button {
padding: 1px 10px 1px 10px;
}
/* Search result highlighting */
.search-result-title b,
.search-result-content b {
font-weight: 900;
color: var(--admonition-warning-accent-color);
}
/* Customized icons */
.bx-tn-toc::before {

View File

@ -23,12 +23,52 @@ const TPL = /*html*/`
.quick-search .dropdown-menu {
max-height: 600px;
max-width: 400px;
max-width: 600px;
overflow-y: auto;
overflow-x: hidden;
text-overflow: ellipsis;
box-shadow: -30px 50px 93px -50px black;
}
.quick-search .dropdown-item {
white-space: normal;
padding: 12px 16px;
line-height: 1.4;
position: relative;
}
.quick-search .dropdown-item:not(:last-child)::after {
content: '';
position: absolute;
bottom: 0;
left: 50%;
transform: translateX(-50%);
width: 80%;
height: 2px;
background: var(--main-border-color);
border-radius: 1px;
opacity: 0.4;
}
.quick-search .dropdown-item:last-child::after {
display: none;
}
.quick-search .dropdown-item.disabled::after {
display: none;
}
.quick-search .dropdown-item.show-in-full-search::after {
display: none;
}
.quick-search .dropdown-item:hover {
background-color: #f8f9fa;
}
.quick-search .dropdown-divider {
margin: 0;
}
</style>
<div class="input-group-prepend">
@ -40,11 +80,21 @@ const TPL = /*html*/`
<input type="text" class="form-control form-control-sm search-string" placeholder="${t("quick-search.placeholder")}">
</div>`;
const MAX_DISPLAYED_NOTES = 15;
const INITIAL_DISPLAYED_NOTES = 15;
const LOAD_MORE_BATCH_SIZE = 10;
// TODO: Deduplicate with server.
interface QuickSearchResponse {
searchResultNoteIds: string[];
searchResults?: Array<{
notePath: string;
noteTitle: string;
notePathTitle: string;
highlightedNotePathTitle: string;
contentSnippet?: string;
highlightedContentSnippet?: string;
icon: string;
}>;
error: string;
}
@ -53,6 +103,12 @@ export default class QuickSearchWidget extends BasicWidget {
private dropdown!: bootstrap.Dropdown;
private $searchString!: JQuery<HTMLElement>;
private $dropdownMenu!: JQuery<HTMLElement>;
// State for infinite scrolling
private allSearchResults: Array<any> = [];
private allSearchResultNoteIds: string[] = [];
private currentDisplayedCount: number = 0;
private isLoadingMore: boolean = false;
doRender() {
this.$widget = $(TPL);
@ -68,6 +124,11 @@ export default class QuickSearchWidget extends BasicWidget {
});
this.$widget.find(".input-group-prepend").on("shown.bs.dropdown", () => this.search());
// Add scroll event listener for infinite scrolling
this.$dropdownMenu.on("scroll", () => {
this.handleScroll();
});
if (utils.isMobile()) {
this.$searchString.keydown((e) => {
@ -112,10 +173,16 @@ export default class QuickSearchWidget extends BasicWidget {
return;
}
// Reset state for new search
this.allSearchResults = [];
this.allSearchResultNoteIds = [];
this.currentDisplayedCount = 0;
this.isLoadingMore = false;
this.$dropdownMenu.empty();
this.$dropdownMenu.append(`<span class="dropdown-item disabled"><span class="bx bx-loader bx-spin"></span>${t("quick-search.searching")}</span>`);
const { searchResultNoteIds, error } = await server.get<QuickSearchResponse>(`quick-search/${encodeURIComponent(searchString)}`);
const { searchResultNoteIds, searchResults, error } = await server.get<QuickSearchResponse>(`quick-search/${encodeURIComponent(searchString)}`);
if (error) {
let tooltip = new Tooltip(this.$searchString[0], {
@ -129,47 +196,148 @@ export default class QuickSearchWidget extends BasicWidget {
setTimeout(() => tooltip.dispose(), 4000);
}
const displayedNoteIds = searchResultNoteIds.slice(0, Math.min(MAX_DISPLAYED_NOTES, searchResultNoteIds.length));
// Store all results for infinite scrolling
this.allSearchResults = searchResults || [];
this.allSearchResultNoteIds = searchResultNoteIds || [];
this.$dropdownMenu.empty();
if (displayedNoteIds.length === 0) {
if (this.allSearchResults.length === 0 && this.allSearchResultNoteIds.length === 0) {
this.$dropdownMenu.append(`<span class="dropdown-item disabled">${t("quick-search.no-results")}</span>`);
return;
}
for (const note of await froca.getNotes(displayedNoteIds)) {
const $link = await linkService.createLink(note.noteId, { showNotePath: true, showNoteIcon: true });
$link.addClass("dropdown-item");
$link.attr("tabIndex", "0");
$link.on("click", (e) => {
this.dropdown.hide();
// Display initial batch
await this.displayMoreResults(INITIAL_DISPLAYED_NOTES);
this.addShowInFullSearchButton();
this.dropdown.update();
}
private async displayMoreResults(batchSize: number) {
if (this.isLoadingMore) return;
this.isLoadingMore = true;
// Remove the "Show in full search" button temporarily
this.$dropdownMenu.find('.show-in-full-search').remove();
this.$dropdownMenu.find('.dropdown-divider').remove();
// Use highlighted search results if available, otherwise fall back to basic display
if (this.allSearchResults.length > 0) {
const startIndex = this.currentDisplayedCount;
const endIndex = Math.min(startIndex + batchSize, this.allSearchResults.length);
const resultsToDisplay = this.allSearchResults.slice(startIndex, endIndex);
for (const result of resultsToDisplay) {
const noteId = result.notePath.split("/").pop();
if (!noteId) continue;
const $item = $('<a class="dropdown-item" tabindex="0" href="javascript:">');
// Build the display HTML with content snippet below the title
let itemHtml = `<div style="display: flex; flex-direction: column;">
<div style="display: flex; align-items: flex-start; gap: 6px;">
<span class="${result.icon}" style="flex-shrink: 0; margin-top: 1px;"></span>
<span style="flex: 1;" class="search-result-title">${result.highlightedNotePathTitle}</span>
</div>`;
// Add content snippet below the title if available
if (result.highlightedContentSnippet) {
itemHtml += `<div style="font-size: 0.85em; color: var(--main-text-color); opacity: 0.7; margin-left: 20px; margin-top: 4px; line-height: 1.3;" class="search-result-content">${result.highlightedContentSnippet}</div>`;
}
itemHtml += `</div>`;
$item.html(itemHtml);
$item.on("click", (e) => {
this.dropdown.hide();
e.preventDefault();
const activeContext = appContext.tabManager.getActiveContext();
if (activeContext) {
activeContext.setNote(noteId);
}
});
shortcutService.bindElShortcut($item, "return", () => {
this.dropdown.hide();
const activeContext = appContext.tabManager.getActiveContext();
if (activeContext) {
activeContext.setNote(noteId);
}
});
this.$dropdownMenu.append($item);
}
this.currentDisplayedCount = endIndex;
} else {
// Fallback to original behavior if no highlighted results
const startIndex = this.currentDisplayedCount;
const endIndex = Math.min(startIndex + batchSize, this.allSearchResultNoteIds.length);
const noteIdsToDisplay = this.allSearchResultNoteIds.slice(startIndex, endIndex);
for (const note of await froca.getNotes(noteIdsToDisplay)) {
const $link = await linkService.createLink(note.noteId, { showNotePath: true, showNoteIcon: true });
$link.addClass("dropdown-item");
$link.attr("tabIndex", "0");
$link.on("click", (e) => {
this.dropdown.hide();
if (!e.target || e.target.nodeName !== "A") {
// click on the link is handled by link handling, but we want the whole item clickable
const activeContext = appContext.tabManager.getActiveContext();
if (activeContext) {
activeContext.setNote(note.noteId);
}
}
});
shortcutService.bindElShortcut($link, "return", () => {
this.dropdown.hide();
if (!e.target || e.target.nodeName !== "A") {
// click on the link is handled by link handling, but we want the whole item clickable
const activeContext = appContext.tabManager.getActiveContext();
if (activeContext) {
activeContext.setNote(note.noteId);
}
}
});
shortcutService.bindElShortcut($link, "return", () => {
this.dropdown.hide();
});
const activeContext = appContext.tabManager.getActiveContext();
if (activeContext) {
activeContext.setNote(note.noteId);
}
});
this.$dropdownMenu.append($link);
}
this.$dropdownMenu.append($link);
this.currentDisplayedCount = endIndex;
}
if (searchResultNoteIds.length > MAX_DISPLAYED_NOTES) {
const numRemainingResults = searchResultNoteIds.length - MAX_DISPLAYED_NOTES;
this.$dropdownMenu.append(`<span class="dropdown-item disabled">${t("quick-search.more-results", { number: numRemainingResults })}</span>`);
}
this.isLoadingMore = false;
}
const $showInFullButton = $('<a class="dropdown-item" tabindex="0">').text(t("quick-search.show-in-full-search"));
private handleScroll() {
if (this.isLoadingMore) return;
const dropdown = this.$dropdownMenu[0];
const scrollTop = dropdown.scrollTop;
const scrollHeight = dropdown.scrollHeight;
const clientHeight = dropdown.clientHeight;
// Trigger loading more when user scrolls near the bottom (within 50px)
if (scrollTop + clientHeight >= scrollHeight - 50) {
const totalResults = this.allSearchResults.length > 0 ? this.allSearchResults.length : this.allSearchResultNoteIds.length;
if (this.currentDisplayedCount < totalResults) {
this.displayMoreResults(LOAD_MORE_BATCH_SIZE).then(() => {
this.addShowInFullSearchButton();
});
}
}
}
private addShowInFullSearchButton() {
// Remove existing button if it exists
this.$dropdownMenu.find('.show-in-full-search').remove();
this.$dropdownMenu.find('.dropdown-divider').remove();
const $showInFullButton = $('<a class="dropdown-item show-in-full-search" tabindex="0">').text(t("quick-search.show-in-full-search"));
this.$dropdownMenu.append($(`<div class="dropdown-divider">`));
this.$dropdownMenu.append($showInFullButton);

View File

@ -52,10 +52,15 @@ function quickSearch(req: Request) {
fuzzyAttributeSearch: false
});
const resultNoteIds = searchService.findResultsWithQuery(searchString, searchContext).map((sr) => sr.noteId);
// Use the same highlighting logic as autocomplete for consistency
const searchResults = searchService.searchNotesForAutocomplete(searchString, false);
// Extract note IDs for backward compatibility
const resultNoteIds = searchResults.map((result) => result.notePath.split("/").pop()).filter(Boolean) as string[];
return {
searchResultNoteIds: resultNoteIds,
searchResults: searchResults,
error: searchContext.getError()
};
}

View File

@ -1,5 +1,6 @@
import { describe, it, expect } from "vitest";
import { processMindmapContent } from "./note_content_fulltext.js";
import NoteContentFulltextExp from "./note_content_fulltext.js";
describe("processMindmapContent", () => {
it("supports empty JSON", () => {
@ -11,3 +12,19 @@ describe("processMindmapContent", () => {
expect(processMindmapContent(`{ "node": " }`)).toEqual("");
});
});
describe("Fuzzy Search Operators", () => {
it("~= operator works with typos", () => {
// Test that the ~= operator can handle common typos
const expression = new NoteContentFulltextExp("~=", { tokens: ["hello"] });
expect(expression.tokens).toEqual(["hello"]);
expect(() => new NoteContentFulltextExp("~=", { tokens: ["he"] })).toThrow(); // Too short
});
it("~* operator works with fuzzy contains", () => {
// Test that the ~* operator handles fuzzy substring matching
const expression = new NoteContentFulltextExp("~*", { tokens: ["world"] });
expect(expression.tokens).toEqual(["world"]);
expect(() => new NoteContentFulltextExp("~*", { tokens: ["wo"] })).toThrow(); // Too short
});
});

View File

@ -11,8 +11,19 @@ import protectedSessionService from "../../protected_session.js";
import striptags from "striptags";
import { normalize } from "../../utils.js";
import sql from "../../sql.js";
import {
normalizeSearchText,
calculateOptimizedEditDistance,
validateFuzzySearchTokens,
validateAndPreprocessContent,
fuzzyMatchWord,
FUZZY_SEARCH_CONFIG
} from "../utils/text_utils.js";
const ALLOWED_OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", "%="]);
const ALLOWED_OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", "%=", "~=", "~*"]);
// Maximum content size for search processing (2MB)
const MAX_SEARCH_CONTENT_SIZE = 2 * 1024 * 1024;
const cachedRegexes: Record<string, RegExp> = {};
@ -41,6 +52,16 @@ class NoteContentFulltextExp extends Expression {
constructor(operator: string, { tokens, raw, flatText }: ConstructorOpts) {
super();
if (!operator || !tokens || !Array.isArray(tokens)) {
throw new Error('Invalid parameters: operator and tokens are required');
}
// Validate fuzzy search tokens
const validation = validateFuzzySearchTokens(tokens, operator);
if (!validation.isValid) {
throw new Error(validation.error!);
}
this.operator = operator;
this.tokens = tokens;
this.raw = !!raw;
@ -59,7 +80,9 @@ class NoteContentFulltextExp extends Expression {
for (const row of sql.iterateRows<SearchRow>(`
SELECT noteId, type, mime, content, isProtected
FROM notes JOIN blobs USING (blobId)
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') AND isDeleted = 0`)) {
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND isDeleted = 0
AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
this.findInText(row, inputNoteSet, resultNoteSet);
}
@ -89,6 +112,13 @@ class NoteContentFulltextExp extends Expression {
}
content = this.preprocessContent(content, type, mime);
// Apply content size validation and preprocessing
const processedContent = validateAndPreprocessContent(content, noteId);
if (!processedContent) {
return; // Content too large or invalid
}
content = processedContent;
if (this.tokens.length === 1) {
const [token] = this.tokens;
@ -99,21 +129,27 @@ class NoteContentFulltextExp extends Expression {
(this.operator === "*=" && content.endsWith(token)) ||
(this.operator === "=*" && content.startsWith(token)) ||
(this.operator === "*=*" && content.includes(token)) ||
(this.operator === "%=" && getRegex(token).test(content))
(this.operator === "%=" && getRegex(token).test(content)) ||
(this.operator === "~=" && this.matchesWithFuzzy(content, noteId)) ||
(this.operator === "~*" && this.fuzzyMatchToken(normalizeSearchText(token), normalizeSearchText(content)))
) {
resultNoteSet.add(becca.notes[noteId]);
}
} else {
const nonMatchingToken = this.tokens.find(
(token) =>
!content?.includes(token) &&
// in case of default fulltext search, we should consider both title, attrs and content
// so e.g. "hello world" should match when "hello" is in title and "world" in content
(!this.flatText || !becca.notes[noteId].getFlatText().includes(token))
);
// Multi-token matching with fuzzy support and phrase proximity
if (this.operator === "~=" || this.operator === "~*") {
if (this.matchesWithFuzzy(content, noteId)) {
resultNoteSet.add(becca.notes[noteId]);
}
} else {
const nonMatchingToken = this.tokens.find(
(token) =>
!this.tokenMatchesContent(token, content, noteId)
);
if (!nonMatchingToken) {
resultNoteSet.add(becca.notes[noteId]);
if (!nonMatchingToken) {
resultNoteSet.add(becca.notes[noteId]);
}
}
}
@ -124,8 +160,8 @@ class NoteContentFulltextExp extends Expression {
content = normalize(content.toString());
if (type === "text" && mime === "text/html") {
if (!this.raw && content.length < 20000) {
// striptags is slow for very large notes
if (!this.raw) {
// Content size already filtered at DB level, safe to process
content = this.stripTags(content);
}
@ -152,6 +188,147 @@ class NoteContentFulltextExp extends Expression {
return content.trim();
}
/**
* Checks if a token matches content with optional fuzzy matching
*/
private tokenMatchesContent(token: string, content: string, noteId: string): boolean {
const normalizedToken = normalizeSearchText(token);
const normalizedContent = normalizeSearchText(content);
if (normalizedContent.includes(normalizedToken)) {
return true;
}
// Check flat text for default fulltext search
if (!this.flatText || !becca.notes[noteId].getFlatText().includes(token)) {
return false;
}
return true;
}
/**
* Performs fuzzy matching with edit distance and phrase proximity
*/
private matchesWithFuzzy(content: string, noteId: string): boolean {
try {
const normalizedContent = normalizeSearchText(content);
const flatText = this.flatText ? normalizeSearchText(becca.notes[noteId].getFlatText()) : "";
// For phrase matching, check if tokens appear within reasonable proximity
if (this.tokens.length > 1) {
return this.matchesPhrase(normalizedContent, flatText);
}
// Single token fuzzy matching
const token = normalizeSearchText(this.tokens[0]);
return this.fuzzyMatchToken(token, normalizedContent) ||
(this.flatText && this.fuzzyMatchToken(token, flatText));
} catch (error) {
log.error(`Error in fuzzy matching for note ${noteId}: ${error}`);
return false;
}
}
/**
* Checks if multiple tokens match as a phrase with proximity consideration
*/
private matchesPhrase(content: string, flatText: string): boolean {
const searchText = this.flatText ? `${content} ${flatText}` : content;
// Apply content size limits for phrase matching
const limitedText = validateAndPreprocessContent(searchText);
if (!limitedText) {
return false;
}
const words = limitedText.toLowerCase().split(/\s+/);
// Only skip phrase matching for truly extreme word counts that could crash the system
if (words.length > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
console.error(`Phrase matching skipped due to extreme word count that could cause system instability: ${words.length} words`);
return false;
}
// Warn about large word counts but still attempt matching
if (words.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
console.info(`Large word count for phrase matching: ${words.length} words - may take longer but will attempt full matching`);
}
// Find positions of each token
const tokenPositions: number[][] = this.tokens.map(token => {
const normalizedToken = normalizeSearchText(token);
const positions: number[] = [];
words.forEach((word, index) => {
if (this.fuzzyMatchSingle(normalizedToken, word)) {
positions.push(index);
}
});
return positions;
});
// Check if we found all tokens
if (tokenPositions.some(positions => positions.length === 0)) {
return false;
}
// Check for phrase proximity using configurable distance
return this.hasProximityMatch(tokenPositions, FUZZY_SEARCH_CONFIG.MAX_PHRASE_PROXIMITY);
}
/**
* Checks if token positions indicate a phrase match within max distance
*/
private hasProximityMatch(tokenPositions: number[][], maxDistance: number): boolean {
// For 2 tokens, simple proximity check
if (tokenPositions.length === 2) {
const [pos1, pos2] = tokenPositions;
return pos1.some(p1 => pos2.some(p2 => Math.abs(p1 - p2) <= maxDistance));
}
// For more tokens, check if we can find a sequence where all tokens are within range
const findSequence = (remaining: number[][], currentPos: number): boolean => {
if (remaining.length === 0) return true;
const [nextPositions, ...rest] = remaining;
return nextPositions.some(pos =>
Math.abs(pos - currentPos) <= maxDistance &&
findSequence(rest, pos)
);
};
const [firstPositions, ...rest] = tokenPositions;
return firstPositions.some(startPos => findSequence(rest, startPos));
}
/**
* Performs fuzzy matching for a single token against content
*/
private fuzzyMatchToken(token: string, content: string): boolean {
if (token.length < FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
// For short tokens, require exact match to avoid too many false positives
return content.includes(token);
}
const words = content.split(/\s+/);
// Only limit word processing for truly extreme cases to prevent system instability
const limitedWords = words.slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT);
return limitedWords.some(word => this.fuzzyMatchSingle(token, word));
}
/**
* Fuzzy matches a single token against a single word
*/
private fuzzyMatchSingle(token: string, word: string): boolean {
// Use shared optimized fuzzy matching logic
return fuzzyMatchWord(token, word, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
}
stripTags(content: string) {
// we want to allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412
// we want to insert space in place of block tags (because they imply text separation)

View File

@ -7,6 +7,7 @@ import Expression from "./expression.js";
import NoteSet from "../note_set.js";
import becca from "../../../becca/becca.js";
import { normalize } from "../../utils.js";
import { normalizeSearchText, fuzzyMatchWord, fuzzyMatchWordWithResult } from "../utils/text_utils.js";
import beccaService from "../../../becca/becca_service.js";
class NoteFlatTextExp extends Expression {
@ -15,7 +16,8 @@ class NoteFlatTextExp extends Expression {
constructor(tokens: string[]) {
super();
this.tokens = tokens;
// Normalize tokens using centralized normalization function
this.tokens = tokens.map(token => normalizeSearchText(token));
}
execute(inputNoteSet: NoteSet, executionContext: any, searchContext: SearchContext) {
@ -55,14 +57,18 @@ class NoteFlatTextExp extends Expression {
const foundAttrTokens: string[] = [];
for (const token of remainingTokens) {
if (note.type.includes(token) || note.mime.includes(token)) {
// Add defensive checks for undefined properties
const typeMatches = note.type && note.type.includes(token);
const mimeMatches = note.mime && note.mime.includes(token);
if (typeMatches || mimeMatches) {
foundAttrTokens.push(token);
}
}
for (const attribute of note.getOwnedAttributes()) {
const normalizedName = normalize(attribute.name);
const normalizedValue = normalize(attribute.value);
const normalizedName = normalizeSearchText(attribute.name);
const normalizedValue = normalizeSearchText(attribute.value);
for (const token of remainingTokens) {
if (normalizedName.includes(token) || normalizedValue.includes(token)) {
@ -72,11 +78,11 @@ class NoteFlatTextExp extends Expression {
}
for (const parentNote of note.parents) {
const title = normalize(beccaService.getNoteTitle(note.noteId, parentNote.noteId));
const title = normalizeSearchText(beccaService.getNoteTitle(note.noteId, parentNote.noteId));
const foundTokens: string[] = foundAttrTokens.slice();
for (const token of remainingTokens) {
if (title.includes(token)) {
if (this.smartMatch(title, token, searchContext)) {
foundTokens.push(token);
}
}
@ -91,7 +97,7 @@ class NoteFlatTextExp extends Expression {
}
};
const candidateNotes = this.getCandidateNotes(inputNoteSet);
const candidateNotes = this.getCandidateNotes(inputNoteSet, searchContext);
for (const note of candidateNotes) {
// autocomplete should be able to find notes by their noteIds as well (only leafs)
@ -103,23 +109,27 @@ class NoteFlatTextExp extends Expression {
const foundAttrTokens: string[] = [];
for (const token of this.tokens) {
if (note.type.includes(token) || note.mime.includes(token)) {
// Add defensive checks for undefined properties
const typeMatches = note.type && note.type.includes(token);
const mimeMatches = note.mime && note.mime.includes(token);
if (typeMatches || mimeMatches) {
foundAttrTokens.push(token);
}
for (const attribute of note.ownedAttributes) {
if (normalize(attribute.name).includes(token) || normalize(attribute.value).includes(token)) {
if (normalizeSearchText(attribute.name).includes(token) || normalizeSearchText(attribute.value).includes(token)) {
foundAttrTokens.push(token);
}
}
}
for (const parentNote of note.parents) {
const title = normalize(beccaService.getNoteTitle(note.noteId, parentNote.noteId));
const title = normalizeSearchText(beccaService.getNoteTitle(note.noteId, parentNote.noteId));
const foundTokens = foundAttrTokens.slice();
for (const token of this.tokens) {
if (title.includes(token)) {
if (this.smartMatch(title, token, searchContext)) {
foundTokens.push(token);
}
}
@ -152,12 +162,13 @@ class NoteFlatTextExp extends Expression {
/**
* Returns noteIds which have at least one matching tokens
*/
getCandidateNotes(noteSet: NoteSet): BNote[] {
getCandidateNotes(noteSet: NoteSet, searchContext?: SearchContext): BNote[] {
const candidateNotes: BNote[] = [];
for (const note of noteSet.notes) {
const normalizedFlatText = normalizeSearchText(note.getFlatText());
for (const token of this.tokens) {
if (note.getFlatText().includes(token)) {
if (this.smartMatch(normalizedFlatText, token, searchContext)) {
candidateNotes.push(note);
break;
}
@ -166,6 +177,34 @@ class NoteFlatTextExp extends Expression {
return candidateNotes;
}
/**
* Smart matching that tries exact match first, then fuzzy fallback
* @param text The text to search in
* @param token The token to search for
* @param searchContext The search context to track matched words for highlighting
* @returns True if match found (exact or fuzzy)
*/
private smartMatch(text: string, token: string, searchContext?: SearchContext): boolean {
// Exact match has priority
if (text.includes(token)) {
return true;
}
// Fuzzy fallback only if enabled and for tokens >= 4 characters
if (searchContext?.enableFuzzyMatching && token.length >= 4) {
const matchedWord = fuzzyMatchWordWithResult(token, text);
if (matchedWord) {
// Track the fuzzy matched word for highlighting
if (!searchContext.highlightedTokens.includes(matchedWord)) {
searchContext.highlightedTokens.push(matchedWord);
}
return true;
}
}
return false;
}
}
export default NoteFlatTextExp;

View File

@ -18,6 +18,7 @@ class SearchContext {
debug?: boolean;
debugInfo: {} | null;
fuzzyAttributeSearch: boolean;
enableFuzzyMatching: boolean; // Controls whether fuzzy matching is enabled for this search phase
highlightedTokens: string[];
originalQuery: string;
fulltextQuery: string;
@ -45,6 +46,7 @@ class SearchContext {
this.debug = params.debug;
this.debugInfo = null;
this.fuzzyAttributeSearch = !!params.fuzzyAttributeSearch;
this.enableFuzzyMatching = true; // Default to true for backward compatibility
this.highlightedTokens = [];
this.originalQuery = "";
this.fulltextQuery = ""; // complete fulltext part

View File

@ -2,17 +2,46 @@
import beccaService from "../../becca/becca_service.js";
import becca from "../../becca/becca.js";
import {
normalizeSearchText,
calculateOptimizedEditDistance,
FUZZY_SEARCH_CONFIG
} from "./utils/text_utils.js";
// Scoring constants for better maintainability
const SCORE_WEIGHTS = {
NOTE_ID_EXACT_MATCH: 1000,
TITLE_EXACT_MATCH: 2000,
TITLE_PREFIX_MATCH: 500,
TITLE_WORD_MATCH: 300,
TOKEN_EXACT_MATCH: 4,
TOKEN_PREFIX_MATCH: 2,
TOKEN_CONTAINS_MATCH: 1,
TOKEN_FUZZY_MATCH: 0.5,
TITLE_FACTOR: 2.0,
PATH_FACTOR: 0.3,
HIDDEN_NOTE_PENALTY: 3,
// Score caps to prevent fuzzy matches from outranking exact matches
MAX_FUZZY_SCORE_PER_TOKEN: 3, // Cap fuzzy token contributions to stay below exact matches
MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER: 3, // Limit token length impact for fuzzy matches
MAX_TOTAL_FUZZY_SCORE: 200 // Total cap on fuzzy scoring per search
} as const;
class SearchResult {
notePathArray: string[];
score: number;
notePathTitle: string;
highlightedNotePathTitle?: string;
contentSnippet?: string;
highlightedContentSnippet?: string;
private fuzzyScore: number; // Track fuzzy score separately
constructor(notePathArray: string[]) {
this.notePathArray = notePathArray;
this.notePathTitle = beccaService.getNoteTitleForPath(notePathArray);
this.score = 0;
this.fuzzyScore = 0;
}
get notePath() {
@ -23,53 +52,117 @@ class SearchResult {
return this.notePathArray[this.notePathArray.length - 1];
}
computeScore(fulltextQuery: string, tokens: string[]) {
computeScore(fulltextQuery: string, tokens: string[], enableFuzzyMatching: boolean = true) {
this.score = 0;
this.fuzzyScore = 0; // Reset fuzzy score tracking
const note = becca.notes[this.noteId];
const normalizedQuery = fulltextQuery.toLowerCase();
const normalizedTitle = note.title.toLowerCase();
const normalizedQuery = normalizeSearchText(fulltextQuery.toLowerCase());
const normalizedTitle = normalizeSearchText(note.title.toLowerCase());
// Note ID exact match, much higher score
if (note.noteId.toLowerCase() === fulltextQuery) {
this.score += 1000;
this.score += SCORE_WEIGHTS.NOTE_ID_EXACT_MATCH;
}
// Title matching scores, make sure to always win
// Title matching scores with fuzzy matching support
if (normalizedTitle === normalizedQuery) {
this.score += 2000; // Increased from 1000 to ensure exact matches always win
this.score += SCORE_WEIGHTS.TITLE_EXACT_MATCH;
} else if (normalizedTitle.startsWith(normalizedQuery)) {
this.score += 500; // Increased to give more weight to prefix matches
} else if (normalizedTitle.includes(` ${normalizedQuery} `) || normalizedTitle.startsWith(`${normalizedQuery} `) || normalizedTitle.endsWith(` ${normalizedQuery}`)) {
this.score += 300; // Increased to better distinguish word matches
this.score += SCORE_WEIGHTS.TITLE_PREFIX_MATCH;
} else if (this.isWordMatch(normalizedTitle, normalizedQuery)) {
this.score += SCORE_WEIGHTS.TITLE_WORD_MATCH;
} else if (enableFuzzyMatching) {
// Try fuzzy matching for typos only if enabled
const fuzzyScore = this.calculateFuzzyTitleScore(normalizedTitle, normalizedQuery);
this.score += fuzzyScore;
this.fuzzyScore += fuzzyScore; // Track fuzzy score contributions
}
// Add scores for partial matches with adjusted weights
this.addScoreForStrings(tokens, note.title, 2.0); // Increased to give more weight to title matches
this.addScoreForStrings(tokens, this.notePathTitle, 0.3); // Reduced to further de-emphasize path matches
// Add scores for token matches
this.addScoreForStrings(tokens, note.title, SCORE_WEIGHTS.TITLE_FACTOR, enableFuzzyMatching);
this.addScoreForStrings(tokens, this.notePathTitle, SCORE_WEIGHTS.PATH_FACTOR, enableFuzzyMatching);
if (note.isInHiddenSubtree()) {
this.score = this.score / 3; // Increased penalty for hidden notes
this.score = this.score / SCORE_WEIGHTS.HIDDEN_NOTE_PENALTY;
}
}
addScoreForStrings(tokens: string[], str: string, factor: number) {
const chunks = str.toLowerCase().split(" ");
addScoreForStrings(tokens: string[], str: string, factor: number, enableFuzzyMatching: boolean = true) {
const normalizedStr = normalizeSearchText(str.toLowerCase());
const chunks = normalizedStr.split(" ");
let tokenScore = 0;
for (const chunk of chunks) {
for (const token of tokens) {
if (chunk === token) {
tokenScore += 4 * token.length * factor;
} else if (chunk.startsWith(token)) {
tokenScore += 2 * token.length * factor;
} else if (chunk.includes(token)) {
tokenScore += token.length * factor;
const normalizedToken = normalizeSearchText(token.toLowerCase());
if (chunk === normalizedToken) {
tokenScore += SCORE_WEIGHTS.TOKEN_EXACT_MATCH * token.length * factor;
} else if (chunk.startsWith(normalizedToken)) {
tokenScore += SCORE_WEIGHTS.TOKEN_PREFIX_MATCH * token.length * factor;
} else if (chunk.includes(normalizedToken)) {
tokenScore += SCORE_WEIGHTS.TOKEN_CONTAINS_MATCH * token.length * factor;
} else {
// Try fuzzy matching for individual tokens with caps applied
const editDistance = calculateOptimizedEditDistance(chunk, normalizedToken, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
if (editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
normalizedToken.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
this.fuzzyScore < SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
const fuzzyWeight = SCORE_WEIGHTS.TOKEN_FUZZY_MATCH * (1 - editDistance / FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
// Apply caps: limit token length multiplier and per-token contribution
const cappedTokenLength = Math.min(token.length, SCORE_WEIGHTS.MAX_FUZZY_TOKEN_LENGTH_MULTIPLIER);
const fuzzyTokenScore = Math.min(
fuzzyWeight * cappedTokenLength * factor,
SCORE_WEIGHTS.MAX_FUZZY_SCORE_PER_TOKEN
);
tokenScore += fuzzyTokenScore;
this.fuzzyScore += fuzzyTokenScore;
}
}
}
}
this.score += tokenScore;
}
/**
* Checks if the query matches as a complete word in the text
*/
private isWordMatch(text: string, query: string): boolean {
return text.includes(` ${query} `) ||
text.startsWith(`${query} `) ||
text.endsWith(` ${query}`);
}
/**
* Calculates fuzzy matching score for title matches with caps applied
*/
private calculateFuzzyTitleScore(title: string, query: string): number {
// Check if we've already hit the fuzzy scoring cap
if (this.fuzzyScore >= SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE) {
return 0;
}
const editDistance = calculateOptimizedEditDistance(title, query, FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE);
const maxLen = Math.max(title.length, query.length);
// Only apply fuzzy matching if the query is reasonably long and edit distance is small
if (query.length >= FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH &&
editDistance <= FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE &&
editDistance / maxLen <= 0.3) {
const similarity = 1 - (editDistance / maxLen);
const baseFuzzyScore = SCORE_WEIGHTS.TITLE_WORD_MATCH * similarity * 0.7; // Reduced weight for fuzzy matches
// Apply cap to ensure fuzzy title matches don't exceed reasonable bounds
return Math.min(baseFuzzyScore, SCORE_WEIGHTS.MAX_TOTAL_FUZZY_SCORE * 0.3);
}
return 0;
}
}
export default SearchResult;

View File

@ -1,3 +1,5 @@
import { normalizeSearchText, fuzzyMatchWord, FUZZY_SEARCH_CONFIG } from "../utils/text_utils.js";
const cachedRegexes: Record<string, RegExp> = {};
function getRegex(str: string) {
@ -20,7 +22,41 @@ const stringComparators: Record<string, Comparator<string>> = {
"*=": (comparedValue) => (val) => !!val && val.endsWith(comparedValue),
"=*": (comparedValue) => (val) => !!val && val.startsWith(comparedValue),
"*=*": (comparedValue) => (val) => !!val && val.includes(comparedValue),
"%=": (comparedValue) => (val) => !!val && !!getRegex(comparedValue).test(val)
"%=": (comparedValue) => (val) => !!val && !!getRegex(comparedValue).test(val),
"~=": (comparedValue) => (val) => {
if (!val || !comparedValue) return false;
// Validate minimum length for fuzzy search to prevent false positives
if (comparedValue.length < FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
return val.includes(comparedValue);
}
const normalizedVal = normalizeSearchText(val);
const normalizedCompared = normalizeSearchText(comparedValue);
// First try exact substring match
if (normalizedVal.includes(normalizedCompared)) {
return true;
}
// Then try fuzzy word matching
const words = normalizedVal.split(/\s+/);
return words.some(word => fuzzyMatchWord(normalizedCompared, word));
},
"~*": (comparedValue) => (val) => {
if (!val || !comparedValue) return false;
// Validate minimum length for fuzzy search
if (comparedValue.length < FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH) {
return val.includes(comparedValue);
}
const normalizedVal = normalizeSearchText(val);
const normalizedCompared = normalizeSearchText(comparedValue);
// For ~* operator, use fuzzy matching across the entire content
return fuzzyMatchWord(normalizedCompared, normalizedVal);
}
};
const numericComparators: Record<string, Comparator<number>> = {

View File

@ -40,7 +40,7 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext) {
}
}
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%="]);
const OPERATORS = new Set(["=", "!=", "*=*", "*=", "=*", ">", ">=", "<", "<=", "%=", "~=", "~*"]);
function isOperator(token: TokenData) {
if (Array.isArray(token)) {

View File

@ -0,0 +1,241 @@
import { describe, it, expect, beforeEach } from "vitest";
import searchService from "./search.js";
import BNote from "../../../becca/entities/bnote.js";
import BBranch from "../../../becca/entities/bbranch.js";
import SearchContext from "../search_context.js";
import becca from "../../../becca/becca.js";
import { findNoteByTitle, note, NoteBuilder } from "../../../test/becca_mocking.js";
describe("Progressive Search Strategy", () => {
let rootNote: any;
beforeEach(() => {
becca.reset();
rootNote = new NoteBuilder(new BNote({ noteId: "root", title: "root", type: "text" }));
new BBranch({
branchId: "none_root",
noteId: "root",
parentNoteId: "none",
notePosition: 10
});
});
describe("Phase 1: Exact Matches Only", () => {
it("should complete search with exact matches when sufficient results found", () => {
// Create notes with exact matches
rootNote
.child(note("Document Analysis One"))
.child(note("Document Report Two"))
.child(note("Document Review Three"))
.child(note("Document Summary Four"))
.child(note("Document Overview Five"))
.child(note("Documnt Analysis Six")); // This has a typo that should require fuzzy matching
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("document", searchContext);
// Should find 5 exact matches and not need fuzzy matching
expect(searchResults.length).toEqual(5);
// Verify all results have high scores (exact matches)
const highQualityResults = searchResults.filter(result => result.score >= 10);
expect(highQualityResults.length).toEqual(5);
// The typo document should not be in results since we have enough exact matches
expect(findNoteByTitle(searchResults, "Documnt Analysis Six")).toBeFalsy();
});
it("should use exact match scoring only in Phase 1", () => {
rootNote
.child(note("Testing Exact Match"))
.child(note("Test Document"))
.child(note("Another Test"));
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// All results should have scores from exact matching only
for (const result of searchResults) {
expect(result.score).toBeGreaterThan(0);
// Scores should be from exact/prefix/contains matches, not fuzzy
expect(result.score % 0.5).not.toBe(0); // Fuzzy scores are multiples of 0.5
}
});
});
describe("Phase 2: Fuzzy Fallback", () => {
it("should trigger fuzzy matching when insufficient exact matches", () => {
// Create only a few notes, some with typos
rootNote
.child(note("Document One"))
.child(note("Report Two"))
.child(note("Anaylsis Three")) // Typo: "Analysis"
.child(note("Sumary Four")); // Typo: "Summary"
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
// Should find the typo through fuzzy matching
expect(searchResults.length).toBeGreaterThan(0);
expect(findNoteByTitle(searchResults, "Anaylsis Three")).toBeTruthy();
});
it("should merge exact and fuzzy results with exact matches always ranked higher", () => {
rootNote
.child(note("Analysis Report")) // Exact match
.child(note("Data Analysis")) // Exact match
.child(note("Anaylsis Doc")) // Fuzzy match
.child(note("Statistical Anlaysis")); // Fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
expect(searchResults.length).toBe(4);
// Get the note titles in result order
const resultTitles = searchResults.map(r => becca.notes[r.noteId].title);
// Find positions of exact and fuzzy matches
const exactPositions = resultTitles.map((title, index) =>
title.toLowerCase().includes("analysis") ? index : -1
).filter(pos => pos !== -1);
const fuzzyPositions = resultTitles.map((title, index) =>
(title.includes("Anaylsis") || title.includes("Anlaysis")) ? index : -1
).filter(pos => pos !== -1);
expect(exactPositions.length).toBe(2);
expect(fuzzyPositions.length).toBe(2);
// CRITICAL: All exact matches must come before all fuzzy matches
const lastExactPosition = Math.max(...exactPositions);
const firstFuzzyPosition = Math.min(...fuzzyPositions);
expect(lastExactPosition).toBeLessThan(firstFuzzyPosition);
});
it("should not duplicate results between phases", () => {
rootNote
.child(note("Test Document")) // Would match in both phases
.child(note("Tset Report")); // Only fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should only have unique results
const noteIds = searchResults.map(r => r.noteId);
const uniqueNoteIds = [...new Set(noteIds)];
expect(noteIds.length).toBe(uniqueNoteIds.length);
expect(findNoteByTitle(searchResults, "Test Document")).toBeTruthy();
expect(findNoteByTitle(searchResults, "Tset Report")).toBeTruthy();
});
});
describe("Result Sufficiency Thresholds", () => {
it("should respect minimum result count threshold", () => {
// Create exactly 4 high-quality results (below threshold of 5)
rootNote
.child(note("Test One"))
.child(note("Test Two"))
.child(note("Test Three"))
.child(note("Test Four"))
.child(note("Tset Five")); // Typo that should be found via fuzzy
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should proceed to Phase 2 and include fuzzy match
expect(searchResults.length).toBe(5);
expect(findNoteByTitle(searchResults, "Tset Five")).toBeTruthy();
});
it("should respect minimum quality score threshold", () => {
// Create notes that might have low exact match scores
rootNote
.child(note("Testing Document")) // Should have decent score
.child(note("Document with test inside")) // Lower score due to position
.child(note("Another test case"))
.child(note("Test case example"))
.child(note("Tset with typo")); // Fuzzy match
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test", searchContext);
// Should include fuzzy results if exact results don't meet quality threshold
expect(searchResults.length).toBeGreaterThan(4);
});
});
describe("Fuzzy Score Management", () => {
it("should cap fuzzy token scores to prevent outranking exact matches", () => {
// Create note with exact match
rootNote.child(note("Test Document"));
// Create note that could accumulate high fuzzy scores
rootNote.child(note("Tset Documnt with many fuzzy tockens for testng")); // Multiple typos
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test document", searchContext);
expect(searchResults.length).toBe(2);
// Find the exact and fuzzy match results
const exactResult = searchResults.find(r => becca.notes[r.noteId].title === "Test Document");
const fuzzyResult = searchResults.find(r => becca.notes[r.noteId].title.includes("Tset"));
expect(exactResult).toBeTruthy();
expect(fuzzyResult).toBeTruthy();
// Exact match should always score higher than fuzzy, even with multiple fuzzy matches
expect(exactResult!.score).toBeGreaterThan(fuzzyResult!.score);
});
it("should enforce maximum total fuzzy score per search", () => {
// Create note with many potential fuzzy matches
rootNote.child(note("Tset Documnt Anaylsis Sumary Reportng")); // Many typos
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("test document analysis summary reporting", searchContext);
expect(searchResults.length).toBe(1);
// Total score should be bounded despite many fuzzy matches
expect(searchResults[0].score).toBeLessThan(500); // Should not exceed reasonable bounds due to caps
});
});
describe("SearchContext Integration", () => {
it("should respect enableFuzzyMatching flag", () => {
rootNote
.child(note("Test Document"))
.child(note("Tset Report")); // Typo
// Test with fuzzy matching disabled
const exactOnlyContext = new SearchContext();
exactOnlyContext.enableFuzzyMatching = false;
const exactResults = searchService.findResultsWithQuery("test", exactOnlyContext);
expect(exactResults.length).toBe(1);
expect(findNoteByTitle(exactResults, "Test Document")).toBeTruthy();
expect(findNoteByTitle(exactResults, "Tset Report")).toBeFalsy();
// Test with fuzzy matching enabled (default)
const fuzzyContext = new SearchContext();
const fuzzyResults = searchService.findResultsWithQuery("test", fuzzyContext);
expect(fuzzyResults.length).toBe(2);
expect(findNoteByTitle(fuzzyResults, "Tset Report")).toBeTruthy();
});
});
describe("Edge Cases", () => {
it("should handle empty search results gracefully", () => {
rootNote.child(note("Unrelated Content"));
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("nonexistent", searchContext);
expect(searchResults.length).toBe(0);
});
});
});

View File

@ -553,6 +553,70 @@ describe("Search", () => {
expect(becca.notes[searchResults[0].noteId].title).toEqual("Reddit is bad");
});
it("search completes in reasonable time", () => {
// Create a moderate-sized dataset to test performance
const countries = ["Austria", "Belgium", "Croatia", "Denmark", "Estonia", "Finland", "Germany", "Hungary", "Ireland", "Japan"];
const europeanCountries = note("Europe");
countries.forEach(country => {
europeanCountries.child(note(country).label("type", "country").label("continent", "Europe"));
});
rootNote.child(europeanCountries);
const searchContext = new SearchContext();
const startTime = Date.now();
// Perform a search that exercises multiple features
const searchResults = searchService.findResultsWithQuery("#type=country AND continent", searchContext);
const endTime = Date.now();
const duration = endTime - startTime;
// Search should complete in under 1 second for reasonable dataset
expect(duration).toBeLessThan(1000);
expect(searchResults.length).toEqual(10);
});
it("progressive search always puts exact matches before fuzzy matches", () => {
rootNote
.child(note("Analysis Report")) // Exact match
.child(note("Data Analysis")) // Exact match
.child(note("Test Analysis")) // Exact match
.child(note("Advanced Anaylsis")) // Fuzzy match (typo)
.child(note("Quick Anlaysis")); // Fuzzy match (typo)
const searchContext = new SearchContext();
const searchResults = searchService.findResultsWithQuery("analysis", searchContext);
// With only 3 exact matches (below threshold), fuzzy should be triggered
// Should find all 5 matches but exact ones should come first
expect(searchResults.length).toEqual(5);
// Get note titles in result order
const resultTitles = searchResults.map(r => becca.notes[r.noteId].title);
// Find all exact matches (contain "analysis")
const exactMatchIndices = resultTitles.map((title, index) =>
title.toLowerCase().includes("analysis") ? index : -1
).filter(index => index !== -1);
// Find all fuzzy matches (contain typos)
const fuzzyMatchIndices = resultTitles.map((title, index) =>
(title.includes("Anaylsis") || title.includes("Anlaysis")) ? index : -1
).filter(index => index !== -1);
expect(exactMatchIndices.length).toEqual(3);
expect(fuzzyMatchIndices.length).toEqual(2);
// CRITICAL: All exact matches must appear before all fuzzy matches
const lastExactIndex = Math.max(...exactMatchIndices);
const firstFuzzyIndex = Math.min(...fuzzyMatchIndices);
expect(lastExactIndex).toBeLessThan(firstFuzzyIndex);
});
// FIXME: test what happens when we order without any filter criteria
// it("comparison between labels", () => {

View File

@ -17,6 +17,8 @@ import type { SearchParams, TokenStructure } from "./types.js";
import type Expression from "../expressions/expression.js";
import sql from "../../sql.js";
import scriptService from "../../script.js";
import striptags from "striptags";
import protectedSessionService from "../../protected_session.js";
export interface SearchNoteResult {
searchResultNoteIds: string[];
@ -235,6 +237,41 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
loadNeededInfoFromDatabase();
}
// If there's an explicit orderBy clause, skip progressive search
// as it would interfere with the ordering
if (searchContext.orderBy) {
// For ordered queries, don't use progressive search but respect
// the original fuzzy matching setting
return performSearch(expression, searchContext, searchContext.enableFuzzyMatching);
}
// If fuzzy matching is explicitly disabled, skip progressive search
if (!searchContext.enableFuzzyMatching) {
return performSearch(expression, searchContext, false);
}
// Phase 1: Try exact matches first (without fuzzy matching)
const exactResults = performSearch(expression, searchContext, false);
// Check if we have sufficient high-quality results
const minResultThreshold = 5;
const minScoreForQuality = 10; // Minimum score to consider a result "high quality"
const highQualityResults = exactResults.filter(result => result.score >= minScoreForQuality);
// If we have enough high-quality exact matches, return them
if (highQualityResults.length >= minResultThreshold) {
return exactResults;
}
// Phase 2: Add fuzzy matching as fallback when exact matches are insufficient
const fuzzyResults = performSearch(expression, searchContext, true);
// Merge results, ensuring exact matches always rank higher than fuzzy matches
return mergeExactAndFuzzyResults(exactResults, fuzzyResults);
}
function performSearch(expression: Expression, searchContext: SearchContext, enableFuzzyMatching: boolean): SearchResult[] {
const allNoteSet = becca.getAllNoteSet();
const noteIdToNotePath: Record<string, string[]> = {};
@ -242,6 +279,10 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
noteIdToNotePath
};
// Store original fuzzy setting and temporarily override it
const originalFuzzyMatching = searchContext.enableFuzzyMatching;
searchContext.enableFuzzyMatching = enableFuzzyMatching;
const noteSet = expression.execute(allNoteSet, executionContext, searchContext);
const searchResults = noteSet.notes.map((note) => {
@ -255,9 +296,12 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
});
for (const res of searchResults) {
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens);
res.computeScore(searchContext.fulltextQuery, searchContext.highlightedTokens, enableFuzzyMatching);
}
// Restore original fuzzy setting
searchContext.enableFuzzyMatching = originalFuzzyMatching;
if (!noteSet.sorted) {
searchResults.sort((a, b) => {
if (a.score > b.score) {
@ -279,6 +323,49 @@ function findResultsWithExpression(expression: Expression, searchContext: Search
return searchResults;
}
function mergeExactAndFuzzyResults(exactResults: SearchResult[], fuzzyResults: SearchResult[]): SearchResult[] {
// Create a map of exact result note IDs for deduplication
const exactNoteIds = new Set(exactResults.map(result => result.noteId));
// Add fuzzy results that aren't already in exact results
const additionalFuzzyResults = fuzzyResults.filter(result => !exactNoteIds.has(result.noteId));
// Sort exact results by score (best exact matches first)
exactResults.sort((a, b) => {
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
}
// if score does not decide then sort results by depth of the note.
if (a.notePathArray.length === b.notePathArray.length) {
return a.notePathTitle < b.notePathTitle ? -1 : 1;
}
return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
});
// Sort fuzzy results by score (best fuzzy matches first)
additionalFuzzyResults.sort((a, b) => {
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
}
// if score does not decide then sort results by depth of the note.
if (a.notePathArray.length === b.notePathArray.length) {
return a.notePathTitle < b.notePathTitle ? -1 : 1;
}
return a.notePathArray.length < b.notePathArray.length ? -1 : 1;
});
// CRITICAL: Always put exact matches before fuzzy matches, regardless of scores
return [...exactResults, ...additionalFuzzyResults];
}
function parseQueryToExpression(query: string, searchContext: SearchContext) {
const { fulltextQuery, fulltextTokens, expressionTokens } = lex(query);
searchContext.fulltextQuery = fulltextQuery;
@ -328,6 +415,16 @@ function findResultsWithQuery(query: string, searchContext: SearchContext): Sear
return [];
}
// If the query starts with '#', it's a pure expression query.
// Don't use progressive search for these as they may have complex
// ordering or other logic that shouldn't be interfered with.
const isPureExpressionQuery = query.trim().startsWith('#');
if (isPureExpressionQuery) {
// For pure expression queries, use standard search without progressive phases
return performSearch(expression, searchContext, searchContext.enableFuzzyMatching);
}
return findResultsWithExpression(expression, searchContext);
}
@ -337,6 +434,91 @@ function findFirstNoteWithQuery(query: string, searchContext: SearchContext): BN
return searchResults.length > 0 ? becca.notes[searchResults[0].noteId] : null;
}
function extractContentSnippet(noteId: string, searchTokens: string[], maxLength: number = 200): string {
const note = becca.notes[noteId];
if (!note) {
return "";
}
// Only extract content for text-based notes
if (!["text", "code", "mermaid", "canvas", "mindMap"].includes(note.type)) {
return "";
}
try {
let content = note.getContent();
if (!content || typeof content !== "string") {
return "";
}
// Handle protected notes
if (note.isProtected && protectedSessionService.isProtectedSessionAvailable()) {
try {
content = protectedSessionService.decryptString(content) || "";
} catch (e) {
return ""; // Can't decrypt, don't show content
}
} else if (note.isProtected) {
return ""; // Protected but no session available
}
// Strip HTML tags for text notes
if (note.type === "text") {
content = striptags(content);
}
// Normalize whitespace
content = content.replace(/\s+/g, " ").trim();
if (!content) {
return "";
}
// Try to find a snippet around the first matching token
const normalizedContent = normalizeString(content.toLowerCase());
let snippetStart = 0;
let matchFound = false;
for (const token of searchTokens) {
const normalizedToken = normalizeString(token.toLowerCase());
const matchIndex = normalizedContent.indexOf(normalizedToken);
if (matchIndex !== -1) {
// Center the snippet around the match
snippetStart = Math.max(0, matchIndex - maxLength / 2);
matchFound = true;
break;
}
}
// Extract snippet
let snippet = content.substring(snippetStart, snippetStart + maxLength);
// Try to start/end at word boundaries
if (snippetStart > 0) {
const firstSpace = snippet.indexOf(" ");
if (firstSpace > 0 && firstSpace < 20) {
snippet = snippet.substring(firstSpace + 1);
}
snippet = "..." + snippet;
}
if (snippetStart + maxLength < content.length) {
const lastSpace = snippet.lastIndexOf(" ");
if (lastSpace > snippet.length - 20) {
snippet = snippet.substring(0, lastSpace);
}
snippet = snippet + "...";
}
return snippet;
} catch (e) {
log.error(`Error extracting content snippet for note ${noteId}: ${e}`);
return "";
}
}
function searchNotesForAutocomplete(query: string, fastSearch: boolean = true) {
const searchContext = new SearchContext({
fastSearch: fastSearch,
@ -351,6 +533,11 @@ function searchNotesForAutocomplete(query: string, fastSearch: boolean = true) {
const trimmed = allSearchResults.slice(0, 200);
// Extract content snippets
for (const result of trimmed) {
result.contentSnippet = extractContentSnippet(result.noteId, searchContext.highlightedTokens);
}
highlightSearchResults(trimmed, searchContext.highlightedTokens, searchContext.ignoreInternalAttributes);
return trimmed.map((result) => {
@ -360,6 +547,8 @@ function searchNotesForAutocomplete(query: string, fastSearch: boolean = true) {
noteTitle: title,
notePathTitle: result.notePathTitle,
highlightedNotePathTitle: result.highlightedNotePathTitle,
contentSnippet: result.contentSnippet,
highlightedContentSnippet: result.highlightedContentSnippet,
icon: icon ?? "bx bx-note"
};
});
@ -381,26 +570,11 @@ function highlightSearchResults(searchResults: SearchResult[], highlightedTokens
highlightedTokens.sort((a, b) => (a.length > b.length ? -1 : 1));
for (const result of searchResults) {
const note = becca.notes[result.noteId];
result.highlightedNotePathTitle = result.notePathTitle.replace(/[<{}]/g, "");
if (highlightedTokens.find((token) => note.type.includes(token))) {
result.highlightedNotePathTitle += ` "type: ${note.type}'`;
}
if (highlightedTokens.find((token) => note.mime.includes(token))) {
result.highlightedNotePathTitle += ` "mime: ${note.mime}'`;
}
for (const attr of note.getAttributes()) {
if (attr.type === "relation" && attr.name === "internalLink" && ignoreInternalAttributes) {
continue;
}
if (highlightedTokens.find((token) => normalize(attr.name).includes(token) || normalize(attr.value).includes(token))) {
result.highlightedNotePathTitle += ` "${formatAttribute(attr)}'`;
}
// Initialize highlighted content snippet
if (result.contentSnippet) {
result.highlightedContentSnippet = escapeHtml(result.contentSnippet).replace(/[<{}]/g, "");
}
}
@ -419,40 +593,36 @@ function highlightSearchResults(searchResults: SearchResult[], highlightedTokens
const tokenRegex = new RegExp(escapeRegExp(token), "gi");
let match;
// Find all matches
if (!result.highlightedNotePathTitle) {
continue;
// Highlight in note path title
if (result.highlightedNotePathTitle) {
const titleRegex = new RegExp(escapeRegExp(token), "gi");
while ((match = titleRegex.exec(normalizeString(result.highlightedNotePathTitle))) !== null) {
result.highlightedNotePathTitle = wrapText(result.highlightedNotePathTitle, match.index, token.length, "{", "}");
// 2 characters are added, so we need to adjust the index
titleRegex.lastIndex += 2;
}
}
while ((match = tokenRegex.exec(normalizeString(result.highlightedNotePathTitle))) !== null) {
result.highlightedNotePathTitle = wrapText(result.highlightedNotePathTitle, match.index, token.length, "{", "}");
// 2 characters are added, so we need to adjust the index
tokenRegex.lastIndex += 2;
// Highlight in content snippet
if (result.highlightedContentSnippet) {
const contentRegex = new RegExp(escapeRegExp(token), "gi");
while ((match = contentRegex.exec(normalizeString(result.highlightedContentSnippet))) !== null) {
result.highlightedContentSnippet = wrapText(result.highlightedContentSnippet, match.index, token.length, "{", "}");
// 2 characters are added, so we need to adjust the index
contentRegex.lastIndex += 2;
}
}
}
}
for (const result of searchResults) {
if (!result.highlightedNotePathTitle) {
continue;
if (result.highlightedNotePathTitle) {
result.highlightedNotePathTitle = result.highlightedNotePathTitle.replace(/{/g, "<b>").replace(/}/g, "</b>");
}
result.highlightedNotePathTitle = result.highlightedNotePathTitle.replace(/"/g, "<small>").replace(/'/g, "</small>").replace(/{/g, "<b>").replace(/}/g, "</b>");
}
}
function formatAttribute(attr: BAttribute) {
if (attr.type === "relation") {
return `~${escapeHtml(attr.name)}=…`;
} else if (attr.type === "label") {
let label = `#${escapeHtml(attr.name)}`;
if (attr.value) {
const val = /[^\w-]/.test(attr.value) ? `"${attr.value}"` : attr.value;
label += `=${escapeHtml(val)}`;
if (result.highlightedContentSnippet) {
result.highlightedContentSnippet = result.highlightedContentSnippet.replace(/{/g, "<b>").replace(/}/g, "</b>");
}
return label;
}
}

View File

@ -0,0 +1,65 @@
import { describe, it, expect } from "vitest";
import { calculateOptimizedEditDistance, validateFuzzySearchTokens, fuzzyMatchWord } from './text_utils.js';
describe('Fuzzy Search Core', () => {
describe('calculateOptimizedEditDistance', () => {
it('calculates edit distance for common typos', () => {
expect(calculateOptimizedEditDistance('hello', 'helo')).toBe(1);
expect(calculateOptimizedEditDistance('world', 'wrold')).toBe(2);
expect(calculateOptimizedEditDistance('cafe', 'café')).toBe(1);
expect(calculateOptimizedEditDistance('identical', 'identical')).toBe(0);
});
it('handles performance safety with oversized input', () => {
const longString = 'a'.repeat(2000);
const result = calculateOptimizedEditDistance(longString, 'short');
expect(result).toBeGreaterThan(2); // Should use fallback heuristic
});
});
describe('validateFuzzySearchTokens', () => {
it('validates minimum length requirements for fuzzy operators', () => {
const result1 = validateFuzzySearchTokens(['ab'], '~=');
expect(result1.isValid).toBe(false);
expect(result1.error).toContain('at least 3 characters');
const result2 = validateFuzzySearchTokens(['hello'], '~=');
expect(result2.isValid).toBe(true);
const result3 = validateFuzzySearchTokens(['ok'], '=');
expect(result3.isValid).toBe(true); // Non-fuzzy operators allow short tokens
});
it('validates token types and empty arrays', () => {
expect(validateFuzzySearchTokens([], '=')).toEqual({
isValid: false,
error: 'Invalid tokens: at least one token is required'
});
expect(validateFuzzySearchTokens([''], '=')).toEqual({
isValid: false,
error: 'Invalid tokens: empty or whitespace-only tokens are not allowed'
});
});
});
describe('fuzzyMatchWord', () => {
it('matches words with diacritics normalization', () => {
expect(fuzzyMatchWord('cafe', 'café')).toBe(true);
expect(fuzzyMatchWord('naive', 'naïve')).toBe(true);
});
it('matches with typos within distance threshold', () => {
expect(fuzzyMatchWord('hello', 'helo')).toBe(true);
expect(fuzzyMatchWord('world', 'wrold')).toBe(true);
expect(fuzzyMatchWord('test', 'tset')).toBe(true);
expect(fuzzyMatchWord('test', 'xyz')).toBe(false);
});
it('handles edge cases safely', () => {
expect(fuzzyMatchWord('', 'test')).toBe(false);
expect(fuzzyMatchWord('test', '')).toBe(false);
expect(fuzzyMatchWord('a', 'b')).toBe(false); // Very short tokens
});
});
});

View File

@ -0,0 +1,334 @@
"use strict";
import { normalize } from "../../utils.js";
/**
* Shared text processing utilities for search functionality
*/
// Configuration constants for fuzzy matching
export const FUZZY_SEARCH_CONFIG = {
// Minimum token length for fuzzy operators to prevent false positives
MIN_FUZZY_TOKEN_LENGTH: 3,
// Maximum edit distance for fuzzy matching
MAX_EDIT_DISTANCE: 2,
// Maximum proximity distance for phrase matching (in words)
MAX_PHRASE_PROXIMITY: 10,
// Absolute hard limits for extreme cases - only to prevent system crashes
ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
// Performance warning thresholds - inform user but still attempt search
PERFORMANCE_WARNING_SIZE: 5 * 1024 * 1024, // 5MB - warn about potential performance impact
PERFORMANCE_WARNING_WORDS: 100000, // 100K words - warn about word count impact
// Progressive processing thresholds for very large content
PROGRESSIVE_PROCESSING_SIZE: 10 * 1024 * 1024, // 10MB - use progressive processing
PROGRESSIVE_PROCESSING_WORDS: 500000, // 500K words - use progressive processing
// Performance thresholds
EARLY_TERMINATION_THRESHOLD: 3,
} as const;
/**
* Normalizes text by removing diacritics and converting to lowercase.
* This is the centralized text normalization function used across all search components.
* Uses the shared normalize function from utils for consistency.
*
* Examples:
* - "café" -> "cafe"
* - "naïve" -> "naive"
* - "HELLO WORLD" -> "hello world"
*
* @param text The text to normalize
* @returns The normalized text
*/
export function normalizeSearchText(text: string): string {
if (!text || typeof text !== 'string') {
return '';
}
// Use shared normalize function for consistency across the codebase
return normalize(text);
}
/**
* Optimized edit distance calculation using single array and early termination.
* This is significantly more memory efficient than the 2D matrix approach and includes
* early termination optimizations for better performance.
*
* @param str1 First string
* @param str2 Second string
* @param maxDistance Maximum allowed distance (for early termination)
* @returns The edit distance between the strings, or maxDistance + 1 if exceeded
*/
export function calculateOptimizedEditDistance(str1: string, str2: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): number {
// Input validation
if (typeof str1 !== 'string' || typeof str2 !== 'string') {
throw new Error('Both arguments must be strings');
}
if (maxDistance < 0 || !Number.isInteger(maxDistance)) {
throw new Error('maxDistance must be a non-negative integer');
}
const len1 = str1.length;
const len2 = str2.length;
// Performance guard: if strings are too long, limit processing
const maxStringLength = 1000;
if (len1 > maxStringLength || len2 > maxStringLength) {
// For very long strings, fall back to simple length-based heuristic
return Math.abs(len1 - len2) <= maxDistance ? Math.abs(len1 - len2) : maxDistance + 1;
}
// Early termination: if length difference exceeds max distance
if (Math.abs(len1 - len2) > maxDistance) {
return maxDistance + 1;
}
// Handle edge cases
if (len1 === 0) return len2 <= maxDistance ? len2 : maxDistance + 1;
if (len2 === 0) return len1 <= maxDistance ? len1 : maxDistance + 1;
// Use single array optimization for better memory usage
let previousRow = Array.from({ length: len2 + 1 }, (_, i) => i);
let currentRow = new Array(len2 + 1);
for (let i = 1; i <= len1; i++) {
currentRow[0] = i;
let minInRow = i;
for (let j = 1; j <= len2; j++) {
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
currentRow[j] = Math.min(
previousRow[j] + 1, // deletion
currentRow[j - 1] + 1, // insertion
previousRow[j - 1] + cost // substitution
);
// Track minimum value in current row for early termination
if (currentRow[j] < minInRow) {
minInRow = currentRow[j];
}
}
// Early termination: if minimum distance in row exceeds threshold
if (minInRow > maxDistance) {
return maxDistance + 1;
}
// Swap arrays for next iteration
[previousRow, currentRow] = [currentRow, previousRow];
}
const result = previousRow[len2];
return result <= maxDistance ? result : maxDistance + 1;
}
/**
* Validates that tokens meet minimum requirements for fuzzy operators.
*
* @param tokens Array of search tokens
* @param operator The search operator being used
* @returns Validation result with success status and error message
*/
export function validateFuzzySearchTokens(tokens: string[], operator: string): { isValid: boolean; error?: string } {
if (!operator || typeof operator !== 'string') {
return {
isValid: false,
error: 'Invalid operator: operator must be a non-empty string'
};
}
if (!Array.isArray(tokens)) {
return {
isValid: false,
error: 'Invalid tokens: tokens must be an array'
};
}
if (tokens.length === 0) {
return {
isValid: false,
error: 'Invalid tokens: at least one token is required'
};
}
// Check for null, undefined, or non-string tokens
const invalidTypeTokens = tokens.filter(token =>
token == null || typeof token !== 'string'
);
if (invalidTypeTokens.length > 0) {
return {
isValid: false,
error: 'Invalid tokens: all tokens must be non-null strings'
};
}
// Check for empty string tokens
const emptyTokens = tokens.filter(token => token.trim().length === 0);
if (emptyTokens.length > 0) {
return {
isValid: false,
error: 'Invalid tokens: empty or whitespace-only tokens are not allowed'
};
}
if (operator !== '~=' && operator !== '~*') {
return { isValid: true };
}
// Check minimum token length for fuzzy operators
const shortTokens = tokens.filter(token => token.length < FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH);
if (shortTokens.length > 0) {
return {
isValid: false,
error: `Fuzzy search operators (~=, ~*) require tokens of at least ${FUZZY_SEARCH_CONFIG.MIN_FUZZY_TOKEN_LENGTH} characters. Invalid tokens: ${shortTokens.join(', ')}`
};
}
// Check for excessively long tokens that could cause performance issues
const maxTokenLength = 100; // Reasonable limit for search tokens
const longTokens = tokens.filter(token => token.length > maxTokenLength);
if (longTokens.length > 0) {
return {
isValid: false,
error: `Tokens are too long (max ${maxTokenLength} characters). Long tokens: ${longTokens.map(t => t.substring(0, 20) + '...').join(', ')}`
};
}
return { isValid: true };
}
/**
* Validates and preprocesses content for search operations.
* Philosophy: Try to search everything! Only block truly extreme cases that could crash the system.
*
* @param content The content to validate and preprocess
* @param noteId The note ID (for logging purposes)
* @returns Processed content, only null for truly extreme cases that could cause system instability
*/
export function validateAndPreprocessContent(content: string, noteId?: string): string | null {
if (!content || typeof content !== 'string') {
return null;
}
// Only block content that could actually crash the system (100MB+)
if (content.length > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE) {
console.error(`Content size exceeds absolute system limit for note ${noteId || 'unknown'}: ${content.length} bytes - this could cause system instability`);
// Only in truly extreme cases, truncate to prevent system crash
return content.substring(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE);
}
// Warn about very large content but still process it
if (content.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_SIZE) {
console.info(`Large content for note ${noteId || 'unknown'}: ${content.length} bytes - processing may take time but will attempt full search`);
}
// For word count, be even more permissive - only block truly extreme cases
const wordCount = content.split(/\s+/).length;
if (wordCount > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
console.error(`Word count exceeds absolute system limit for note ${noteId || 'unknown'}: ${wordCount} words - this could cause system instability`);
// Only in truly extreme cases, truncate to prevent system crash
return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
}
// Warn about high word counts but still process them
if (wordCount > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
console.info(`High word count for note ${noteId || 'unknown'}: ${wordCount} words - phrase matching may take time but will attempt full search`);
}
// Progressive processing warning for very large content
if (content.length > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_SIZE || wordCount > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_WORDS) {
console.info(`Very large content for note ${noteId || 'unknown'} - using progressive processing to maintain responsiveness`);
}
return content;
}
/**
* Escapes special regex characters in a string for use in RegExp constructor
*/
function escapeRegExp(string: string): string {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Checks if a word matches a token with fuzzy matching and returns the matched word.
* Optimized for common case where distances are small.
*
* @param token The search token (should be normalized)
* @param text The text to match against (should be normalized)
* @param maxDistance Maximum allowed edit distance
* @returns The matched word if found, null otherwise
*/
export function fuzzyMatchWordWithResult(token: string, text: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): string | null {
// Input validation
if (typeof token !== 'string' || typeof text !== 'string') {
return null;
}
if (token.length === 0 || text.length === 0) {
return null;
}
try {
// Normalize both strings for comparison
const normalizedToken = token.toLowerCase();
const normalizedText = text.toLowerCase();
// Exact match check first (most common case)
if (normalizedText.includes(normalizedToken)) {
// Find the exact match in the original text to preserve case
const exactMatch = text.match(new RegExp(escapeRegExp(token), 'i'));
return exactMatch ? exactMatch[0] : token;
}
// For fuzzy matching, we need to check individual words in the text
// Split the text into words and check each word against the token
const words = normalizedText.split(/\s+/).filter(word => word.length > 0);
const originalWords = text.split(/\s+/).filter(word => word.length > 0);
for (let i = 0; i < words.length; i++) {
const word = words[i];
const originalWord = originalWords[i];
// Skip if word is too different in length for fuzzy matching
if (Math.abs(word.length - normalizedToken.length) > maxDistance) {
continue;
}
// For very short tokens or very different lengths, be more strict
if (normalizedToken.length < 4 || Math.abs(word.length - normalizedToken.length) > 2) {
continue;
}
// Use optimized edit distance calculation
const distance = calculateOptimizedEditDistance(normalizedToken, word, maxDistance);
if (distance <= maxDistance) {
return originalWord; // Return the original word with case preserved
}
}
return null;
} catch (error) {
// Log error and return null for safety
console.warn('Error in fuzzy word matching:', error);
return null;
}
}
/**
* Checks if a word matches a token with fuzzy matching.
* Optimized for common case where distances are small.
*
* @param token The search token (should be normalized)
* @param word The word to match against (should be normalized)
* @param maxDistance Maximum allowed edit distance
* @returns True if the word matches the token within the distance threshold
*/
export function fuzzyMatchWord(token: string, text: string, maxDistance: number = FUZZY_SEARCH_CONFIG.MAX_EDIT_DISTANCE): boolean {
return fuzzyMatchWordWithResult(token, text, maxDistance) !== null;
}

View File

@ -3,13 +3,50 @@
The _Quick search_ function does a full-text search (that is, it searches through the content of notes and not just the title of a note) and displays the result in an easy-to-access manner.
The alternative to the quick search is the <a class="reference-link" href="Search.md">Search</a> function, which opens in a dedicated tab and has support for advanced queries.
The alternative to the quick search is the <a class="reference-link" href="Search.md">Search</a> function, which opens in a dedicated tab and has support for advanced queries.
For even faster navigation, it's possible to use <a class="reference-link" href="Jump%20to.md">Jump to Note</a> which will only search through the note titles instead of the content.
For even faster navigation, it's possible to use <a class="reference-link" href="Jump%20to.md">Jump to Note</a> which will only search through the note titles instead of the content.
## Layout
Based on the <a class="reference-link" href="../UI%20Elements/Vertical%20and%20horizontal%20layout.md">Vertical and horizontal layout</a>, the quick search is placed:
Based on the <a class="reference-link" href="../UI%20Elements/Vertical%20and%20horizontal%20layout.md">Vertical and horizontal layout</a>, the quick search is placed:
* On the vertical layout, it is displayed right above the <a class="reference-link" href="../UI%20Elements/Note%20Tree.md">Note Tree</a>.
* On the horizontal layout, it is displayed in the <a class="reference-link" href="../UI%20Elements/Launch%20Bar.md">Launch Bar</a>, where it can be positioned just like any other icon.
* On the vertical layout, it is displayed right above the <a class="reference-link" href="../UI%20Elements/Note%20Tree.md">Note Tree</a>.
* On the horizontal layout, it is displayed in the <a class="reference-link" href="../UI%20Elements/Launch%20Bar.md">Launch Bar</a>, where it can be positioned just like any other icon.
## Search Features
Quick search includes the following features:
### Content Previews
Search results now display a 200-character preview of the note content below the note title. This preview shows the context where your search terms appear, making it easier to identify the right note without opening it.
### Infinite Scrolling
Results are loaded progressively as you scroll:
- Initial display shows 15 results
- Scrolling near the bottom automatically loads 10 more results
- Continue scrolling to load all matching notes
### Visual Features
- **Highlighting**: Search terms appear in bold with accent colors
- **Separation**: Results are separated with dividers
- **Theme Support**: Highlighting colors adapt to light/dark themes
### Search Behavior
Quick search uses progressive search:
1. Shows exact matches first
2. Includes fuzzy matches when exact results are fewer than 5
3. Exact matches appear before fuzzy matches
### Keyboard Navigation
- Press `Enter` to open the first result
- Use arrow keys to navigate through results
- Press `Escape` to close the quick search
## Using Quick Search
1. **Typo tolerance**: Search finds results despite minor typos
2. **Content previews**: 200-character snippets show match context
3. **Infinite scrolling**: Additional results load on scroll
4. **Specific terms**: Specific search terms return more focused results
5. **Match locations**: Bold text indicates where matches occur

View File

@ -66,11 +66,25 @@ The options available are:
* `#book #publicationYear = 1954`: Find notes with the "book" label and "publicationYear" set to 1954.
* `#genre *=* fan`: Find notes with the "genre" label containing the substring "fan". Additional operators include `*=*` for "contains", `=*` for "starts with", `*=` for "ends with", and `!=` for "is not equal to".
* `#book #publicationYear >= 1950 #publicationYear < 1960`: Use numeric operators to find all books published in the 1950s.
* `#dateNote >= TODAY-30`: A "smart search" to find notes with the "dateNote" label within the last 30 days. Supported smart values include NOW +- seconds, TODAY +- days, MONTH +- months, YEAR +- years.
* `#dateNote >= TODAY-30`: Find notes with the "dateNote" label within the last 30 days. Supported date values include NOW +- seconds, TODAY +- days, MONTH +- months, YEAR +- years.
* `~author.title *=* Tolkien`: Find notes related to an author whose title contains "Tolkien".
* `#publicationYear %= '19[0-9]{2}'`: Use the '%=' operator to match a regular expression (regex). This feature has been available since Trilium 0.52.
* `note.content %= '\\d{2}:\\d{2} (PM|AM)'`: Find notes that mention a time. Backslashes in a regex must be escaped.
### Fuzzy Search
Trilium supports fuzzy search operators that find results with typos or spelling variations:
* `#title ~= trilim`: Fuzzy exact match - finds notes with titles like "Trilium" even if you typed "trilim" (with typo)
* `#content ~* progra`: Fuzzy contains match - finds notes containing words like "program", "programmer", "programming" even with slight misspellings
* `note.content ~* develpment`: Will find notes containing "development" despite the typo
**Important notes about fuzzy search:**
- Fuzzy search requires at least 3 characters in the search term
- Maximum edit distance is 2 characters (number of character changes needed)
- Diacritics are normalized (e.g., "café" matches "cafe")
- Fuzzy matches work best for finding content with minor typos or spelling variations
### Advanced Use Cases
* `~author.relations.son.title = 'Christopher Tolkien'`: Search for notes with an "author" relation to a note that has a "son" relation to "Christopher Tolkien". This can be modeled with the following note structure:
@ -117,6 +131,32 @@ Some queries can only be expressed with negation:
This query finds all book notes not in the "Tolkien" subtree.
## Progressive Search Strategy
Trilium uses a progressive search strategy that performs exact matching first, then adds fuzzy matching when needed.
### How Progressive Search Works
1. **Phase 1 - Exact Matching**: When you search, Trilium first looks for exact matches of your search terms. This handles the vast majority of searches (90%+) and returns results almost instantly.
2. **Phase 2 - Fuzzy Fallback**: If Phase 1 doesn't find enough high-quality results (fewer than 5 results with good relevance scores), Trilium automatically adds fuzzy matching to find results with typos or spelling variations.
3. **Result Ordering**: Exact matches always appear before fuzzy matches, regardless of individual scores. This ensures that when you search for "project", notes containing the exact word "project" will appear before notes containing similar words like "projects" or "projection".
### Progressive Search Behavior
- **Speed**: Most searches complete using only exact matching
- **Ordering**: Exact matches appear before fuzzy matches
- **Fallback**: Fuzzy matching activates when exact matches return fewer than 5 results
- **Identification**: Results indicate whether they are exact or fuzzy matches
### Search Performance
Search system specifications:
- Content size limit: 10MB per note (previously 50KB)
- Edit distance calculations for fuzzy matching
- Infinite scrolling in Quick Search
## Under the Hood
### Label and Relation Shortcuts
@ -142,7 +182,7 @@ However, common label and relation searches have shortcut syntax:
### Separating Full-Text and Attribute Parts
Search syntax allows combining full-text search with attribute-based search seamlessly. For example, `tolkien #book` contains:
Search syntax allows combining full-text search with attribute-based search. For example, `tolkien #book` contains:
1. Full-text tokens - `tolkien`
2. Attribute expressions - `#book`
@ -181,4 +221,21 @@ This finds notes created in May 2019. Numeric operators like `#publicationYear >
You can open Trilium and automatically trigger a search by including the search [url encoded](https://meyerweb.com/eric/tools/dencoder/) string in the URL:
`http://localhost:8080/#?searchString=abc`
`http://localhost:8080/#?searchString=abc`
## Search Configuration
### Parameters
| Parameter | Value | Description |
|-----------|-------|-------------|
| MIN_FUZZY_TOKEN_LENGTH | 3 | Minimum characters for fuzzy matching |
| MAX_EDIT_DISTANCE | 2 | Maximum character changes allowed |
| RESULT_SUFFICIENCY_THRESHOLD | 5 | Minimum exact results before fuzzy fallback |
| MAX_CONTENT_SIZE | 10MB | Maximum note content size for search processing |
### Limits
* Searched note content is limited to 10MB per note to prevent performance issues
* Notes exceeding this limit will still be included in title and attribute searches
* Fuzzy matching requires tokens of at least 3 characters

View File

@ -54,4 +54,27 @@ More detailed answer:
* files are stored in no particular order and user can't change this
* Trilium allows storing note [attributes](Advanced%20Usage/Attributes.md) which could be represented in extended user attributes but their support differs greatly among different filesystems / operating systems
* Trilium makes links / relations between different notes which can be quickly retrieved / navigated (e.g. for [note map](Advanced%20Usage/Note%20Map%20\(Link%20map%2C%20Tree%20map\).md)). There's no such support in file systems which means these would have to be stored in some kind of side-car files (mini-databases).
* Filesystems are generally not transactional. While this is not completely required for a note-taking application, having transactions make it way easier to keep notes and their metadata in predictable and consistent state.
* Filesystems are generally not transactional. While this is not completely required for a note-taking application, having transactions make it way easier to keep notes and their metadata in predictable and consistent state.
## Search-related Questions
### Why does search sometimes find results with typos?
Trilium uses a progressive search strategy that includes fuzzy matching when exact matches return fewer than 5 results. This finds notes despite minor typos in your search query. You can use fuzzy search operators (`~=` for fuzzy exact match and `~*` for fuzzy contains). See the <a class="reference-link" href="Basic%20Concepts%20and%20Features/Navigation/Search.md">Search</a> documentation for details.
### How can I search for notes when I'm not sure of the exact spelling?
Use the fuzzy search operators:
- `#title ~= "projct"` - finds notes with titles like "project" despite the typo
- `note.content ~* "algoritm"` - finds content containing "algorithm" or similar words
### Why do some search results appear before others with lower scores?
Trilium places exact matches before fuzzy matches. When you search for "project", notes containing exactly "project" appear before notes with variations like "projects" or "projection", regardless of other scoring factors.
### How can I make my searches faster?
1. Use the "Fast search" option to search only titles and attributes (not content)
2. Limit search scope using the "Ancestor" field
3. Set a result limit to prevent loading too many results
4. For large databases, consider archiving old notes to reduce search scope