diff --git a/apps/server-e2e/src/exact_search.spec.ts b/apps/server-e2e/src/exact_search.spec.ts new file mode 100644 index 000000000..1e4660e41 --- /dev/null +++ b/apps/server-e2e/src/exact_search.spec.ts @@ -0,0 +1,502 @@ +import { test, expect } from "@playwright/test"; +import App from "./support/app"; + +const BASE_URL = "http://127.0.0.1:8082"; + +/** + * E2E tests for exact search functionality using the leading "=" operator. + * + * These tests validate the GitHub issue: + * - Searching for "pagio" returns many false positives (e.g., "page", "pages") + * - Searching for "=pagio" should return ONLY exact matches for "pagio" + */ + +test.describe("Exact Search with Leading = Operator", () => { + let csrfToken: string; + let createdNoteIds: string[] = []; + + test.beforeEach(async ({ page, context }) => { + const app = new App(page, context); + await app.goto(); + + // Get CSRF token + csrfToken = await page.evaluate(() => { + return (window as any).glob.csrfToken; + }); + + expect(csrfToken).toBeTruthy(); + + // Create test notes with specific content patterns + // Note 1: Contains exactly "pagio" in title + const note1 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Test Note with pagio", + content: "This note contains the word pagio in the content.", + type: "text" + } + }); + expect(note1.ok()).toBeTruthy(); + const note1Data = await note1.json(); + createdNoteIds.push(note1Data.note.noteId); + + // Note 2: Contains "page" (not exact match) + const note2 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Test Note with page", + content: "This note contains the word page in the content.", + type: "text" + } + }); + expect(note2.ok()).toBeTruthy(); + const note2Data = await note2.json(); + createdNoteIds.push(note2Data.note.noteId); + + // Note 3: Contains "pages" (plural, not exact match) + const note3 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Test Note with pages", + content: "This note contains the word pages in the content.", + type: "text" + } + }); + expect(note3.ok()).toBeTruthy(); + const note3Data = await note3.json(); + createdNoteIds.push(note3Data.note.noteId); + + // Note 4: Contains "homepage" (contains "page", not exact match) + const note4 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Homepage Note", + content: "This note is about homepage content.", + type: "text" + } + }); + expect(note4.ok()).toBeTruthy(); + const note4Data = await note4.json(); + createdNoteIds.push(note4Data.note.noteId); + + // Note 5: Another note with exact "pagio" in content + const note5 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Another pagio Note", + content: "This is another note with pagio content for testing exact matches.", + type: "text" + } + }); + expect(note5.ok()).toBeTruthy(); + const note5Data = await note5.json(); + createdNoteIds.push(note5Data.note.noteId); + + // Note 6: Contains "pagio" in title only + const note6 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "pagio", + content: "This note has pagio as the title.", + type: "text" + } + }); + expect(note6.ok()).toBeTruthy(); + const note6Data = await note6.json(); + createdNoteIds.push(note6Data.note.noteId); + + // Wait a bit for indexing + await page.waitForTimeout(500); + }); + + test.afterEach(async ({ page }) => { + // Clean up created notes + for (const noteId of createdNoteIds) { + try { + const taskId = `cleanup-${Math.random().toString(36).substr(2, 9)}`; + await page.request.delete(`${BASE_URL}/api/notes/${noteId}?taskId=${taskId}&last=true`, { + headers: { "x-csrf-token": csrfToken } + }); + } catch (e) { + console.error(`Failed to delete note ${noteId}:`, e); + } + } + createdNoteIds = []; + }); + + test("Quick search without = operator returns all partial matches", async ({ page }) => { + // Test the /quick-search endpoint without the = operator + const response = await page.request.get(`${BASE_URL}/api/quick-search/pag`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + // Should return multiple notes including "page", "pages", "homepage" + expect(data.searchResultNoteIds).toBeDefined(); + expect(data.searchResults).toBeDefined(); + + // Filter to only our test notes + const testResults = data.searchResults.filter((result: any) => + result.noteTitle.includes("page") || + result.noteTitle.includes("pagio") || + result.noteTitle.includes("Homepage") + ); + + // Should find at least "page", "pages", "homepage", and "pagio" notes + expect(testResults.length).toBeGreaterThanOrEqual(4); + + console.log("Quick search 'pag' found:", testResults.length, "matching notes"); + console.log("Note titles:", testResults.map((r: any) => r.noteTitle)); + }); + + test("Quick search with = operator returns only exact matches", async ({ page }) => { + // Test the /quick-search endpoint WITH the = operator + const response = await page.request.get(`${BASE_URL}/api/quick-search/=pagio`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + // Should return only notes with exact "pagio" match + expect(data.searchResultNoteIds).toBeDefined(); + expect(data.searchResults).toBeDefined(); + + // Filter to only our test notes + const testResults = data.searchResults.filter((result: any) => + createdNoteIds.includes(result.notePath.split("/").pop() || "") + ); + + console.log("Quick search '=pagio' found:", testResults.length, "matching notes"); + console.log("Note titles:", testResults.map((r: any) => r.noteTitle)); + + // Should find exactly 3 notes: "Test Note with pagio", "Another pagio Note", "pagio" + expect(testResults.length).toBe(3); + + // Verify that none of the results contain "page" or "pages" (only "pagio") + for (const result of testResults) { + const title = result.noteTitle.toLowerCase(); + const hasPageNotPagio = (title.includes("page") && !title.includes("pagio")); + expect(hasPageNotPagio).toBe(false); + } + }); + + test("Full search API without = operator returns partial matches", async ({ page }) => { + // Test the /search endpoint without the = operator + const response = await page.request.get(`${BASE_URL}/api/search/pag`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + // Should return an array of note IDs + expect(Array.isArray(data)).toBe(true); + + // Filter to only our test notes + const testNoteIds = data.filter((id: string) => createdNoteIds.includes(id)); + + console.log("Full search 'pag' found:", testNoteIds.length, "matching notes from our test set"); + + // Should find at least 4 notes + expect(testNoteIds.length).toBeGreaterThanOrEqual(4); + }); + + test("Full search API with = operator returns only exact matches", async ({ page }) => { + // Test the /search endpoint WITH the = operator + const response = await page.request.get(`${BASE_URL}/api/search/=pagio`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + // Should return an array of note IDs + expect(Array.isArray(data)).toBe(true); + + // Filter to only our test notes + const testNoteIds = data.filter((id: string) => createdNoteIds.includes(id)); + + console.log("Full search '=pagio' found:", testNoteIds.length, "matching notes from our test set"); + + // Should find exactly 3 notes with exact "pagio" match + expect(testNoteIds.length).toBe(3); + }); + + test("Exact search operator works with content search", async ({ page }) => { + // Create a note with "test" in title but different content + const noteWithTest = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Testing Content", + content: "This note contains the exact word test in content.", + type: "text" + } + }); + expect(noteWithTest.ok()).toBeTruthy(); + const noteWithTestData = await noteWithTest.json(); + const testNoteId = noteWithTestData.note.noteId; + createdNoteIds.push(testNoteId); + + // Create a note with "testing" (not exact match) + const noteWithTesting = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Testing More", + content: "This note has testing in the content.", + type: "text" + } + }); + expect(noteWithTesting.ok()).toBeTruthy(); + const noteWithTestingData = await noteWithTesting.json(); + createdNoteIds.push(noteWithTestingData.note.noteId); + + await page.waitForTimeout(500); + + // Search with exact operator + const response = await page.request.get(`${BASE_URL}/api/quick-search/=test`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === testNoteId || noteId === noteWithTestingData.note.noteId; + }); + + console.log("Exact search '=test' found our test notes:", ourTestNotes.length); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find the note with exact "test" match, but not "testing" + // Note: This test may fail if the implementation doesn't properly handle exact matching in content + expect(ourTestNotes.length).toBeGreaterThan(0); + }); + + test("Exact search is case-insensitive", async ({ page }) => { + // Create notes with different case variations + const noteUpper = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "EXACT MATCH", + content: "This note has EXACT in uppercase.", + type: "text" + } + }); + expect(noteUpper.ok()).toBeTruthy(); + const noteUpperData = await noteUpper.json(); + createdNoteIds.push(noteUpperData.note.noteId); + + const noteLower = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "exact match", + content: "This note has exact in lowercase.", + type: "text" + } + }); + expect(noteLower.ok()).toBeTruthy(); + const noteLowerData = await noteLower.json(); + createdNoteIds.push(noteLowerData.note.noteId); + + await page.waitForTimeout(500); + + // Search with exact operator in lowercase + const response = await page.request.get(`${BASE_URL}/api/quick-search/=exact`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === noteUpperData.note.noteId || noteId === noteLowerData.note.noteId; + }); + + console.log("Case-insensitive exact search found:", ourTestNotes.length, "notes"); + + // Should find both uppercase and lowercase versions + expect(ourTestNotes.length).toBe(2); + }); + + test("Exact phrase matching with multi-word searches", async ({ page }) => { + // Create notes with various phrase patterns + const note1 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "exact phrase", + content: "This note contains the exact phrase.", + type: "text" + } + }); + expect(note1.ok()).toBeTruthy(); + const note1Data = await note1.json(); + createdNoteIds.push(note1Data.note.noteId); + + const note2 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "exact phrase match", + content: "This note has exact phrase followed by more words.", + type: "text" + } + }); + expect(note2.ok()).toBeTruthy(); + const note2Data = await note2.json(); + createdNoteIds.push(note2Data.note.noteId); + + const note3 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "phrase exact", + content: "This note has the words in reverse order.", + type: "text" + } + }); + expect(note3.ok()).toBeTruthy(); + const note3Data = await note3.json(); + createdNoteIds.push(note3Data.note.noteId); + + const note4 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "this exact and that phrase", + content: "Words are separated but both present.", + type: "text" + } + }); + expect(note4.ok()).toBeTruthy(); + const note4Data = await note4.json(); + createdNoteIds.push(note4Data.note.noteId); + + await page.waitForTimeout(500); + + // Search for exact phrase "exact phrase" + const response = await page.request.get(`${BASE_URL}/api/quick-search/='exact phrase'`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return [note1Data.note.noteId, note2Data.note.noteId, note3Data.note.noteId, note4Data.note.noteId].includes(noteId || ""); + }); + + console.log("Exact phrase search '=\"exact phrase\"' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only notes 1 and 2 (consecutive "exact phrase") + // Should NOT find note 3 (reversed order) or note 4 (words separated) + expect(ourTestNotes.length).toBe(2); + + const foundTitles = ourTestNotes.map((r: any) => r.noteTitle); + expect(foundTitles).toContain("exact phrase"); + expect(foundTitles).toContain("exact phrase match"); + expect(foundTitles).not.toContain("phrase exact"); + expect(foundTitles).not.toContain("this exact and that phrase"); + }); + + test("Exact phrase matching respects word order", async ({ page }) => { + // Create notes to test word order sensitivity + const noteForward = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Testing Order", + content: "This is a test sentence for verification.", + type: "text" + } + }); + expect(noteForward.ok()).toBeTruthy(); + const noteForwardData = await noteForward.json(); + createdNoteIds.push(noteForwardData.note.noteId); + + const noteReverse = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Order Testing", + content: "A sentence test is this for verification.", + type: "text" + } + }); + expect(noteReverse.ok()).toBeTruthy(); + const noteReverseData = await noteReverse.json(); + createdNoteIds.push(noteReverseData.note.noteId); + + await page.waitForTimeout(500); + + // Search for exact phrase "test sentence" + const response = await page.request.get(`${BASE_URL}/api/quick-search/='test sentence'`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === noteForwardData.note.noteId || noteId === noteReverseData.note.noteId; + }); + + console.log("Exact phrase search '=\"test sentence\"' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only the forward order note + expect(ourTestNotes.length).toBe(1); + expect(ourTestNotes[0].noteTitle).toBe("Testing Order"); + }); + + test("Multi-word exact search without quotes", async ({ page }) => { + // Test that multi-word search with = but without quotes also does exact phrase matching + const notePhrase = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Quick Test Note", + content: "A simple note for multi word testing.", + type: "text" + } + }); + expect(notePhrase.ok()).toBeTruthy(); + const notePhraseData = await notePhrase.json(); + createdNoteIds.push(notePhraseData.note.noteId); + + const noteScattered = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Word Multi Testing", + content: "Words are multi scattered in this testing example.", + type: "text" + } + }); + expect(noteScattered.ok()).toBeTruthy(); + const noteScatteredData = await noteScattered.json(); + createdNoteIds.push(noteScatteredData.note.noteId); + + await page.waitForTimeout(500); + + // Search for "=multi word" without quotes (parser tokenizes as two words) + const response = await page.request.get(`${BASE_URL}/api/quick-search/=multi word`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === notePhraseData.note.noteId || noteId === noteScatteredData.note.noteId; + }); + + console.log("Multi-word exact search '=multi word' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only the note with consecutive "multi word" phrase + expect(ourTestNotes.length).toBe(1); + expect(ourTestNotes[0].noteTitle).toBe("Quick Test Note"); + }); +}); diff --git a/apps/server/src/routes/api/search.ts b/apps/server/src/routes/api/search.ts index 29d75c6dc..cbd584529 100644 --- a/apps/server/src/routes/api/search.ts +++ b/apps/server/src/routes/api/search.ts @@ -10,6 +10,8 @@ import cls from "../../services/cls.js"; import attributeFormatter from "../../services/attribute_formatter.js"; import ValidationError from "../../errors/validation_error.js"; import type SearchResult from "../../services/search/search_result.js"; +import hoistedNoteService from "../../services/hoisted_note.js"; +import beccaService from "../../becca/becca_service.js"; function searchFromNote(req: Request): SearchNoteResult { const note = becca.getNoteOrThrow(req.params.noteId); @@ -49,13 +51,41 @@ function quickSearch(req: Request) { const searchContext = new SearchContext({ fastSearch: false, includeArchivedNotes: false, - fuzzyAttributeSearch: false + includeHiddenNotes: true, + fuzzyAttributeSearch: true, + ignoreInternalAttributes: true, + ancestorNoteId: hoistedNoteService.isHoistedInHiddenSubtree() ? "root" : hoistedNoteService.getHoistedNoteId() + }); + + // Execute search with our context + const allSearchResults = searchService.findResultsWithQuery(searchString, searchContext); + const trimmed = allSearchResults.slice(0, 200); + + // Extract snippets using highlightedTokens from our context + for (const result of trimmed) { + result.contentSnippet = searchService.extractContentSnippet(result.noteId, searchContext.highlightedTokens); + result.attributeSnippet = searchService.extractAttributeSnippet(result.noteId, searchContext.highlightedTokens); + } + + // Highlight the results + searchService.highlightSearchResults(trimmed, searchContext.highlightedTokens, searchContext.ignoreInternalAttributes); + + // Map to API format + const searchResults = trimmed.map((result) => { + const { title, icon } = beccaService.getNoteTitleAndIcon(result.noteId); + return { + notePath: result.notePath, + noteTitle: title, + notePathTitle: result.notePathTitle, + highlightedNotePathTitle: result.highlightedNotePathTitle, + contentSnippet: result.contentSnippet, + highlightedContentSnippet: result.highlightedContentSnippet, + attributeSnippet: result.attributeSnippet, + highlightedAttributeSnippet: result.highlightedAttributeSnippet, + icon: icon + }; }); - // Use the same highlighting logic as autocomplete for consistency - const searchResults = searchService.searchNotesForAutocomplete(searchString, false); - - // Extract note IDs for backward compatibility const resultNoteIds = searchResults.map((result) => result.notePath.split("/").pop()).filter(Boolean) as string[]; return { diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts index f1e1bf95f..c36dddd74 100644 --- a/apps/server/src/services/search/expressions/note_content_fulltext.ts +++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts @@ -75,20 +75,101 @@ class NoteContentFulltextExp extends Expression { return inputNoteSet; } + // Add tokens to highlightedTokens so snippet extraction knows what to look for + for (const token of this.tokens) { + if (!searchContext.highlightedTokens.includes(token)) { + searchContext.highlightedTokens.push(token); + } + } + const resultNoteSet = new NoteSet(); + // Search through notes with content for (const row of sql.iterateRows(` SELECT noteId, type, mime, content, isProtected FROM notes JOIN blobs USING (blobId) - WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND isDeleted = 0 + WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND isDeleted = 0 AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { this.findInText(row, inputNoteSet, resultNoteSet); } + // For exact match with flatText, also search notes WITHOUT content (they may have matching attributes) + if (this.flatText && (this.operator === "=" || this.operator === "!=")) { + for (const note of inputNoteSet.notes) { + // Skip if already found or doesn't exist + if (resultNoteSet.hasNoteId(note.noteId) || !(note.noteId in becca.notes)) { + continue; + } + + const noteFromBecca = becca.notes[note.noteId]; + const flatText = noteFromBecca.getFlatText(); + + // For flatText, only check attribute values (format: #name=value or ~name=value) + // Don't match against noteId, type, mime, or title which are also in flatText + let matches = false; + const phrase = this.tokens.join(" "); + const normalizedPhrase = normalizeSearchText(phrase); + const normalizedFlatText = normalizeSearchText(flatText); + + // Check if =phrase appears in flatText (indicates attribute value match) + matches = normalizedFlatText.includes(`=${normalizedPhrase}`); + + if ((this.operator === "=" && matches) || (this.operator === "!=" && !matches)) { + resultNoteSet.add(noteFromBecca); + } + } + } + return resultNoteSet; } + /** + * Checks if content contains the exact word (with word boundaries) or exact phrase + * This is case-insensitive since content and token are already normalized + */ + private containsExactWord(token: string, content: string): boolean { + // Normalize both for case-insensitive comparison + const normalizedToken = normalizeSearchText(token); + const normalizedContent = normalizeSearchText(content); + + // If token contains spaces, it's a multi-word phrase from quotes + // Check for substring match (consecutive phrase) + if (normalizedToken.includes(' ')) { + return normalizedContent.includes(normalizedToken); + } + + // For single words, split content into words and check for exact match + const words = normalizedContent.split(/\s+/); + return words.some(word => word === normalizedToken); + } + + /** + * Checks if content contains the exact phrase (consecutive words in order) + * This is case-insensitive since content and tokens are already normalized + */ + private containsExactPhrase(tokens: string[], content: string, checkFlatTextAttributes: boolean = false): boolean { + const normalizedTokens = tokens.map(t => normalizeSearchText(t)); + const normalizedContent = normalizeSearchText(content); + + // Join tokens with single space to form the phrase + const phrase = normalizedTokens.join(" "); + + // Check if the phrase appears as a substring (consecutive words) + if (normalizedContent.includes(phrase)) { + return true; + } + + // For flatText, also check if the phrase appears in attribute values + // Attributes in flatText appear as "#name=value" or "~name=value" + // So we need to check for "=phrase" to match attribute values + if (checkFlatTextAttributes && normalizedContent.includes(`=${phrase}`)) { + return true; + } + + return false; + } + findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { return; @@ -112,7 +193,7 @@ class NoteContentFulltextExp extends Expression { } content = this.preprocessContent(content, type, mime); - + // Apply content size validation and preprocessing const processedContent = validateAndPreprocessContent(content, noteId); if (!processedContent) { @@ -123,9 +204,25 @@ class NoteContentFulltextExp extends Expression { if (this.tokens.length === 1) { const [token] = this.tokens; + let matches = false; + if (this.operator === "=") { + matches = this.containsExactWord(token, content); + // Also check flatText if enabled (includes attributes) + if (!matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = this.containsExactPhrase([token], flatText, true); + } + } else if (this.operator === "!=") { + matches = !this.containsExactWord(token, content); + // For negation, check flatText too + if (matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = !this.containsExactPhrase([token], flatText, true); + } + } + if ( - (this.operator === "=" && token === content) || - (this.operator === "!=" && token !== content) || + matches || (this.operator === "*=" && content.endsWith(token)) || (this.operator === "=*" && content.startsWith(token)) || (this.operator === "*=*" && content.includes(token)) || @@ -138,10 +235,26 @@ class NoteContentFulltextExp extends Expression { } else { // Multi-token matching with fuzzy support and phrase proximity if (this.operator === "~=" || this.operator === "~*") { + // Fuzzy phrase matching if (this.matchesWithFuzzy(content, noteId)) { resultNoteSet.add(becca.notes[noteId]); } + } else if (this.operator === "=" || this.operator === "!=") { + // Exact phrase matching for = and != + let matches = this.containsExactPhrase(this.tokens, content, false); + + // Also check flatText if enabled (includes attributes) + if (!matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = this.containsExactPhrase(this.tokens, flatText, true); + } + + if ((this.operator === "=" && matches) || + (this.operator === "!=" && !matches)) { + resultNoteSet.add(becca.notes[noteId]); + } } else { + // Other operators: check all tokens present (any order) const nonMatchingToken = this.tokens.find( (token) => !this.tokenMatchesContent(token, content, noteId) diff --git a/apps/server/src/services/search/services/build_comparator.ts b/apps/server/src/services/search/services/build_comparator.ts index 3aebe1adb..c090b458f 100644 --- a/apps/server/src/services/search/services/build_comparator.ts +++ b/apps/server/src/services/search/services/build_comparator.ts @@ -13,8 +13,41 @@ function getRegex(str: string) { type Comparator = (comparedValue: T) => (val: string) => boolean; const stringComparators: Record> = { - "=": (comparedValue) => (val) => val === comparedValue, - "!=": (comparedValue) => (val) => val !== comparedValue, + "=": (comparedValue) => (val) => { + // For the = operator, check if the value contains the exact word or phrase + // This is case-insensitive + if (!val) return false; + + const normalizedVal = normalizeSearchText(val); + const normalizedCompared = normalizeSearchText(comparedValue); + + // If comparedValue has spaces, it's a multi-word phrase + // Check for substring match (consecutive phrase) + if (normalizedCompared.includes(" ")) { + return normalizedVal.includes(normalizedCompared); + } + + // For single word, split into words and check for exact word match + const words = normalizedVal.split(/\s+/); + return words.some(word => word === normalizedCompared); + }, + "!=": (comparedValue) => (val) => { + // Negation of exact word/phrase match + if (!val) return true; + + const normalizedVal = normalizeSearchText(val); + const normalizedCompared = normalizeSearchText(comparedValue); + + // If comparedValue has spaces, it's a multi-word phrase + // Check for substring match (consecutive phrase) and negate + if (normalizedCompared.includes(" ")) { + return !normalizedVal.includes(normalizedCompared); + } + + // For single word, split into words and check for exact word match, then negate + const words = normalizedVal.split(/\s+/); + return !words.some(word => word === normalizedCompared); + }, ">": (comparedValue) => (val) => val > comparedValue, ">=": (comparedValue) => (val) => val >= comparedValue, "<": (comparedValue) => (val) => val < comparedValue, diff --git a/apps/server/src/services/search/services/parse.ts b/apps/server/src/services/search/services/parse.ts index b537ee562..03986b9ac 100644 --- a/apps/server/src/services/search/services/parse.ts +++ b/apps/server/src/services/search/services/parse.ts @@ -38,11 +38,14 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading if (!searchContext.fastSearch) { // For exact match with "=", we need different behavior - if (leadingOperator === "=" && tokens.length === 1) { - // Exact match on title OR exact match on content + if (leadingOperator === "=" && tokens.length >= 1) { + // Exact match on title OR exact match on content OR exact match in flat text (includes attributes) + // For multi-word, join tokens with space to form exact phrase + const titleSearchValue = tokens.join(" "); return new OrExp([ - new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), - new NoteContentFulltextExp("=", { tokens, flatText: false }) + new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue), + new NoteContentFulltextExp("=", { tokens, flatText: false }), + new NoteContentFulltextExp("=", { tokens, flatText: true }) ]); } return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]); diff --git a/apps/server/src/services/search/services/search.spec.ts b/apps/server/src/services/search/services/search.spec.ts index d448a04b0..fc36d7d7c 100644 --- a/apps/server/src/services/search/services/search.spec.ts +++ b/apps/server/src/services/search/services/search.spec.ts @@ -242,18 +242,149 @@ describe("Search", () => { const searchContext = new SearchContext(); - // Using leading = for exact title match - let searchResults = searchService.findResultsWithQuery("=Example Note", searchContext); - expect(searchResults.length).toEqual(1); + // Using leading = for exact word match - should find notes containing the exact word "example" + let searchResults = searchService.findResultsWithQuery("=example", searchContext); + expect(searchResults.length).toEqual(2); // "Example Note" and "Sample" (has label "example") expect(findNoteByTitle(searchResults, "Example Note")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "Sample")).toBeTruthy(); - // Without =, it should find all notes containing "example" + // Without =, it should find all notes containing "example" (substring match) searchResults = searchService.findResultsWithQuery("example", searchContext); - expect(searchResults.length).toEqual(3); + expect(searchResults.length).toEqual(3); // All notes // = operator should not match partial words - searchResults = searchService.findResultsWithQuery("=Example", searchContext); - expect(searchResults.length).toEqual(0); + searchResults = searchService.findResultsWithQuery("=examples", searchContext); + expect(searchResults.length).toEqual(1); // Only "Examples of Usage" + expect(findNoteByTitle(searchResults, "Examples of Usage")).toBeTruthy(); + }); + + it("leading = operator for exact match - comprehensive title tests", () => { + // Create notes with varying titles to test exact vs contains matching + rootNote + .child(note("testing")) + .child(note("testing123")) + .child(note("My testing notes")) + .child(note("123testing")) + .child(note("test")); + + const searchContext = new SearchContext(); + + // Test 1: Exact word match with leading = should find notes containing the exact word "testing" + let searchResults = searchService.findResultsWithQuery("=testing", searchContext); + expect(searchResults.length).toEqual(2); // "testing" and "My testing notes" (word boundary) + expect(findNoteByTitle(searchResults, "testing")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "My testing notes")).toBeTruthy(); + + // Test 2: Without =, it should find all notes containing "testing" (substring contains behavior) + searchResults = searchService.findResultsWithQuery("testing", searchContext); + expect(searchResults.length).toEqual(4); // All notes with "testing" substring + + // Test 3: Exact match should only find the exact composite word + searchResults = searchService.findResultsWithQuery("=testing123", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "testing123")).toBeTruthy(); + + // Test 4: Exact match should only find the exact composite word + searchResults = searchService.findResultsWithQuery("=123testing", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "123testing")).toBeTruthy(); + + // Test 5: Verify that "test" doesn't match "testing" with exact search + searchResults = searchService.findResultsWithQuery("=test", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "test")).toBeTruthy(); + }); + + it("leading = operator with quoted phrases", () => { + rootNote + .child(note("exact phrase")) + .child(note("exact phrase match")) + .child(note("this exact phrase here")) + .child(note("phrase exact")); + + const searchContext = new SearchContext(); + + // Test 1: With = and quotes, treat as exact phrase match (consecutive words in order) + let searchResults = searchService.findResultsWithQuery("='exact phrase'", searchContext); + // Should match only notes containing the exact phrase "exact phrase" + expect(searchResults.length).toEqual(3); // Only notes with consecutive "exact phrase" + expect(findNoteByTitle(searchResults, "exact phrase")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy(); + + // Test 2: Without =, quoted phrase should find substring/contains matches + searchResults = searchService.findResultsWithQuery("'exact phrase'", searchContext); + expect(searchResults.length).toEqual(3); // All notes containing the phrase substring + expect(findNoteByTitle(searchResults, "exact phrase")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy(); + + // Test 3: Verify word order matters with exact phrase matching + searchResults = searchService.findResultsWithQuery("='phrase exact'", searchContext); + expect(searchResults.length).toEqual(1); // Only "phrase exact" matches + expect(findNoteByTitle(searchResults, "phrase exact")).toBeTruthy(); + }); + + it("leading = operator case sensitivity", () => { + rootNote + .child(note("TESTING")) + .child(note("testing")) + .child(note("Testing")) + .child(note("TeStiNg")); + + const searchContext = new SearchContext(); + + // Exact match should be case-insensitive (based on lex.ts line 4: str.toLowerCase()) + let searchResults = searchService.findResultsWithQuery("=testing", searchContext); + expect(searchResults.length).toEqual(4); // All variants of "testing" + + searchResults = searchService.findResultsWithQuery("=TESTING", searchContext); + expect(searchResults.length).toEqual(4); // All variants + + searchResults = searchService.findResultsWithQuery("=Testing", searchContext); + expect(searchResults.length).toEqual(4); // All variants + + searchResults = searchService.findResultsWithQuery("=TeStiNg", searchContext); + expect(searchResults.length).toEqual(4); // All variants + }); + + it("leading = operator with special characters", () => { + rootNote + .child(note("test-note")) + .child(note("test_note")) + .child(note("test.note")) + .child(note("test note")) + .child(note("testnote")); + + const searchContext = new SearchContext(); + + // Each exact match should only find its specific variant (compound words are treated as single words) + let searchResults = searchService.findResultsWithQuery("=test-note", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "test-note")).toBeTruthy(); + + searchResults = searchService.findResultsWithQuery("=test_note", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "test_note")).toBeTruthy(); + + searchResults = searchService.findResultsWithQuery("=test.note", searchContext); + expect(searchResults.length).toEqual(1); + expect(findNoteByTitle(searchResults, "test.note")).toBeTruthy(); + + // For phrases with spaces, use quotes to keep them together + // With exact phrase matching, this finds notes with the consecutive phrase + searchResults = searchService.findResultsWithQuery("='test note'", searchContext); + expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase + expect(findNoteByTitle(searchResults, "test note")).toBeTruthy(); + + // Without quotes, "test note" is tokenized as two separate tokens + // and will be treated as an exact phrase search with = operator + searchResults = searchService.findResultsWithQuery("=test note", searchContext); + expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase + + // Without =, should find all matches containing "test" substring + searchResults = searchService.findResultsWithQuery("test", searchContext); + expect(searchResults.length).toEqual(5); }); it("fuzzy attribute search", () => { diff --git a/apps/server/src/services/search/services/search.ts b/apps/server/src/services/search/services/search.ts index 22dbe6d9f..5ca4bda4a 100644 --- a/apps/server/src/services/search/services/search.ts +++ b/apps/server/src/services/search/services/search.ts @@ -500,19 +500,38 @@ function extractContentSnippet(noteId: string, searchTokens: string[], maxLength // Extract snippet let snippet = content.substring(snippetStart, snippetStart + maxLength); - + // If snippet contains linebreaks, limit to max 4 lines and override character limit const lines = snippet.split('\n'); if (lines.length > 4) { - snippet = lines.slice(0, 4).join('\n'); + // Find which lines contain the search tokens to ensure they're included + const normalizedLines = lines.map(line => normalizeString(line.toLowerCase())); + const normalizedTokens = searchTokens.map(token => normalizeString(token.toLowerCase())); + + // Find the first line that contains a search token + let firstMatchLine = -1; + for (let i = 0; i < normalizedLines.length; i++) { + if (normalizedTokens.some(token => normalizedLines[i].includes(token))) { + firstMatchLine = i; + break; + } + } + + if (firstMatchLine !== -1) { + // Center the 4-line window around the first match + // Try to show 1 line before and 2 lines after the match + const startLine = Math.max(0, firstMatchLine - 1); + const endLine = Math.min(lines.length, startLine + 4); + snippet = lines.slice(startLine, endLine).join('\n'); + } else { + // No match found in lines (shouldn't happen), just take first 4 + snippet = lines.slice(0, 4).join('\n'); + } // Add ellipsis if we truncated lines snippet = snippet + "..."; } else if (lines.length > 1) { - // For multi-line snippets, just limit to 4 lines (keep existing snippet) - snippet = lines.slice(0, 4).join('\n'); - if (lines.length > 4) { - snippet = snippet + "..."; - } + // For multi-line snippets that are 4 or fewer lines, keep them as-is + // No need to truncate } else { // Single line content - apply original word boundary logic // Try to start/end at word boundaries @@ -770,5 +789,8 @@ export default { searchNotesForAutocomplete, findResultsWithQuery, findFirstNoteWithQuery, - searchNotes + searchNotes, + extractContentSnippet, + extractAttributeSnippet, + highlightSearchResults };