diff --git a/apps/server-e2e/src/exact_search.spec.ts b/apps/server-e2e/src/exact_search.spec.ts index 3d2ebbe69..1e4660e41 100644 --- a/apps/server-e2e/src/exact_search.spec.ts +++ b/apps/server-e2e/src/exact_search.spec.ts @@ -322,4 +322,181 @@ test.describe("Exact Search with Leading = Operator", () => { // Should find both uppercase and lowercase versions expect(ourTestNotes.length).toBe(2); }); + + test("Exact phrase matching with multi-word searches", async ({ page }) => { + // Create notes with various phrase patterns + const note1 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "exact phrase", + content: "This note contains the exact phrase.", + type: "text" + } + }); + expect(note1.ok()).toBeTruthy(); + const note1Data = await note1.json(); + createdNoteIds.push(note1Data.note.noteId); + + const note2 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "exact phrase match", + content: "This note has exact phrase followed by more words.", + type: "text" + } + }); + expect(note2.ok()).toBeTruthy(); + const note2Data = await note2.json(); + createdNoteIds.push(note2Data.note.noteId); + + const note3 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "phrase exact", + content: "This note has the words in reverse order.", + type: "text" + } + }); + expect(note3.ok()).toBeTruthy(); + const note3Data = await note3.json(); + createdNoteIds.push(note3Data.note.noteId); + + const note4 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "this exact and that phrase", + content: "Words are separated but both present.", + type: "text" + } + }); + expect(note4.ok()).toBeTruthy(); + const note4Data = await note4.json(); + createdNoteIds.push(note4Data.note.noteId); + + await page.waitForTimeout(500); + + // Search for exact phrase "exact phrase" + const response = await page.request.get(`${BASE_URL}/api/quick-search/='exact phrase'`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return [note1Data.note.noteId, note2Data.note.noteId, note3Data.note.noteId, note4Data.note.noteId].includes(noteId || ""); + }); + + console.log("Exact phrase search '=\"exact phrase\"' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only notes 1 and 2 (consecutive "exact phrase") + // Should NOT find note 3 (reversed order) or note 4 (words separated) + expect(ourTestNotes.length).toBe(2); + + const foundTitles = ourTestNotes.map((r: any) => r.noteTitle); + expect(foundTitles).toContain("exact phrase"); + expect(foundTitles).toContain("exact phrase match"); + expect(foundTitles).not.toContain("phrase exact"); + expect(foundTitles).not.toContain("this exact and that phrase"); + }); + + test("Exact phrase matching respects word order", async ({ page }) => { + // Create notes to test word order sensitivity + const noteForward = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Testing Order", + content: "This is a test sentence for verification.", + type: "text" + } + }); + expect(noteForward.ok()).toBeTruthy(); + const noteForwardData = await noteForward.json(); + createdNoteIds.push(noteForwardData.note.noteId); + + const noteReverse = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Order Testing", + content: "A sentence test is this for verification.", + type: "text" + } + }); + expect(noteReverse.ok()).toBeTruthy(); + const noteReverseData = await noteReverse.json(); + createdNoteIds.push(noteReverseData.note.noteId); + + await page.waitForTimeout(500); + + // Search for exact phrase "test sentence" + const response = await page.request.get(`${BASE_URL}/api/quick-search/='test sentence'`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === noteForwardData.note.noteId || noteId === noteReverseData.note.noteId; + }); + + console.log("Exact phrase search '=\"test sentence\"' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only the forward order note + expect(ourTestNotes.length).toBe(1); + expect(ourTestNotes[0].noteTitle).toBe("Testing Order"); + }); + + test("Multi-word exact search without quotes", async ({ page }) => { + // Test that multi-word search with = but without quotes also does exact phrase matching + const notePhrase = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Quick Test Note", + content: "A simple note for multi word testing.", + type: "text" + } + }); + expect(notePhrase.ok()).toBeTruthy(); + const notePhraseData = await notePhrase.json(); + createdNoteIds.push(notePhraseData.note.noteId); + + const noteScattered = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, { + headers: { "x-csrf-token": csrfToken }, + data: { + title: "Word Multi Testing", + content: "Words are multi scattered in this testing example.", + type: "text" + } + }); + expect(noteScattered.ok()).toBeTruthy(); + const noteScatteredData = await noteScattered.json(); + createdNoteIds.push(noteScatteredData.note.noteId); + + await page.waitForTimeout(500); + + // Search for "=multi word" without quotes (parser tokenizes as two words) + const response = await page.request.get(`${BASE_URL}/api/quick-search/=multi word`, { + headers: { "x-csrf-token": csrfToken } + }); + + expect(response.ok()).toBeTruthy(); + const data = await response.json(); + + const ourTestNotes = data.searchResults.filter((result: any) => { + const noteId = result.notePath.split("/").pop(); + return noteId === notePhraseData.note.noteId || noteId === noteScatteredData.note.noteId; + }); + + console.log("Multi-word exact search '=multi word' found:", ourTestNotes.length, "notes"); + console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle)); + + // Should find only the note with consecutive "multi word" phrase + expect(ourTestNotes.length).toBe(1); + expect(ourTestNotes[0].noteTitle).toBe("Quick Test Note"); + }); }); diff --git a/apps/server/src/services/search/expressions/note_content_fulltext.ts b/apps/server/src/services/search/expressions/note_content_fulltext.ts index 967190bd9..81250dda5 100644 --- a/apps/server/src/services/search/expressions/note_content_fulltext.ts +++ b/apps/server/src/services/search/expressions/note_content_fulltext.ts @@ -77,15 +77,43 @@ class NoteContentFulltextExp extends Expression { const resultNoteSet = new NoteSet(); + // Search through notes with content for (const row of sql.iterateRows(` SELECT noteId, type, mime, content, isProtected FROM notes JOIN blobs USING (blobId) - WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') - AND isDeleted = 0 + WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') + AND isDeleted = 0 AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { this.findInText(row, inputNoteSet, resultNoteSet); } + // For exact match with flatText, also search notes WITHOUT content (they may have matching attributes) + if (this.flatText && (this.operator === "=" || this.operator === "!=")) { + for (const noteId of inputNoteSet.noteIdSet) { + // Skip if already found or doesn't exist + if (resultNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { + continue; + } + + const note = becca.notes[noteId]; + const flatText = note.getFlatText(); + + // For flatText, only check attribute values (format: #name=value or ~name=value) + // Don't match against noteId, type, mime, or title which are also in flatText + let matches = false; + const phrase = this.tokens.join(" "); + const normalizedPhrase = normalizeSearchText(phrase); + const normalizedFlatText = normalizeSearchText(flatText); + + // Check if =phrase appears in flatText (indicates attribute value match) + matches = normalizedFlatText.includes(`=${normalizedPhrase}`); + + if ((this.operator === "=" && matches) || (this.operator === "!=" && !matches)) { + resultNoteSet.add(note); + } + } + } + return resultNoteSet; } @@ -103,6 +131,32 @@ class NoteContentFulltextExp extends Expression { return words.some(word => word === normalizedToken); } + /** + * Checks if content contains the exact phrase (consecutive words in order) + * This is case-insensitive since content and tokens are already normalized + */ + private containsExactPhrase(tokens: string[], content: string, checkFlatTextAttributes: boolean = false): boolean { + const normalizedTokens = tokens.map(t => normalizeSearchText(t)); + const normalizedContent = normalizeSearchText(content); + + // Join tokens with single space to form the phrase + const phrase = normalizedTokens.join(" "); + + // Check if the phrase appears as a substring (consecutive words) + if (normalizedContent.includes(phrase)) { + return true; + } + + // For flatText, also check if the phrase appears in attribute values + // Attributes in flatText appear as "#name=value" or "~name=value" + // So we need to check for "=phrase" to match attribute values + if (checkFlatTextAttributes && normalizedContent.includes(`=${phrase}`)) { + return true; + } + + return false; + } + findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { return; @@ -137,9 +191,25 @@ class NoteContentFulltextExp extends Expression { if (this.tokens.length === 1) { const [token] = this.tokens; + let matches = false; + if (this.operator === "=") { + matches = this.containsExactWord(token, content); + // Also check flatText if enabled (includes attributes) + if (!matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = this.containsExactPhrase([token], flatText, true); + } + } else if (this.operator === "!=") { + matches = !this.containsExactWord(token, content); + // For negation, check flatText too + if (matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = !this.containsExactPhrase([token], flatText, true); + } + } + if ( - (this.operator === "=" && this.containsExactWord(token, content)) || - (this.operator === "!=" && !this.containsExactWord(token, content)) || + matches || (this.operator === "*=" && content.endsWith(token)) || (this.operator === "=*" && content.startsWith(token)) || (this.operator === "*=*" && content.includes(token)) || @@ -152,10 +222,26 @@ class NoteContentFulltextExp extends Expression { } else { // Multi-token matching with fuzzy support and phrase proximity if (this.operator === "~=" || this.operator === "~*") { + // Fuzzy phrase matching if (this.matchesWithFuzzy(content, noteId)) { resultNoteSet.add(becca.notes[noteId]); } + } else if (this.operator === "=" || this.operator === "!=") { + // Exact phrase matching for = and != + let matches = this.containsExactPhrase(this.tokens, content, false); + + // Also check flatText if enabled (includes attributes) + if (!matches && this.flatText) { + const flatText = becca.notes[noteId].getFlatText(); + matches = this.containsExactPhrase(this.tokens, flatText, true); + } + + if ((this.operator === "=" && matches) || + (this.operator === "!=" && !matches)) { + resultNoteSet.add(becca.notes[noteId]); + } } else { + // Other operators: check all tokens present (any order) const nonMatchingToken = this.tokens.find( (token) => !this.tokenMatchesContent(token, content, noteId) diff --git a/apps/server/src/services/search/services/build_comparator.ts b/apps/server/src/services/search/services/build_comparator.ts index 1f08a36d5..0f8020de3 100644 --- a/apps/server/src/services/search/services/build_comparator.ts +++ b/apps/server/src/services/search/services/build_comparator.ts @@ -14,24 +14,35 @@ type Comparator = (comparedValue: T) => (val: string) => boolean; const stringComparators: Record> = { "=": (comparedValue) => (val) => { - // For the = operator, check if the value contains the exact word (word-boundary matching) + // For the = operator, check if the value contains the exact word or phrase // This is case-insensitive since both values are already lowercased if (!val) return false; const normalizedVal = normalizeSearchText(val); const normalizedCompared = normalizeSearchText(comparedValue); - // Split into words and check for exact match + // If comparedValue has multiple words, check for exact phrase + if (normalizedCompared.includes(" ")) { + return normalizedVal.includes(normalizedCompared); + } + + // For single word, split into words and check for exact match const words = normalizedVal.split(/\s+/); return words.some(word => word === normalizedCompared); }, "!=": (comparedValue) => (val) => { - // Negation of exact word match + // Negation of exact word/phrase match if (!val) return true; const normalizedVal = normalizeSearchText(val); const normalizedCompared = normalizeSearchText(comparedValue); + // If comparedValue has multiple words, check for exact phrase + if (normalizedCompared.includes(" ")) { + return !normalizedVal.includes(normalizedCompared); + } + + // For single word, split into words and check for exact match const words = normalizedVal.split(/\s+/); return !words.some(word => word === normalizedCompared); }, diff --git a/apps/server/src/services/search/services/parse.ts b/apps/server/src/services/search/services/parse.ts index b537ee562..03986b9ac 100644 --- a/apps/server/src/services/search/services/parse.ts +++ b/apps/server/src/services/search/services/parse.ts @@ -38,11 +38,14 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading if (!searchContext.fastSearch) { // For exact match with "=", we need different behavior - if (leadingOperator === "=" && tokens.length === 1) { - // Exact match on title OR exact match on content + if (leadingOperator === "=" && tokens.length >= 1) { + // Exact match on title OR exact match on content OR exact match in flat text (includes attributes) + // For multi-word, join tokens with space to form exact phrase + const titleSearchValue = tokens.join(" "); return new OrExp([ - new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), - new NoteContentFulltextExp("=", { tokens, flatText: false }) + new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue), + new NoteContentFulltextExp("=", { tokens, flatText: false }), + new NoteContentFulltextExp("=", { tokens, flatText: true }) ]); } return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]); diff --git a/apps/server/src/services/search/services/search.spec.ts b/apps/server/src/services/search/services/search.spec.ts index c6703660b..fc36d7d7c 100644 --- a/apps/server/src/services/search/services/search.spec.ts +++ b/apps/server/src/services/search/services/search.spec.ts @@ -304,10 +304,13 @@ describe("Search", () => { const searchContext = new SearchContext(); - // Test 1: With = and quotes, treat as multi-word exact match (both words must match) + // Test 1: With = and quotes, treat as exact phrase match (consecutive words in order) let searchResults = searchService.findResultsWithQuery("='exact phrase'", searchContext); - // With current implementation, this searches for notes containing both "exact" and "phrase" words - expect(searchResults.length).toEqual(4); // All notes with both words + // Should match only notes containing the exact phrase "exact phrase" + expect(searchResults.length).toEqual(3); // Only notes with consecutive "exact phrase" + expect(findNoteByTitle(searchResults, "exact phrase")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy(); + expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy(); // Test 2: Without =, quoted phrase should find substring/contains matches searchResults = searchService.findResultsWithQuery("'exact phrase'", searchContext); @@ -316,9 +319,10 @@ describe("Search", () => { expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy(); expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy(); - // Test 3: Verify word order doesn't matter with exact word matching + // Test 3: Verify word order matters with exact phrase matching searchResults = searchService.findResultsWithQuery("='phrase exact'", searchContext); - expect(searchResults.length).toEqual(4); // All notes with both words + expect(searchResults.length).toEqual(1); // Only "phrase exact" matches + expect(findNoteByTitle(searchResults, "phrase exact")).toBeTruthy(); }); it("leading = operator case sensitivity", () => { @@ -368,15 +372,15 @@ describe("Search", () => { expect(findNoteByTitle(searchResults, "test.note")).toBeTruthy(); // For phrases with spaces, use quotes to keep them together - // With word-boundary matching, this finds all notes with both words + // With exact phrase matching, this finds notes with the consecutive phrase searchResults = searchService.findResultsWithQuery("='test note'", searchContext); - expect(searchResults.length).toEqual(1); // Only "test note" has both words as separate tokens + expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase expect(findNoteByTitle(searchResults, "test note")).toBeTruthy(); - // Without quotes, "test note" is tokenized as two separate words - // and will match all notes containing both "test" AND "note" words + // Without quotes, "test note" is tokenized as two separate tokens + // and will be treated as an exact phrase search with = operator searchResults = searchService.findResultsWithQuery("=test note", searchContext); - expect(searchResults.length).toEqual(1); // Only "test note" has both as separate words + expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase // Without =, should find all matches containing "test" substring searchResults = searchService.findResultsWithQuery("test", searchContext);