feat(search): also support the use of ="exact match search string"

This commit is contained in:
perf3ct 2025-10-10 12:23:57 -07:00
parent 50f0b88eff
commit 4fa4112840
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
5 changed files with 302 additions and 21 deletions

View File

@ -322,4 +322,181 @@ test.describe("Exact Search with Leading = Operator", () => {
// Should find both uppercase and lowercase versions // Should find both uppercase and lowercase versions
expect(ourTestNotes.length).toBe(2); expect(ourTestNotes.length).toBe(2);
}); });
test("Exact phrase matching with multi-word searches", async ({ page }) => {
// Create notes with various phrase patterns
const note1 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "exact phrase",
content: "This note contains the exact phrase.",
type: "text"
}
});
expect(note1.ok()).toBeTruthy();
const note1Data = await note1.json();
createdNoteIds.push(note1Data.note.noteId);
const note2 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "exact phrase match",
content: "This note has exact phrase followed by more words.",
type: "text"
}
});
expect(note2.ok()).toBeTruthy();
const note2Data = await note2.json();
createdNoteIds.push(note2Data.note.noteId);
const note3 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "phrase exact",
content: "This note has the words in reverse order.",
type: "text"
}
});
expect(note3.ok()).toBeTruthy();
const note3Data = await note3.json();
createdNoteIds.push(note3Data.note.noteId);
const note4 = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "this exact and that phrase",
content: "Words are separated but both present.",
type: "text"
}
});
expect(note4.ok()).toBeTruthy();
const note4Data = await note4.json();
createdNoteIds.push(note4Data.note.noteId);
await page.waitForTimeout(500);
// Search for exact phrase "exact phrase"
const response = await page.request.get(`${BASE_URL}/api/quick-search/='exact phrase'`, {
headers: { "x-csrf-token": csrfToken }
});
expect(response.ok()).toBeTruthy();
const data = await response.json();
const ourTestNotes = data.searchResults.filter((result: any) => {
const noteId = result.notePath.split("/").pop();
return [note1Data.note.noteId, note2Data.note.noteId, note3Data.note.noteId, note4Data.note.noteId].includes(noteId || "");
});
console.log("Exact phrase search '=\"exact phrase\"' found:", ourTestNotes.length, "notes");
console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle));
// Should find only notes 1 and 2 (consecutive "exact phrase")
// Should NOT find note 3 (reversed order) or note 4 (words separated)
expect(ourTestNotes.length).toBe(2);
const foundTitles = ourTestNotes.map((r: any) => r.noteTitle);
expect(foundTitles).toContain("exact phrase");
expect(foundTitles).toContain("exact phrase match");
expect(foundTitles).not.toContain("phrase exact");
expect(foundTitles).not.toContain("this exact and that phrase");
});
test("Exact phrase matching respects word order", async ({ page }) => {
// Create notes to test word order sensitivity
const noteForward = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "Testing Order",
content: "This is a test sentence for verification.",
type: "text"
}
});
expect(noteForward.ok()).toBeTruthy();
const noteForwardData = await noteForward.json();
createdNoteIds.push(noteForwardData.note.noteId);
const noteReverse = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "Order Testing",
content: "A sentence test is this for verification.",
type: "text"
}
});
expect(noteReverse.ok()).toBeTruthy();
const noteReverseData = await noteReverse.json();
createdNoteIds.push(noteReverseData.note.noteId);
await page.waitForTimeout(500);
// Search for exact phrase "test sentence"
const response = await page.request.get(`${BASE_URL}/api/quick-search/='test sentence'`, {
headers: { "x-csrf-token": csrfToken }
});
expect(response.ok()).toBeTruthy();
const data = await response.json();
const ourTestNotes = data.searchResults.filter((result: any) => {
const noteId = result.notePath.split("/").pop();
return noteId === noteForwardData.note.noteId || noteId === noteReverseData.note.noteId;
});
console.log("Exact phrase search '=\"test sentence\"' found:", ourTestNotes.length, "notes");
console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle));
// Should find only the forward order note
expect(ourTestNotes.length).toBe(1);
expect(ourTestNotes[0].noteTitle).toBe("Testing Order");
});
test("Multi-word exact search without quotes", async ({ page }) => {
// Test that multi-word search with = but without quotes also does exact phrase matching
const notePhrase = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "Quick Test Note",
content: "A simple note for multi word testing.",
type: "text"
}
});
expect(notePhrase.ok()).toBeTruthy();
const notePhraseData = await notePhrase.json();
createdNoteIds.push(notePhraseData.note.noteId);
const noteScattered = await page.request.post(`${BASE_URL}/api/notes/root/children?target=into&targetBranchId=`, {
headers: { "x-csrf-token": csrfToken },
data: {
title: "Word Multi Testing",
content: "Words are multi scattered in this testing example.",
type: "text"
}
});
expect(noteScattered.ok()).toBeTruthy();
const noteScatteredData = await noteScattered.json();
createdNoteIds.push(noteScatteredData.note.noteId);
await page.waitForTimeout(500);
// Search for "=multi word" without quotes (parser tokenizes as two words)
const response = await page.request.get(`${BASE_URL}/api/quick-search/=multi word`, {
headers: { "x-csrf-token": csrfToken }
});
expect(response.ok()).toBeTruthy();
const data = await response.json();
const ourTestNotes = data.searchResults.filter((result: any) => {
const noteId = result.notePath.split("/").pop();
return noteId === notePhraseData.note.noteId || noteId === noteScatteredData.note.noteId;
});
console.log("Multi-word exact search '=multi word' found:", ourTestNotes.length, "notes");
console.log("Note titles:", ourTestNotes.map((r: any) => r.noteTitle));
// Should find only the note with consecutive "multi word" phrase
expect(ourTestNotes.length).toBe(1);
expect(ourTestNotes[0].noteTitle).toBe("Quick Test Note");
});
}); });

View File

@ -77,15 +77,43 @@ class NoteContentFulltextExp extends Expression {
const resultNoteSet = new NoteSet(); const resultNoteSet = new NoteSet();
// Search through notes with content
for (const row of sql.iterateRows<SearchRow>(` for (const row of sql.iterateRows<SearchRow>(`
SELECT noteId, type, mime, content, isProtected SELECT noteId, type, mime, content, isProtected
FROM notes JOIN blobs USING (blobId) FROM notes JOIN blobs USING (blobId)
WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap') WHERE type IN ('text', 'code', 'mermaid', 'canvas', 'mindMap')
AND isDeleted = 0 AND isDeleted = 0
AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) { AND LENGTH(content) < ${MAX_SEARCH_CONTENT_SIZE}`)) {
this.findInText(row, inputNoteSet, resultNoteSet); this.findInText(row, inputNoteSet, resultNoteSet);
} }
// For exact match with flatText, also search notes WITHOUT content (they may have matching attributes)
if (this.flatText && (this.operator === "=" || this.operator === "!=")) {
for (const noteId of inputNoteSet.noteIdSet) {
// Skip if already found or doesn't exist
if (resultNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
continue;
}
const note = becca.notes[noteId];
const flatText = note.getFlatText();
// For flatText, only check attribute values (format: #name=value or ~name=value)
// Don't match against noteId, type, mime, or title which are also in flatText
let matches = false;
const phrase = this.tokens.join(" ");
const normalizedPhrase = normalizeSearchText(phrase);
const normalizedFlatText = normalizeSearchText(flatText);
// Check if =phrase appears in flatText (indicates attribute value match)
matches = normalizedFlatText.includes(`=${normalizedPhrase}`);
if ((this.operator === "=" && matches) || (this.operator === "!=" && !matches)) {
resultNoteSet.add(note);
}
}
}
return resultNoteSet; return resultNoteSet;
} }
@ -103,6 +131,32 @@ class NoteContentFulltextExp extends Expression {
return words.some(word => word === normalizedToken); return words.some(word => word === normalizedToken);
} }
/**
* Checks if content contains the exact phrase (consecutive words in order)
* This is case-insensitive since content and tokens are already normalized
*/
private containsExactPhrase(tokens: string[], content: string, checkFlatTextAttributes: boolean = false): boolean {
const normalizedTokens = tokens.map(t => normalizeSearchText(t));
const normalizedContent = normalizeSearchText(content);
// Join tokens with single space to form the phrase
const phrase = normalizedTokens.join(" ");
// Check if the phrase appears as a substring (consecutive words)
if (normalizedContent.includes(phrase)) {
return true;
}
// For flatText, also check if the phrase appears in attribute values
// Attributes in flatText appear as "#name=value" or "~name=value"
// So we need to check for "=phrase" to match attribute values
if (checkFlatTextAttributes && normalizedContent.includes(`=${phrase}`)) {
return true;
}
return false;
}
findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) { findInText({ noteId, isProtected, content, type, mime }: SearchRow, inputNoteSet: NoteSet, resultNoteSet: NoteSet) {
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
return; return;
@ -137,9 +191,25 @@ class NoteContentFulltextExp extends Expression {
if (this.tokens.length === 1) { if (this.tokens.length === 1) {
const [token] = this.tokens; const [token] = this.tokens;
let matches = false;
if (this.operator === "=") {
matches = this.containsExactWord(token, content);
// Also check flatText if enabled (includes attributes)
if (!matches && this.flatText) {
const flatText = becca.notes[noteId].getFlatText();
matches = this.containsExactPhrase([token], flatText, true);
}
} else if (this.operator === "!=") {
matches = !this.containsExactWord(token, content);
// For negation, check flatText too
if (matches && this.flatText) {
const flatText = becca.notes[noteId].getFlatText();
matches = !this.containsExactPhrase([token], flatText, true);
}
}
if ( if (
(this.operator === "=" && this.containsExactWord(token, content)) || matches ||
(this.operator === "!=" && !this.containsExactWord(token, content)) ||
(this.operator === "*=" && content.endsWith(token)) || (this.operator === "*=" && content.endsWith(token)) ||
(this.operator === "=*" && content.startsWith(token)) || (this.operator === "=*" && content.startsWith(token)) ||
(this.operator === "*=*" && content.includes(token)) || (this.operator === "*=*" && content.includes(token)) ||
@ -152,10 +222,26 @@ class NoteContentFulltextExp extends Expression {
} else { } else {
// Multi-token matching with fuzzy support and phrase proximity // Multi-token matching with fuzzy support and phrase proximity
if (this.operator === "~=" || this.operator === "~*") { if (this.operator === "~=" || this.operator === "~*") {
// Fuzzy phrase matching
if (this.matchesWithFuzzy(content, noteId)) { if (this.matchesWithFuzzy(content, noteId)) {
resultNoteSet.add(becca.notes[noteId]); resultNoteSet.add(becca.notes[noteId]);
} }
} else if (this.operator === "=" || this.operator === "!=") {
// Exact phrase matching for = and !=
let matches = this.containsExactPhrase(this.tokens, content, false);
// Also check flatText if enabled (includes attributes)
if (!matches && this.flatText) {
const flatText = becca.notes[noteId].getFlatText();
matches = this.containsExactPhrase(this.tokens, flatText, true);
}
if ((this.operator === "=" && matches) ||
(this.operator === "!=" && !matches)) {
resultNoteSet.add(becca.notes[noteId]);
}
} else { } else {
// Other operators: check all tokens present (any order)
const nonMatchingToken = this.tokens.find( const nonMatchingToken = this.tokens.find(
(token) => (token) =>
!this.tokenMatchesContent(token, content, noteId) !this.tokenMatchesContent(token, content, noteId)

View File

@ -14,24 +14,35 @@ type Comparator<T> = (comparedValue: T) => (val: string) => boolean;
const stringComparators: Record<string, Comparator<string>> = { const stringComparators: Record<string, Comparator<string>> = {
"=": (comparedValue) => (val) => { "=": (comparedValue) => (val) => {
// For the = operator, check if the value contains the exact word (word-boundary matching) // For the = operator, check if the value contains the exact word or phrase
// This is case-insensitive since both values are already lowercased // This is case-insensitive since both values are already lowercased
if (!val) return false; if (!val) return false;
const normalizedVal = normalizeSearchText(val); const normalizedVal = normalizeSearchText(val);
const normalizedCompared = normalizeSearchText(comparedValue); const normalizedCompared = normalizeSearchText(comparedValue);
// Split into words and check for exact match // If comparedValue has multiple words, check for exact phrase
if (normalizedCompared.includes(" ")) {
return normalizedVal.includes(normalizedCompared);
}
// For single word, split into words and check for exact match
const words = normalizedVal.split(/\s+/); const words = normalizedVal.split(/\s+/);
return words.some(word => word === normalizedCompared); return words.some(word => word === normalizedCompared);
}, },
"!=": (comparedValue) => (val) => { "!=": (comparedValue) => (val) => {
// Negation of exact word match // Negation of exact word/phrase match
if (!val) return true; if (!val) return true;
const normalizedVal = normalizeSearchText(val); const normalizedVal = normalizeSearchText(val);
const normalizedCompared = normalizeSearchText(comparedValue); const normalizedCompared = normalizeSearchText(comparedValue);
// If comparedValue has multiple words, check for exact phrase
if (normalizedCompared.includes(" ")) {
return !normalizedVal.includes(normalizedCompared);
}
// For single word, split into words and check for exact match
const words = normalizedVal.split(/\s+/); const words = normalizedVal.split(/\s+/);
return !words.some(word => word === normalizedCompared); return !words.some(word => word === normalizedCompared);
}, },

View File

@ -38,11 +38,14 @@ function getFulltext(_tokens: TokenData[], searchContext: SearchContext, leading
if (!searchContext.fastSearch) { if (!searchContext.fastSearch) {
// For exact match with "=", we need different behavior // For exact match with "=", we need different behavior
if (leadingOperator === "=" && tokens.length === 1) { if (leadingOperator === "=" && tokens.length >= 1) {
// Exact match on title OR exact match on content // Exact match on title OR exact match on content OR exact match in flat text (includes attributes)
// For multi-word, join tokens with space to form exact phrase
const titleSearchValue = tokens.join(" ");
return new OrExp([ return new OrExp([
new PropertyComparisonExp(searchContext, "title", "=", tokens[0]), new PropertyComparisonExp(searchContext, "title", "=", titleSearchValue),
new NoteContentFulltextExp("=", { tokens, flatText: false }) new NoteContentFulltextExp("=", { tokens, flatText: false }),
new NoteContentFulltextExp("=", { tokens, flatText: true })
]); ]);
} }
return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]); return new OrExp([new NoteFlatTextExp(tokens), new NoteContentFulltextExp(operator, { tokens, flatText: true })]);

View File

@ -304,10 +304,13 @@ describe("Search", () => {
const searchContext = new SearchContext(); const searchContext = new SearchContext();
// Test 1: With = and quotes, treat as multi-word exact match (both words must match) // Test 1: With = and quotes, treat as exact phrase match (consecutive words in order)
let searchResults = searchService.findResultsWithQuery("='exact phrase'", searchContext); let searchResults = searchService.findResultsWithQuery("='exact phrase'", searchContext);
// With current implementation, this searches for notes containing both "exact" and "phrase" words // Should match only notes containing the exact phrase "exact phrase"
expect(searchResults.length).toEqual(4); // All notes with both words expect(searchResults.length).toEqual(3); // Only notes with consecutive "exact phrase"
expect(findNoteByTitle(searchResults, "exact phrase")).toBeTruthy();
expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy();
expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy();
// Test 2: Without =, quoted phrase should find substring/contains matches // Test 2: Without =, quoted phrase should find substring/contains matches
searchResults = searchService.findResultsWithQuery("'exact phrase'", searchContext); searchResults = searchService.findResultsWithQuery("'exact phrase'", searchContext);
@ -316,9 +319,10 @@ describe("Search", () => {
expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy(); expect(findNoteByTitle(searchResults, "exact phrase match")).toBeTruthy();
expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy(); expect(findNoteByTitle(searchResults, "this exact phrase here")).toBeTruthy();
// Test 3: Verify word order doesn't matter with exact word matching // Test 3: Verify word order matters with exact phrase matching
searchResults = searchService.findResultsWithQuery("='phrase exact'", searchContext); searchResults = searchService.findResultsWithQuery("='phrase exact'", searchContext);
expect(searchResults.length).toEqual(4); // All notes with both words expect(searchResults.length).toEqual(1); // Only "phrase exact" matches
expect(findNoteByTitle(searchResults, "phrase exact")).toBeTruthy();
}); });
it("leading = operator case sensitivity", () => { it("leading = operator case sensitivity", () => {
@ -368,15 +372,15 @@ describe("Search", () => {
expect(findNoteByTitle(searchResults, "test.note")).toBeTruthy(); expect(findNoteByTitle(searchResults, "test.note")).toBeTruthy();
// For phrases with spaces, use quotes to keep them together // For phrases with spaces, use quotes to keep them together
// With word-boundary matching, this finds all notes with both words // With exact phrase matching, this finds notes with the consecutive phrase
searchResults = searchService.findResultsWithQuery("='test note'", searchContext); searchResults = searchService.findResultsWithQuery("='test note'", searchContext);
expect(searchResults.length).toEqual(1); // Only "test note" has both words as separate tokens expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase
expect(findNoteByTitle(searchResults, "test note")).toBeTruthy(); expect(findNoteByTitle(searchResults, "test note")).toBeTruthy();
// Without quotes, "test note" is tokenized as two separate words // Without quotes, "test note" is tokenized as two separate tokens
// and will match all notes containing both "test" AND "note" words // and will be treated as an exact phrase search with = operator
searchResults = searchService.findResultsWithQuery("=test note", searchContext); searchResults = searchService.findResultsWithQuery("=test note", searchContext);
expect(searchResults.length).toEqual(1); // Only "test note" has both as separate words expect(searchResults.length).toEqual(1); // Only "test note" has the exact phrase
// Without =, should find all matches containing "test" substring // Without =, should find all matches containing "test" substring
searchResults = searchService.findResultsWithQuery("test", searchContext); searchResults = searchService.findResultsWithQuery("test", searchContext);