diff --git a/src/becca/entities/note.js b/src/becca/entities/note.js index 59e7fce16..5562f239c 100644 --- a/src/becca/entities/note.js +++ b/src/becca/entities/note.js @@ -649,7 +649,7 @@ class Note extends AbstractEntity { this.flatTextCache += ' '; } - this.flatTextCache = this.flatTextCache.toLowerCase(); + this.flatTextCache = utils.removeDiacritic(this.flatTextCache.toLowerCase()); } return this.flatTextCache; diff --git a/src/services/search/expressions/note_cache_flat_text.js b/src/services/search/expressions/note_cache_flat_text.js index 3cbd84c33..ca5f2c59d 100644 --- a/src/services/search/expressions/note_cache_flat_text.js +++ b/src/services/search/expressions/note_cache_flat_text.js @@ -3,8 +3,9 @@ const Expression = require('./expression'); const NoteSet = require('../note_set'); const becca = require('../../../becca/becca'); +const utils = require("../../utils"); -class BeccaFlatTextExp extends Expression { +class NoteFlatTextExp extends Expression { constructor(tokens) { super(); @@ -44,15 +45,15 @@ class BeccaFlatTextExp extends Expression { for (const attribute of note.ownedAttributes) { for (const token of tokens) { - if (attribute.name.toLowerCase().includes(token) - || attribute.value.toLowerCase().includes(token)) { + if (utils.normalize(attribute.name).includes(token) + || utils.normalize(attribute.value).includes(token)) { foundAttrTokens.push(token); } } } for (const parentNote of note.parents) { - const title = beccaService.getNoteTitle(note.noteId, parentNote.noteId).toLowerCase(); + const title = utils.normalize(beccaService.getNoteTitle(note.noteId, parentNote.noteId)); const foundTokens = foundAttrTokens.slice(); for (const token of tokens) { @@ -89,8 +90,8 @@ class BeccaFlatTextExp extends Expression { } for (const attribute of note.ownedAttributes) { - if (attribute.name.toLowerCase().includes(token) - || attribute.value.toLowerCase().includes(token)) { + if (utils.normalize(attribute.name).includes(token) + || utils.normalize(attribute.value).includes(token)) { foundAttrTokens.push(token); } @@ -98,7 +99,7 @@ class BeccaFlatTextExp extends Expression { } for (const parentNote of note.parents) { - const title = beccaService.getNoteTitle(note.noteId, parentNote.noteId).toLowerCase(); + const title = utils.normalize(beccaService.getNoteTitle(note.noteId, parentNote.noteId)); const foundTokens = foundAttrTokens.slice(); for (const token of this.tokens) { @@ -140,4 +141,4 @@ class BeccaFlatTextExp extends Expression { } } -module.exports = BeccaFlatTextExp; +module.exports = NoteFlatTextExp; diff --git a/src/services/search/expressions/note_content_protected_fulltext.js b/src/services/search/expressions/note_content_protected_fulltext.js index 197649765..6b171c3e7 100644 --- a/src/services/search/expressions/note_content_protected_fulltext.js +++ b/src/services/search/expressions/note_content_protected_fulltext.js @@ -6,6 +6,7 @@ const log = require('../../log'); const becca = require('../../../becca/becca'); const protectedSessionService = require('../../protected_session'); const striptags = require('striptags'); +const utils = require("../../utils"); class NoteContentProtectedFulltextExp extends Expression { constructor(operator, tokens, raw) { @@ -45,7 +46,7 @@ class NoteContentProtectedFulltextExp extends Expression { continue; } - content = content.toLowerCase(); + content = utils.normalize(content); if (type === 'text' && mime === 'text/html') { if (!this.raw && content.length < 20000) { // striptags is slow for very large notes diff --git a/src/services/search/expressions/note_content_unprotected_fulltext.js b/src/services/search/expressions/note_content_unprotected_fulltext.js index 208f6e415..ad43cef46 100644 --- a/src/services/search/expressions/note_content_unprotected_fulltext.js +++ b/src/services/search/expressions/note_content_unprotected_fulltext.js @@ -4,6 +4,7 @@ const Expression = require('./expression'); const NoteSet = require('../note_set'); const becca = require('../../../becca/becca'); const striptags = require('striptags'); +const utils = require("../../utils"); class NoteContentUnprotectedFulltextExp extends Expression { constructor(operator, tokens, raw) { @@ -31,7 +32,7 @@ class NoteContentUnprotectedFulltextExp extends Expression { continue; } - content = content.toString().toLowerCase(); + content = utils.normalize(content.toString()); if (type === 'text' && mime === 'text/html') { if (!this.raw && content.length < 20000) { // striptags is slow for very large notes diff --git a/src/services/search/services/parse.js b/src/services/search/services/parse.js index f42f56329..a635c7fcb 100644 --- a/src/services/search/services/parse.js +++ b/src/services/search/services/parse.js @@ -18,9 +18,10 @@ const OrderByAndLimitExp = require('../expressions/order_by_and_limit'); const AncestorExp = require("../expressions/ancestor"); const buildComparator = require('./build_comparator'); const ValueExtractor = require('../value_extractor'); +const utils = require("../../utils"); function getFulltext(tokens, searchContext) { - tokens = tokens.map(t => t.token); + tokens = tokens.map(t => utils.removeDiacritic(t.token)); searchContext.highlightedTokens.push(...tokens); diff --git a/src/services/search/services/search.js b/src/services/search/services/search.js index 500e09aa5..e98e54672 100644 --- a/src/services/search/services/search.js +++ b/src/services/search/services/search.js @@ -223,8 +223,8 @@ function highlightSearchResults(searchResults, highlightedTokens) { } for (const attr of note.getAttributes()) { - if (highlightedTokens.find(token => attr.name.toLowerCase().includes(token) - || attr.value.toLowerCase().includes(token))) { + if (highlightedTokens.find(token => utils.normalize(attr.name).includes(token) + || utils.normalize(attr.value).includes(token))) { result.highlightedNotePathTitle += ` "${formatAttribute(attr)}'`; } @@ -232,6 +232,7 @@ function highlightSearchResults(searchResults, highlightedTokens) { } for (const token of highlightedTokens) { + // this approach won't work for strings with diacritics const tokenRegex = new RegExp("(" + utils.escapeRegExp(token) + ")", "gi"); for (const result of searchResults) { diff --git a/src/services/utils.js b/src/services/utils.js index 818767600..0b923ff2c 100644 --- a/src/services/utils.js +++ b/src/services/utils.js @@ -290,6 +290,14 @@ function deferred() { })(); } +function removeDiacritic(str) { + return str.normalize("NFD").replace(/\p{Diacritic}/gu, ""); +} + +function normalize(str) { + return removeDiacritic(str).toLowerCase(); +} + module.exports = { randomSecureToken, randomString, @@ -321,5 +329,7 @@ module.exports = { removeTextFileExtension, formatDownloadTitle, timeLimit, - deferred + deferred, + removeDiacritic, + normalize };