"use strict"; const Expression = require('./expression.js'); const NoteSet = require('../note_set.js'); const log = require('../../log.js'); const becca = require('../../../becca/becca.js'); const protectedSessionService = require('../../protected_session.js'); const striptags = require('striptags'); const utils = require('../../utils.js'); const ALLOWED_OPERATORS = ['=', '!=', '*=*', '*=', '=*', '%=']; const cachedRegexes = {}; function getRegex(str) { if (!(str in cachedRegexes)) { cachedRegexes[str] = new RegExp(str, 'ms'); // multiline, dot-all } return cachedRegexes[str]; } class NoteContentFulltextExp extends Expression { constructor(operator, {tokens, raw, flatText}) { super(); this.operator = operator; this.tokens = tokens; this.raw = !!raw; this.flatText = !!flatText; } execute(inputNoteSet, executionContext, searchContext) { if (!ALLOWED_OPERATORS.includes(this.operator)) { searchContext.addError(`Note content can be searched only with operators: ${ALLOWED_OPERATORS.join(", ")}, operator ${this.operator} given.`); return inputNoteSet; } const resultNoteSet = new NoteSet(); const sql = require('../../sql.js'); for (const row of sql.iterateRows(` SELECT noteId, type, mime, content, isProtected FROM notes JOIN blobs USING (blobId) WHERE type IN ('text', 'code', 'mermaid') AND isDeleted = 0`)) { this.findInText(row, inputNoteSet, resultNoteSet); } return resultNoteSet; } findInText({noteId, isProtected, content, type, mime}, inputNoteSet, resultNoteSet) { if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) { return; } if (isProtected) { if (!protectedSessionService.isProtectedSessionAvailable()) { return; } try { content = protectedSessionService.decryptString(content); } catch (e) { log.info(`Cannot decrypt content of note ${noteId}`); return; } } if (!content) { return; } content = this.preprocessContent(content, type, mime); if (this.tokens.length === 1) { const [token] = this.tokens; if ((this.operator === '=' && token === content) || (this.operator === '!=' && token !== content) || (this.operator === '*=' && content.endsWith(token)) || (this.operator === '=*' && content.startsWith(token)) || (this.operator === '*=*' && content.includes(token)) || (this.operator === '%=' && getRegex(token).test(content))) { resultNoteSet.add(becca.notes[noteId]); } } else { const nonMatchingToken = this.tokens.find(token => !content.includes(token) && ( // in case of default fulltext search, we should consider both title, attrs and content // so e.g. "hello world" should match when "hello" is in title and "world" in content !this.flatText || !becca.notes[noteId].getFlatText().includes(token) ) ); if (!nonMatchingToken) { resultNoteSet.add(becca.notes[noteId]); } } return content; } preprocessContent(content, type, mime) { content = utils.normalize(content.toString()); if (type === 'text' && mime === 'text/html') { if (!this.raw && content.length < 20000) { // striptags is slow for very large notes content = this.stripTags(content); } content = content.replace(/ /g, ' '); } return content.trim(); } stripTags(content) { // we want to allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412 // we want to insert space in place of block tags (because they imply text separation) // but we don't want to insert text for typical formatting inline tags which can occur within one word const linkTag = 'a'; const inlineFormattingTags = ['b', 'strong', 'em', 'i', 'span', 'big', 'small', 'font', 'sub', 'sup']; // replace tags which imply text separation with a space content = striptags(content, [linkTag, ...inlineFormattingTags], ' '); // replace the inline formatting tags (but not links) without a space content = striptags(content, [linkTag], ''); // at least the closing link tag can be easily stripped return content.replace(/<\/a>/ig, ""); } } module.exports = NoteContentFulltextExp;