trilium/src/services/search/expressions/note_content_fulltext.js

134 lines
4.5 KiB
JavaScript

"use strict";
const Expression = require('./expression');
const NoteSet = require('../note_set');
const log = require('../../log');
const becca = require('../../../becca/becca');
const protectedSessionService = require('../../protected_session');
const striptags = require('striptags');
const utils = require("../../utils");
const ALLOWED_OPERATORS = ['=', '!=', '*=*', '*=', '=*', '%='];
const cachedRegexes = {};
function getRegex(str) {
if (!(str in cachedRegexes)) {
cachedRegexes[str] = new RegExp(str, 'ms'); // multiline, dot-all
}
return cachedRegexes[str];
}
class NoteContentFulltextExp extends Expression {
constructor(operator, {tokens, raw, flatText}) {
super();
this.operator = operator;
this.tokens = tokens;
this.raw = !!raw;
this.flatText = !!flatText;
}
execute(inputNoteSet, executionContext, searchContext) {
if (!ALLOWED_OPERATORS.includes(this.operator)) {
searchContext.addError(`Note content can be searched only with operators: ${ALLOWED_OPERATORS.join(", ")}, operator ${this.operator} given.`);
return inputNoteSet;
}
const resultNoteSet = new NoteSet();
const sql = require('../../sql');
for (const row of sql.iterateRows(`
SELECT noteId, type, mime, content, isProtected
FROM notes JOIN note_contents USING (noteId)
WHERE type IN ('text', 'code', 'mermaid') AND isDeleted = 0`)) {
this.findInText(row, inputNoteSet, resultNoteSet);
}
for (const row of sql.iterateRows(`
SELECT noteId, 'plainText' as type, mime, content, isProtected
FROM note_ancillaries JOIN note_ancillary_contents USING (noteAncillaryId)
WHERE name IN ('plainText') AND isDeleted = 0`)) {
if (!resultNoteSet.hasNoteId(row.noteId)) {
this.findInText(row, inputNoteSet, resultNoteSet);
}
}
return resultNoteSet;
}
findInText({noteId, isProtected, content, type, mime}, inputNoteSet, resultNoteSet) {
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
return;
}
if (isProtected) {
if (!protectedSessionService.isProtectedSessionAvailable()) {
return;
}
try {
content = protectedSessionService.decryptString(content);
} catch (e) {
log.info(`Cannot decrypt content of note ${noteId}`);
return;
}
}
content = this.preprocessContent(content, type, mime);
if (this.tokens.length === 1) {
const [token] = this.tokens;
if ((this.operator === '=' && token === content)
|| (this.operator === '!=' && token !== content)
|| (this.operator === '*=' && content.endsWith(token))
|| (this.operator === '=*' && content.startsWith(token))
|| (this.operator === '*=*' && content.includes(token))
|| (this.operator === '%=' && getRegex(token).test(content))) {
resultNoteSet.add(becca.notes[noteId]);
}
} else {
const nonMatchingToken = this.tokens.find(token =>
!content.includes(token) &&
(
// in case of default fulltext search we should consider both title, attrs and content
// so e.g. "hello world" should match when "hello" is in title and "world" in content
!this.flatText
|| !becca.notes[noteId].getFlatText().includes(token)
)
);
if (!nonMatchingToken) {
resultNoteSet.add(becca.notes[noteId]);
}
}
return content;
}
preprocessContent(content, type, mime) {
content = utils.normalize(content.toString());
if (type === 'text' && mime === 'text/html') {
if (!this.raw && content.length < 20000) { // striptags is slow for very large notes
// allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412
content = striptags(content, ['a'], ' ');
// at least the closing tag can be easily stripped
content = content.replace(/<\/a>/ig, "");
}
content = content.replace(/&nbsp;/g, ' ');
}
return content.trim();
}
}
module.exports = NoteContentFulltextExp;