From 0eb77e43e2653d01f86772533bb55936c748e651 Mon Sep 17 00:00:00 2001 From: zadam Date: Thu, 23 Jul 2020 23:38:38 +0200 Subject: [PATCH] full text search uses in-memory search even for unprotected notes and preprocesses content with tag stripping --- package-lock.json | 5 ++++ package.json | 3 +- src/public/app/services/tree_cache.js | 2 +- .../note_content_protected_fulltext.js | 11 ++++++- .../note_content_unprotected_fulltext.js | 30 +++++++++++-------- 5 files changed, 35 insertions(+), 16 deletions(-) diff --git a/package-lock.json b/package-lock.json index 02c1da738..0c0668bb2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7346,6 +7346,11 @@ "escape-string-regexp": "^1.0.2" } }, + "striptags": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/striptags/-/striptags-3.1.1.tgz", + "integrity": "sha1-yMPn/db7S7OjKjt1LltePjgJPr0=" + }, "strtok3": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-6.0.3.tgz", diff --git a/package.json b/package.json index c07311b82..53f30b7ca 100644 --- a/package.json +++ b/package.json @@ -66,6 +66,7 @@ "serve-favicon": "2.5.0", "session-file-store": "1.4.0", "string-similarity": "4.0.1", + "striptags": "^3.1.1", "turndown": "6.0.0", "turndown-plugin-gfm": "1.0.2", "unescape": "1.0.1", @@ -75,7 +76,7 @@ }, "devDependencies": { "cross-env": "^7.0.2", - "electron": "10.0.0-beta.13", + "electron": "10.0.0-beta.11", "electron-builder": "22.7.0", "electron-packager": "15.0.0", "electron-rebuild": "1.11.0", diff --git a/src/public/app/services/tree_cache.js b/src/public/app/services/tree_cache.js index 8461a1f58..6637c7c73 100644 --- a/src/public/app/services/tree_cache.js +++ b/src/public/app/services/tree_cache.js @@ -241,7 +241,7 @@ class TreeCache { return null; } else if (!noteId) { - console.log(`Falsy noteId ${noteId}, returning null.`); + console.trace(`Falsy noteId ${noteId}, returning null.`); return null; } diff --git a/src/services/search/expressions/note_content_protected_fulltext.js b/src/services/search/expressions/note_content_protected_fulltext.js index 74633d5b0..472fd62f0 100644 --- a/src/services/search/expressions/note_content_protected_fulltext.js +++ b/src/services/search/expressions/note_content_protected_fulltext.js @@ -5,6 +5,7 @@ const NoteSet = require('../note_set'); const log = require('../../log'); const noteCache = require('../../note_cache/note_cache'); const protectedSessionService = require('../../protected_session'); +const striptags = require('striptags'); class NoteContentProtectedFulltextExp extends Expression { constructor(operator, tokens) { @@ -26,7 +27,10 @@ class NoteContentProtectedFulltextExp extends Expression { const sql = require('../../sql'); - for (let {noteId, content} of sql.iterateRows(`SELECT noteId, content FROM notes JOIN note_contents USING (noteId) WHERE isDeleted = 0 AND isProtected = 1`)) { + for (let {noteId, type, mime, content} of sql.iterateRows(` + SELECT noteId, type, mime, content + FROM notes JOIN note_contents USING (noteId) + WHERE type IN ('text', 'code') AND isDeleted = 0 AND isProtected = 1`)) { try { content = protectedSessionService.decryptString(content); @@ -38,6 +42,11 @@ class NoteContentProtectedFulltextExp extends Expression { content = content.toLowerCase(); + if (type === 'text' && mime === 'text/html') { + content = striptags(content); + content = content.replace(/ /g, ' '); + } + if (this.tokens.find(token => !content.includes(token))) { continue; } diff --git a/src/services/search/expressions/note_content_unprotected_fulltext.js b/src/services/search/expressions/note_content_unprotected_fulltext.js index 9fa45cd7f..ad11a8985 100644 --- a/src/services/search/expressions/note_content_unprotected_fulltext.js +++ b/src/services/search/expressions/note_content_unprotected_fulltext.js @@ -3,7 +3,7 @@ const Expression = require('./expression'); const NoteSet = require('../note_set'); const noteCache = require('../../note_cache/note_cache'); -const utils = require('../../utils'); +const striptags = require('striptags'); class NoteContentUnprotectedFulltextExp extends Expression { constructor(operator, tokens) { @@ -18,21 +18,25 @@ class NoteContentUnprotectedFulltextExp extends Expression { execute(inputNoteSet) { const resultNoteSet = new NoteSet(); - const wheres = this.tokens.map(token => "note_contents.content LIKE " + utils.prepareSqlForLike('%', token, '%')); const sql = require('../../sql'); -console.log(` - SELECT notes.noteId - FROM notes - JOIN note_contents ON notes.noteId = note_contents.noteId - WHERE isDeleted = 0 AND isProtected = 0 AND ${wheres.join(' AND ')}`); - const noteIds = sql.getColumn(` - SELECT notes.noteId - FROM notes - JOIN note_contents ON notes.noteId = note_contents.noteId - WHERE isDeleted = 0 AND isProtected = 0 AND ${wheres.join(' AND ')}`); - for (const noteId of noteIds) { + for (let {noteId, type, mime, content} of sql.iterateRows(` + SELECT noteId, type, mime, content + FROM notes JOIN note_contents USING (noteId) + WHERE type IN ('text', 'code') AND isDeleted = 0 AND isProtected = 0`)) { + + content = content.toLowerCase(); + + if (type === 'text' && mime === 'text/html') { + content = striptags(content); + content = content.replace(/ /g, ' '); + } + + if (this.tokens.find(token => !content.includes(token))) { + continue; + } + if (inputNoteSet.hasNoteId(noteId) && noteId in noteCache.notes) { resultNoteSet.add(noteCache.notes[noteId]); }