diff --git a/db/migrations/0210__consistency_checks.js b/db/migrations/0210__consistency_checks.js index f3809fa06..b29331dbe 100644 --- a/db/migrations/0210__consistency_checks.js +++ b/db/migrations/0210__consistency_checks.js @@ -3,11 +3,11 @@ module.exports = async () => { const beccaLoader = require("../../src/becca/becca_loader"); const log = require("../../src/services/log"); const consistencyChecks = require("../../src/services/consistency_checks"); - const noteService = require("../../src/services/notes"); + const eraseService = require("../../src/services/erase"); await cls.init(async () => { // precaution for the 0211 migration - noteService.eraseDeletedNotesNow(); + eraseService.eraseDeletedNotesNow(); beccaLoader.load(); diff --git a/src/becca/entities/abstract_becca_entity.js b/src/becca/entities/abstract_becca_entity.js index d694763a5..9fdc189b0 100644 --- a/src/becca/entities/abstract_becca_entity.js +++ b/src/becca/entities/abstract_becca_entity.js @@ -175,6 +175,8 @@ class AbstractBeccaEntity { } sql.execute("DELETE FROM blobs WHERE blobId = ?", [oldBlobId]); + // blobs are not marked as erased in entity_changes, they are just purged completely + // this is because technically every keystroke can create a new blob and there would be just too many sql.execute("DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId = ?", [oldBlobId]); } diff --git a/src/routes/api/branches.js b/src/routes/api/branches.js index 8999ed7f6..f61b92147 100644 --- a/src/routes/api/branches.js +++ b/src/routes/api/branches.js @@ -4,7 +4,7 @@ const sql = require('../../services/sql'); const utils = require('../../services/utils'); const entityChangesService = require('../../services/entity_changes'); const treeService = require('../../services/tree'); -const noteService = require('../../services/notes'); +const eraseService = require('../../services/erase'); const becca = require('../../becca/becca'); const TaskContext = require('../../services/task_context'); const branchService = require("../../services/branches"); @@ -193,7 +193,7 @@ function deleteBranch(req) { if (eraseNotes) { // erase automatically means deleting all clones + note itself branch.getNote().deleteNote(deleteId, taskContext); - noteService.eraseNotesWithDeleteId(deleteId); + eraseService.eraseNotesWithDeleteId(deleteId); noteDeleted = true; } else { noteDeleted = branch.deleteBranch(deleteId, taskContext); diff --git a/src/routes/api/notes.js b/src/routes/api/notes.js index 74f455be5..1abbf2c7c 100644 --- a/src/routes/api/notes.js +++ b/src/routes/api/notes.js @@ -1,6 +1,7 @@ "use strict"; const noteService = require('../../services/notes'); +const eraseService = require('../../services/erase'); const treeService = require('../../services/tree'); const sql = require('../../services/sql'); const utils = require('../../services/utils'); @@ -65,7 +66,7 @@ function deleteNote(req) { note.deleteNote(deleteId, taskContext); if (eraseNotes) { - noteService.eraseNotesWithDeleteId(deleteId); + eraseService.eraseNotesWithDeleteId(deleteId); } if (last) { @@ -150,11 +151,11 @@ function duplicateSubtree(req) { } function eraseDeletedNotesNow() { - noteService.eraseDeletedNotesNow(); + eraseService.eraseDeletedNotesNow(); } function eraseUnusedAttachmentsNow() { - noteService.eraseUnusedAttachmentsNow(); + eraseService.eraseUnusedAttachmentsNow(); } function getDeleteNotesPreview(req) { diff --git a/src/services/content_hash.js b/src/services/content_hash.js index 90ff248b1..8eacc18b1 100644 --- a/src/services/content_hash.js +++ b/src/services/content_hash.js @@ -3,8 +3,12 @@ const sql = require('./sql'); const utils = require('./utils'); const log = require('./log'); +const eraseService = require("./erase"); function getEntityHashes() { + // blob erasure is not synced, we should check before each sync if there's some blob to erase + eraseService.eraseUnusedBlobs(); + const startTime = new Date(); const hashRows = sql.getRawRows(` diff --git a/src/services/entity_changes.js b/src/services/entity_changes.js index e9c4a66a5..5b98ad9d9 100644 --- a/src/services/entity_changes.js +++ b/src/services/entity_changes.js @@ -72,7 +72,7 @@ function addEntityChangesForSector(entityName, sector) { } }); - log.info(`Added sector ${sector} of '${entityName}' to sync queue in ${Date.now() - startTime}ms.`); + log.info(`Added sector ${sector} of '${entityName}' (${entityChanges.length} entities) to sync queue in ${Date.now() - startTime}ms.`); } function cleanupEntityChangesForMissingEntities(entityName, entityPrimaryKey) { diff --git a/src/services/erase.js b/src/services/erase.js new file mode 100644 index 000000000..ee0b21d23 --- /dev/null +++ b/src/services/erase.js @@ -0,0 +1,186 @@ +const sql = require("./sql.js"); +const revisionService = require("./revisions.js"); +const log = require("./log.js"); +const entityChangesService = require("./entity_changes.js"); +const optionService = require("./options.js"); +const dateUtils = require("./date_utils.js"); +const sqlInit = require("./sql_init.js"); +const cls = require("./cls.js"); + +function eraseNotes(noteIdsToErase) { + if (noteIdsToErase.length === 0) { + return; + } + + sql.executeMany(`DELETE FROM notes WHERE noteId IN (???)`, noteIdsToErase); + setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'notes' AND entityId IN (???)`, noteIdsToErase)); + + // we also need to erase all "dependent" entities of the erased notes + const branchIdsToErase = sql.getManyRows(`SELECT branchId FROM branches WHERE noteId IN (???)`, noteIdsToErase) + .map(row => row.branchId); + + eraseBranches(branchIdsToErase); + + const attributeIdsToErase = sql.getManyRows(`SELECT attributeId FROM attributes WHERE noteId IN (???)`, noteIdsToErase) + .map(row => row.attributeId); + + eraseAttributes(attributeIdsToErase); + + const revisionIdsToErase = sql.getManyRows(`SELECT revisionId FROM revisions WHERE noteId IN (???)`, noteIdsToErase) + .map(row => row.revisionId); + + revisionService.eraseRevisions(revisionIdsToErase); + + log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`); +} + +function setEntityChangesAsErased(entityChanges) { + for (const ec of entityChanges) { + ec.isErased = true; + + entityChangesService.addEntityChange(ec); + } +} + +function eraseBranches(branchIdsToErase) { + if (branchIdsToErase.length === 0) { + return; + } + + sql.executeMany(`DELETE FROM branches WHERE branchId IN (???)`, branchIdsToErase); + + setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'branches' AND entityId IN (???)`, branchIdsToErase)); + + log.info(`Erased branches: ${JSON.stringify(branchIdsToErase)}`); +} + +function eraseAttributes(attributeIdsToErase) { + if (attributeIdsToErase.length === 0) { + return; + } + + sql.executeMany(`DELETE FROM attributes WHERE attributeId IN (???)`, attributeIdsToErase); + + setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attributes' AND entityId IN (???)`, attributeIdsToErase)); + + log.info(`Erased attributes: ${JSON.stringify(attributeIdsToErase)}`); +} + +function eraseAttachments(attachmentIdsToErase) { + if (attachmentIdsToErase.length === 0) { + return; + } + + sql.executeMany(`DELETE FROM attachments WHERE attachmentId IN (???)`, attachmentIdsToErase); + + setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attachments' AND entityId IN (???)`, attachmentIdsToErase)); + + log.info(`Erased attachments: ${JSON.stringify(attachmentIdsToErase)}`); +} + +function eraseUnusedBlobs() { + const unusedBlobIds = sql.getColumn(` + SELECT blobs.blobId + FROM blobs + LEFT JOIN notes ON notes.blobId = blobs.blobId + LEFT JOIN attachments ON attachments.blobId = blobs.blobId + LEFT JOIN revisions ON revisions.blobId = blobs.blobId + WHERE notes.noteId IS NULL + AND attachments.attachmentId IS NULL + AND revisions.revisionId IS NULL`); + + if (unusedBlobIds.length === 0) { + return; + } + + sql.executeMany(`DELETE FROM blobs WHERE blobId IN (???)`, unusedBlobIds); + // blobs are not marked as erased in entity_changes, they are just purged completely + // this is because technically every keystroke can create a new blob and there would be just too many + sql.executeMany(`DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId IN (???)`, unusedBlobIds); + + log.info(`Erased unused blobs: ${JSON.stringify(unusedBlobIds)}`); +} + +function eraseDeletedEntities(eraseEntitiesAfterTimeInSeconds = null) { + // this is important also so that the erased entity changes are sent to the connected clients + sql.transactional(() => { + if (eraseEntitiesAfterTimeInSeconds === null) { + eraseEntitiesAfterTimeInSeconds = optionService.getOptionInt('eraseEntitiesAfterTimeInSeconds'); + } + + const cutoffDate = new Date(Date.now() - eraseEntitiesAfterTimeInSeconds * 1000); + + const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); + + eraseNotes(noteIdsToErase); + + const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); + + eraseBranches(branchIdsToErase); + + const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); + + eraseAttributes(attributeIdsToErase); + + const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); + + eraseAttachments(attachmentIdsToErase); + + eraseUnusedBlobs(); + }); +} + +function eraseNotesWithDeleteId(deleteId) { + const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); + + eraseNotes(noteIdsToErase); + + const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); + + eraseBranches(branchIdsToErase); + + const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); + + eraseAttributes(attributeIdsToErase); + + const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); + + eraseAttachments(attachmentIdsToErase); + + eraseUnusedBlobs(); +} + +function eraseDeletedNotesNow() { + eraseDeletedEntities(0); +} + +function eraseUnusedAttachmentsNow() { + eraseScheduledAttachments(0); +} + +function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds = null) { + if (eraseUnusedAttachmentsAfterSeconds === null) { + eraseUnusedAttachmentsAfterSeconds = optionService.getOptionInt('eraseUnusedAttachmentsAfterSeconds'); + } + + const cutOffDate = dateUtils.utcDateTimeStr(new Date(Date.now() - (eraseUnusedAttachmentsAfterSeconds * 1000))); + const attachmentIdsToErase = sql.getColumn('SELECT attachmentId FROM attachments WHERE utcDateScheduledForErasureSince < ?', [cutOffDate]); + + eraseAttachments(attachmentIdsToErase); +} + +sqlInit.dbReady.then(() => { + // first cleanup kickoff 5 minutes after startup + setTimeout(cls.wrap(() => eraseDeletedEntities()), 5 * 60 * 1000); + setTimeout(cls.wrap(() => eraseScheduledAttachments()), 6 * 60 * 1000); + + setInterval(cls.wrap(() => eraseDeletedEntities()), 4 * 3600 * 1000); + setInterval(cls.wrap(() => eraseScheduledAttachments()), 3600 * 1000); +}); + +module.exports = { + eraseDeletedNotesNow, + eraseUnusedAttachmentsNow, + eraseNotesWithDeleteId, + eraseUnusedBlobs +}; diff --git a/src/services/notes.js b/src/services/notes.js index 46a633df7..3272b4b71 100644 --- a/src/services/notes.js +++ b/src/services/notes.js @@ -855,158 +855,6 @@ async function asyncPostProcessContent(note, content) { scanForLinks(note, content); } -function eraseNotes(noteIdsToErase) { - if (noteIdsToErase.length === 0) { - return; - } - - sql.executeMany(`DELETE FROM notes WHERE noteId IN (???)`, noteIdsToErase); - setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'notes' AND entityId IN (???)`, noteIdsToErase)); - - // we also need to erase all "dependent" entities of the erased notes - const branchIdsToErase = sql.getManyRows(`SELECT branchId FROM branches WHERE noteId IN (???)`, noteIdsToErase) - .map(row => row.branchId); - - eraseBranches(branchIdsToErase); - - const attributeIdsToErase = sql.getManyRows(`SELECT attributeId FROM attributes WHERE noteId IN (???)`, noteIdsToErase) - .map(row => row.attributeId); - - eraseAttributes(attributeIdsToErase); - - const revisionIdsToErase = sql.getManyRows(`SELECT revisionId FROM revisions WHERE noteId IN (???)`, noteIdsToErase) - .map(row => row.revisionId); - - revisionService.eraseRevisions(revisionIdsToErase); - - log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`); -} - -function setEntityChangesAsErased(entityChanges) { - for (const ec of entityChanges) { - ec.isErased = true; - - entityChangesService.addEntityChange(ec); - } -} - -function eraseBranches(branchIdsToErase) { - if (branchIdsToErase.length === 0) { - return; - } - - sql.executeMany(`DELETE FROM branches WHERE branchId IN (???)`, branchIdsToErase); - - setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'branches' AND entityId IN (???)`, branchIdsToErase)); - - log.info(`Erased branches: ${JSON.stringify(branchIdsToErase)}`); -} - -function eraseAttributes(attributeIdsToErase) { - if (attributeIdsToErase.length === 0) { - return; - } - - sql.executeMany(`DELETE FROM attributes WHERE attributeId IN (???)`, attributeIdsToErase); - - setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attributes' AND entityId IN (???)`, attributeIdsToErase)); - - log.info(`Erased attributes: ${JSON.stringify(attributeIdsToErase)}`); -} - -function eraseAttachments(attachmentIdsToErase) { - if (attachmentIdsToErase.length === 0) { - return; - } - - sql.executeMany(`DELETE FROM attachments WHERE attachmentId IN (???)`, attachmentIdsToErase); - - setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attachments' AND entityId IN (???)`, attachmentIdsToErase)); - - log.info(`Erased attachments: ${JSON.stringify(attachmentIdsToErase)}`); -} - -function eraseUnusedBlobs() { - // this method is rather defense in depth - in normal operation, the unused blobs should be erased immediately - // after getting unused (handled in entity._setContent()) - const unusedBlobIds = sql.getColumn(` - SELECT blobs.blobId - FROM blobs - LEFT JOIN notes ON notes.blobId = blobs.blobId - LEFT JOIN attachments ON attachments.blobId = blobs.blobId - LEFT JOIN revisions ON revisions.blobId = blobs.blobId - WHERE notes.noteId IS NULL - AND attachments.attachmentId IS NULL - AND revisions.revisionId IS NULL`); - - if (unusedBlobIds.length === 0) { - return; - } - - sql.executeMany(`DELETE FROM blobs WHERE blobId IN (???)`, unusedBlobIds); - - setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'blobs' AND entityId IN (???)`, unusedBlobIds)); - - log.info(`Erased unused blobs: ${JSON.stringify(unusedBlobIds)}`); -} - -function eraseDeletedEntities(eraseEntitiesAfterTimeInSeconds = null) { - // this is important also so that the erased entity changes are sent to the connected clients - sql.transactional(() => { - if (eraseEntitiesAfterTimeInSeconds === null) { - eraseEntitiesAfterTimeInSeconds = optionService.getOptionInt('eraseEntitiesAfterTimeInSeconds'); - } - - const cutoffDate = new Date(Date.now() - eraseEntitiesAfterTimeInSeconds * 1000); - - const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); - - eraseNotes(noteIdsToErase); - - const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); - - eraseBranches(branchIdsToErase); - - const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); - - eraseAttributes(attributeIdsToErase); - - const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]); - - eraseAttachments(attachmentIdsToErase); - - eraseUnusedBlobs(); - }); -} - -function eraseNotesWithDeleteId(deleteId) { - const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); - - eraseNotes(noteIdsToErase); - - const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); - - eraseBranches(branchIdsToErase); - - const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); - - eraseAttributes(attributeIdsToErase); - - const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); - - eraseAttachments(attachmentIdsToErase); - - eraseUnusedBlobs(); -} - -function eraseDeletedNotesNow() { - eraseDeletedEntities(0); -} - -function eraseUnusedAttachmentsNow() { - eraseScheduledAttachments(0); -} - // all keys should be replaced by the corresponding values function replaceByMap(str, mapObj) { const re = new RegExp(Object.keys(mapObj).join("|"),"g"); @@ -1138,26 +986,6 @@ function getNoteIdMapping(origNote) { return noteIdMapping; } -function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds = null) { - if (eraseUnusedAttachmentsAfterSeconds === null) { - eraseUnusedAttachmentsAfterSeconds = optionService.getOptionInt('eraseUnusedAttachmentsAfterSeconds'); - } - - const cutOffDate = dateUtils.utcDateTimeStr(new Date(Date.now() - (eraseUnusedAttachmentsAfterSeconds * 1000))); - const attachmentIdsToErase = sql.getColumn('SELECT attachmentId FROM attachments WHERE utcDateScheduledForErasureSince < ?', [cutOffDate]); - - eraseAttachments(attachmentIdsToErase); -} - -sqlInit.dbReady.then(() => { - // first cleanup kickoff 5 minutes after startup - setTimeout(cls.wrap(() => eraseDeletedEntities()), 5 * 60 * 1000); - setTimeout(cls.wrap(() => eraseScheduledAttachments()), 6 * 60 * 1000); - - setInterval(cls.wrap(() => eraseDeletedEntities()), 4 * 3600 * 1000); - setInterval(cls.wrap(() => eraseScheduledAttachments()), 3600 * 1000); -}); - module.exports = { createNewNote, createNewNoteWithTarget, @@ -1168,9 +996,6 @@ module.exports = { duplicateSubtreeWithoutRoot, getUndeletedParentBranchIds, triggerNoteTitleChanged, - eraseDeletedNotesNow, - eraseUnusedAttachmentsNow, - eraseNotesWithDeleteId, saveRevisionIfNeeded, downloadImages, asyncPostProcessContent