blob erasure is not synced, need to clean them up before each content hash check

This commit is contained in:
zadam 2023-07-27 23:57:12 +02:00
parent 8edb5428e5
commit ce3834eb9e
8 changed files with 201 additions and 183 deletions

View File

@ -3,11 +3,11 @@ module.exports = async () => {
const beccaLoader = require("../../src/becca/becca_loader");
const log = require("../../src/services/log");
const consistencyChecks = require("../../src/services/consistency_checks");
const noteService = require("../../src/services/notes");
const eraseService = require("../../src/services/erase");
await cls.init(async () => {
// precaution for the 0211 migration
noteService.eraseDeletedNotesNow();
eraseService.eraseDeletedNotesNow();
beccaLoader.load();

View File

@ -175,6 +175,8 @@ class AbstractBeccaEntity {
}
sql.execute("DELETE FROM blobs WHERE blobId = ?", [oldBlobId]);
// blobs are not marked as erased in entity_changes, they are just purged completely
// this is because technically every keystroke can create a new blob and there would be just too many
sql.execute("DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId = ?", [oldBlobId]);
}

View File

@ -4,7 +4,7 @@ const sql = require('../../services/sql');
const utils = require('../../services/utils');
const entityChangesService = require('../../services/entity_changes');
const treeService = require('../../services/tree');
const noteService = require('../../services/notes');
const eraseService = require('../../services/erase');
const becca = require('../../becca/becca');
const TaskContext = require('../../services/task_context');
const branchService = require("../../services/branches");
@ -193,7 +193,7 @@ function deleteBranch(req) {
if (eraseNotes) {
// erase automatically means deleting all clones + note itself
branch.getNote().deleteNote(deleteId, taskContext);
noteService.eraseNotesWithDeleteId(deleteId);
eraseService.eraseNotesWithDeleteId(deleteId);
noteDeleted = true;
} else {
noteDeleted = branch.deleteBranch(deleteId, taskContext);

View File

@ -1,6 +1,7 @@
"use strict";
const noteService = require('../../services/notes');
const eraseService = require('../../services/erase');
const treeService = require('../../services/tree');
const sql = require('../../services/sql');
const utils = require('../../services/utils');
@ -65,7 +66,7 @@ function deleteNote(req) {
note.deleteNote(deleteId, taskContext);
if (eraseNotes) {
noteService.eraseNotesWithDeleteId(deleteId);
eraseService.eraseNotesWithDeleteId(deleteId);
}
if (last) {
@ -150,11 +151,11 @@ function duplicateSubtree(req) {
}
function eraseDeletedNotesNow() {
noteService.eraseDeletedNotesNow();
eraseService.eraseDeletedNotesNow();
}
function eraseUnusedAttachmentsNow() {
noteService.eraseUnusedAttachmentsNow();
eraseService.eraseUnusedAttachmentsNow();
}
function getDeleteNotesPreview(req) {

View File

@ -3,8 +3,12 @@
const sql = require('./sql');
const utils = require('./utils');
const log = require('./log');
const eraseService = require("./erase");
function getEntityHashes() {
// blob erasure is not synced, we should check before each sync if there's some blob to erase
eraseService.eraseUnusedBlobs();
const startTime = new Date();
const hashRows = sql.getRawRows(`

View File

@ -72,7 +72,7 @@ function addEntityChangesForSector(entityName, sector) {
}
});
log.info(`Added sector ${sector} of '${entityName}' to sync queue in ${Date.now() - startTime}ms.`);
log.info(`Added sector ${sector} of '${entityName}' (${entityChanges.length} entities) to sync queue in ${Date.now() - startTime}ms.`);
}
function cleanupEntityChangesForMissingEntities(entityName, entityPrimaryKey) {

186
src/services/erase.js Normal file
View File

@ -0,0 +1,186 @@
const sql = require("./sql.js");
const revisionService = require("./revisions.js");
const log = require("./log.js");
const entityChangesService = require("./entity_changes.js");
const optionService = require("./options.js");
const dateUtils = require("./date_utils.js");
const sqlInit = require("./sql_init.js");
const cls = require("./cls.js");
function eraseNotes(noteIdsToErase) {
if (noteIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM notes WHERE noteId IN (???)`, noteIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'notes' AND entityId IN (???)`, noteIdsToErase));
// we also need to erase all "dependent" entities of the erased notes
const branchIdsToErase = sql.getManyRows(`SELECT branchId FROM branches WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.branchId);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getManyRows(`SELECT attributeId FROM attributes WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.attributeId);
eraseAttributes(attributeIdsToErase);
const revisionIdsToErase = sql.getManyRows(`SELECT revisionId FROM revisions WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.revisionId);
revisionService.eraseRevisions(revisionIdsToErase);
log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`);
}
function setEntityChangesAsErased(entityChanges) {
for (const ec of entityChanges) {
ec.isErased = true;
entityChangesService.addEntityChange(ec);
}
}
function eraseBranches(branchIdsToErase) {
if (branchIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM branches WHERE branchId IN (???)`, branchIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'branches' AND entityId IN (???)`, branchIdsToErase));
log.info(`Erased branches: ${JSON.stringify(branchIdsToErase)}`);
}
function eraseAttributes(attributeIdsToErase) {
if (attributeIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM attributes WHERE attributeId IN (???)`, attributeIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attributes' AND entityId IN (???)`, attributeIdsToErase));
log.info(`Erased attributes: ${JSON.stringify(attributeIdsToErase)}`);
}
function eraseAttachments(attachmentIdsToErase) {
if (attachmentIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM attachments WHERE attachmentId IN (???)`, attachmentIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attachments' AND entityId IN (???)`, attachmentIdsToErase));
log.info(`Erased attachments: ${JSON.stringify(attachmentIdsToErase)}`);
}
function eraseUnusedBlobs() {
const unusedBlobIds = sql.getColumn(`
SELECT blobs.blobId
FROM blobs
LEFT JOIN notes ON notes.blobId = blobs.blobId
LEFT JOIN attachments ON attachments.blobId = blobs.blobId
LEFT JOIN revisions ON revisions.blobId = blobs.blobId
WHERE notes.noteId IS NULL
AND attachments.attachmentId IS NULL
AND revisions.revisionId IS NULL`);
if (unusedBlobIds.length === 0) {
return;
}
sql.executeMany(`DELETE FROM blobs WHERE blobId IN (???)`, unusedBlobIds);
// blobs are not marked as erased in entity_changes, they are just purged completely
// this is because technically every keystroke can create a new blob and there would be just too many
sql.executeMany(`DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId IN (???)`, unusedBlobIds);
log.info(`Erased unused blobs: ${JSON.stringify(unusedBlobIds)}`);
}
function eraseDeletedEntities(eraseEntitiesAfterTimeInSeconds = null) {
// this is important also so that the erased entity changes are sent to the connected clients
sql.transactional(() => {
if (eraseEntitiesAfterTimeInSeconds === null) {
eraseEntitiesAfterTimeInSeconds = optionService.getOptionInt('eraseEntitiesAfterTimeInSeconds');
}
const cutoffDate = new Date(Date.now() - eraseEntitiesAfterTimeInSeconds * 1000);
const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseNotes(noteIdsToErase);
const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseAttributes(attributeIdsToErase);
const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseAttachments(attachmentIdsToErase);
eraseUnusedBlobs();
});
}
function eraseNotesWithDeleteId(deleteId) {
const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseNotes(noteIdsToErase);
const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseAttributes(attributeIdsToErase);
const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseAttachments(attachmentIdsToErase);
eraseUnusedBlobs();
}
function eraseDeletedNotesNow() {
eraseDeletedEntities(0);
}
function eraseUnusedAttachmentsNow() {
eraseScheduledAttachments(0);
}
function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds = null) {
if (eraseUnusedAttachmentsAfterSeconds === null) {
eraseUnusedAttachmentsAfterSeconds = optionService.getOptionInt('eraseUnusedAttachmentsAfterSeconds');
}
const cutOffDate = dateUtils.utcDateTimeStr(new Date(Date.now() - (eraseUnusedAttachmentsAfterSeconds * 1000)));
const attachmentIdsToErase = sql.getColumn('SELECT attachmentId FROM attachments WHERE utcDateScheduledForErasureSince < ?', [cutOffDate]);
eraseAttachments(attachmentIdsToErase);
}
sqlInit.dbReady.then(() => {
// first cleanup kickoff 5 minutes after startup
setTimeout(cls.wrap(() => eraseDeletedEntities()), 5 * 60 * 1000);
setTimeout(cls.wrap(() => eraseScheduledAttachments()), 6 * 60 * 1000);
setInterval(cls.wrap(() => eraseDeletedEntities()), 4 * 3600 * 1000);
setInterval(cls.wrap(() => eraseScheduledAttachments()), 3600 * 1000);
});
module.exports = {
eraseDeletedNotesNow,
eraseUnusedAttachmentsNow,
eraseNotesWithDeleteId,
eraseUnusedBlobs
};

View File

@ -855,158 +855,6 @@ async function asyncPostProcessContent(note, content) {
scanForLinks(note, content);
}
function eraseNotes(noteIdsToErase) {
if (noteIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM notes WHERE noteId IN (???)`, noteIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'notes' AND entityId IN (???)`, noteIdsToErase));
// we also need to erase all "dependent" entities of the erased notes
const branchIdsToErase = sql.getManyRows(`SELECT branchId FROM branches WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.branchId);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getManyRows(`SELECT attributeId FROM attributes WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.attributeId);
eraseAttributes(attributeIdsToErase);
const revisionIdsToErase = sql.getManyRows(`SELECT revisionId FROM revisions WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.revisionId);
revisionService.eraseRevisions(revisionIdsToErase);
log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`);
}
function setEntityChangesAsErased(entityChanges) {
for (const ec of entityChanges) {
ec.isErased = true;
entityChangesService.addEntityChange(ec);
}
}
function eraseBranches(branchIdsToErase) {
if (branchIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM branches WHERE branchId IN (???)`, branchIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'branches' AND entityId IN (???)`, branchIdsToErase));
log.info(`Erased branches: ${JSON.stringify(branchIdsToErase)}`);
}
function eraseAttributes(attributeIdsToErase) {
if (attributeIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM attributes WHERE attributeId IN (???)`, attributeIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attributes' AND entityId IN (???)`, attributeIdsToErase));
log.info(`Erased attributes: ${JSON.stringify(attributeIdsToErase)}`);
}
function eraseAttachments(attachmentIdsToErase) {
if (attachmentIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM attachments WHERE attachmentId IN (???)`, attachmentIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attachments' AND entityId IN (???)`, attachmentIdsToErase));
log.info(`Erased attachments: ${JSON.stringify(attachmentIdsToErase)}`);
}
function eraseUnusedBlobs() {
// this method is rather defense in depth - in normal operation, the unused blobs should be erased immediately
// after getting unused (handled in entity._setContent())
const unusedBlobIds = sql.getColumn(`
SELECT blobs.blobId
FROM blobs
LEFT JOIN notes ON notes.blobId = blobs.blobId
LEFT JOIN attachments ON attachments.blobId = blobs.blobId
LEFT JOIN revisions ON revisions.blobId = blobs.blobId
WHERE notes.noteId IS NULL
AND attachments.attachmentId IS NULL
AND revisions.revisionId IS NULL`);
if (unusedBlobIds.length === 0) {
return;
}
sql.executeMany(`DELETE FROM blobs WHERE blobId IN (???)`, unusedBlobIds);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'blobs' AND entityId IN (???)`, unusedBlobIds));
log.info(`Erased unused blobs: ${JSON.stringify(unusedBlobIds)}`);
}
function eraseDeletedEntities(eraseEntitiesAfterTimeInSeconds = null) {
// this is important also so that the erased entity changes are sent to the connected clients
sql.transactional(() => {
if (eraseEntitiesAfterTimeInSeconds === null) {
eraseEntitiesAfterTimeInSeconds = optionService.getOptionInt('eraseEntitiesAfterTimeInSeconds');
}
const cutoffDate = new Date(Date.now() - eraseEntitiesAfterTimeInSeconds * 1000);
const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseNotes(noteIdsToErase);
const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseAttributes(attributeIdsToErase);
const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);
eraseAttachments(attachmentIdsToErase);
eraseUnusedBlobs();
});
}
function eraseNotesWithDeleteId(deleteId) {
const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseNotes(noteIdsToErase);
const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseBranches(branchIdsToErase);
const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseAttributes(attributeIdsToErase);
const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseAttachments(attachmentIdsToErase);
eraseUnusedBlobs();
}
function eraseDeletedNotesNow() {
eraseDeletedEntities(0);
}
function eraseUnusedAttachmentsNow() {
eraseScheduledAttachments(0);
}
// all keys should be replaced by the corresponding values
function replaceByMap(str, mapObj) {
const re = new RegExp(Object.keys(mapObj).join("|"),"g");
@ -1138,26 +986,6 @@ function getNoteIdMapping(origNote) {
return noteIdMapping;
}
function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds = null) {
if (eraseUnusedAttachmentsAfterSeconds === null) {
eraseUnusedAttachmentsAfterSeconds = optionService.getOptionInt('eraseUnusedAttachmentsAfterSeconds');
}
const cutOffDate = dateUtils.utcDateTimeStr(new Date(Date.now() - (eraseUnusedAttachmentsAfterSeconds * 1000)));
const attachmentIdsToErase = sql.getColumn('SELECT attachmentId FROM attachments WHERE utcDateScheduledForErasureSince < ?', [cutOffDate]);
eraseAttachments(attachmentIdsToErase);
}
sqlInit.dbReady.then(() => {
// first cleanup kickoff 5 minutes after startup
setTimeout(cls.wrap(() => eraseDeletedEntities()), 5 * 60 * 1000);
setTimeout(cls.wrap(() => eraseScheduledAttachments()), 6 * 60 * 1000);
setInterval(cls.wrap(() => eraseDeletedEntities()), 4 * 3600 * 1000);
setInterval(cls.wrap(() => eraseScheduledAttachments()), 3600 * 1000);
});
module.exports = {
createNewNote,
createNewNoteWithTarget,
@ -1168,9 +996,6 @@ module.exports = {
duplicateSubtreeWithoutRoot,
getUndeletedParentBranchIds,
triggerNoteTitleChanged,
eraseDeletedNotesNow,
eraseUnusedAttachmentsNow,
eraseNotesWithDeleteId,
saveRevisionIfNeeded,
downloadImages,
asyncPostProcessContent