From cb9feab7b235df8c0bde783b8fbcfebe261ed07d Mon Sep 17 00:00:00 2001 From: zadam Date: Sun, 4 Jun 2023 22:50:07 +0200 Subject: [PATCH] got rid of the hot/cold blob, all blobs are "cold" now --- src/becca/entities/abstract_becca_entity.js | 118 ++++++++++---------- src/becca/entities/battachment.js | 1 - src/becca/entities/bnote.js | 10 +- src/etapi/etapi.openapi.yaml | 2 +- src/services/notes.js | 2 + 5 files changed, 64 insertions(+), 69 deletions(-) diff --git a/src/becca/entities/abstract_becca_entity.js b/src/becca/entities/abstract_becca_entity.js index a7cce2375..d58746b00 100644 --- a/src/becca/entities/abstract_becca_entity.js +++ b/src/becca/entities/abstract_becca_entity.js @@ -119,23 +119,10 @@ class AbstractBeccaEntity { return this; } - /** - * Hot entities keep stable blobId which is continuously updated and is not shared with other entities. - * Cold entities can still update its blob, but the blobId will change (and new blob will be created). - * Functionally this is the same, it's an optimization to avoid creating a new blob every second with auto saved - * text notes. - * - * @protected - */ - _isHot() { - return false; - } - /** @protected */ _setContent(content, opts = {}) { // client code asks to save entity even if blobId didn't change (something else was changed) opts.forceSave = !!opts.forceSave; - opts.forceCold = !!opts.forceCold; opts.forceFrontendReload = !!opts.forceFrontendReload; if (content === null || content === undefined) { @@ -149,7 +136,7 @@ class AbstractBeccaEntity { content = Buffer.isBuffer(content) ? content : Buffer.from(content); } - const unencryptedContentForHashCalculation = this.getUnencryptedContentForHashCalculation(content); + const unencryptedContentForHashCalculation = this.#getUnencryptedContentForHashCalculation(content); if (this.isProtected) { if (protectedSessionService.isProtectedSessionAvailable()) { @@ -160,16 +147,38 @@ class AbstractBeccaEntity { } sql.transactional(() => { - let newBlobId = this._saveBlob(content, unencryptedContentForHashCalculation, opts); + const newBlobId = this.#saveBlob(content, unencryptedContentForHashCalculation, opts); + const oldBlobId = this.blobId; - if (newBlobId !== this.blobId || opts.forceSave) { + if (newBlobId !== oldBlobId || opts.forceSave) { this.blobId = newBlobId; this.save(); + + if (newBlobId !== oldBlobId) { + this.#deleteBlobIfNoteUsed(oldBlobId); + } } }); } - getUnencryptedContentForHashCalculation(unencryptedContent) { + #deleteBlobIfNoteUsed(blobId) { + if (sql.getValue("SELECT 1 FROM notes WHERE blobId = ? LIMIT 1", [blobId])) { + return; + } + + if (sql.getValue("SELECT 1 FROM attachments WHERE blobId = ? LIMIT 1", [blobId])) { + return; + } + + if (sql.getValue("SELECT 1 FROM note_revisions WHERE blobId = ? LIMIT 1", [blobId])) { + return; + } + + sql.execute("DELETE FROM blobs WHERE blobId = ?", [blobId]); + sql.execute("DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId = ?", [blobId]); + } + + #getUnencryptedContentForHashCalculation(unencryptedContent) { if (this.isProtected) { // a "random" prefix make sure that the calculated hash/blobId is different for an encrypted note and decrypted const encryptedPrefixSuffix = "t$[nvQg7q)&_ENCRYPTED_?M:Bf&j3jr_"; @@ -181,54 +190,47 @@ class AbstractBeccaEntity { } } - /** @protected */ - _saveBlob(content, unencryptedContentForHashCalculation, opts = {}) { - let newBlobId; - let blobNeedsInsert; + #saveBlob(content, unencryptedContentForHashCalculation, opts = {}) { + /* + * We're using the unencrypted blob for the hash calculation, because otherwise the random IV would + * cause every content blob to be unique which would balloon the database size (esp. with revisioning). + * This has minor security implications (it's easy to infer that given content is shared between different + * notes/attachments, but the trade-off comes out clearly positive). + */ + const newBlobId = utils.hashedBlobId(unencryptedContentForHashCalculation); + const blobNeedsInsert = !sql.getValue('SELECT 1 FROM blobs WHERE blobId = ?', [newBlobId]); - if (this._isHot() && !opts.forceCold) { - newBlobId = this.blobId || utils.randomBlobId(); - blobNeedsInsert = true; - } else { - /* - * We're using the unencrypted blob for the hash calculation, because otherwise the random IV would - * cause every content blob to be unique which would balloon the database size (esp. with revisioning). - * This has minor security implications (it's easy to infer that given content is shared between different - * notes/attachments, but the trade-off comes out clearly positive). - */ - newBlobId = utils.hashedBlobId(unencryptedContentForHashCalculation); - blobNeedsInsert = !sql.getValue('SELECT 1 FROM blobs WHERE blobId = ?', [newBlobId]); + if (!blobNeedsInsert) { + return newBlobId; } - if (blobNeedsInsert) { - const pojo = { - blobId: newBlobId, - content: content, - dateModified: dateUtils.localNowDateTime(), - utcDateModified: dateUtils.utcNowDateTime() - }; + const pojo = { + blobId: newBlobId, + content: content, + dateModified: dateUtils.localNowDateTime(), + utcDateModified: dateUtils.utcNowDateTime() + }; - sql.upsert("blobs", "blobId", pojo); + sql.upsert("blobs", "blobId", pojo); - const hash = utils.hash(`${newBlobId}|${pojo.content.toString()}`); + const hash = utils.hash(`${newBlobId}|${pojo.content.toString()}`); - entityChangesService.addEntityChange({ - entityName: 'blobs', - entityId: newBlobId, - hash: hash, - isErased: false, - utcDateChanged: pojo.utcDateModified, - isSynced: true, - // overriding componentId will cause frontend to think the change is coming from a different component - // and thus reload - componentId: opts.forceFrontendReload ? utils.randomString(10) : null - }); + entityChangesService.addEntityChange({ + entityName: 'blobs', + entityId: newBlobId, + hash: hash, + isErased: false, + utcDateChanged: pojo.utcDateModified, + isSynced: true, + // overriding componentId will cause frontend to think the change is coming from a different component + // and thus reload + componentId: opts.forceFrontendReload ? utils.randomString(10) : null + }); - eventService.emit(eventService.ENTITY_CHANGED, { - entityName: 'blobs', - entity: this - }); - } + eventService.emit(eventService.ENTITY_CHANGED, { + entityName: 'blobs', + entity: this + }); return newBlobId; } diff --git a/src/becca/entities/battachment.js b/src/becca/entities/battachment.js index be8ff6b5f..300a776de 100644 --- a/src/becca/entities/battachment.js +++ b/src/becca/entities/battachment.js @@ -118,7 +118,6 @@ class BAttachment extends AbstractBeccaEntity { * @param content * @param {object} [opts] * @param {object} [opts.forceSave=false] - will also save this BAttachment entity - * @param {object} [opts.forceCold=false] - blob has to be saved as cold * @param {object} [opts.forceFrontendReload=false] - override frontend heuristics on when to reload, instruct to reload */ setContent(content, opts) { diff --git a/src/becca/entities/bnote.js b/src/becca/entities/bnote.js index f9cbdb238..8502cea80 100644 --- a/src/becca/entities/bnote.js +++ b/src/becca/entities/bnote.js @@ -227,15 +227,10 @@ class BNote extends AbstractBeccaEntity { return JSON.parse(content); } - _isHot() { - return ['text', 'code', 'relationMap', 'canvas', 'mermaid'].includes(this.type); - } - /** * @param content * @param {object} [opts] * @param {object} [opts.forceSave=false] - will also save this BNote entity - * @param {object} [opts.forceCold=false] - blob has to be saved as cold * @param {object} [opts.forceFrontendReload=false] - override frontend heuristics on when to reload, instruct to reload */ setContent(content, opts) { @@ -1610,10 +1605,7 @@ class BNote extends AbstractBeccaEntity { const revisionAttachment = noteAttachment.copy(); revisionAttachment.parentId = noteRevision.noteRevisionId; - revisionAttachment.setContent(noteAttachment.getContent(), { - forceSave: true, - forceCold: true - }); + revisionAttachment.setContent(noteAttachment.getContent(), { forceSave: true }); // content is rewritten to point to the revision attachments noteContent = noteContent.replaceAll(`attachments/${noteAttachment.attachmentId}`, `attachments/${revisionAttachment.attachmentId}`); diff --git a/src/etapi/etapi.openapi.yaml b/src/etapi/etapi.openapi.yaml index 9371331ce..da3d1864e 100644 --- a/src/etapi/etapi.openapi.yaml +++ b/src/etapi/etapi.openapi.yaml @@ -784,7 +784,7 @@ components: readOnly: true blobId: type: string - description: ID of the blob object. For "cold" notes, this is effectively a content hash + description: ID of the blob object which effectively serves as a content hash attributes: $ref: '#/components/schemas/AttributeList' readOnly: true diff --git a/src/services/notes.js b/src/services/notes.js index 8f54aaaba..3a0fc2f0a 100644 --- a/src/services/notes.js +++ b/src/services/notes.js @@ -892,6 +892,8 @@ function eraseAttachments(attachmentIdsToErase) { } function eraseUnusedBlobs() { + // this method is rather defense in depth - in normal operation, the unused blobs should be erased immediately + // after getting unused (handled in entity._setContent()) const unusedBlobIds = sql.getColumn(` SELECT blobs.blobId FROM blobs