got rid of the hot/cold blob, all blobs are "cold" now

This commit is contained in:
zadam 2023-06-04 22:50:07 +02:00
parent b79631a35d
commit cb9feab7b2
5 changed files with 64 additions and 69 deletions

View File

@ -119,23 +119,10 @@ class AbstractBeccaEntity {
return this;
}
/**
* Hot entities keep stable blobId which is continuously updated and is not shared with other entities.
* Cold entities can still update its blob, but the blobId will change (and new blob will be created).
* Functionally this is the same, it's an optimization to avoid creating a new blob every second with auto saved
* text notes.
*
* @protected
*/
_isHot() {
return false;
}
/** @protected */
_setContent(content, opts = {}) {
// client code asks to save entity even if blobId didn't change (something else was changed)
opts.forceSave = !!opts.forceSave;
opts.forceCold = !!opts.forceCold;
opts.forceFrontendReload = !!opts.forceFrontendReload;
if (content === null || content === undefined) {
@ -149,7 +136,7 @@ class AbstractBeccaEntity {
content = Buffer.isBuffer(content) ? content : Buffer.from(content);
}
const unencryptedContentForHashCalculation = this.getUnencryptedContentForHashCalculation(content);
const unencryptedContentForHashCalculation = this.#getUnencryptedContentForHashCalculation(content);
if (this.isProtected) {
if (protectedSessionService.isProtectedSessionAvailable()) {
@ -160,16 +147,38 @@ class AbstractBeccaEntity {
}
sql.transactional(() => {
let newBlobId = this._saveBlob(content, unencryptedContentForHashCalculation, opts);
const newBlobId = this.#saveBlob(content, unencryptedContentForHashCalculation, opts);
const oldBlobId = this.blobId;
if (newBlobId !== this.blobId || opts.forceSave) {
if (newBlobId !== oldBlobId || opts.forceSave) {
this.blobId = newBlobId;
this.save();
if (newBlobId !== oldBlobId) {
this.#deleteBlobIfNoteUsed(oldBlobId);
}
}
});
}
getUnencryptedContentForHashCalculation(unencryptedContent) {
#deleteBlobIfNoteUsed(blobId) {
if (sql.getValue("SELECT 1 FROM notes WHERE blobId = ? LIMIT 1", [blobId])) {
return;
}
if (sql.getValue("SELECT 1 FROM attachments WHERE blobId = ? LIMIT 1", [blobId])) {
return;
}
if (sql.getValue("SELECT 1 FROM note_revisions WHERE blobId = ? LIMIT 1", [blobId])) {
return;
}
sql.execute("DELETE FROM blobs WHERE blobId = ?", [blobId]);
sql.execute("DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId = ?", [blobId]);
}
#getUnencryptedContentForHashCalculation(unencryptedContent) {
if (this.isProtected) {
// a "random" prefix make sure that the calculated hash/blobId is different for an encrypted note and decrypted
const encryptedPrefixSuffix = "t$[nvQg7q)&_ENCRYPTED_?M:Bf&j3jr_";
@ -181,26 +190,20 @@ class AbstractBeccaEntity {
}
}
/** @protected */
_saveBlob(content, unencryptedContentForHashCalculation, opts = {}) {
let newBlobId;
let blobNeedsInsert;
if (this._isHot() && !opts.forceCold) {
newBlobId = this.blobId || utils.randomBlobId();
blobNeedsInsert = true;
} else {
#saveBlob(content, unencryptedContentForHashCalculation, opts = {}) {
/*
* We're using the unencrypted blob for the hash calculation, because otherwise the random IV would
* cause every content blob to be unique which would balloon the database size (esp. with revisioning).
* This has minor security implications (it's easy to infer that given content is shared between different
* notes/attachments, but the trade-off comes out clearly positive).
*/
newBlobId = utils.hashedBlobId(unencryptedContentForHashCalculation);
blobNeedsInsert = !sql.getValue('SELECT 1 FROM blobs WHERE blobId = ?', [newBlobId]);
const newBlobId = utils.hashedBlobId(unencryptedContentForHashCalculation);
const blobNeedsInsert = !sql.getValue('SELECT 1 FROM blobs WHERE blobId = ?', [newBlobId]);
if (!blobNeedsInsert) {
return newBlobId;
}
if (blobNeedsInsert) {
const pojo = {
blobId: newBlobId,
content: content,
@ -228,7 +231,6 @@ class AbstractBeccaEntity {
entityName: 'blobs',
entity: this
});
}
return newBlobId;
}

View File

@ -118,7 +118,6 @@ class BAttachment extends AbstractBeccaEntity {
* @param content
* @param {object} [opts]
* @param {object} [opts.forceSave=false] - will also save this BAttachment entity
* @param {object} [opts.forceCold=false] - blob has to be saved as cold
* @param {object} [opts.forceFrontendReload=false] - override frontend heuristics on when to reload, instruct to reload
*/
setContent(content, opts) {

View File

@ -227,15 +227,10 @@ class BNote extends AbstractBeccaEntity {
return JSON.parse(content);
}
_isHot() {
return ['text', 'code', 'relationMap', 'canvas', 'mermaid'].includes(this.type);
}
/**
* @param content
* @param {object} [opts]
* @param {object} [opts.forceSave=false] - will also save this BNote entity
* @param {object} [opts.forceCold=false] - blob has to be saved as cold
* @param {object} [opts.forceFrontendReload=false] - override frontend heuristics on when to reload, instruct to reload
*/
setContent(content, opts) {
@ -1610,10 +1605,7 @@ class BNote extends AbstractBeccaEntity {
const revisionAttachment = noteAttachment.copy();
revisionAttachment.parentId = noteRevision.noteRevisionId;
revisionAttachment.setContent(noteAttachment.getContent(), {
forceSave: true,
forceCold: true
});
revisionAttachment.setContent(noteAttachment.getContent(), { forceSave: true });
// content is rewritten to point to the revision attachments
noteContent = noteContent.replaceAll(`attachments/${noteAttachment.attachmentId}`, `attachments/${revisionAttachment.attachmentId}`);

View File

@ -784,7 +784,7 @@ components:
readOnly: true
blobId:
type: string
description: ID of the blob object. For "cold" notes, this is effectively a content hash
description: ID of the blob object which effectively serves as a content hash
attributes:
$ref: '#/components/schemas/AttributeList'
readOnly: true

View File

@ -892,6 +892,8 @@ function eraseAttachments(attachmentIdsToErase) {
}
function eraseUnusedBlobs() {
// this method is rather defense in depth - in normal operation, the unused blobs should be erased immediately
// after getting unused (handled in entity._setContent())
const unusedBlobIds = sql.getColumn(`
SELECT blobs.blobId
FROM blobs