mirror of
https://github.com/zadam/trilium.git
synced 2025-03-01 14:22:32 +01:00
ocr
This commit is contained in:
parent
c68a67d148
commit
63c62df787
@ -90,33 +90,34 @@ class BNoteAttachment extends AbstractBeccaEntity {
|
||||
}
|
||||
|
||||
setContent(content) {
|
||||
this.contentCheckSum = this.calculateCheckSum(content);
|
||||
this.save(); // also explicitly save note_attachment to update contentCheckSum
|
||||
sql.transactional(() => {
|
||||
this.contentCheckSum = this.calculateCheckSum(content);
|
||||
this.save(); // also explicitly save note_attachment to update contentCheckSum
|
||||
|
||||
const pojo = {
|
||||
noteAttachmentId: this.noteAttachmentId,
|
||||
content: content,
|
||||
utcDateModified: dateUtils.utcNowDateTime()
|
||||
};
|
||||
const pojo = {
|
||||
noteAttachmentId: this.noteAttachmentId,
|
||||
content: content,
|
||||
utcDateModified: dateUtils.utcNowDateTime()
|
||||
};
|
||||
|
||||
if (this.isProtected) {
|
||||
if (protectedSessionService.isProtectedSessionAvailable()) {
|
||||
pojo.content = protectedSessionService.encrypt(pojo.content);
|
||||
if (this.isProtected) {
|
||||
if (protectedSessionService.isProtectedSessionAvailable()) {
|
||||
pojo.content = protectedSessionService.encrypt(pojo.content);
|
||||
} else {
|
||||
throw new Error(`Cannot update content of noteAttachmentId=${this.noteAttachmentId} since we're out of protected session.`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw new Error(`Cannot update content of noteAttachmentId=${this.noteAttachmentId} since we're out of protected session.`);
|
||||
}
|
||||
}
|
||||
|
||||
sql.upsert("note_attachment_contents", "noteAttachmentId", pojo);
|
||||
sql.upsert("note_attachment_contents", "noteAttachmentId", pojo);
|
||||
|
||||
entityChangesService.addEntityChange({
|
||||
entityName: 'note_attachment_contents',
|
||||
entityId: this.noteAttachmentId,
|
||||
hash: this.contentCheckSum,
|
||||
isErased: false,
|
||||
utcDateChanged: pojo.utcDateModified,
|
||||
isSynced: true
|
||||
entityChangesService.addEntityChange({
|
||||
entityName: 'note_attachment_contents',
|
||||
entityId: this.noteAttachmentId,
|
||||
hash: this.contentCheckSum,
|
||||
isErased: false,
|
||||
utcDateChanged: pojo.utcDateModified,
|
||||
isSynced: true
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,24 @@ function getImageMimeFromExtension(ext) {
|
||||
return `image/${ext === 'svg' ? 'svg+xml' : ext}`;
|
||||
}
|
||||
|
||||
function runOcr(note, buffer) {
|
||||
if (!optionService.getOptionBool('ocrImages')) {
|
||||
return;
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
const img = new Canvas.Image();
|
||||
img.src = buffer;
|
||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||
const plainText = OCRAD(canvas);
|
||||
|
||||
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
||||
|
||||
note.saveNoteAttachment('plainText', 'text/plain', plainText);
|
||||
}
|
||||
|
||||
function updateImage(noteId, uploadBuffer, originalName) {
|
||||
log.info(`Updating image ${noteId}: ${originalName}`);
|
||||
|
||||
@ -85,17 +103,7 @@ function updateImage(noteId, uploadBuffer, originalName) {
|
||||
note.setContent(buffer);
|
||||
});
|
||||
|
||||
const start = Date.now();
|
||||
const img = new Canvas.Image();
|
||||
img.src = buffer;
|
||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||
const text = OCRAD(canvas);
|
||||
|
||||
console.log(text);
|
||||
|
||||
log.info(`OCR of ${buffer.byteLength} bytes took ${Date.now() - start}ms`);
|
||||
runOcr(note, buffer);
|
||||
});
|
||||
}
|
||||
|
||||
@ -136,7 +144,9 @@ function saveImage(parentNoteId, uploadBuffer, originalName, shrinkImageSwitch,
|
||||
note.save();
|
||||
|
||||
note.setContent(buffer);
|
||||
})
|
||||
});
|
||||
|
||||
runOcr(note, buffer);
|
||||
});
|
||||
|
||||
return {
|
||||
|
@ -90,6 +90,7 @@ const defaultOptions = [
|
||||
{ name: 'checkForUpdates', value: 'true', isSynced: true },
|
||||
{ name: 'disableTray', value: 'false', isSynced: false },
|
||||
{ name: 'userGuideSha256Hash', value: '', isSynced: true },
|
||||
{ name: 'ocrImages', value: 'true', isSynced: true },
|
||||
];
|
||||
|
||||
function initStartupOptions() {
|
||||
|
@ -40,63 +40,75 @@ class NoteContentFulltextExp extends Expression {
|
||||
const resultNoteSet = new NoteSet();
|
||||
const sql = require('../../sql');
|
||||
|
||||
for (let {noteId, type, mime, content, isProtected} of sql.iterateRows(`
|
||||
for (const row of sql.iterateRows(`
|
||||
SELECT noteId, type, mime, content, isProtected
|
||||
FROM notes JOIN note_contents USING (noteId)
|
||||
WHERE type IN ('text', 'code', 'mermaid') AND isDeleted = 0`)) {
|
||||
|
||||
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
|
||||
continue;
|
||||
}
|
||||
this.findInText(row, inputNoteSet, resultNoteSet);
|
||||
}
|
||||
|
||||
if (isProtected) {
|
||||
if (!protectedSessionService.isProtectedSessionAvailable()) {
|
||||
continue;
|
||||
}
|
||||
for (const row of sql.iterateRows(`
|
||||
SELECT noteId, 'plainText' as type, mime, content, isProtected
|
||||
FROM note_attachments JOIN note_attachment_contents USING (noteAttachmentId)
|
||||
WHERE name IN ('plainText') AND isDeleted = 0`)) {
|
||||
|
||||
try {
|
||||
content = protectedSessionService.decryptString(content);
|
||||
} catch (e) {
|
||||
log.info(`Cannot decrypt content of note ${noteId}`);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
content = this.preprocessContent(content, type, mime);
|
||||
|
||||
if (this.tokens.length === 1) {
|
||||
const [token] = this.tokens;
|
||||
|
||||
if ((this.operator === '=' && token === content)
|
||||
|| (this.operator === '!=' && token !== content)
|
||||
|| (this.operator === '*=' && content.endsWith(token))
|
||||
|| (this.operator === '=*' && content.startsWith(token))
|
||||
|| (this.operator === '*=*' && content.includes(token))
|
||||
|| (this.operator === '%=' && getRegex(token).test(content))) {
|
||||
|
||||
resultNoteSet.add(becca.notes[noteId]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
const nonMatchingToken = this.tokens.find(token =>
|
||||
!content.includes(token) &&
|
||||
(
|
||||
// in case of default fulltext search we should consider both title, attrs and content
|
||||
// so e.g. "hello world" should match when "hello" is in title and "world" in content
|
||||
!this.flatText
|
||||
|| !becca.notes[noteId].getFlatText().includes(token)
|
||||
)
|
||||
);
|
||||
|
||||
if (!nonMatchingToken) {
|
||||
resultNoteSet.add(becca.notes[noteId]);
|
||||
}
|
||||
}
|
||||
this.findInText(row, inputNoteSet, resultNoteSet);
|
||||
}
|
||||
|
||||
return resultNoteSet;
|
||||
}
|
||||
|
||||
findInText({noteId, isProtected, content, type, mime}, inputNoteSet, resultNoteSet) {
|
||||
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isProtected) {
|
||||
if (!protectedSessionService.isProtectedSessionAvailable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
content = protectedSessionService.decryptString(content);
|
||||
} catch (e) {
|
||||
log.info(`Cannot decrypt content of note ${noteId}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
content = this.preprocessContent(content, type, mime);
|
||||
|
||||
if (this.tokens.length === 1) {
|
||||
const [token] = this.tokens;
|
||||
|
||||
if ((this.operator === '=' && token === content)
|
||||
|| (this.operator === '!=' && token !== content)
|
||||
|| (this.operator === '*=' && content.endsWith(token))
|
||||
|| (this.operator === '=*' && content.startsWith(token))
|
||||
|| (this.operator === '*=*' && content.includes(token))
|
||||
|| (this.operator === '%=' && getRegex(token).test(content))) {
|
||||
|
||||
resultNoteSet.add(becca.notes[noteId]);
|
||||
}
|
||||
} else {
|
||||
const nonMatchingToken = this.tokens.find(token =>
|
||||
!content.includes(token) &&
|
||||
(
|
||||
// in case of default fulltext search we should consider both title, attrs and content
|
||||
// so e.g. "hello world" should match when "hello" is in title and "world" in content
|
||||
!this.flatText
|
||||
|| !becca.notes[noteId].getFlatText().includes(token)
|
||||
)
|
||||
);
|
||||
|
||||
if (!nonMatchingToken) {
|
||||
resultNoteSet.add(becca.notes[noteId]);
|
||||
}
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
preprocessContent(content, type, mime) {
|
||||
content = utils.normalize(content.toString());
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user