mirror of
https://github.com/zadam/trilium.git
synced 2025-03-01 14:22:32 +01:00
ocr
This commit is contained in:
parent
c68a67d148
commit
63c62df787
@ -90,33 +90,34 @@ class BNoteAttachment extends AbstractBeccaEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setContent(content) {
|
setContent(content) {
|
||||||
this.contentCheckSum = this.calculateCheckSum(content);
|
sql.transactional(() => {
|
||||||
this.save(); // also explicitly save note_attachment to update contentCheckSum
|
this.contentCheckSum = this.calculateCheckSum(content);
|
||||||
|
this.save(); // also explicitly save note_attachment to update contentCheckSum
|
||||||
|
|
||||||
const pojo = {
|
const pojo = {
|
||||||
noteAttachmentId: this.noteAttachmentId,
|
noteAttachmentId: this.noteAttachmentId,
|
||||||
content: content,
|
content: content,
|
||||||
utcDateModified: dateUtils.utcNowDateTime()
|
utcDateModified: dateUtils.utcNowDateTime()
|
||||||
};
|
};
|
||||||
|
|
||||||
if (this.isProtected) {
|
if (this.isProtected) {
|
||||||
if (protectedSessionService.isProtectedSessionAvailable()) {
|
if (protectedSessionService.isProtectedSessionAvailable()) {
|
||||||
pojo.content = protectedSessionService.encrypt(pojo.content);
|
pojo.content = protectedSessionService.encrypt(pojo.content);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Cannot update content of noteAttachmentId=${this.noteAttachmentId} since we're out of protected session.`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
throw new Error(`Cannot update content of noteAttachmentId=${this.noteAttachmentId} since we're out of protected session.`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sql.upsert("note_attachment_contents", "noteAttachmentId", pojo);
|
sql.upsert("note_attachment_contents", "noteAttachmentId", pojo);
|
||||||
|
|
||||||
entityChangesService.addEntityChange({
|
entityChangesService.addEntityChange({
|
||||||
entityName: 'note_attachment_contents',
|
entityName: 'note_attachment_contents',
|
||||||
entityId: this.noteAttachmentId,
|
entityId: this.noteAttachmentId,
|
||||||
hash: this.contentCheckSum,
|
hash: this.contentCheckSum,
|
||||||
isErased: false,
|
isErased: false,
|
||||||
utcDateChanged: pojo.utcDateModified,
|
utcDateChanged: pojo.utcDateModified,
|
||||||
isSynced: true
|
isSynced: true
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,6 +65,24 @@ function getImageMimeFromExtension(ext) {
|
|||||||
return `image/${ext === 'svg' ? 'svg+xml' : ext}`;
|
return `image/${ext === 'svg' ? 'svg+xml' : ext}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function runOcr(note, buffer) {
|
||||||
|
if (!optionService.getOptionBool('ocrImages')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
const img = new Canvas.Image();
|
||||||
|
img.src = buffer;
|
||||||
|
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||||
|
const plainText = OCRAD(canvas);
|
||||||
|
|
||||||
|
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
||||||
|
|
||||||
|
note.saveNoteAttachment('plainText', 'text/plain', plainText);
|
||||||
|
}
|
||||||
|
|
||||||
function updateImage(noteId, uploadBuffer, originalName) {
|
function updateImage(noteId, uploadBuffer, originalName) {
|
||||||
log.info(`Updating image ${noteId}: ${originalName}`);
|
log.info(`Updating image ${noteId}: ${originalName}`);
|
||||||
|
|
||||||
@ -85,17 +103,7 @@ function updateImage(noteId, uploadBuffer, originalName) {
|
|||||||
note.setContent(buffer);
|
note.setContent(buffer);
|
||||||
});
|
});
|
||||||
|
|
||||||
const start = Date.now();
|
runOcr(note, buffer);
|
||||||
const img = new Canvas.Image();
|
|
||||||
img.src = buffer;
|
|
||||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
|
||||||
const ctx = canvas.getContext('2d');
|
|
||||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
|
||||||
const text = OCRAD(canvas);
|
|
||||||
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
log.info(`OCR of ${buffer.byteLength} bytes took ${Date.now() - start}ms`);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +144,9 @@ function saveImage(parentNoteId, uploadBuffer, originalName, shrinkImageSwitch,
|
|||||||
note.save();
|
note.save();
|
||||||
|
|
||||||
note.setContent(buffer);
|
note.setContent(buffer);
|
||||||
})
|
});
|
||||||
|
|
||||||
|
runOcr(note, buffer);
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -90,6 +90,7 @@ const defaultOptions = [
|
|||||||
{ name: 'checkForUpdates', value: 'true', isSynced: true },
|
{ name: 'checkForUpdates', value: 'true', isSynced: true },
|
||||||
{ name: 'disableTray', value: 'false', isSynced: false },
|
{ name: 'disableTray', value: 'false', isSynced: false },
|
||||||
{ name: 'userGuideSha256Hash', value: '', isSynced: true },
|
{ name: 'userGuideSha256Hash', value: '', isSynced: true },
|
||||||
|
{ name: 'ocrImages', value: 'true', isSynced: true },
|
||||||
];
|
];
|
||||||
|
|
||||||
function initStartupOptions() {
|
function initStartupOptions() {
|
||||||
|
@ -40,63 +40,75 @@ class NoteContentFulltextExp extends Expression {
|
|||||||
const resultNoteSet = new NoteSet();
|
const resultNoteSet = new NoteSet();
|
||||||
const sql = require('../../sql');
|
const sql = require('../../sql');
|
||||||
|
|
||||||
for (let {noteId, type, mime, content, isProtected} of sql.iterateRows(`
|
for (const row of sql.iterateRows(`
|
||||||
SELECT noteId, type, mime, content, isProtected
|
SELECT noteId, type, mime, content, isProtected
|
||||||
FROM notes JOIN note_contents USING (noteId)
|
FROM notes JOIN note_contents USING (noteId)
|
||||||
WHERE type IN ('text', 'code', 'mermaid') AND isDeleted = 0`)) {
|
WHERE type IN ('text', 'code', 'mermaid') AND isDeleted = 0`)) {
|
||||||
|
|
||||||
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
|
this.findInText(row, inputNoteSet, resultNoteSet);
|
||||||
continue;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (isProtected) {
|
for (const row of sql.iterateRows(`
|
||||||
if (!protectedSessionService.isProtectedSessionAvailable()) {
|
SELECT noteId, 'plainText' as type, mime, content, isProtected
|
||||||
continue;
|
FROM note_attachments JOIN note_attachment_contents USING (noteAttachmentId)
|
||||||
}
|
WHERE name IN ('plainText') AND isDeleted = 0`)) {
|
||||||
|
|
||||||
try {
|
this.findInText(row, inputNoteSet, resultNoteSet);
|
||||||
content = protectedSessionService.decryptString(content);
|
|
||||||
} catch (e) {
|
|
||||||
log.info(`Cannot decrypt content of note ${noteId}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
content = this.preprocessContent(content, type, mime);
|
|
||||||
|
|
||||||
if (this.tokens.length === 1) {
|
|
||||||
const [token] = this.tokens;
|
|
||||||
|
|
||||||
if ((this.operator === '=' && token === content)
|
|
||||||
|| (this.operator === '!=' && token !== content)
|
|
||||||
|| (this.operator === '*=' && content.endsWith(token))
|
|
||||||
|| (this.operator === '=*' && content.startsWith(token))
|
|
||||||
|| (this.operator === '*=*' && content.includes(token))
|
|
||||||
|| (this.operator === '%=' && getRegex(token).test(content))) {
|
|
||||||
|
|
||||||
resultNoteSet.add(becca.notes[noteId]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
const nonMatchingToken = this.tokens.find(token =>
|
|
||||||
!content.includes(token) &&
|
|
||||||
(
|
|
||||||
// in case of default fulltext search we should consider both title, attrs and content
|
|
||||||
// so e.g. "hello world" should match when "hello" is in title and "world" in content
|
|
||||||
!this.flatText
|
|
||||||
|| !becca.notes[noteId].getFlatText().includes(token)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!nonMatchingToken) {
|
|
||||||
resultNoteSet.add(becca.notes[noteId]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return resultNoteSet;
|
return resultNoteSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
findInText({noteId, isProtected, content, type, mime}, inputNoteSet, resultNoteSet) {
|
||||||
|
if (!inputNoteSet.hasNoteId(noteId) || !(noteId in becca.notes)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isProtected) {
|
||||||
|
if (!protectedSessionService.isProtectedSessionAvailable()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
content = protectedSessionService.decryptString(content);
|
||||||
|
} catch (e) {
|
||||||
|
log.info(`Cannot decrypt content of note ${noteId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
content = this.preprocessContent(content, type, mime);
|
||||||
|
|
||||||
|
if (this.tokens.length === 1) {
|
||||||
|
const [token] = this.tokens;
|
||||||
|
|
||||||
|
if ((this.operator === '=' && token === content)
|
||||||
|
|| (this.operator === '!=' && token !== content)
|
||||||
|
|| (this.operator === '*=' && content.endsWith(token))
|
||||||
|
|| (this.operator === '=*' && content.startsWith(token))
|
||||||
|
|| (this.operator === '*=*' && content.includes(token))
|
||||||
|
|| (this.operator === '%=' && getRegex(token).test(content))) {
|
||||||
|
|
||||||
|
resultNoteSet.add(becca.notes[noteId]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const nonMatchingToken = this.tokens.find(token =>
|
||||||
|
!content.includes(token) &&
|
||||||
|
(
|
||||||
|
// in case of default fulltext search we should consider both title, attrs and content
|
||||||
|
// so e.g. "hello world" should match when "hello" is in title and "world" in content
|
||||||
|
!this.flatText
|
||||||
|
|| !becca.notes[noteId].getFlatText().includes(token)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!nonMatchingToken) {
|
||||||
|
resultNoteSet.add(becca.notes[noteId]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
preprocessContent(content, type, mime) {
|
preprocessContent(content, type, mime) {
|
||||||
content = utils.normalize(content.toString());
|
content = utils.normalize(content.toString());
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user