mirror of
https://github.com/zadam/trilium.git
synced 2025-06-06 09:58:32 +02:00
ocr wip
This commit is contained in:
parent
37ba76fdd8
commit
88e09eb279
1413
package-lock.json
generated
1413
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,7 @@
|
||||
"archiver": "5.3.1",
|
||||
"async-mutex": "0.4.0",
|
||||
"axios": "1.2.5",
|
||||
"better-sqlite3": "7.4.5",
|
||||
"better-sqlite3": "8.0.1",
|
||||
"canvas": "2.11.0",
|
||||
"chokidar": "3.5.3",
|
||||
"cls-hooked": "4.2.2",
|
||||
@ -95,7 +95,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"cross-env": "7.0.3",
|
||||
"electron": "16.2.8",
|
||||
"electron": "23.0.0-beta.6",
|
||||
"electron-builder": "23.6.0",
|
||||
"electron-packager": "17.1.1",
|
||||
"electron-rebuild": "3.2.9",
|
||||
|
@ -48,6 +48,14 @@ function isEntityEventsDisabled() {
|
||||
return !!namespace.get('disableEntityEvents');
|
||||
}
|
||||
|
||||
function isOcrDisabled() {
|
||||
return !!namespace.get('disableOcr');
|
||||
}
|
||||
|
||||
function disableOcr() {
|
||||
namespace.set('disableOcr', true);
|
||||
}
|
||||
|
||||
function getAndClearEntityChangeIds() {
|
||||
const entityChangeIds = namespace.get('entityChangeIds') || [];
|
||||
|
||||
@ -92,5 +100,7 @@ module.exports = {
|
||||
reset,
|
||||
getAndClearEntityChangeIds,
|
||||
addEntityChange,
|
||||
ignoreEntityChangeIds
|
||||
ignoreEntityChangeIds,
|
||||
isOcrDisabled,
|
||||
disableOcr
|
||||
};
|
||||
|
@ -12,8 +12,7 @@ const sanitizeFilename = require('sanitize-filename');
|
||||
const isSvg = require('is-svg');
|
||||
const isAnimated = require('is-animated');
|
||||
const htmlSanitizer = require("./html_sanitizer");
|
||||
const OCRAD = require('ocrad.js');
|
||||
const Canvas = require('canvas');
|
||||
const textExtractingService = require("./text_extracting");
|
||||
|
||||
async function processImage(uploadBuffer, originalName, shrinkImageSwitch) {
|
||||
const compressImages = optionService.getOptionBool("compressImages");
|
||||
@ -128,7 +127,7 @@ function saveImage(parentNoteId, uploadBuffer, originalName, shrinkImageSwitch,
|
||||
note.setContent(buffer);
|
||||
});
|
||||
|
||||
runOcr(note, buffer);
|
||||
textExtractingService.runOcr(note, buffer);
|
||||
});
|
||||
|
||||
return {
|
||||
|
@ -729,27 +729,12 @@ function scanForLinks(note, content) {
|
||||
}
|
||||
}
|
||||
|
||||
function runOcr(note, buffer) {
|
||||
if (!note.isImage() || !optionService.getOptionBool('ocrImages')) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const plainText = textExtractingService.ocrTextFromBuffer(buffer);
|
||||
|
||||
note.saveNoteAncillary('plainText', 'text/plain', plainText);
|
||||
}
|
||||
catch (e) {
|
||||
log.error(`OCR on note '${note.noteId}' failed with error '${e.message}', stack ${e.stack}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Things which have to be executed after updating content, but asynchronously (separate transaction)
|
||||
*/
|
||||
async function asyncPostProcessContent(note, content) {
|
||||
scanForLinks(note, content);
|
||||
runOcr(note, content);
|
||||
await textExtractingService.runOcr(note, content);
|
||||
await textExtractingService.extractTextFromPdf(note, content);
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
const Canvas = require("canvas");
|
||||
const OCRAD = require("ocrad.js");
|
||||
const log = require("./log.js");
|
||||
const optionService = require("./options.js");
|
||||
const log = require("./log");
|
||||
const optionService = require("./options");
|
||||
const cls = require("./cls");
|
||||
|
||||
function ocrFromByteArray(img) {
|
||||
// byte array contains raw uncompressed pixel data
|
||||
@ -85,7 +86,7 @@ async function extractTextFromPdf(note, buffer) {
|
||||
content.items.forEach(({str}) => strings.push(str));
|
||||
|
||||
try {
|
||||
if (optionService.getOptionBool('ocrImages')) {
|
||||
if (optionService.getOptionBool('ocrImages') && !cls.isOcrDisabled()) {
|
||||
await ocrTextFromPdfImages(pdfjsLib, page, strings);
|
||||
}
|
||||
}
|
||||
@ -117,13 +118,37 @@ async function ocrTextFromBuffer(buffer) {
|
||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||
|
||||
const plainText = OCRAD(canvas);
|
||||
|
||||
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
||||
return plainText;
|
||||
}
|
||||
|
||||
async function runOcr(note, buffer) {
|
||||
console.log("buffer length", buffer.length);
|
||||
|
||||
if (!note.isImage()
|
||||
|| !optionService.getOptionBool('ocrImages')
|
||||
|| cls.isOcrDisabled()
|
||||
|| buffer.length === 0
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const plainText = await ocrTextFromBuffer(buffer);
|
||||
|
||||
console.log("OCR", plainText);
|
||||
|
||||
note.saveNoteAncillary('plainText', 'text/plain', plainText);
|
||||
}
|
||||
catch (e) {
|
||||
log.error(`OCR on note '${note.noteId}' failed with error '${e.message}', stack ${e.stack}`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ocrTextFromBuffer,
|
||||
runOcr,
|
||||
extractTextFromPdf
|
||||
};
|
||||
|
@ -13,6 +13,7 @@ const yauzl = require("yauzl");
|
||||
const htmlSanitizer = require('./html_sanitizer');
|
||||
const sql = require('./sql');
|
||||
const options = require('./options');
|
||||
const cls = require('./cls');
|
||||
const {USER_GUIDE_ZIP_DIR} = require('./resource_dir');
|
||||
|
||||
async function importUserGuideIfNeeded() {
|
||||
@ -33,6 +34,8 @@ async function importUserGuideIfNeeded() {
|
||||
const hiddenRoot = becca.getNote("_hidden");
|
||||
const data = await fs.readFile(USER_GUIDE_ZIP_DIR + "/user-guide.zip", "binary");
|
||||
|
||||
cls.disableOcr(); // no OCR needed for user guide images
|
||||
|
||||
await importZip(Buffer.from(data, 'binary'), hiddenRoot);
|
||||
|
||||
options.setOption('userGuideSha256Hash', userGuideSha256HashInFile);
|
||||
|
Loading…
x
Reference in New Issue
Block a user