mirror of
https://github.com/zadam/trilium.git
synced 2025-06-06 18:08:33 +02:00
ocr wip
This commit is contained in:
parent
37ba76fdd8
commit
88e09eb279
1413
package-lock.json
generated
1413
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,7 @@
|
|||||||
"archiver": "5.3.1",
|
"archiver": "5.3.1",
|
||||||
"async-mutex": "0.4.0",
|
"async-mutex": "0.4.0",
|
||||||
"axios": "1.2.5",
|
"axios": "1.2.5",
|
||||||
"better-sqlite3": "7.4.5",
|
"better-sqlite3": "8.0.1",
|
||||||
"canvas": "2.11.0",
|
"canvas": "2.11.0",
|
||||||
"chokidar": "3.5.3",
|
"chokidar": "3.5.3",
|
||||||
"cls-hooked": "4.2.2",
|
"cls-hooked": "4.2.2",
|
||||||
@ -95,7 +95,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"cross-env": "7.0.3",
|
"cross-env": "7.0.3",
|
||||||
"electron": "16.2.8",
|
"electron": "23.0.0-beta.6",
|
||||||
"electron-builder": "23.6.0",
|
"electron-builder": "23.6.0",
|
||||||
"electron-packager": "17.1.1",
|
"electron-packager": "17.1.1",
|
||||||
"electron-rebuild": "3.2.9",
|
"electron-rebuild": "3.2.9",
|
||||||
|
@ -48,6 +48,14 @@ function isEntityEventsDisabled() {
|
|||||||
return !!namespace.get('disableEntityEvents');
|
return !!namespace.get('disableEntityEvents');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isOcrDisabled() {
|
||||||
|
return !!namespace.get('disableOcr');
|
||||||
|
}
|
||||||
|
|
||||||
|
function disableOcr() {
|
||||||
|
namespace.set('disableOcr', true);
|
||||||
|
}
|
||||||
|
|
||||||
function getAndClearEntityChangeIds() {
|
function getAndClearEntityChangeIds() {
|
||||||
const entityChangeIds = namespace.get('entityChangeIds') || [];
|
const entityChangeIds = namespace.get('entityChangeIds') || [];
|
||||||
|
|
||||||
@ -92,5 +100,7 @@ module.exports = {
|
|||||||
reset,
|
reset,
|
||||||
getAndClearEntityChangeIds,
|
getAndClearEntityChangeIds,
|
||||||
addEntityChange,
|
addEntityChange,
|
||||||
ignoreEntityChangeIds
|
ignoreEntityChangeIds,
|
||||||
|
isOcrDisabled,
|
||||||
|
disableOcr
|
||||||
};
|
};
|
||||||
|
@ -12,8 +12,7 @@ const sanitizeFilename = require('sanitize-filename');
|
|||||||
const isSvg = require('is-svg');
|
const isSvg = require('is-svg');
|
||||||
const isAnimated = require('is-animated');
|
const isAnimated = require('is-animated');
|
||||||
const htmlSanitizer = require("./html_sanitizer");
|
const htmlSanitizer = require("./html_sanitizer");
|
||||||
const OCRAD = require('ocrad.js');
|
const textExtractingService = require("./text_extracting");
|
||||||
const Canvas = require('canvas');
|
|
||||||
|
|
||||||
async function processImage(uploadBuffer, originalName, shrinkImageSwitch) {
|
async function processImage(uploadBuffer, originalName, shrinkImageSwitch) {
|
||||||
const compressImages = optionService.getOptionBool("compressImages");
|
const compressImages = optionService.getOptionBool("compressImages");
|
||||||
@ -128,7 +127,7 @@ function saveImage(parentNoteId, uploadBuffer, originalName, shrinkImageSwitch,
|
|||||||
note.setContent(buffer);
|
note.setContent(buffer);
|
||||||
});
|
});
|
||||||
|
|
||||||
runOcr(note, buffer);
|
textExtractingService.runOcr(note, buffer);
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -729,27 +729,12 @@ function scanForLinks(note, content) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function runOcr(note, buffer) {
|
|
||||||
if (!note.isImage() || !optionService.getOptionBool('ocrImages')) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const plainText = textExtractingService.ocrTextFromBuffer(buffer);
|
|
||||||
|
|
||||||
note.saveNoteAncillary('plainText', 'text/plain', plainText);
|
|
||||||
}
|
|
||||||
catch (e) {
|
|
||||||
log.error(`OCR on note '${note.noteId}' failed with error '${e.message}', stack ${e.stack}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Things which have to be executed after updating content, but asynchronously (separate transaction)
|
* Things which have to be executed after updating content, but asynchronously (separate transaction)
|
||||||
*/
|
*/
|
||||||
async function asyncPostProcessContent(note, content) {
|
async function asyncPostProcessContent(note, content) {
|
||||||
scanForLinks(note, content);
|
scanForLinks(note, content);
|
||||||
runOcr(note, content);
|
await textExtractingService.runOcr(note, content);
|
||||||
await textExtractingService.extractTextFromPdf(note, content);
|
await textExtractingService.extractTextFromPdf(note, content);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
const Canvas = require("canvas");
|
const Canvas = require("canvas");
|
||||||
const OCRAD = require("ocrad.js");
|
const OCRAD = require("ocrad.js");
|
||||||
const log = require("./log.js");
|
const log = require("./log");
|
||||||
const optionService = require("./options.js");
|
const optionService = require("./options");
|
||||||
|
const cls = require("./cls");
|
||||||
|
|
||||||
function ocrFromByteArray(img) {
|
function ocrFromByteArray(img) {
|
||||||
// byte array contains raw uncompressed pixel data
|
// byte array contains raw uncompressed pixel data
|
||||||
@ -85,7 +86,7 @@ async function extractTextFromPdf(note, buffer) {
|
|||||||
content.items.forEach(({str}) => strings.push(str));
|
content.items.forEach(({str}) => strings.push(str));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (optionService.getOptionBool('ocrImages')) {
|
if (optionService.getOptionBool('ocrImages') && !cls.isOcrDisabled()) {
|
||||||
await ocrTextFromPdfImages(pdfjsLib, page, strings);
|
await ocrTextFromPdfImages(pdfjsLib, page, strings);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -117,13 +118,37 @@ async function ocrTextFromBuffer(buffer) {
|
|||||||
const canvas = new Canvas.createCanvas(img.width, img.height);
|
const canvas = new Canvas.createCanvas(img.width, img.height);
|
||||||
const ctx = canvas.getContext('2d');
|
const ctx = canvas.getContext('2d');
|
||||||
ctx.drawImage(img, 0, 0, img.width, img.height);
|
ctx.drawImage(img, 0, 0, img.width, img.height);
|
||||||
|
|
||||||
const plainText = OCRAD(canvas);
|
const plainText = OCRAD(canvas);
|
||||||
|
|
||||||
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
log.info(`OCR of ${buffer.byteLength} image bytes into ${plainText.length} chars of text took ${Date.now() - start}ms`);
|
||||||
return plainText;
|
return plainText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function runOcr(note, buffer) {
|
||||||
|
console.log("buffer length", buffer.length);
|
||||||
|
|
||||||
|
if (!note.isImage()
|
||||||
|
|| !optionService.getOptionBool('ocrImages')
|
||||||
|
|| cls.isOcrDisabled()
|
||||||
|
|| buffer.length === 0
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const plainText = await ocrTextFromBuffer(buffer);
|
||||||
|
|
||||||
|
console.log("OCR", plainText);
|
||||||
|
|
||||||
|
note.saveNoteAncillary('plainText', 'text/plain', plainText);
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
log.error(`OCR on note '${note.noteId}' failed with error '${e.message}', stack ${e.stack}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
ocrTextFromBuffer,
|
runOcr,
|
||||||
extractTextFromPdf
|
extractTextFromPdf
|
||||||
};
|
};
|
||||||
|
@ -13,6 +13,7 @@ const yauzl = require("yauzl");
|
|||||||
const htmlSanitizer = require('./html_sanitizer');
|
const htmlSanitizer = require('./html_sanitizer');
|
||||||
const sql = require('./sql');
|
const sql = require('./sql');
|
||||||
const options = require('./options');
|
const options = require('./options');
|
||||||
|
const cls = require('./cls');
|
||||||
const {USER_GUIDE_ZIP_DIR} = require('./resource_dir');
|
const {USER_GUIDE_ZIP_DIR} = require('./resource_dir');
|
||||||
|
|
||||||
async function importUserGuideIfNeeded() {
|
async function importUserGuideIfNeeded() {
|
||||||
@ -33,6 +34,8 @@ async function importUserGuideIfNeeded() {
|
|||||||
const hiddenRoot = becca.getNote("_hidden");
|
const hiddenRoot = becca.getNote("_hidden");
|
||||||
const data = await fs.readFile(USER_GUIDE_ZIP_DIR + "/user-guide.zip", "binary");
|
const data = await fs.readFile(USER_GUIDE_ZIP_DIR + "/user-guide.zip", "binary");
|
||||||
|
|
||||||
|
cls.disableOcr(); // no OCR needed for user guide images
|
||||||
|
|
||||||
await importZip(Buffer.from(data, 'binary'), hiddenRoot);
|
await importZip(Buffer.from(data, 'binary'), hiddenRoot);
|
||||||
|
|
||||||
options.setOption('userGuideSha256Hash', userGuideSha256HashInFile);
|
options.setOption('userGuideSha256Hash', userGuideSha256HashInFile);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user