feat(ocr): add an option to display OCR text

This commit is contained in:
Elian Doran 2025-07-26 12:08:04 +03:00
parent c55aa6ee88
commit 422d318dac
No known key found for this signature in database
7 changed files with 284 additions and 1 deletions

View File

@ -146,6 +146,19 @@ export default class RootCommandExecutor extends Component {
}
}
async showNoteOCRTextCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();
if (notePath) {
await appContext.tabManager.openTabWithNoteWithHoisting(notePath, {
activate: true,
viewScope: {
viewMode: "ocr"
}
});
}
}
async showAttachmentsCommand() {
const notePath = appContext.tabManager.getActiveContextNotePath();

View File

@ -674,6 +674,7 @@
"search_in_note": "Search in note",
"note_source": "Note source",
"note_attachments": "Note attachments",
"view_ocr_text": "View OCR text",
"open_note_externally": "Open note externally",
"open_note_externally_title": "File will be open in an external application and watched for changes. You'll then be able to upload the modified version back to Trilium.",
"open_note_custom": "Open note custom",
@ -2002,5 +2003,14 @@
"delete-column-confirmation": "Are you sure you want to delete this column? The corresponding attribute will be deleted in the notes under this column as well.",
"new-item": "New item",
"add-column": "Add Column"
},
"ocr": {
"extracted_text_title": "Extracted Text (OCR)",
"loading_text": "Loading OCR text...",
"no_text_available": "No OCR text available",
"no_text_explanation": "This note has not been processed for OCR text extraction or no text was found.",
"failed_to_load": "Failed to load OCR text",
"extracted_on": "Extracted on: {{date}}",
"unknown_date": "Unknown"
}
}

View File

@ -90,6 +90,10 @@ const TPL = /*html*/`
<span class="bx bx-code"></span> ${t("note_actions.note_source")}<kbd data-command="showNoteSource"></kbd>
</li>
<li data-trigger-command="showNoteOCRText" class="dropdown-item show-ocr-text-button">
<span class="bx bx-text"></span> ${t("note_actions.view_ocr_text")}<kbd data-command="showNoteOCRText"></kbd>
</li>
<div class="dropdown-divider"></div>
@ -117,6 +121,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
private $printActiveNoteButton!: JQuery<HTMLElement>;
private $exportAsPdfButton!: JQuery<HTMLElement>;
private $showSourceButton!: JQuery<HTMLElement>;
private $showOCRTextButton!: JQuery<HTMLElement>;
private $showAttachmentsButton!: JQuery<HTMLElement>;
private $renderNoteButton!: JQuery<HTMLElement>;
private $saveRevisionButton!: JQuery<HTMLElement>;
@ -143,6 +148,7 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
this.$printActiveNoteButton = this.$widget.find(".print-active-note-button");
this.$exportAsPdfButton = this.$widget.find(".export-as-pdf-button");
this.$showSourceButton = this.$widget.find(".show-source-button");
this.$showOCRTextButton = this.$widget.find(".show-ocr-text-button");
this.$showAttachmentsButton = this.$widget.find(".show-attachments-button");
this.$renderNoteButton = this.$widget.find(".render-note-button");
this.$saveRevisionButton = this.$widget.find(".save-revision-button");
@ -190,6 +196,9 @@ export default class NoteActionsWidget extends NoteContextAwareWidget {
this.toggleDisabled(this.$showAttachmentsButton, !isInOptions);
this.toggleDisabled(this.$showSourceButton, ["text", "code", "relationMap", "mermaid", "canvas", "mindMap"].includes(note.type));
// Show OCR text button for notes that could have OCR data (images and files)
this.toggleDisabled(this.$showOCRTextButton, ["image", "file"].includes(note.type));
const canPrint = ["text", "code"].includes(note.type);
this.toggleDisabled(this.$printActiveNoteButton, canPrint);

View File

@ -28,6 +28,7 @@ import ContentWidgetTypeWidget from "./type_widgets/content_widget.js";
import AttachmentListTypeWidget from "./type_widgets/attachment_list.js";
import AttachmentDetailTypeWidget from "./type_widgets/attachment_detail.js";
import MindMapWidget from "./type_widgets/mind_map.js";
import ReadOnlyOCRTextWidget from "./type_widgets/read_only_ocr_text.js";
import utils from "../services/utils.js";
import type { NoteType } from "../entities/fnote.js";
import type TypeWidget from "./type_widgets/type_widget.js";
@ -55,6 +56,7 @@ const typeWidgetClasses = {
readOnlyText: ReadOnlyTextTypeWidget,
editableCode: EditableCodeTypeWidget,
readOnlyCode: ReadOnlyCodeTypeWidget,
readOnlyOCRText: ReadOnlyOCRTextWidget,
file: FileTypeWidget,
image: ImageTypeWidget,
search: NoneTypeWidget,
@ -85,6 +87,7 @@ type ExtendedNoteType =
| "empty"
| "readOnlyCode"
| "readOnlyText"
| "readOnlyOCRText"
| "editableText"
| "editableCode"
| "attachmentDetail"
@ -223,6 +226,8 @@ export default class NoteDetailWidget extends NoteContextAwareWidget {
if (viewScope?.viewMode === "source") {
resultingType = "readOnlyCode";
} else if (viewScope?.viewMode === "ocr") {
resultingType = "readOnlyOCRText";
} else if (viewScope && viewScope.viewMode === "attachments") {
resultingType = viewScope.attachmentId ? "attachmentDetail" : "attachmentList";
} else if (type === "text" && (await this.noteContext?.isReadOnly())) {

View File

@ -0,0 +1,155 @@
import type { EventData } from "../../components/app_context.js";
import type FNote from "../../entities/fnote.js";
import server from "../../services/server.js";
import toastService from "../../services/toast.js";
import { t } from "../../services/i18n.js";
import TypeWidget from "./type_widget.js";
const TPL = /*html*/`
<div class="note-detail-ocr-text note-detail-printable">
<style>
.note-detail-ocr-text {
min-height: 50px;
position: relative;
padding: 10px;
}
.ocr-text-content {
white-space: pre-wrap;
font-family: var(--detail-text-font-family);
font-size: var(--detail-text-font-size);
line-height: 1.6;
border: 1px solid var(--main-border-color);
border-radius: 4px;
padding: 15px;
background-color: var(--accented-background-color);
min-height: 100px;
}
.ocr-text-header {
margin-bottom: 10px;
padding: 8px 12px;
background-color: var(--main-background-color);
border: 1px solid var(--main-border-color);
border-radius: 4px;
font-weight: 500;
color: var(--main-text-color);
}
.ocr-text-meta {
font-size: 0.9em;
color: var(--muted-text-color);
margin-top: 10px;
font-style: italic;
}
.ocr-text-empty {
color: var(--muted-text-color);
font-style: italic;
text-align: center;
padding: 30px;
}
.ocr-text-loading {
text-align: center;
padding: 30px;
color: var(--muted-text-color);
}
.ocr-text-error {
color: var(--error-color);
background-color: var(--error-background-color);
border: 1px solid var(--error-border-color);
padding: 10px;
border-radius: 4px;
margin-top: 10px;
}
</style>
<div class="ocr-text-header">
<span class="bx bx-text"></span> ${t("ocr.extracted_text_title")}
</div>
<div class="ocr-text-content"></div>
<div class="ocr-text-meta"></div>
</div>`;
interface OCRResponse {
success: boolean;
text: string;
hasOcr: boolean;
extractedAt: string | null;
error?: string;
}
export default class ReadOnlyOCRTextWidget extends TypeWidget {
private $content!: JQuery<HTMLElement>;
private $meta!: JQuery<HTMLElement>;
static getType() {
return "readOnlyOCRText";
}
doRender() {
this.$widget = $(TPL);
this.contentSized();
this.$content = this.$widget.find(".ocr-text-content");
this.$meta = this.$widget.find(".ocr-text-meta");
super.doRender();
}
async doRefresh(note: FNote) {
// Show loading state
this.$content.html(`<div class="ocr-text-loading">
<span class="bx bx-loader-alt bx-spin"></span> ${t("ocr.loading_text")}
</div>`);
this.$meta.empty();
try {
const response = await server.get<OCRResponse>(`ocr/notes/${note.noteId}/text`);
if (!response.success) {
this.showError(response.error || t("ocr.failed_to_load"));
return;
}
if (!response.hasOcr || !response.text) {
this.$content.html(`<div class="ocr-text-empty">
<span class="bx bx-info-circle"></span> ${t("ocr.no_text_available")}
</div>`);
this.$meta.html(t("ocr.no_text_explanation"));
return;
}
// Show the OCR text
this.$content.text(response.text);
// Show metadata
const extractedAt = response.extractedAt ? new Date(response.extractedAt).toLocaleString() : t("ocr.unknown_date");
this.$meta.html(t("ocr.extracted_on", { date: extractedAt }));
} catch (error: any) {
console.error("Error loading OCR text:", error);
this.showError(error.message || t("ocr.failed_to_load"));
}
}
private showError(message: string) {
this.$content.html(`<div class="ocr-text-error">
<span class="bx bx-error"></span> ${message}
</div>`);
this.$meta.empty();
}
async executeWithContentElementEvent({ resolve, ntxId }: EventData<"executeWithContentElement">) {
if (!this.isNoteContext(ntxId)) {
return;
}
await this.initialized;
resolve(this.$content);
}
}

View File

@ -2,6 +2,7 @@ import { Request, Response } from "express";
import ocrService from "../../services/ocr/ocr_service.js";
import log from "../../services/log.js";
import becca from "../../becca/becca.js";
import sql from "../../services/sql.js";
/**
* @swagger
@ -511,6 +512,94 @@ async function deleteOCRResults(req: Request, res: Response) {
}
}
/**
* @swagger
* /api/ocr/notes/{noteId}/text:
* get:
* summary: Get OCR text for a specific note
* operationId: ocr-get-note-text
* parameters:
* - name: noteId
* in: path
* required: true
* schema:
* type: string
* description: Note ID to get OCR text for
* responses:
* 200:
* description: OCR text retrieved successfully
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* text:
* type: string
* description: The extracted OCR text
* hasOcr:
* type: boolean
* description: Whether OCR text exists for this note
* extractedAt:
* type: string
* format: date-time
* description: When the OCR was last processed
* 404:
* description: Note not found
* tags: ["ocr"]
*/
async function getNoteOCRText(req: Request, res: Response) {
try {
const { noteId } = req.params;
const note = becca.getNote(noteId);
if (!note) {
res.status(404).json({
success: false,
error: 'Note not found'
});
(res as any).triliumResponseHandled = true;
return;
}
// Get stored OCR result
let ocrText: string | null = null;
let extractedAt: string | null = null;
if (note.blobId) {
const result = sql.getRow<{
ocr_text: string | null;
ocr_last_processed: string | null;
}>(`
SELECT ocr_text, ocr_last_processed
FROM blobs
WHERE blobId = ?
`, [note.blobId]);
if (result) {
ocrText = result.ocr_text;
extractedAt = result.ocr_last_processed;
}
}
res.json({
success: true,
text: ocrText || '',
hasOcr: !!ocrText,
extractedAt: extractedAt
});
(res as any).triliumResponseHandled = true;
} catch (error: unknown) {
log.error(`Error getting OCR text for note: ${error instanceof Error ? error.message : String(error)}`);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
});
(res as any).triliumResponseHandled = true;
}
}
export default {
processNoteOCR,
processAttachmentOCR,
@ -518,5 +607,6 @@ export default {
batchProcessOCR,
getBatchProgress,
getOCRStats,
deleteOCRResults
deleteOCRResults,
getNoteOCRText
};

View File

@ -394,6 +394,7 @@ function register(app: express.Application) {
asyncApiRoute(GET, "/api/ocr/batch-progress", ocrRoute.getBatchProgress);
asyncApiRoute(GET, "/api/ocr/stats", ocrRoute.getOCRStats);
asyncApiRoute(DEL, "/api/ocr/delete/:blobId", ocrRoute.deleteOCRResults);
asyncApiRoute(GET, "/api/ocr/notes/:noteId/text", ocrRoute.getNoteOCRText);
// API Documentation
apiDocsRoute(app);