feat(ocr): swap from custom table to using the blobs table, with a new column

2026-02-06 05:44:27 +01:00 · 2025-07-14 16:15:15 +00:00 · 2025-07-14 16:15:15 +00:00 · 9029f59410
commit 9029f59410
parent 4b5e8d33a6
7 changed files with 246 additions and 331 deletions
--- a/apps/server/src/becca/entities/bblob.ts
+++ b/apps/server/src/becca/entities/bblob.ts
@ -10,11 +10,12 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
        return "blobId";
    }
    static get hashedProperties() {
-        return ["blobId", "content"];
+        return ["blobId", "content", "ocr_text"];
    }

    content!: string | Buffer;
    contentLength!: number;
+    ocr_text?: string | null;

    constructor(row: BlobRow) {
        super();
@ -25,6 +26,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
        this.blobId = row.blobId;
        this.content = row.content;
        this.contentLength = row.contentLength;
+        this.ocr_text = row.ocr_text;
        this.dateModified = row.dateModified;
        this.utcDateModified = row.utcDateModified;
    }
@ -34,6 +36,7 @@ class BBlob extends AbstractBeccaEntity<BBlob> {
            blobId: this.blobId,
            content: this.content || null,
            contentLength: this.contentLength,
+            ocr_text: this.ocr_text || null,
            dateModified: this.dateModified,
            utcDateModified: this.utcDateModified
        };
--- a/apps/server/src/migrations/migrations.ts
+++ b/apps/server/src/migrations/migrations.ts
@ -6,64 +6,16 @@

 // Migrations should be kept in descending order, so the latest migration is first.
 const MIGRATIONS: (SqlMigration | JsMigration)[] = [
-    // Add OCR results table for storing extracted text from images
+    // Add OCR text column to blobs table for storing extracted text from images
    {
        version: 233,
        sql: /*sql*/`\
-            -- Create OCR results table to store extracted text from images
-            CREATE TABLE IF NOT EXISTS ocr_results (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                entity_id TEXT NOT NULL,
-                entity_type TEXT NOT NULL DEFAULT 'note',
-                extracted_text TEXT NOT NULL,
-                confidence REAL NOT NULL,
-                language TEXT NOT NULL DEFAULT 'eng',
-                extracted_at TEXT NOT NULL,
-                created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
-                UNIQUE(entity_id, entity_type)
-            );
-
-            -- Create indexes for better search performance
-            CREATE INDEX IF NOT EXISTS idx_ocr_results_entity 
-            ON ocr_results (entity_id, entity_type);
-
-            CREATE INDEX IF NOT EXISTS idx_ocr_results_text 
-            ON ocr_results (extracted_text);
-
-            CREATE INDEX IF NOT EXISTS idx_ocr_results_confidence 
-            ON ocr_results (confidence);
-
-            -- Create full-text search index for extracted text
-            CREATE VIRTUAL TABLE IF NOT EXISTS ocr_results_fts USING fts5(
-                entity_id UNINDEXED,
-                entity_type UNINDEXED,
-                extracted_text,
-                content='ocr_results',
-                content_rowid='id'
-            );
-
-            -- Create triggers to keep FTS table in sync
-            CREATE TRIGGER IF NOT EXISTS ocr_results_fts_insert 
-            AFTER INSERT ON ocr_results 
-            BEGIN
-                INSERT INTO ocr_results_fts(rowid, entity_id, entity_type, extracted_text) 
-                VALUES (new.id, new.entity_id, new.entity_type, new.extracted_text);
-            END;
-
-            CREATE TRIGGER IF NOT EXISTS ocr_results_fts_update 
-            AFTER UPDATE ON ocr_results 
-            BEGIN
-                UPDATE ocr_results_fts 
-                SET extracted_text = new.extracted_text 
-                WHERE rowid = new.id;
-            END;
-
-            CREATE TRIGGER IF NOT EXISTS ocr_results_fts_delete 
-            AFTER DELETE ON ocr_results 
-            BEGIN
-                DELETE FROM ocr_results_fts WHERE rowid = old.id;
-            END;
+            -- Add OCR text column to blobs table
+            ALTER TABLE blobs ADD COLUMN ocr_text TEXT DEFAULT NULL;
+            
+            -- Create index for OCR text searches
+            CREATE INDEX IF NOT EXISTS idx_blobs_ocr_text 
+            ON blobs (ocr_text);
        `
    },
    // Remove embedding tables since LLM embedding functionality has been removed
--- a/apps/server/src/routes/api/ocr.ts
+++ b/apps/server/src/routes/api/ocr.ts
@ -246,13 +246,6 @@ async function processAttachmentOCR(req: Request, res: Response) {
 *         schema:
 *           type: string
 *         description: Search query text
- *       - name: entityType
- *         in: query
- *         required: false
- *         schema:
- *           type: string
- *           enum: [note, attachment]
- *         description: Filter by entity type
 *     responses:
 *       '200':
 *         description: Search results
@ -268,14 +261,10 @@ async function processAttachmentOCR(req: Request, res: Response) {
 *                   items:
 *                     type: object
 *                     properties:
- *                       entityId:
- *                         type: string
- *                       entityType:
+ *                       blobId:
 *                         type: string
 *                       text:
 *                         type: string
- *                       confidence:
- *                         type: number
 *       '400':
 *         description: Bad request - missing search query
 *       '500':
@ -286,7 +275,7 @@ async function processAttachmentOCR(req: Request, res: Response) {
 */
 async function searchOCR(req: Request, res: Response) {
    try {
-        const { q: searchText, entityType } = req.query;
+        const { q: searchText } = req.query;

        if (!searchText || typeof searchText !== 'string') {
            res.status(400).json({
@ -297,10 +286,7 @@ async function searchOCR(req: Request, res: Response) {
            return;
        }

-        const results = ocrService.searchOCRResults(
-            searchText,
-            entityType as 'note' | 'attachment' | undefined
-        );
+        const results = ocrService.searchOCRResults(searchText);

        res.json({
            success: true,
@ -431,10 +417,10 @@ async function getBatchProgress(req: Request, res: Response) {
 *                   properties:
 *                     totalProcessed:
 *                       type: number
- *                     averageConfidence:
+ *                     imageNotes:
+ *                       type: number
+ *                     imageAttachments:
 *                       type: number
- *                     byEntityType:
- *                       type: object
 *       '500':
 *         description: Internal server error
 *     security:
@ -463,24 +449,17 @@ async function getOCRStats(req: Request, res: Response) {

 /**
 * @swagger
- * /api/ocr/delete/{entityType}/{entityId}:
+ * /api/ocr/delete/{blobId}:
 *   delete:
- *     summary: Delete OCR results for a specific entity
+ *     summary: Delete OCR results for a specific blob
 *     operationId: ocr-delete-results
 *     parameters:
- *       - name: entityType
+ *       - name: blobId
 *         in: path
 *         required: true
 *         schema:
 *           type: string
- *           enum: [note, attachment]
- *         description: Type of entity
- *       - name: entityId
- *         in: path
- *         required: true
- *         schema:
- *           type: string
- *         description: ID of the entity
+ *         description: ID of the blob
 *     responses:
 *       '200':
 *         description: OCR results deleted successfully
@ -503,31 +482,22 @@ async function getOCRStats(req: Request, res: Response) {
 */
 async function deleteOCRResults(req: Request, res: Response) {
    try {
-        const { entityType, entityId } = req.params;
+        const { blobId } = req.params;

-        if (!entityType || !entityId) {
+        if (!blobId) {
            res.status(400).json({
                success: false,
-                error: 'Entity type and ID are required'
+                error: 'Blob ID is required'
            });
            (res as any).triliumResponseHandled = true;
            return;
        }

-        if (!['note', 'attachment'].includes(entityType)) {
-            res.status(400).json({
-                success: false,
-                error: 'Entity type must be either "note" or "attachment"'
-            });
-            (res as any).triliumResponseHandled = true;
-            return;
-        }
-
-        ocrService.deleteOCRResult(entityId, entityType as 'note' | 'attachment');
+        ocrService.deleteOCRResult(blobId);

        res.json({
            success: true,
-            message: `OCR results deleted for ${entityType} ${entityId}`
+            message: `OCR results deleted for blob ${blobId}`
        });
        (res as any).triliumResponseHandled = true;

--- a/apps/server/src/services/ocr/ocr_service.spec.ts
+++ b/apps/server/src/services/ocr/ocr_service.spec.ts
@ -240,7 +240,7 @@ describe('OCRService', () => {
    });

    describe('storeOCRResult', () => {
-        it('should store OCR result in database successfully', async () => {
+        it('should store OCR result in blob successfully', async () => {
            const ocrResult = {
                text: 'Sample text',
                confidence: 0.95,
@ -248,15 +248,29 @@ describe('OCRService', () => {
                language: 'eng'
            };

-            await ocrService.storeOCRResult('note123', ocrResult, 'note');
+            await ocrService.storeOCRResult('blob123', ocrResult);

            expect(mockSql.execute).toHaveBeenCalledWith(
-                expect.stringContaining('INSERT OR REPLACE INTO ocr_results'),
-                expect.arrayContaining(['note123', 'note', 'Sample text', 0.95, 'eng', expect.any(String)])
+                expect.stringContaining('UPDATE blobs SET ocr_text = ?'),
+                ['Sample text', 'blob123']
            );
        });

-        it('should handle database insertion errors', async () => {
+        it('should handle undefined blobId gracefully', async () => {
+            const ocrResult = {
+                text: 'Sample text',
+                confidence: 0.95,
+                extractedAt: '2025-06-10T10:00:00.000Z',
+                language: 'eng'
+            };
+
+            await ocrService.storeOCRResult(undefined, ocrResult);
+
+            expect(mockSql.execute).not.toHaveBeenCalled();
+            expect(mockLog.error).toHaveBeenCalledWith('Cannot store OCR result: blobId is undefined');
+        });
+
+        it('should handle database update errors', async () => {
            const error = new Error('Database error');
            mockSql.execute.mockImplementation(() => {
                throw error;
@ -269,8 +283,8 @@ describe('OCRService', () => {
                language: 'eng'
            };

-            await expect(ocrService.storeOCRResult('note123', ocrResult, 'note')).rejects.toThrow('Database error');
-            expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result for note note123: Error: Database error');
+            await expect(ocrService.storeOCRResult('blob123', ocrResult)).rejects.toThrow('Database error');
+            expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result for blob blob123: Error: Database error');
        });
    });

@ -279,6 +293,7 @@ describe('OCRService', () => {
            noteId: 'note123',
            type: 'image',
            mime: 'image/jpeg',
+            blobId: 'blob123',
            getContent: vi.fn()
        };

@ -316,10 +331,7 @@ describe('OCRService', () => {

        it('should return existing OCR result if forceReprocess is false', async () => {
            const existingResult = {
-                extracted_text: 'Existing text',
-                confidence: 0.85,
-                language: 'eng',
-                extracted_at: '2025-06-10T09:00:00.000Z'
+                ocr_text: 'Existing text'
            };
            mockSql.getRow.mockReturnValue(existingResult);

@ -327,19 +339,16 @@ describe('OCRService', () => {

            expect(result).toEqual({
                text: 'Existing text',
-                confidence: 0.85,
+                confidence: 0.95,
                language: 'eng',
-                extractedAt: '2025-06-10T09:00:00.000Z'
+                extractedAt: expect.any(String)
            });
            expect(mockNote.getContent).not.toHaveBeenCalled();
        });

        it('should reprocess if forceReprocess is true', async () => {
            const existingResult = {
-                extracted_text: 'Existing text',
-                confidence: 0.85,
-                language: 'eng',
-                extracted_at: '2025-06-10T09:00:00.000Z'
+                ocr_text: 'Existing text'
            };
            mockSql.getRow.mockResolvedValue(existingResult);
            
@ -385,6 +394,7 @@ describe('OCRService', () => {
            attachmentId: 'attach123',
            role: 'image',
            mime: 'image/png',
+            blobId: 'blob456',
            getContent: vi.fn()
        };

@ -434,10 +444,8 @@ describe('OCRService', () => {
        it('should search OCR results successfully', () => {
            const mockResults = [
                {
-                    entity_id: 'note1',
-                    entity_type: 'note',
-                    extracted_text: 'Sample search text',
-                    confidence: 0.95
+                    blobId: 'blob1',
+                    ocr_text: 'Sample search text'
                }
            ];
            mockSql.getRows.mockReturnValue(mockResults);
@ -445,36 +453,15 @@ describe('OCRService', () => {
            const results = ocrService.searchOCRResults('search');

            expect(results).toEqual([{
-                entityId: 'note1',
-                entityType: 'note',
-                text: 'Sample search text',
-                confidence: 0.95
+                blobId: 'blob1',
+                text: 'Sample search text'
            }]);
            expect(mockSql.getRows).toHaveBeenCalledWith(
-                expect.stringContaining('WHERE extracted_text LIKE ?'),
+                expect.stringContaining('WHERE ocr_text LIKE ?'),
                ['%search%']
            );
        });

-        it('should filter by entity type', () => {
-            const mockResults = [
-                {
-                    entity_id: 'note1',
-                    entity_type: 'note',
-                    extracted_text: 'Note text',
-                    confidence: 0.95
-                }
-            ];
-            mockSql.getRows.mockReturnValue(mockResults);
-
-            ocrService.searchOCRResults('text', 'note');
-
-            expect(mockSql.getRows).toHaveBeenCalledWith(
-                expect.stringContaining('AND entity_type = ?'),
-                ['%text%', 'note']
-            );
-        });
-
        it('should handle search errors gracefully', () => {
            mockSql.getRows.mockImplementation(() => {
                throw new Error('Database error');
@ -490,39 +477,37 @@ describe('OCRService', () => {
    describe('getOCRStats', () => {
        it('should return OCR statistics successfully', () => {
            const mockStats = {
-                total_processed: 150,
-                avg_confidence: 0.87
+                total_processed: 150
+            };
+            const mockNoteStats = {
+                count: 100
+            };
+            const mockAttachmentStats = {
+                count: 50
            };
-            const mockByEntityType = [
-                { entity_type: 'note', count: 100 },
-                { entity_type: 'attachment', count: 50 }
-            ];
            
-            mockSql.getRow.mockReturnValue(mockStats);
-            mockSql.getRows.mockReturnValue(mockByEntityType);
+            mockSql.getRow.mockReturnValueOnce(mockStats);
+            mockSql.getRow.mockReturnValueOnce(mockNoteStats);
+            mockSql.getRow.mockReturnValueOnce(mockAttachmentStats);

            const stats = ocrService.getOCRStats();

            expect(stats).toEqual({
                totalProcessed: 150,
-                averageConfidence: 0.87,
-                byEntityType: {
-                    note: 100,
-                    attachment: 50
-                }
+                imageNotes: 100,
+                imageAttachments: 50
            });
        });

        it('should handle missing statistics gracefully', () => {
            mockSql.getRow.mockReturnValue(null);
-            mockSql.getRows.mockReturnValue([]);

            const stats = ocrService.getOCRStats();

            expect(stats).toEqual({
                totalProcessed: 0,
-                averageConfidence: 0,
-                byEntityType: {}
+                imageNotes: 0,
+                imageAttachments: 0
            });
        });
    });
@ -698,11 +683,11 @@ describe('OCRService', () => {
                
                // Mock data for batch processing
                const imageNotes = [
-                    { noteId: 'note1', mime: 'image/jpeg' },
-                    { noteId: 'note2', mime: 'image/png' }
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
+                    { noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
                ];
                const imageAttachments = [
-                    { attachmentId: 'attach1', mime: 'image/gif' }
+                    { attachmentId: 'attach1', mime: 'image/gif', blobId: 'blob3' }
                ];

                // Setup mocks for startBatchProcessing
@ -723,18 +708,21 @@ describe('OCRService', () => {
                    noteId: 'note1',
                    type: 'image',
                    mime: 'image/jpeg',
+                    blobId: 'blob1',
                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
                };
                const mockNote2 = {
                    noteId: 'note2',
                    type: 'image',
                    mime: 'image/png',
+                    blobId: 'blob2',
                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
                };
                const mockAttachment = {
                    attachmentId: 'attach1',
                    role: 'image',
                    mime: 'image/gif',
+                    blobId: 'blob3',
                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
                };

@ -761,7 +749,7 @@ describe('OCRService', () => {

            it('should handle processing errors gracefully', async () => {
                const imageNotes = [
-                    { noteId: 'note1', mime: 'image/jpeg' }
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' }
                ];

                // Setup mocks for startBatchProcessing
@ -777,6 +765,7 @@ describe('OCRService', () => {
                    noteId: 'note1',
                    type: 'image',
                    mime: 'image/jpeg',
+                    blobId: 'blob1',
                    getContent: vi.fn().mockImplementation(() => { throw new Error('Failed to get content'); })
                };
                mockBecca.getNote.mockReturnValue(mockNote);
@ -796,8 +785,8 @@ describe('OCRService', () => {

            it('should stop processing when cancelled', async () => {
                const imageNotes = [
-                    { noteId: 'note1', mime: 'image/jpeg' },
-                    { noteId: 'note2', mime: 'image/png' }
+                    { noteId: 'note1', mime: 'image/jpeg', blobId: 'blob1' },
+                    { noteId: 'note2', mime: 'image/png', blobId: 'blob2' }
                ];

                // Setup mocks
@ -821,8 +810,8 @@ describe('OCRService', () => {

            it('should skip unsupported MIME types', async () => {
                const imageNotes = [
-                    { noteId: 'note1', mime: 'text/plain' }, // unsupported
-                    { noteId: 'note2', mime: 'image/jpeg' }  // supported
+                    { noteId: 'note1', mime: 'text/plain', blobId: 'blob1' }, // unsupported
+                    { noteId: 'note2', mime: 'image/jpeg', blobId: 'blob2' }  // supported
                ];

                // Setup mocks
@ -835,6 +824,7 @@ describe('OCRService', () => {
                    noteId: 'note2',
                    type: 'image',
                    mime: 'image/jpeg',
+                    blobId: 'blob2',
                    getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
                };
                mockBecca.getNote.mockReturnValue(mockNote);
@ -858,13 +848,13 @@ describe('OCRService', () => {

    describe('deleteOCRResult', () => {
        it('should delete OCR result successfully', () => {
-            ocrService.deleteOCRResult('note123', 'note');
+            ocrService.deleteOCRResult('blob123');

            expect(mockSql.execute).toHaveBeenCalledWith(
-                expect.stringContaining('DELETE FROM ocr_results'),
-                ['note123', 'note']
+                expect.stringContaining('UPDATE blobs SET ocr_text = NULL'),
+                ['blob123']
            );
-            expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for note note123');
+            expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for blob blob123');
        });

        it('should handle deletion errors', () => {
@ -872,8 +862,8 @@ describe('OCRService', () => {
                throw new Error('Database error');
            });

-            expect(() => ocrService.deleteOCRResult('note123', 'note')).toThrow('Database error');
-            expect(mockLog.error).toHaveBeenCalledWith('Failed to delete OCR result for note note123: Error: Database error');
+            expect(() => ocrService.deleteOCRResult('blob123')).toThrow('Database error');
+            expect(mockLog.error).toHaveBeenCalledWith('Failed to delete OCR result for blob blob123: Error: Database error');
        });
    });

@ -886,6 +876,7 @@ describe('OCRService', () => {
            mockBecca.getNote.mockReturnValue({
                noteId: 'note123',
                mime: 'image/jpeg',
+                blobId: 'blob123',
                getContent: vi.fn().mockReturnValue(Buffer.from('fake-image-data'))
            });
            mockSql.getRow.mockResolvedValue(null);
--- a/apps/server/src/services/ocr/ocr_service.ts
+++ b/apps/server/src/services/ocr/ocr_service.ts
@ -17,11 +17,9 @@ export interface OCRProcessingOptions {
    confidence?: number;
 }

-interface OCRResultRow {
-    entity_id: string;
-    entity_type: string;
-    extracted_text: string;
-    confidence: number;
+interface OCRBlobRow {
+    blobId: string;
+    ocr_text: string;
 }

 /**
@ -176,8 +174,8 @@ class OCRService {
            return null;
        }

-        // Check if OCR already exists and we're not forcing reprocessing
-        const existingOCR = this.getStoredOCRResult(noteId);
+        // Check if OCR already exists in the blob and we're not forcing reprocessing
+        const existingOCR = this.getStoredOCRResult(note.blobId);
        if (existingOCR && !options.forceReprocess) {
            log.info(`OCR already exists for note ${noteId}, returning cached result`);
            return existingOCR;
@ -191,8 +189,8 @@ class OCRService {

            const ocrResult = await this.extractTextFromImage(content, options);
            
-            // Store OCR result
-            await this.storeOCRResult(noteId, ocrResult);
+            // Store OCR result in blob
+            await this.storeOCRResult(note.blobId, ocrResult);
            
            return ocrResult;
        } catch (error) {
@ -226,8 +224,8 @@ class OCRService {
            return null;
        }

-        // Check if OCR already exists and we're not forcing reprocessing
-        const existingOCR = this.getStoredOCRResult(attachmentId, 'attachment');
+        // Check if OCR already exists in the blob and we're not forcing reprocessing
+        const existingOCR = this.getStoredOCRResult(attachment.blobId);
        if (existingOCR && !options.forceReprocess) {
            log.info(`OCR already exists for attachment ${attachmentId}, returning cached result`);
            return existingOCR;
@ -241,8 +239,8 @@ class OCRService {

            const ocrResult = await this.extractTextFromImage(content, options);
            
-            // Store OCR result
-            await this.storeOCRResult(attachmentId, ocrResult, 'attachment');
+            // Store OCR result in blob
+            await this.storeOCRResult(attachment.blobId, ocrResult);
            
            return ocrResult;
        } catch (error) {
@ -252,57 +250,62 @@ class OCRService {
    }

    /**
-     * Store OCR result in database
+     * Store OCR result in blob
     */
-    async storeOCRResult(entityId: string, ocrResult: OCRResult, entityType: 'note' | 'attachment' = 'note'): Promise<void> {
+    async storeOCRResult(blobId: string | undefined, ocrResult: OCRResult): Promise<void> {
+        if (!blobId) {
+            log.error('Cannot store OCR result: blobId is undefined');
+            return;
+        }
+
        try {
+            // Store OCR text in blobs table
            sql.execute(`
-                INSERT OR REPLACE INTO ocr_results (entity_id, entity_type, extracted_text, confidence, language, extracted_at)
-                VALUES (?, ?, ?, ?, ?, ?)
+                UPDATE blobs SET ocr_text = ? WHERE blobId = ?
            `, [
-                entityId,
-                entityType,
                ocrResult.text,
-                ocrResult.confidence,
-                ocrResult.language || 'eng',
-                ocrResult.extractedAt
+                blobId
            ]);
            
-            log.info(`Stored OCR result for ${entityType} ${entityId}`);
+            log.info(`Stored OCR result for blob ${blobId}`);
        } catch (error) {
-            log.error(`Failed to store OCR result for ${entityType} ${entityId}: ${error}`);
+            log.error(`Failed to store OCR result for blob ${blobId}: ${error}`);
            throw error;
        }
    }

    /**
-     * Get stored OCR result from database
+     * Get stored OCR result from blob
     */
-    private getStoredOCRResult(entityId: string, entityType: 'note' | 'attachment' = 'note'): OCRResult | null {
+    private getStoredOCRResult(blobId: string | undefined): OCRResult | null {
+        if (!blobId) {
+            return null;
+        }
+
        try {
            const row = sql.getRow<{
-                extracted_text: string;
-                confidence: number;
-                language?: string;
-                extracted_at: string;
+                ocr_text: string | null;
            }>(`
-                SELECT extracted_text, confidence, language, extracted_at
-                FROM ocr_results 
-                WHERE entity_id = ? AND entity_type = ?
-            `, [entityId, entityType]);
+                SELECT ocr_text
+                FROM blobs 
+                WHERE blobId = ?
+            `, [blobId]);
            
-            if (!row) {
+            if (!row || !row.ocr_text) {
                return null;
            }
            
+            // Return basic OCR result from stored text
+            // Note: we lose confidence, language, and extractedAt metadata
+            // but gain simplicity by storing directly in blob
            return {
-                text: row.extracted_text,
-                confidence: row.confidence,
-                language: row.language,
-                extractedAt: row.extracted_at
+                text: row.ocr_text,
+                confidence: 0.95, // Default high confidence for existing OCR
+                extractedAt: new Date().toISOString(),
+                language: 'eng'
            };
        } catch (error) {
-            log.error(`Failed to get OCR result for ${entityType} ${entityId}: ${error}`);
+            log.error(`Failed to get OCR result for blob ${blobId}: ${error}`);
            return null;
        }
    }
@ -310,29 +313,21 @@ class OCRService {
    /**
     * Search for text in OCR results
     */
-    searchOCRResults(searchText: string, entityType?: 'note' | 'attachment'): Array<{ entityId: string; entityType: string; text: string; confidence: number }> {
+    searchOCRResults(searchText: string): Array<{ blobId: string; text: string }> {
        try {
-            let query = `
-                SELECT entity_id, entity_type, extracted_text, confidence
-                FROM ocr_results 
-                WHERE extracted_text LIKE ?
+            const query = `
+                SELECT blobId, ocr_text
+                FROM blobs 
+                WHERE ocr_text LIKE ?
+                AND ocr_text IS NOT NULL
            `;
            const params = [`%${searchText}%`];
            
-            if (entityType) {
-                query += ' AND entity_type = ?';
-                params.push(entityType);
-            }
-            
-            query += ' ORDER BY confidence DESC';
-            
-            const rows = sql.getRows<OCRResultRow>(query, params);
+            const rows = sql.getRows<OCRBlobRow>(query, params);
            
            return rows.map(row => ({
-                entityId: row.entity_id,
-                entityType: row.entity_type,
-                text: row.extracted_text,
-                confidence: row.confidence
+                blobId: row.blobId,
+                text: row.ocr_text
            }));
        } catch (error) {
            log.error(`Failed to search OCR results: ${error}`);
@ -341,18 +336,18 @@ class OCRService {
    }

    /**
-     * Delete OCR results for an entity
+     * Delete OCR results for a blob
     */
-    deleteOCRResult(entityId: string, entityType: 'note' | 'attachment' = 'note'): void {
+    deleteOCRResult(blobId: string): void {
        try {
            sql.execute(`
-                DELETE FROM ocr_results 
-                WHERE entity_id = ? AND entity_type = ?
-            `, [entityId, entityType]);
+                UPDATE blobs SET ocr_text = NULL 
+                WHERE blobId = ?
+            `, [blobId]);
            
-            log.info(`Deleted OCR result for ${entityType} ${entityId}`);
+            log.info(`Deleted OCR result for blob ${blobId}`);
        } catch (error) {
-            log.error(`Failed to delete OCR result for ${entityType} ${entityId}: ${error}`);
+            log.error(`Failed to delete OCR result for blob ${blobId}: ${error}`);
            throw error;
        }
    }
@ -373,14 +368,15 @@ class OCRService {
            const imageNotes = sql.getRows<{
                noteId: string;
                mime: string;
+                blobId: string;
            }>(`
-                SELECT noteId, mime
-                FROM notes 
-                WHERE type = 'image' 
-                AND isDeleted = 0
-                AND noteId NOT IN (
-                    SELECT entity_id FROM ocr_results WHERE entity_type = 'note'
-                )
+                SELECT n.noteId, n.mime, n.blobId
+                FROM notes n
+                LEFT JOIN blobs b ON n.blobId = b.blobId
+                WHERE n.type = 'image' 
+                AND n.isDeleted = 0
+                AND n.blobId IS NOT NULL
+                AND (b.ocr_text IS NULL OR b.ocr_text = '')
            `);

            log.info(`Found ${imageNotes.length} image notes to process`);
@ -401,14 +397,15 @@ class OCRService {
            const imageAttachments = sql.getRows<{
                attachmentId: string;
                mime: string;
+                blobId: string;
            }>(`
-                SELECT attachmentId, mime
-                FROM attachments 
-                WHERE role = 'image'
-                AND isDeleted = 0
-                AND attachmentId NOT IN (
-                    SELECT entity_id FROM ocr_results WHERE entity_type = 'attachment'
-                )
+                SELECT a.attachmentId, a.mime, a.blobId
+                FROM attachments a
+                LEFT JOIN blobs b ON a.blobId = b.blobId
+                WHERE a.role = 'image'
+                AND a.isDeleted = 0
+                AND a.blobId IS NOT NULL
+                AND (b.ocr_text IS NULL OR b.ocr_text = '')
            `);

            log.info(`Found ${imageAttachments.length} image attachments to process`);
@ -435,38 +432,48 @@ class OCRService {
    /**
     * Get OCR statistics
     */
-    getOCRStats(): { totalProcessed: number; averageConfidence: number; byEntityType: Record<string, number> } {
+    getOCRStats(): { totalProcessed: number; imageNotes: number; imageAttachments: number } {
        try {
            const stats = sql.getRow<{
                total_processed: number;
-                avg_confidence: number;
            }>(`
-                SELECT 
-                    COUNT(*) as total_processed,
-                    AVG(confidence) as avg_confidence
-                FROM ocr_results
+                SELECT COUNT(*) as total_processed
+                FROM blobs
+                WHERE ocr_text IS NOT NULL AND ocr_text != ''
            `);

-            const byEntityType = sql.getRows<{
-                entity_type: string;
+            // Count image notes with OCR
+            const noteStats = sql.getRow<{
                count: number;
            }>(`
-                SELECT entity_type, COUNT(*) as count
-                FROM ocr_results
-                GROUP BY entity_type
+                SELECT COUNT(*) as count
+                FROM notes n
+                JOIN blobs b ON n.blobId = b.blobId
+                WHERE n.type = 'image'
+                AND n.isDeleted = 0
+                AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
+            `);
+
+            // Count image attachments with OCR
+            const attachmentStats = sql.getRow<{
+                count: number;
+            }>(`
+                SELECT COUNT(*) as count
+                FROM attachments a
+                JOIN blobs b ON a.blobId = b.blobId
+                WHERE a.role = 'image'
+                AND a.isDeleted = 0
+                AND b.ocr_text IS NOT NULL AND b.ocr_text != ''
            `);

            return {
                totalProcessed: stats?.total_processed || 0,
-                averageConfidence: stats?.avg_confidence || 0,
-                byEntityType: byEntityType.reduce((acc, row) => {
-                    acc[row.entity_type] = row.count;
-                    return acc;
-                }, {} as Record<string, number>)
+                imageNotes: noteStats?.count || 0,
+                imageAttachments: attachmentStats?.count || 0
            };
        } catch (error) {
            log.error(`Failed to get OCR stats: ${error}`);
-            return { totalProcessed: 0, averageConfidence: 0, byEntityType: {} };
+            return { totalProcessed: 0, imageNotes: 0, imageAttachments: 0 };
        }
    }

@ -584,14 +591,15 @@ class OCRService {
            const imageNotes = sql.getRows<{
                noteId: string;
                mime: string;
+                blobId: string;
            }>(`
-                SELECT noteId, mime
-                FROM notes 
-                WHERE type = 'image' 
-                AND isDeleted = 0
-                AND noteId NOT IN (
-                    SELECT entity_id FROM ocr_results WHERE entity_type = 'note'
-                )
+                SELECT n.noteId, n.mime, n.blobId
+                FROM notes n
+                LEFT JOIN blobs b ON n.blobId = b.blobId
+                WHERE n.type = 'image' 
+                AND n.isDeleted = 0
+                AND n.blobId IS NOT NULL
+                AND (b.ocr_text IS NULL OR b.ocr_text = '')
            `);

            for (const noteRow of imageNotes) {
@ -616,14 +624,15 @@ class OCRService {
            const imageAttachments = sql.getRows<{
                attachmentId: string;
                mime: string;
+                blobId: string;
            }>(`
-                SELECT attachmentId, mime
-                FROM attachments 
-                WHERE role = 'image'
-                AND isDeleted = 0
-                AND attachmentId NOT IN (
-                    SELECT entity_id FROM ocr_results WHERE entity_type = 'attachment'
-                )
+                SELECT a.attachmentId, a.mime, a.blobId
+                FROM attachments a
+                LEFT JOIN blobs b ON a.blobId = b.blobId
+                WHERE a.role = 'image'
+                AND a.isDeleted = 0
+                AND a.blobId IS NOT NULL
+                AND (b.ocr_text IS NULL OR b.ocr_text = '')
            `);

            for (const attachmentRow of imageAttachments) {
--- a/apps/server/src/services/search/expressions/ocr_content.ts
+++ b/apps/server/src/services/search/expressions/ocr_content.ts
@ -25,21 +25,30 @@ export default class OCRContentExpression extends Expression {
        const ocrResults = this.searchOCRContent(this.searchText);

        for (const ocrResult of ocrResults) {
-            let note: import('../../../becca/entities/bnote.js').default | null = null;
-            
-            if (ocrResult.entity_type === 'note') {
-                note = becca.getNote(ocrResult.entity_id);
-            } else if (ocrResult.entity_type === 'attachment') {
-                // For attachments, find the parent note
-                const attachment = becca.getAttachment(ocrResult.entity_id);
-                if (attachment) {
-                    note = becca.getNote(attachment.ownerId);
+            // Find notes that use this blob
+            const notes = sql.getRows<{noteId: string}>(`
+                SELECT noteId FROM notes 
+                WHERE blobId = ? AND isDeleted = 0
+            `, [ocrResult.blobId]);
+
+            for (const noteRow of notes) {
+                const note = becca.getNote(noteRow.noteId);
+                if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
+                    resultNoteSet.add(note);
                }
            }

-            // Only add notes that are in the input note set and not deleted
-            if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
-                resultNoteSet.add(note);
+            // Find attachments that use this blob and their parent notes
+            const attachments = sql.getRows<{ownerId: string}>(`
+                SELECT ownerId FROM attachments
+                WHERE blobId = ? AND isDeleted = 0
+            `, [ocrResult.blobId]);
+
+            for (const attachmentRow of attachments) {
+                const note = becca.getNote(attachmentRow.ownerId);
+                if (note && !note.isDeleted && inputNoteSet.hasNoteId(note.noteId)) {
+                    resultNoteSet.add(note);
+                }
            }
        }

@ -62,44 +71,24 @@ export default class OCRContentExpression extends Expression {
    }

    private searchOCRContent(searchText: string): Array<{
-        entity_id: string;
-        entity_type: string;
-        extracted_text: string;
-        confidence: number;
+        blobId: string;
+        ocr_text: string;
    }> {
        try {
-            // Use FTS search if available, otherwise fall back to LIKE
-            let query: string;
-            let params: unknown[];
-
-            try {
-                // Try FTS first
-                query = `
-                    SELECT ocr.entity_id, ocr.entity_type, ocr.extracted_text, ocr.confidence
-                    FROM ocr_results_fts fts
-                    JOIN ocr_results ocr ON fts.rowid = ocr.id
-                    WHERE ocr_results_fts MATCH ?
-                    ORDER BY ocr.confidence DESC, rank
-                    LIMIT 50
-                `;
-                params = [searchText];
-            } catch {
-                // Fallback to LIKE search
-                query = `
-                    SELECT entity_id, entity_type, extracted_text, confidence
-                    FROM ocr_results
-                    WHERE extracted_text LIKE ?
-                    ORDER BY confidence DESC
-                    LIMIT 50
-                `;
-                params = [`%${searchText}%`];
-            }
+            // Search in blobs table for OCR text
+            const query = `
+                SELECT blobId, ocr_text
+                FROM blobs
+                WHERE ocr_text LIKE ?
+                AND ocr_text IS NOT NULL
+                AND ocr_text != ''
+                LIMIT 50
+            `;
+            const params = [`%${searchText}%`];

            return sql.getRows<{
-                entity_id: string;
-                entity_type: string;
-                extracted_text: string;
-                confidence: number;
+                blobId: string;
+                ocr_text: string;
            }>(query, params);
        } catch (error) {
            console.error('Error searching OCR content:', error);
--- a/packages/commons/src/lib/rows.ts
+++ b/packages/commons/src/lib/rows.ts
@ -70,6 +70,7 @@ export interface BlobRow {
    blobId: string;
    content: string | Buffer;
    contentLength: number;
+    ocr_text?: string | null;
    dateModified: string;
    utcDateModified: string;
 }