mirror of
https://github.com/zadam/trilium.git
synced 2025-12-05 15:04:24 +01:00
838 lines
30 KiB
TypeScript
838 lines
30 KiB
TypeScript
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
// Mock Tesseract.js
|
|
const mockWorker = {
|
|
recognize: vi.fn(),
|
|
terminate: vi.fn(),
|
|
reinitialize: vi.fn()
|
|
};
|
|
|
|
const mockTesseract = {
|
|
createWorker: vi.fn().mockResolvedValue(mockWorker)
|
|
};
|
|
|
|
vi.mock('tesseract.js', () => ({
|
|
default: mockTesseract
|
|
}));
|
|
|
|
// Mock dependencies
|
|
const mockOptions = {
|
|
getOptionBool: vi.fn(),
|
|
getOption: vi.fn()
|
|
};
|
|
|
|
const mockLog = {
|
|
info: vi.fn(),
|
|
error: vi.fn()
|
|
};
|
|
|
|
const mockSql = {
|
|
execute: vi.fn(),
|
|
getRow: vi.fn(),
|
|
getRows: vi.fn()
|
|
};
|
|
|
|
const mockBecca = {
|
|
getNote: vi.fn(),
|
|
getAttachment: vi.fn()
|
|
};
|
|
|
|
vi.mock('../options.js', () => ({
|
|
default: mockOptions
|
|
}));
|
|
|
|
vi.mock('../log.js', () => ({
|
|
default: mockLog
|
|
}));
|
|
|
|
vi.mock('../sql.js', () => ({
|
|
default: mockSql
|
|
}));
|
|
|
|
vi.mock('../../becca/becca.js', () => ({
|
|
default: mockBecca
|
|
}));
|
|
|
|
// Import the service after mocking
|
|
let ocrService: typeof import('./ocr_service.js').default;
|
|
|
|
beforeEach(async () => {
|
|
// Clear all mocks
|
|
vi.clearAllMocks();
|
|
|
|
// Reset mock implementations
|
|
mockOptions.getOptionBool.mockReturnValue(true);
|
|
mockOptions.getOption.mockReturnValue('eng');
|
|
mockSql.execute.mockResolvedValue({ lastInsertRowid: 1 });
|
|
mockSql.getRow.mockResolvedValue(null);
|
|
mockSql.getRows.mockResolvedValue([]);
|
|
|
|
// Dynamically import the service to ensure mocks are applied
|
|
const module = await import('./ocr_service.js');
|
|
ocrService = module.default; // It's an instance, not a class
|
|
});
|
|
|
|
afterEach(() => {
|
|
vi.restoreAllMocks();
|
|
});
|
|
|
|
describe('OCRService', () => {
|
|
describe('isOCREnabled', () => {
|
|
it('should return true when OCR is enabled in options', () => {
|
|
mockOptions.getOptionBool.mockReturnValue(true);
|
|
|
|
expect(ocrService.isOCREnabled()).toBe(true);
|
|
expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
|
|
});
|
|
|
|
it('should return false when OCR is disabled in options', () => {
|
|
mockOptions.getOptionBool.mockReturnValue(false);
|
|
|
|
expect(ocrService.isOCREnabled()).toBe(false);
|
|
expect(mockOptions.getOptionBool).toHaveBeenCalledWith('ocrEnabled');
|
|
});
|
|
|
|
it('should return false when options throws an error', () => {
|
|
mockOptions.getOptionBool.mockImplementation(() => {
|
|
throw new Error('Options not available');
|
|
});
|
|
|
|
expect(ocrService.isOCREnabled()).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('isSupportedMimeType', () => {
|
|
it('should return true for supported image MIME types', () => {
|
|
expect(ocrService.isSupportedMimeType('image/jpeg')).toBe(true);
|
|
expect(ocrService.isSupportedMimeType('image/jpg')).toBe(true);
|
|
expect(ocrService.isSupportedMimeType('image/png')).toBe(true);
|
|
expect(ocrService.isSupportedMimeType('image/gif')).toBe(true);
|
|
expect(ocrService.isSupportedMimeType('image/bmp')).toBe(true);
|
|
expect(ocrService.isSupportedMimeType('image/tiff')).toBe(true);
|
|
});
|
|
|
|
it('should return false for unsupported MIME types', () => {
|
|
expect(ocrService.isSupportedMimeType('text/plain')).toBe(false);
|
|
expect(ocrService.isSupportedMimeType('application/pdf')).toBe(false);
|
|
expect(ocrService.isSupportedMimeType('video/mp4')).toBe(false);
|
|
expect(ocrService.isSupportedMimeType('audio/mp3')).toBe(false);
|
|
});
|
|
|
|
it('should handle null/undefined MIME types', () => {
|
|
expect(ocrService.isSupportedMimeType(null as any)).toBe(false);
|
|
expect(ocrService.isSupportedMimeType(undefined as any)).toBe(false);
|
|
expect(ocrService.isSupportedMimeType('')).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('initialize', () => {
|
|
it('should initialize Tesseract worker successfully', async () => {
|
|
await ocrService.initialize();
|
|
|
|
expect(mockTesseract.createWorker).toHaveBeenCalledWith('eng', 1, {
|
|
logger: expect.any(Function)
|
|
});
|
|
expect(mockLog.info).toHaveBeenCalledWith('Initializing OCR service with Tesseract.js...');
|
|
expect(mockLog.info).toHaveBeenCalledWith('OCR service initialized successfully');
|
|
});
|
|
|
|
it('should not reinitialize if already initialized', async () => {
|
|
await ocrService.initialize();
|
|
mockTesseract.createWorker.mockClear();
|
|
|
|
await ocrService.initialize();
|
|
|
|
expect(mockTesseract.createWorker).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('should handle initialization errors', async () => {
|
|
const error = new Error('Tesseract initialization failed');
|
|
mockTesseract.createWorker.mockRejectedValue(error);
|
|
|
|
await expect(ocrService.initialize()).rejects.toThrow('Tesseract initialization failed');
|
|
expect(mockLog.error).toHaveBeenCalledWith('Failed to initialize OCR service: Error: Tesseract initialization failed');
|
|
});
|
|
});
|
|
|
|
describe('extractTextFromImage', () => {
|
|
const mockImageBuffer = Buffer.from('fake-image-data');
|
|
|
|
beforeEach(async () => {
|
|
await ocrService.initialize();
|
|
});
|
|
|
|
it('should extract text successfully with default options', async () => {
|
|
const mockResult = {
|
|
data: {
|
|
text: 'Extracted text from image',
|
|
confidence: 95
|
|
}
|
|
};
|
|
mockWorker.recognize.mockResolvedValue(mockResult);
|
|
|
|
const result = await ocrService.extractTextFromImage(mockImageBuffer);
|
|
|
|
expect(result).toEqual({
|
|
text: 'Extracted text from image',
|
|
confidence: 0.95,
|
|
extractedAt: expect.any(String),
|
|
language: 'eng'
|
|
});
|
|
expect(mockWorker.recognize).toHaveBeenCalledWith(mockImageBuffer);
|
|
});
|
|
|
|
it('should extract text with custom language', async () => {
|
|
const mockResult = {
|
|
data: {
|
|
text: 'French text',
|
|
confidence: 88
|
|
}
|
|
};
|
|
mockWorker.recognize.mockResolvedValue(mockResult);
|
|
|
|
const result = await ocrService.extractTextFromImage(mockImageBuffer, { language: 'fra' });
|
|
|
|
expect(result.language).toBe('fra');
|
|
expect(mockWorker.terminate).toHaveBeenCalled();
|
|
expect(mockTesseract.createWorker).toHaveBeenCalledWith('fra', 1, expect.any(Object));
|
|
});
|
|
|
|
it('should handle OCR recognition errors', async () => {
|
|
const error = new Error('OCR recognition failed');
|
|
mockWorker.recognize.mockRejectedValue(error);
|
|
|
|
await expect(ocrService.extractTextFromImage(mockImageBuffer)).rejects.toThrow('OCR recognition failed');
|
|
expect(mockLog.error).toHaveBeenCalledWith('OCR text extraction failed: Error: OCR recognition failed');
|
|
});
|
|
|
|
it('should handle empty or low-confidence results', async () => {
|
|
const mockResult = {
|
|
data: {
|
|
text: ' ',
|
|
confidence: 15
|
|
}
|
|
};
|
|
mockWorker.recognize.mockResolvedValue(mockResult);
|
|
|
|
const result = await ocrService.extractTextFromImage(mockImageBuffer);
|
|
|
|
expect(result.text).toBe('');
|
|
expect(result.confidence).toBe(0.15);
|
|
});
|
|
});
|
|
|
|
describe('storeOCRResult', () => {
|
|
it('should store OCR result in database successfully', async () => {
|
|
const ocrResult = {
|
|
text: 'Sample text',
|
|
confidence: 0.95,
|
|
extractedAt: '2025-06-10T10:00:00.000Z',
|
|
language: 'eng'
|
|
};
|
|
|
|
await ocrService.storeOCRResult('note123', ocrResult, 'note');
|
|
|
|
expect(mockSql.execute).toHaveBeenCalledWith(
|
|
expect.stringContaining('INSERT OR REPLACE INTO ocr_results'),
|
|
expect.arrayContaining(['note123', 'note', 'Sample text', 0.95, 'eng', expect.any(String)])
|
|
);
|
|
});
|
|
|
|
it('should handle database insertion errors', async () => {
|
|
const error = new Error('Database error');
|
|
mockSql.execute.mockRejectedValue(error);
|
|
|
|
const ocrResult = {
|
|
text: 'Sample text',
|
|
confidence: 0.95,
|
|
extractedAt: '2025-06-10T10:00:00.000Z',
|
|
language: 'eng'
|
|
};
|
|
|
|
await expect(ocrService.storeOCRResult('note123', ocrResult, 'note')).rejects.toThrow('Database error');
|
|
expect(mockLog.error).toHaveBeenCalledWith('Failed to store OCR result: Error: Database error');
|
|
});
|
|
});
|
|
|
|
describe('processNoteOCR', () => {
|
|
const mockNote = {
|
|
noteId: 'note123',
|
|
mime: 'image/jpeg',
|
|
getBlob: vi.fn()
|
|
};
|
|
|
|
beforeEach(() => {
|
|
mockBecca.getNote.mockReturnValue(mockNote);
|
|
mockNote.getBlob.mockResolvedValue(Buffer.from('fake-image-data'));
|
|
});
|
|
|
|
it('should process note OCR successfully', async () => {
|
|
mockSql.getRow.mockResolvedValue(null); // No existing OCR result
|
|
|
|
const mockOCRResult = {
|
|
data: {
|
|
text: 'Note image text',
|
|
confidence: 90
|
|
}
|
|
};
|
|
await ocrService.initialize();
|
|
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
|
|
|
const result = await ocrService.processNoteOCR('note123');
|
|
|
|
expect(result).toEqual({
|
|
text: 'Note image text',
|
|
confidence: 0.9,
|
|
extractedAt: expect.any(String),
|
|
language: 'eng'
|
|
});
|
|
expect(mockBecca.getNote).toHaveBeenCalledWith('note123');
|
|
expect(mockNote.getBlob).toHaveBeenCalled();
|
|
});
|
|
|
|
it('should return existing OCR result if forceReprocess is false', async () => {
|
|
const existingResult = {
|
|
extracted_text: 'Existing text',
|
|
confidence: 0.85,
|
|
language: 'eng',
|
|
extracted_at: '2025-06-10T09:00:00.000Z'
|
|
};
|
|
mockSql.getRow.mockResolvedValue(existingResult);
|
|
|
|
const result = await ocrService.processNoteOCR('note123');
|
|
|
|
expect(result).toEqual({
|
|
text: 'Existing text',
|
|
confidence: 0.85,
|
|
language: 'eng',
|
|
extractedAt: '2025-06-10T09:00:00.000Z'
|
|
});
|
|
expect(mockNote.getBlob).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('should reprocess if forceReprocess is true', async () => {
|
|
const existingResult = {
|
|
extracted_text: 'Existing text',
|
|
confidence: 0.85,
|
|
language: 'eng',
|
|
extracted_at: '2025-06-10T09:00:00.000Z'
|
|
};
|
|
mockSql.getRow.mockResolvedValue(existingResult);
|
|
|
|
await ocrService.initialize();
|
|
const mockOCRResult = {
|
|
data: {
|
|
text: 'New processed text',
|
|
confidence: 95
|
|
}
|
|
};
|
|
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
|
|
|
const result = await ocrService.processNoteOCR('note123', { forceReprocess: true });
|
|
|
|
expect(result?.text).toBe('New processed text');
|
|
expect(mockNote.getBlob).toHaveBeenCalled();
|
|
});
|
|
|
|
it('should return null for non-existent note', async () => {
|
|
mockBecca.getNote.mockReturnValue(null);
|
|
|
|
const result = await ocrService.processNoteOCR('nonexistent');
|
|
|
|
expect(result).toBe(null);
|
|
expect(mockLog.error).toHaveBeenCalledWith('Note nonexistent not found');
|
|
});
|
|
|
|
it('should return null for unsupported MIME type', async () => {
|
|
mockNote.mime = 'text/plain';
|
|
|
|
const result = await ocrService.processNoteOCR('note123');
|
|
|
|
expect(result).toBe(null);
|
|
expect(mockLog.info).toHaveBeenCalledWith('Note note123 has unsupported MIME type for OCR: text/plain');
|
|
});
|
|
});
|
|
|
|
describe('processAttachmentOCR', () => {
|
|
const mockAttachment = {
|
|
attachmentId: 'attach123',
|
|
mime: 'image/png',
|
|
getBlob: vi.fn()
|
|
};
|
|
|
|
beforeEach(() => {
|
|
mockBecca.getAttachment.mockReturnValue(mockAttachment);
|
|
mockAttachment.getBlob.mockResolvedValue(Buffer.from('fake-image-data'));
|
|
});
|
|
|
|
it('should process attachment OCR successfully', async () => {
|
|
mockSql.getRow.mockResolvedValue(null);
|
|
|
|
await ocrService.initialize();
|
|
const mockOCRResult = {
|
|
data: {
|
|
text: 'Attachment image text',
|
|
confidence: 92
|
|
}
|
|
};
|
|
mockWorker.recognize.mockResolvedValue(mockOCRResult);
|
|
|
|
const result = await ocrService.processAttachmentOCR('attach123');
|
|
|
|
expect(result).toEqual({
|
|
text: 'Attachment image text',
|
|
confidence: 0.92,
|
|
extractedAt: expect.any(String),
|
|
language: 'eng'
|
|
});
|
|
expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach123');
|
|
});
|
|
|
|
it('should return null for non-existent attachment', async () => {
|
|
mockBecca.getAttachment.mockReturnValue(null);
|
|
|
|
const result = await ocrService.processAttachmentOCR('nonexistent');
|
|
|
|
expect(result).toBe(null);
|
|
expect(mockLog.error).toHaveBeenCalledWith('Attachment nonexistent not found');
|
|
});
|
|
});
|
|
|
|
describe('searchOCRResults', () => {
|
|
it('should search OCR results successfully', () => {
|
|
const mockResults = [
|
|
{
|
|
entity_id: 'note1',
|
|
entity_type: 'note',
|
|
extracted_text: 'Sample search text',
|
|
confidence: 0.95
|
|
}
|
|
];
|
|
mockSql.getRows.mockReturnValue(mockResults);
|
|
|
|
const results = ocrService.searchOCRResults('search');
|
|
|
|
expect(results).toEqual([{
|
|
entityId: 'note1',
|
|
entityType: 'note',
|
|
text: 'Sample search text',
|
|
confidence: 0.95
|
|
}]);
|
|
expect(mockSql.getRows).toHaveBeenCalledWith(
|
|
expect.stringContaining('WHERE extracted_text LIKE ?'),
|
|
['%search%']
|
|
);
|
|
});
|
|
|
|
it('should filter by entity type', () => {
|
|
const mockResults = [
|
|
{
|
|
entity_id: 'note1',
|
|
entity_type: 'note',
|
|
extracted_text: 'Note text',
|
|
confidence: 0.95
|
|
}
|
|
];
|
|
mockSql.getRows.mockReturnValue(mockResults);
|
|
|
|
ocrService.searchOCRResults('text', 'note');
|
|
|
|
expect(mockSql.getRows).toHaveBeenCalledWith(
|
|
expect.stringContaining('AND entity_type = ?'),
|
|
['%text%', 'note']
|
|
);
|
|
});
|
|
|
|
it('should handle search errors gracefully', () => {
|
|
mockSql.getRows.mockImplementation(() => {
|
|
throw new Error('Database error');
|
|
});
|
|
|
|
const results = ocrService.searchOCRResults('search');
|
|
|
|
expect(results).toEqual([]);
|
|
expect(mockLog.error).toHaveBeenCalledWith('Failed to search OCR results: Error: Database error');
|
|
});
|
|
});
|
|
|
|
describe('getOCRStats', () => {
|
|
it('should return OCR statistics successfully', () => {
|
|
const mockStats = {
|
|
total_processed: 150,
|
|
avg_confidence: 0.87
|
|
};
|
|
const mockByEntityType = [
|
|
{ entity_type: 'note', count: 100 },
|
|
{ entity_type: 'attachment', count: 50 }
|
|
];
|
|
|
|
mockSql.getRow.mockReturnValue(mockStats);
|
|
mockSql.getRows.mockReturnValue(mockByEntityType);
|
|
|
|
const stats = ocrService.getOCRStats();
|
|
|
|
expect(stats).toEqual({
|
|
totalProcessed: 150,
|
|
averageConfidence: 0.87,
|
|
byEntityType: {
|
|
note: 100,
|
|
attachment: 50
|
|
}
|
|
});
|
|
});
|
|
|
|
it('should handle missing statistics gracefully', () => {
|
|
mockSql.getRow.mockReturnValue(null);
|
|
mockSql.getRows.mockReturnValue([]);
|
|
|
|
const stats = ocrService.getOCRStats();
|
|
|
|
expect(stats).toEqual({
|
|
totalProcessed: 0,
|
|
averageConfidence: 0,
|
|
byEntityType: {}
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Batch Processing', () => {
|
|
describe('startBatchProcessing', () => {
|
|
beforeEach(() => {
|
|
// Reset batch processing state
|
|
ocrService.cancelBatchProcessing();
|
|
});
|
|
|
|
it('should start batch processing when images are available', async () => {
|
|
mockSql.getRow.mockReturnValueOnce({ count: 5 }); // image notes
|
|
mockSql.getRow.mockReturnValueOnce({ count: 3 }); // image attachments
|
|
|
|
const result = await ocrService.startBatchProcessing();
|
|
|
|
expect(result).toEqual({ success: true });
|
|
expect(mockSql.getRow).toHaveBeenCalledTimes(2);
|
|
});
|
|
|
|
it('should return error if batch processing already in progress', async () => {
|
|
// Start first batch
|
|
mockSql.getRow.mockReturnValueOnce({ count: 5 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 3 });
|
|
await ocrService.startBatchProcessing();
|
|
|
|
// Try to start second batch
|
|
const result = await ocrService.startBatchProcessing();
|
|
|
|
expect(result).toEqual({
|
|
success: false,
|
|
message: 'Batch processing already in progress'
|
|
});
|
|
});
|
|
|
|
it('should return error if OCR is disabled', async () => {
|
|
mockOptions.getOptionBool.mockReturnValue(false);
|
|
|
|
const result = await ocrService.startBatchProcessing();
|
|
|
|
expect(result).toEqual({
|
|
success: false,
|
|
message: 'OCR is disabled'
|
|
});
|
|
});
|
|
|
|
it('should return error if no images need processing', async () => {
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image notes
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 }); // image attachments
|
|
|
|
const result = await ocrService.startBatchProcessing();
|
|
|
|
expect(result).toEqual({
|
|
success: false,
|
|
message: 'No images found that need OCR processing'
|
|
});
|
|
});
|
|
|
|
it('should handle database errors gracefully', async () => {
|
|
const error = new Error('Database connection failed');
|
|
mockSql.getRow.mockImplementation(() => {
|
|
throw error;
|
|
});
|
|
|
|
const result = await ocrService.startBatchProcessing();
|
|
|
|
expect(result).toEqual({
|
|
success: false,
|
|
message: 'Database connection failed'
|
|
});
|
|
expect(mockLog.error).toHaveBeenCalledWith(
|
|
'Failed to start batch processing: Database connection failed'
|
|
);
|
|
});
|
|
});
|
|
|
|
describe('getBatchProgress', () => {
|
|
it('should return initial progress state', () => {
|
|
const progress = ocrService.getBatchProgress();
|
|
|
|
expect(progress).toEqual({
|
|
inProgress: false,
|
|
total: 0,
|
|
processed: 0
|
|
});
|
|
});
|
|
|
|
it('should return progress with percentage when total > 0', async () => {
|
|
// Start batch processing
|
|
mockSql.getRow.mockReturnValueOnce({ count: 10 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 });
|
|
await ocrService.startBatchProcessing();
|
|
|
|
const progress = ocrService.getBatchProgress();
|
|
|
|
expect(progress.inProgress).toBe(true);
|
|
expect(progress.total).toBe(10);
|
|
expect(progress.processed).toBe(0);
|
|
expect(progress.percentage).toBe(0);
|
|
expect(progress.startTime).toBeInstanceOf(Date);
|
|
});
|
|
});
|
|
|
|
describe('cancelBatchProcessing', () => {
|
|
it('should cancel ongoing batch processing', async () => {
|
|
// Start batch processing
|
|
mockSql.getRow.mockReturnValueOnce({ count: 5 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 });
|
|
await ocrService.startBatchProcessing();
|
|
|
|
expect(ocrService.getBatchProgress().inProgress).toBe(true);
|
|
|
|
ocrService.cancelBatchProcessing();
|
|
|
|
expect(ocrService.getBatchProgress().inProgress).toBe(false);
|
|
expect(mockLog.info).toHaveBeenCalledWith('Batch OCR processing cancelled');
|
|
});
|
|
|
|
it('should do nothing if no batch processing is running', () => {
|
|
ocrService.cancelBatchProcessing();
|
|
|
|
expect(mockLog.info).not.toHaveBeenCalledWith('Batch OCR processing cancelled');
|
|
});
|
|
});
|
|
|
|
describe('processBatchInBackground', () => {
|
|
beforeEach(async () => {
|
|
await ocrService.initialize();
|
|
});
|
|
|
|
it('should process image notes and attachments in sequence', async () => {
|
|
// Mock data for batch processing
|
|
const imageNotes = [
|
|
{ noteId: 'note1', mime: 'image/jpeg' },
|
|
{ noteId: 'note2', mime: 'image/png' }
|
|
];
|
|
const imageAttachments = [
|
|
{ attachmentId: 'attach1', mime: 'image/gif' }
|
|
];
|
|
|
|
// Setup mocks for startBatchProcessing
|
|
mockSql.getRow.mockReturnValueOnce({ count: 2 }); // image notes count
|
|
mockSql.getRow.mockReturnValueOnce({ count: 1 }); // image attachments count
|
|
|
|
// Setup mocks for background processing
|
|
mockSql.getRows.mockReturnValueOnce(imageNotes); // image notes query
|
|
mockSql.getRows.mockReturnValueOnce(imageAttachments); // image attachments query
|
|
|
|
// Mock successful OCR processing
|
|
mockWorker.recognize.mockResolvedValue({
|
|
data: { text: 'Test text', confidence: 95 }
|
|
});
|
|
|
|
// Mock notes and attachments
|
|
const mockNote = {
|
|
noteId: 'note1',
|
|
type: 'image',
|
|
mime: 'image/jpeg',
|
|
getContent: vi.fn().mockResolvedValue(Buffer.from('fake-image-data'))
|
|
};
|
|
const mockAttachment = {
|
|
attachmentId: 'attach1',
|
|
role: 'image',
|
|
mime: 'image/gif',
|
|
getContent: vi.fn().mockResolvedValue(Buffer.from('fake-image-data'))
|
|
};
|
|
|
|
mockBecca.getNote.mockReturnValue(mockNote);
|
|
mockBecca.getAttachment.mockReturnValue(mockAttachment);
|
|
mockSql.getRow.mockReturnValue(null); // No existing OCR results
|
|
|
|
// Start batch processing
|
|
await ocrService.startBatchProcessing();
|
|
|
|
// Wait for background processing to complete
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
|
|
// Verify notes and attachments were processed
|
|
expect(mockBecca.getNote).toHaveBeenCalledWith('note1');
|
|
expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
|
|
expect(mockBecca.getAttachment).toHaveBeenCalledWith('attach1');
|
|
});
|
|
|
|
it('should handle processing errors gracefully', async () => {
|
|
const imageNotes = [
|
|
{ noteId: 'note1', mime: 'image/jpeg' }
|
|
];
|
|
|
|
// Setup mocks for startBatchProcessing
|
|
mockSql.getRow.mockReturnValueOnce({ count: 1 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 });
|
|
|
|
// Setup mocks for background processing
|
|
mockSql.getRows.mockReturnValueOnce(imageNotes);
|
|
mockSql.getRows.mockReturnValueOnce([]);
|
|
|
|
// Mock note that will cause an error
|
|
const mockNote = {
|
|
noteId: 'note1',
|
|
type: 'image',
|
|
mime: 'image/jpeg',
|
|
getContent: vi.fn().mockRejectedValue(new Error('Failed to get content'))
|
|
};
|
|
mockBecca.getNote.mockReturnValue(mockNote);
|
|
mockSql.getRow.mockReturnValue(null);
|
|
|
|
// Start batch processing
|
|
await ocrService.startBatchProcessing();
|
|
|
|
// Wait for background processing to complete
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
|
|
// Verify error was logged but processing continued
|
|
expect(mockLog.error).toHaveBeenCalledWith(
|
|
expect.stringContaining('Failed to process OCR for note note1')
|
|
);
|
|
});
|
|
|
|
it('should stop processing when cancelled', async () => {
|
|
const imageNotes = [
|
|
{ noteId: 'note1', mime: 'image/jpeg' },
|
|
{ noteId: 'note2', mime: 'image/png' }
|
|
];
|
|
|
|
// Setup mocks
|
|
mockSql.getRow.mockReturnValueOnce({ count: 2 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 });
|
|
mockSql.getRows.mockReturnValueOnce(imageNotes);
|
|
mockSql.getRows.mockReturnValueOnce([]);
|
|
|
|
// Start batch processing
|
|
await ocrService.startBatchProcessing();
|
|
|
|
// Cancel immediately
|
|
ocrService.cancelBatchProcessing();
|
|
|
|
// Wait for background processing to complete
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
|
|
// Verify processing was stopped early
|
|
expect(ocrService.getBatchProgress().inProgress).toBe(false);
|
|
});
|
|
|
|
it('should skip unsupported MIME types', async () => {
|
|
const imageNotes = [
|
|
{ noteId: 'note1', mime: 'text/plain' }, // unsupported
|
|
{ noteId: 'note2', mime: 'image/jpeg' } // supported
|
|
];
|
|
|
|
// Setup mocks
|
|
mockSql.getRow.mockReturnValueOnce({ count: 2 });
|
|
mockSql.getRow.mockReturnValueOnce({ count: 0 });
|
|
mockSql.getRows.mockReturnValueOnce(imageNotes);
|
|
mockSql.getRows.mockReturnValueOnce([]);
|
|
|
|
const mockNote = {
|
|
noteId: 'note2',
|
|
type: 'image',
|
|
mime: 'image/jpeg',
|
|
getContent: vi.fn().mockResolvedValue(Buffer.from('fake-image-data'))
|
|
};
|
|
mockBecca.getNote.mockReturnValue(mockNote);
|
|
mockSql.getRow.mockReturnValue(null);
|
|
mockWorker.recognize.mockResolvedValue({
|
|
data: { text: 'Test text', confidence: 95 }
|
|
});
|
|
|
|
// Start batch processing
|
|
await ocrService.startBatchProcessing();
|
|
|
|
// Wait for background processing to complete
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
|
|
// Verify only supported MIME type was processed
|
|
expect(mockBecca.getNote).toHaveBeenCalledWith('note2');
|
|
expect(mockBecca.getNote).not.toHaveBeenCalledWith('note1');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('deleteOCRResult', () => {
|
|
it('should delete OCR result successfully', () => {
|
|
ocrService.deleteOCRResult('note123', 'note');
|
|
|
|
expect(mockSql.execute).toHaveBeenCalledWith(
|
|
'DELETE FROM ocr_results WHERE entity_id = ? AND entity_type = ?',
|
|
['note123', 'note']
|
|
);
|
|
expect(mockLog.info).toHaveBeenCalledWith('Deleted OCR result for note note123');
|
|
});
|
|
|
|
it('should handle deletion errors', () => {
|
|
mockSql.execute.mockImplementation(() => {
|
|
throw new Error('Database error');
|
|
});
|
|
|
|
expect(() => ocrService.deleteOCRResult('note123', 'note')).toThrow('Database error');
|
|
expect(mockLog.error).toHaveBeenCalledWith('Failed to delete OCR result for note note123: Error: Database error');
|
|
});
|
|
});
|
|
|
|
describe('isCurrentlyProcessing', () => {
|
|
it('should return false initially', () => {
|
|
expect(ocrService.isCurrentlyProcessing()).toBe(false);
|
|
});
|
|
|
|
it('should return true during processing', async () => {
|
|
mockBecca.getNote.mockReturnValue({
|
|
noteId: 'note123',
|
|
mime: 'image/jpeg',
|
|
getBlob: vi.fn().mockResolvedValue(Buffer.from('fake-image-data'))
|
|
});
|
|
mockSql.getRow.mockResolvedValue(null);
|
|
|
|
await ocrService.initialize();
|
|
mockWorker.recognize.mockImplementation(() => {
|
|
expect(ocrService.isCurrentlyProcessing()).toBe(true);
|
|
return Promise.resolve({
|
|
data: { text: 'test', confidence: 90 }
|
|
});
|
|
});
|
|
|
|
await ocrService.processNoteOCR('note123');
|
|
expect(ocrService.isCurrentlyProcessing()).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('cleanup', () => {
|
|
it('should terminate worker on cleanup', async () => {
|
|
await ocrService.initialize();
|
|
|
|
await ocrService.cleanup();
|
|
|
|
expect(mockWorker.terminate).toHaveBeenCalled();
|
|
expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
|
|
});
|
|
|
|
it('should handle cleanup when worker is not initialized', async () => {
|
|
await ocrService.cleanup();
|
|
|
|
expect(mockWorker.terminate).not.toHaveBeenCalled();
|
|
expect(mockLog.info).toHaveBeenCalledWith('OCR service cleaned up');
|
|
});
|
|
});
|
|
}); |