mirror of
https://github.com/zadam/trilium.git
synced 2025-10-20 15:19:01 +02:00
feat(llm): embeddings work and are created when launching for the first ever time
This commit is contained in:
parent
bb8a374ab8
commit
3a4bb47cc1
@ -65,6 +65,7 @@ export const {
|
|||||||
export const {
|
export const {
|
||||||
getEmbeddingStats,
|
getEmbeddingStats,
|
||||||
reprocessAllNotes,
|
reprocessAllNotes,
|
||||||
|
queueNotesForMissingEmbeddings,
|
||||||
cleanupEmbeddings
|
cleanupEmbeddings
|
||||||
} = stats;
|
} = stats;
|
||||||
|
|
||||||
@ -107,6 +108,7 @@ export default {
|
|||||||
// Stats and maintenance
|
// Stats and maintenance
|
||||||
getEmbeddingStats: stats.getEmbeddingStats,
|
getEmbeddingStats: stats.getEmbeddingStats,
|
||||||
reprocessAllNotes: stats.reprocessAllNotes,
|
reprocessAllNotes: stats.reprocessAllNotes,
|
||||||
|
queueNotesForMissingEmbeddings: stats.queueNotesForMissingEmbeddings,
|
||||||
cleanupEmbeddings: stats.cleanupEmbeddings,
|
cleanupEmbeddings: stats.cleanupEmbeddings,
|
||||||
|
|
||||||
// Index operations
|
// Index operations
|
||||||
|
@ -282,8 +282,6 @@ export async function processEmbeddingQueue() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log that we're starting to process this note
|
|
||||||
log.info(`Starting embedding generation for note ${noteId}`);
|
|
||||||
|
|
||||||
// Get note context for embedding
|
// Get note context for embedding
|
||||||
const context = await getNoteEmbeddingContext(noteId);
|
const context = await getNoteEmbeddingContext(noteId);
|
||||||
@ -334,7 +332,6 @@ export async function processEmbeddingQueue() {
|
|||||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
[noteId]
|
[noteId]
|
||||||
);
|
);
|
||||||
log.info(`Successfully completed embedding processing for note ${noteId}`);
|
|
||||||
|
|
||||||
// Count as successfully processed
|
// Count as successfully processed
|
||||||
processedCount++;
|
processedCount++;
|
||||||
|
@ -1,28 +1,13 @@
|
|||||||
import sql from "../../../services/sql.js";
|
import sql from "../../../services/sql.js";
|
||||||
import log from "../../../services/log.js";
|
import log from "../../../services/log.js";
|
||||||
import cls from "../../../services/cls.js";
|
import indexService from '../index_service.js';
|
||||||
import { queueNoteForEmbedding } from "./queue.js";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reprocess all notes to update embeddings
|
* Reprocess all notes to update embeddings
|
||||||
|
* @deprecated Use indexService.reprocessAllNotes() directly instead
|
||||||
*/
|
*/
|
||||||
export async function reprocessAllNotes() {
|
export async function reprocessAllNotes() {
|
||||||
log.info("Queueing all notes for embedding updates");
|
return indexService.reprocessAllNotes();
|
||||||
|
|
||||||
// Get all non-deleted note IDs
|
|
||||||
const noteIds = await sql.getColumn(
|
|
||||||
"SELECT noteId FROM notes WHERE isDeleted = 0"
|
|
||||||
);
|
|
||||||
|
|
||||||
log.info(`Adding ${noteIds.length} notes to embedding queue`);
|
|
||||||
|
|
||||||
// Process each note ID within a cls context
|
|
||||||
for (const noteId of noteIds) {
|
|
||||||
// Use cls.init to ensure proper context for each operation
|
|
||||||
await cls.init(async () => {
|
|
||||||
await queueNoteForEmbedding(noteId as string, 'UPDATE');
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -79,6 +64,14 @@ export async function getEmbeddingStats() {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue notes that don't have embeddings for current provider settings
|
||||||
|
* @deprecated Use indexService.queueNotesForMissingEmbeddings() directly instead
|
||||||
|
*/
|
||||||
|
export async function queueNotesForMissingEmbeddings() {
|
||||||
|
return indexService.queueNotesForMissingEmbeddings();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleanup function to remove stale or unused embeddings
|
* Cleanup function to remove stale or unused embeddings
|
||||||
*/
|
*/
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
import log from "../log.js";
|
import log from "../log.js";
|
||||||
import options from "../options.js";
|
import options from "../options.js";
|
||||||
import becca from "../../becca/becca.js";
|
import becca from "../../becca/becca.js";
|
||||||
|
import beccaLoader from "../../becca/becca_loader.js";
|
||||||
import vectorStore from "./embeddings/index.js";
|
import vectorStore from "./embeddings/index.js";
|
||||||
import providerManager from "./providers/providers.js";
|
import providerManager from "./providers/providers.js";
|
||||||
import { ContextExtractor } from "./context/index.js";
|
import { ContextExtractor } from "./context/index.js";
|
||||||
@ -378,11 +379,10 @@ export class IndexService {
|
|||||||
|
|
||||||
if (!shouldProcessEmbeddings) {
|
if (!shouldProcessEmbeddings) {
|
||||||
// This instance is not configured to process embeddings
|
// This instance is not configured to process embeddings
|
||||||
log.info("Skipping batch indexing as this instance is not configured to process embeddings");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the embedding queue
|
// Process the embedding queue (batch size is controlled by embeddingBatchSize option)
|
||||||
await vectorStore.processEmbeddingQueue();
|
await vectorStore.processEmbeddingQueue();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -879,9 +879,16 @@ export class IndexService {
|
|||||||
log.info(`Automatic embedding indexing started ${isSyncServer ? 'as sync server' : 'as client'}`);
|
log.info(`Automatic embedding indexing started ${isSyncServer ? 'as sync server' : 'as client'}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start background processing of the embedding queue
|
||||||
|
const { setupEmbeddingBackgroundProcessing } = await import('./embeddings/events.js');
|
||||||
|
await setupEmbeddingBackgroundProcessing();
|
||||||
|
|
||||||
// Re-initialize event listeners
|
// Re-initialize event listeners
|
||||||
this.setupEventListeners();
|
this.setupEventListeners();
|
||||||
|
|
||||||
|
// Queue notes that don't have embeddings for current providers
|
||||||
|
await this.queueNotesForMissingEmbeddings();
|
||||||
|
|
||||||
// Start processing the queue immediately
|
// Start processing the queue immediately
|
||||||
await this.runBatchIndexing(20);
|
await this.runBatchIndexing(20);
|
||||||
|
|
||||||
@ -892,6 +899,95 @@ export class IndexService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue notes that don't have embeddings for current provider settings
|
||||||
|
*/
|
||||||
|
async queueNotesForMissingEmbeddings() {
|
||||||
|
try {
|
||||||
|
// Wait for becca to be fully loaded before accessing notes
|
||||||
|
await beccaLoader.beccaLoaded;
|
||||||
|
|
||||||
|
// Get all non-deleted notes
|
||||||
|
const allNotes = Object.values(becca.notes).filter(note => !note.isDeleted);
|
||||||
|
|
||||||
|
// Get enabled providers
|
||||||
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
||||||
|
if (providers.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let queuedCount = 0;
|
||||||
|
let excludedCount = 0;
|
||||||
|
|
||||||
|
// Process notes in batches to avoid overwhelming the system
|
||||||
|
const batchSize = 100;
|
||||||
|
for (let i = 0; i < allNotes.length; i += batchSize) {
|
||||||
|
const batch = allNotes.slice(i, i + batchSize);
|
||||||
|
|
||||||
|
for (const note of batch) {
|
||||||
|
try {
|
||||||
|
// Skip notes excluded from AI
|
||||||
|
if (isNoteExcludedFromAI(note)) {
|
||||||
|
excludedCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if note needs embeddings for any enabled provider
|
||||||
|
let needsEmbedding = false;
|
||||||
|
|
||||||
|
for (const provider of providers) {
|
||||||
|
const config = provider.getConfig();
|
||||||
|
const existingEmbedding = await vectorStore.getEmbeddingForNote(
|
||||||
|
note.noteId,
|
||||||
|
provider.name,
|
||||||
|
config.model
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!existingEmbedding) {
|
||||||
|
needsEmbedding = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (needsEmbedding) {
|
||||||
|
await vectorStore.queueNoteForEmbedding(note.noteId, 'UPDATE');
|
||||||
|
queuedCount++;
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error checking embeddings for note ${note.noteId}: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error queuing notes for missing embeddings: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reprocess all notes to update embeddings
|
||||||
|
*/
|
||||||
|
async reprocessAllNotes() {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get all non-deleted note IDs
|
||||||
|
const noteIds = await sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 0");
|
||||||
|
|
||||||
|
// Process each note ID
|
||||||
|
for (const noteId of noteIds) {
|
||||||
|
await vectorStore.queueNoteForEmbedding(noteId as string, 'UPDATE');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error reprocessing all notes: ${error.message || 'Unknown error'}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop embedding generation (called when AI is disabled)
|
* Stop embedding generation (called when AI is disabled)
|
||||||
*/
|
*/
|
||||||
@ -907,7 +1003,8 @@ export class IndexService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Stop the background processing from embeddings/events.ts
|
// Stop the background processing from embeddings/events.ts
|
||||||
vectorStore.stopEmbeddingBackgroundProcessing();
|
const { stopEmbeddingBackgroundProcessing } = await import('./embeddings/events.js');
|
||||||
|
stopEmbeddingBackgroundProcessing();
|
||||||
|
|
||||||
// Clear all embedding providers to clean up resources
|
// Clear all embedding providers to clean up resources
|
||||||
providerManager.clearAllEmbeddingProviders();
|
providerManager.clearAllEmbeddingProviders();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user