diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts index ac9f29015..7d4672128 100644 --- a/src/routes/api/llm.ts +++ b/src/routes/api/llm.ts @@ -14,99 +14,7 @@ import sql from "../../services/sql.js"; // Import the index service for knowledge base management import indexService from "../../services/llm/index_service.js"; import { CONTEXT_PROMPTS, ERROR_PROMPTS, FORMATTING_PROMPTS } from '../../services/llm/constants/llm_prompt_constants.js'; - -// LLM service constants -export const LLM_CONSTANTS = { - // Context window sizes (in characters) - CONTEXT_WINDOW: { - OLLAMA: 8000, - OPENAI: 12000, - ANTHROPIC: 15000, - VOYAGE: 12000, - DEFAULT: 6000 - }, - - // Embedding dimensions (verify these with your actual models) - EMBEDDING_DIMENSIONS: { - OLLAMA: { - DEFAULT: 384, - NOMIC: 768, - MISTRAL: 1024 - }, - OPENAI: { - ADA: 1536, - DEFAULT: 1536 - }, - ANTHROPIC: { - CLAUDE: 1024, - DEFAULT: 1024 - }, - VOYAGE: { - DEFAULT: 1024 - } - }, - - // Model-specific embedding dimensions for Ollama models - OLLAMA_MODEL_DIMENSIONS: { - "llama3": 4096, - "llama3.1": 4096, - "mistral": 4096, - "nomic": 768, - "mxbai": 1024, - "nomic-embed-text": 768, - "mxbai-embed-large": 1024, - "default": 384 - }, - - // Model-specific context windows for Ollama models - OLLAMA_MODEL_CONTEXT_WINDOWS: { - "llama3": 8192, - "llama3.1": 8192, - "llama3.2": 8192, - "mistral": 8192, - "nomic": 32768, - "mxbai": 32768, - "nomic-embed-text": 32768, - "mxbai-embed-large": 32768, - "default": 4096 - }, - - // Batch size configuration - BATCH_SIZE: { - OPENAI: 10, // OpenAI can handle larger batches efficiently - ANTHROPIC: 5, // More conservative for Anthropic - OLLAMA: 1, // Ollama processes one at a time - DEFAULT: 5 // Conservative default - }, - - // Chunking parameters - CHUNKING: { - DEFAULT_SIZE: 1500, - OLLAMA_SIZE: 1000, - DEFAULT_OVERLAP: 100, - MAX_SIZE_FOR_SINGLE_EMBEDDING: 5000 - }, - - // Search/similarity thresholds - SIMILARITY: { - DEFAULT_THRESHOLD: 0.65, - HIGH_THRESHOLD: 0.75, - LOW_THRESHOLD: 0.5 - }, - - // Session management - SESSION: { - CLEANUP_INTERVAL_MS: 60 * 60 * 1000, // 1 hour - SESSION_EXPIRY_MS: 12 * 60 * 60 * 1000, // 12 hours - MAX_SESSION_MESSAGES: 10 - }, - - // Content limits - CONTENT: { - MAX_NOTE_CONTENT_LENGTH: 1500, - MAX_TOTAL_CONTENT_LENGTH: 10000 - } -}; +import { LLM_CONSTANTS } from '../../services/llm/constants/provider_constants.js'; // Define basic interfaces interface ChatMessage { diff --git a/src/services/llm/constants/provider_constants.ts b/src/services/llm/constants/provider_constants.ts index 1d43e8e46..d62a52bfb 100644 --- a/src/services/llm/constants/provider_constants.ts +++ b/src/services/llm/constants/provider_constants.ts @@ -92,3 +92,96 @@ export const PROVIDER_CONSTANTS = { } } } as const; + +// LLM service configuration constants +export const LLM_CONSTANTS = { + // Context window sizes (in characters) + CONTEXT_WINDOW: { + OLLAMA: 8000, + OPENAI: 12000, + ANTHROPIC: 15000, + VOYAGE: 12000, + DEFAULT: 6000 + }, + + // Embedding dimensions (verify these with your actual models) + EMBEDDING_DIMENSIONS: { + OLLAMA: { + DEFAULT: 384, + NOMIC: 768, + MISTRAL: 1024 + }, + OPENAI: { + ADA: 1536, + DEFAULT: 1536 + }, + ANTHROPIC: { + CLAUDE: 1024, + DEFAULT: 1024 + }, + VOYAGE: { + DEFAULT: 1024 + } + }, + + // Model-specific embedding dimensions for Ollama models + OLLAMA_MODEL_DIMENSIONS: { + "llama3": 4096, + "llama3.1": 4096, + "mistral": 4096, + "nomic": 768, + "mxbai": 1024, + "nomic-embed-text": 768, + "mxbai-embed-large": 1024, + "default": 384 + }, + + // Model-specific context windows for Ollama models + OLLAMA_MODEL_CONTEXT_WINDOWS: { + "llama3": 8192, + "llama3.1": 8192, + "llama3.2": 8192, + "mistral": 8192, + "nomic": 32768, + "mxbai": 32768, + "nomic-embed-text": 32768, + "mxbai-embed-large": 32768, + "default": 4096 + }, + + // Batch size configuration + BATCH_SIZE: { + OPENAI: 10, // OpenAI can handle larger batches efficiently + ANTHROPIC: 5, // More conservative for Anthropic + OLLAMA: 1, // Ollama processes one at a time + DEFAULT: 5 // Conservative default + }, + + // Chunking parameters + CHUNKING: { + DEFAULT_SIZE: 1500, + OLLAMA_SIZE: 1000, + DEFAULT_OVERLAP: 100, + MAX_SIZE_FOR_SINGLE_EMBEDDING: 5000 + }, + + // Search/similarity thresholds + SIMILARITY: { + DEFAULT_THRESHOLD: 0.65, + HIGH_THRESHOLD: 0.75, + LOW_THRESHOLD: 0.5 + }, + + // Session management + SESSION: { + CLEANUP_INTERVAL_MS: 60 * 60 * 1000, // 1 hour + SESSION_EXPIRY_MS: 12 * 60 * 60 * 1000, // 12 hours + MAX_SESSION_MESSAGES: 10 + }, + + // Content limits + CONTENT: { + MAX_NOTE_CONTENT_LENGTH: 1500, + MAX_TOTAL_CONTENT_LENGTH: 10000 + } +}; diff --git a/src/services/llm/context/modules/context_formatter.ts b/src/services/llm/context/modules/context_formatter.ts index 5126d7cb5..f5d5ef47f 100644 --- a/src/services/llm/context/modules/context_formatter.ts +++ b/src/services/llm/context/modules/context_formatter.ts @@ -1,15 +1,16 @@ import sanitizeHtml from 'sanitize-html'; import log from '../../../log.js'; import { CONTEXT_PROMPTS, FORMATTING_PROMPTS } from '../../constants/llm_prompt_constants.js'; +import { LLM_CONSTANTS } from '../../constants/provider_constants.js'; import type { IContextFormatter, NoteSearchResult } from '../../interfaces/context_interfaces.js'; -// Constants for context window sizes, defines in-module to avoid circular dependencies -const CONTEXT_WINDOW = { - OPENAI: 16000, - ANTHROPIC: 100000, - OLLAMA: 4000, // Reduced to avoid issues - DEFAULT: 4000 -}; +// Use constants from the centralized file +// const CONTEXT_WINDOW = { +// OPENAI: 16000, +// ANTHROPIC: 100000, +// OLLAMA: 4000, // Reduced to avoid issues +// DEFAULT: 4000 +// }; /** * Formats context data for LLM consumption @@ -35,10 +36,10 @@ export class ContextFormatter implements IContextFormatter { try { // Get appropriate context size based on provider const maxTotalLength = - providerId === 'openai' ? CONTEXT_WINDOW.OPENAI : - providerId === 'anthropic' ? CONTEXT_WINDOW.ANTHROPIC : - providerId === 'ollama' ? CONTEXT_WINDOW.OLLAMA : - CONTEXT_WINDOW.DEFAULT; + providerId === 'openai' ? LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI : + providerId === 'anthropic' ? LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC : + providerId === 'ollama' ? LLM_CONSTANTS.CONTEXT_WINDOW.OLLAMA : + LLM_CONSTANTS.CONTEXT_WINDOW.DEFAULT; // DEBUG: Log context window size log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`); diff --git a/src/services/llm/embeddings/base_embeddings.ts b/src/services/llm/embeddings/base_embeddings.ts index b8c17220f..7cdff4f6c 100644 --- a/src/services/llm/embeddings/base_embeddings.ts +++ b/src/services/llm/embeddings/base_embeddings.ts @@ -1,7 +1,7 @@ import { NormalizationStatus } from './embeddings_interface.js'; import type { NoteEmbeddingContext } from './embeddings_interface.js'; import log from "../../log.js"; -import { LLM_CONSTANTS } from "../../../routes/api/llm.js"; +import { LLM_CONSTANTS } from "../constants/provider_constants.js"; import options from "../../options.js"; import { isBatchSizeError as checkBatchSizeError } from '../interfaces/error_interfaces.js'; import type { EmbeddingModelInfo } from '../interfaces/embedding_interfaces.js'; diff --git a/src/services/llm/embeddings/providers/ollama.ts b/src/services/llm/embeddings/providers/ollama.ts index ca2c73291..94da3d122 100644 --- a/src/services/llm/embeddings/providers/ollama.ts +++ b/src/services/llm/embeddings/providers/ollama.ts @@ -3,7 +3,7 @@ import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js"; import { NormalizationStatus } from "../embeddings_interface.js"; -import { LLM_CONSTANTS } from "../../../../routes/api/llm.js"; +import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; /** * Ollama embedding provider implementation diff --git a/src/services/llm/embeddings/providers/openai.ts b/src/services/llm/embeddings/providers/openai.ts index 6e99c297f..902ff474d 100644 --- a/src/services/llm/embeddings/providers/openai.ts +++ b/src/services/llm/embeddings/providers/openai.ts @@ -3,7 +3,7 @@ import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js"; import { NormalizationStatus } from "../embeddings_interface.js"; -import { LLM_CONSTANTS } from "../../../../routes/api/llm.js"; +import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; /** * OpenAI embedding provider implementation diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts index 9b22822d4..9d33c6673 100644 --- a/src/services/llm/embeddings/providers/voyage.ts +++ b/src/services/llm/embeddings/providers/voyage.ts @@ -3,7 +3,7 @@ import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js"; import { NormalizationStatus } from "../embeddings_interface.js"; -import { LLM_CONSTANTS } from "../../../../routes/api/llm.js"; +import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; // Voyage model context window sizes - as of current API version const VOYAGE_MODEL_CONTEXT_WINDOWS: Record = { diff --git a/src/services/llm/formatters/anthropic_formatter.ts b/src/services/llm/formatters/anthropic_formatter.ts index 27008e863..39302d4bc 100644 --- a/src/services/llm/formatters/anthropic_formatter.ts +++ b/src/services/llm/formatters/anthropic_formatter.ts @@ -2,6 +2,7 @@ import sanitizeHtml from 'sanitize-html'; import type { Message } from '../ai_interface.js'; import { BaseMessageFormatter } from './base_formatter.js'; import { PROVIDER_PROMPTS } from '../constants/llm_prompt_constants.js'; +import { LLM_CONSTANTS } from '../constants/provider_constants.js'; /** * Anthropic-specific message formatter @@ -12,7 +13,7 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter { * Maximum recommended context length for Anthropic models * Claude has a very large context window */ - private static MAX_CONTEXT_LENGTH = 100000; + private static MAX_CONTEXT_LENGTH = LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC; /** * Format messages for the Anthropic API diff --git a/src/services/llm/formatters/ollama_formatter.ts b/src/services/llm/formatters/ollama_formatter.ts index c1878610d..1d29abcf7 100644 --- a/src/services/llm/formatters/ollama_formatter.ts +++ b/src/services/llm/formatters/ollama_formatter.ts @@ -2,6 +2,7 @@ import type { Message } from '../ai_interface.js'; import { BaseMessageFormatter } from './base_formatter.js'; import sanitizeHtml from 'sanitize-html'; import { PROVIDER_PROMPTS, FORMATTING_PROMPTS } from '../constants/llm_prompt_constants.js'; +import { LLM_CONSTANTS } from '../constants/provider_constants.js'; /** * Ollama-specific message formatter @@ -12,7 +13,7 @@ export class OllamaMessageFormatter extends BaseMessageFormatter { * Maximum recommended context length for Ollama * Smaller than other providers due to Ollama's handling of context */ - private static MAX_CONTEXT_LENGTH = 4000; + private static MAX_CONTEXT_LENGTH = LLM_CONSTANTS.CONTEXT_WINDOW.OLLAMA; /** * Format messages for the Ollama API diff --git a/src/services/llm/formatters/openai_formatter.ts b/src/services/llm/formatters/openai_formatter.ts index 3fa0a8328..5f2a66938 100644 --- a/src/services/llm/formatters/openai_formatter.ts +++ b/src/services/llm/formatters/openai_formatter.ts @@ -2,6 +2,7 @@ import sanitizeHtml from 'sanitize-html'; import type { Message } from '../ai_interface.js'; import { BaseMessageFormatter } from './base_formatter.js'; import { PROVIDER_PROMPTS, FORMATTING_PROMPTS } from '../constants/llm_prompt_constants.js'; +import { LLM_CONSTANTS } from '../constants/provider_constants.js'; /** * OpenAI-specific message formatter @@ -12,7 +13,7 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter { * Maximum recommended context length for OpenAI * Based on GPT-4 context window size */ - private static MAX_CONTEXT_LENGTH = 16000; + private static MAX_CONTEXT_LENGTH = LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI; /** * Format messages for the OpenAI API