From 859170529048bfa0b2f330268cb3b9d88921f1d5 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Tue, 15 Apr 2025 17:41:28 +0000 Subject: [PATCH] yeet hardcoded values --- .../llm/constants/search_constants.ts | 136 ++++++++++++++++++ .../llm/embeddings/providers/voyage.ts | 26 ++-- src/services/llm/index_service.ts | 7 +- src/services/llm/rest_chat_service.ts | 9 +- 4 files changed, 156 insertions(+), 22 deletions(-) create mode 100644 src/services/llm/constants/search_constants.ts diff --git a/src/services/llm/constants/search_constants.ts b/src/services/llm/constants/search_constants.ts new file mode 100644 index 000000000..2292e09bb --- /dev/null +++ b/src/services/llm/constants/search_constants.ts @@ -0,0 +1,136 @@ +export const SEARCH_CONSTANTS = { + // Vector search parameters + VECTOR_SEARCH: { + DEFAULT_MAX_RESULTS: 10, + DEFAULT_THRESHOLD: 0.6, + SIMILARITY_THRESHOLD: { + COSINE: 0.6, + HYBRID: 0.3, + DIM_AWARE: 0.1 + }, + EXACT_MATCH_THRESHOLD: 0.65 + }, + + // Context extraction parameters + CONTEXT: { + CONTENT_LENGTH: { + MEDIUM_THRESHOLD: 5000, + HIGH_THRESHOLD: 10000 + }, + MAX_PARENT_DEPTH: 3, + MAX_CHILDREN: 10, + MAX_LINKS: 10, + MAX_SIMILAR_NOTES: 5, + MAX_CONTENT_LENGTH: 2000, + MAX_RELATIONS: 10, + MAX_POINTS: 5 + }, + + // Hierarchy parameters + HIERARCHY: { + DEFAULT_QUERY_DEPTH: 2, + MAX_NOTES_PER_QUERY: 10, + MAX_PATH_LENGTH: 20, + MAX_BREADTH: 100, + MAX_DEPTH: 5, + MAX_PATHS_TO_SHOW: 3 + }, + + // Temperature settings + TEMPERATURE: { + DEFAULT: 0.7, + RELATIONSHIP_TOOL: 0.4, + VECTOR_SEARCH: 0.3, + QUERY_PROCESSOR: 0.3 + }, + + // Token/char limits + LIMITS: { + DEFAULT_NOTE_SUMMARY_LENGTH: 500, + RELATIONSHIP_TOOL_MAX_TOKENS: 50, + VECTOR_SEARCH_MAX_TOKENS: 500, + QUERY_PROCESSOR_MAX_TOKENS: 300, + MIN_STRING_LENGTH: 3 + }, + + // Tool execution parameters + TOOL_EXECUTION: { + MAX_TOOL_CALL_ITERATIONS: 5, + MAX_FOLLOW_UP_ITERATIONS: 3 + } +}; + +// Model capabilities constants - moved from ./interfaces/model_capabilities.ts +export const MODEL_CAPABILITIES = { + 'gpt-3.5-turbo': { + contextWindowTokens: 8192, + contextWindowChars: 16000 + }, + 'gpt-4': { + contextWindowTokens: 8192 + }, + 'gpt-4-turbo': { + contextWindowTokens: 8192 + }, + 'claude-3-opus': { + contextWindowTokens: 200000 + }, + 'claude-3-sonnet': { + contextWindowTokens: 180000 + }, + 'claude-3.5-sonnet': { + contextWindowTokens: 200000 + }, + 'default': { + contextWindowTokens: 4096 + } +}; + +// Embedding processing constants +export const EMBEDDING_PROCESSING = { + MAX_TOTAL_PROCESSING_TIME: 5 * 60 * 1000, // 5 minutes + MAX_CHUNK_RETRY_ATTEMPTS: 2, + DEFAULT_MAX_CHUNK_PROCESSING_TIME: 60 * 1000, // 1 minute + OLLAMA_MAX_CHUNK_PROCESSING_TIME: 120 * 1000, // 2 minutes + DEFAULT_EMBEDDING_UPDATE_INTERVAL: 200 +}; + +// Provider-specific embedding capabilities +export const PROVIDER_EMBEDDING_CAPABILITIES = { + VOYAGE: { + MODELS: { + 'voyage-large-2': { + contextWidth: 8192, + dimension: 1536 + }, + 'voyage-2': { + contextWidth: 8192, + dimension: 1024 + }, + 'voyage-lite-02': { + contextWidth: 8192, + dimension: 768 + }, + 'default': { + contextWidth: 8192, + dimension: 1024 + } + } + }, + OPENAI: { + MODELS: { + 'text-embedding-3-small': { + dimension: 1536, + contextWindow: 8191 + }, + 'text-embedding-3-large': { + dimension: 3072, + contextWindow: 8191 + }, + 'default': { + dimension: 1536, + contextWindow: 8192 + } + } + } +}; diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts index 2db8254d0..b0bae45d7 100644 --- a/src/services/llm/embeddings/providers/voyage.ts +++ b/src/services/llm/embeddings/providers/voyage.ts @@ -3,21 +3,15 @@ import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig } from "../embeddings_interface.js"; import { NormalizationStatus } from "../embeddings_interface.js"; import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; +import { PROVIDER_EMBEDDING_CAPABILITIES } from "../../constants/search_constants.js"; import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; -// Voyage model context window sizes - as of current API version -const VOYAGE_MODEL_CONTEXT_WINDOWS: Record = { - "voyage-large-2": 8192, - "voyage-2": 8192, - "default": 8192 -}; - -// Voyage embedding dimensions -const VOYAGE_MODEL_DIMENSIONS: Record = { - "voyage-large-2": 1536, - "voyage-2": 1024, - "default": 1024 -}; +// Use constants from the central constants file +const VOYAGE_MODEL_CONTEXT_WINDOWS = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS; +const VOYAGE_MODEL_DIMENSIONS = Object.entries(PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS).reduce((acc, [key, value]) => { + acc[key] = value.dimension; + return acc; +}, {} as Record); /** * Voyage AI embedding provider implementation @@ -62,10 +56,12 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { model => modelName.startsWith(model) ) || "default"; - const contextWindow = VOYAGE_MODEL_CONTEXT_WINDOWS[modelBase]; + const modelInfo = VOYAGE_MODEL_CONTEXT_WINDOWS[modelBase as keyof typeof VOYAGE_MODEL_CONTEXT_WINDOWS]; + const contextWindow = modelInfo.contextWidth; // Get dimension from our registry of known models - const dimension = VOYAGE_MODEL_DIMENSIONS[modelBase] || VOYAGE_MODEL_DIMENSIONS.default; + const dimension = VOYAGE_MODEL_DIMENSIONS[modelBase as keyof typeof VOYAGE_MODEL_DIMENSIONS] || + VOYAGE_MODEL_DIMENSIONS["default"]; return { dimension, diff --git a/src/services/llm/index_service.ts b/src/services/llm/index_service.ts index 534141673..1fa1c9615 100644 --- a/src/services/llm/index_service.ts +++ b/src/services/llm/index_service.ts @@ -21,6 +21,7 @@ import type { OptionDefinitions } from "../options_interface.js"; import sql from "../sql.js"; import sqlInit from "../sql_init.js"; import { CONTEXT_PROMPTS } from './constants/llm_prompt_constants.js'; +import { SEARCH_CONSTANTS } from './constants/search_constants.js'; class IndexService { private initialized = false; @@ -35,9 +36,9 @@ class IndexService { private indexRebuildCurrent = 0; // Configuration - private defaultQueryDepth = 2; - private maxNotesPerQuery = 10; - private defaultSimilarityThreshold = 0.65; + private defaultQueryDepth = SEARCH_CONSTANTS.HIERARCHY.DEFAULT_QUERY_DEPTH; + private maxNotesPerQuery = SEARCH_CONSTANTS.HIERARCHY.MAX_NOTES_PER_QUERY; + private defaultSimilarityThreshold = SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD; private indexUpdateInterval = 3600000; // 1 hour in milliseconds /** diff --git a/src/services/llm/rest_chat_service.ts b/src/services/llm/rest_chat_service.ts index 6d6c2d772..1a6ea79a5 100644 --- a/src/services/llm/rest_chat_service.ts +++ b/src/services/llm/rest_chat_service.ts @@ -1,6 +1,7 @@ import log from "../log.js"; import type { Request, Response } from "express"; import type { Message, ChatCompletionOptions, ChatResponse, StreamChunk } from "./ai_interface.js"; +import { SEARCH_CONSTANTS } from './constants/search_constants.js'; /** * Interface for WebSocket LLM streaming messages @@ -239,7 +240,7 @@ class RestChatService { noteEmbedding.embedding ); - if (similarity > 0.65) { + if (similarity > SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD) { results.push({ noteId, similarity @@ -712,7 +713,7 @@ class RestChatService { // Configure chat options from session metadata const chatOptions: ChatCompletionOptions = { - temperature: session.metadata.temperature || 0.7, + temperature: session.metadata.temperature || SEARCH_CONSTANTS.TEMPERATURE.DEFAULT, maxTokens: session.metadata.maxTokens, model: session.metadata.model, stream: isStreamingRequest ? true : undefined, @@ -739,7 +740,7 @@ class RestChatService { let currentMessages = [...aiMessages]; let hasMoreToolCalls = true; let iterationCount = 0; - const MAX_ITERATIONS = 3; // Prevent infinite loops + const MAX_ITERATIONS = SEARCH_CONSTANTS.TOOL_EXECUTION.MAX_FOLLOW_UP_ITERATIONS; // Prevent infinite loops // Add initial assistant response with tool calls currentMessages.push({ @@ -863,7 +864,7 @@ class RestChatService { // Configure chat options from session metadata const chatOptions: ChatCompletionOptions = { - temperature: session.metadata.temperature || 0.7, + temperature: session.metadata.temperature || SEARCH_CONSTANTS.TEMPERATURE.DEFAULT, maxTokens: session.metadata.maxTokens, model: session.metadata.model, stream: isStreamingRequest ? true : undefined