trilium/apps/server/src/services/llm/interfaces/model_capabilities.ts
2025-04-22 17:16:41 +03:00

139 lines
4.2 KiB
TypeScript

import type { Message } from '../ai_interface.js';
/**
* Interface for model capabilities information
*/
export interface ModelCapabilities {
contextWindowTokens: number; // Context window size in tokens
contextWindowChars: number; // Estimated context window size in characters (for planning)
maxCompletionTokens: number; // Maximum completion length
hasFunctionCalling: boolean; // Whether the model supports function calling
hasVision: boolean; // Whether the model supports image input
costPerInputToken: number; // Cost per input token (if applicable)
costPerOutputToken: number; // Cost per output token (if applicable)
}
/**
* Default model capabilities for unknown models
*/
export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
contextWindowTokens: 8192,
contextWindowChars: 16000, // ~4 chars per token estimate
maxCompletionTokens: 1024,
hasFunctionCalling: false,
hasVision: false,
costPerInputToken: 0,
costPerOutputToken: 0
};
/**
* Model capabilities for common models
*/
export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
// OpenAI models
'gpt-3.5-turbo': {
contextWindowTokens: 8192,
contextWindowChars: 16000,
hasFunctionCalling: true
},
'gpt-3.5-turbo-16k': {
contextWindowTokens: 16384,
contextWindowChars: 65000,
hasFunctionCalling: true
},
'gpt-4': {
contextWindowTokens: 8192,
contextWindowChars: 32000,
hasFunctionCalling: true
},
'gpt-4-32k': {
contextWindowTokens: 32768,
contextWindowChars: 130000,
hasFunctionCalling: true
},
'gpt-4-turbo': {
contextWindowTokens: 128000,
contextWindowChars: 512000,
hasFunctionCalling: true,
hasVision: true
},
'gpt-4o': {
contextWindowTokens: 128000,
contextWindowChars: 512000,
hasFunctionCalling: true,
hasVision: true
},
// Anthropic models
'claude-3-haiku': {
contextWindowTokens: 200000,
contextWindowChars: 800000,
hasVision: true
},
'claude-3-sonnet': {
contextWindowTokens: 200000,
contextWindowChars: 800000,
hasVision: true
},
'claude-3-opus': {
contextWindowTokens: 200000,
contextWindowChars: 800000,
hasVision: true
},
'claude-2': {
contextWindowTokens: 100000,
contextWindowChars: 400000
},
// Ollama models (defaults, will be updated dynamically)
'llama3': {
contextWindowTokens: 8192,
contextWindowChars: 32000
},
'mistral': {
contextWindowTokens: 8192,
contextWindowChars: 32000
},
'llama2': {
contextWindowTokens: 8192,
contextWindowChars: 16000
}
};
/**
* Calculate available context window size for context generation
* This takes into account expected message sizes and other overhead
*
* @param model Model name
* @param messages Current conversation messages
* @param expectedTurns Number of expected additional conversation turns
* @returns Available context size in characters
*/
export function calculateAvailableContextSize(
modelCapabilities: ModelCapabilities,
messages: Message[],
expectedTurns: number = 3
): number {
// Calculate current message token usage (rough estimate)
let currentMessageChars = 0;
for (const message of messages) {
currentMessageChars += message.content.length;
}
// Reserve space for system prompt and overhead
const systemPromptReserve = 1000;
// Reserve space for expected conversation turns
const turnReserve = expectedTurns * 2000; // Average 2000 chars per turn (including both user and assistant)
// Calculate available space
const totalReserved = currentMessageChars + systemPromptReserve + turnReserve;
const availableContextSize = Math.max(0, modelCapabilities.contextWindowChars - totalReserved);
// Use at most 70% of total context window size to be safe
const maxSafeContextSize = Math.floor(modelCapabilities.contextWindowChars * 0.7);
// Return the smaller of available size or max safe size
return Math.min(availableContextSize, maxSafeContextSize);
}