From 72c380b6f43658f0e5609926f2179031a4581694 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Fri, 28 Mar 2025 22:50:15 +0000 Subject: [PATCH] do a wayyy better job at building the messages with context --- src/routes/api/llm.ts | 6 +- src/services/llm/chat_service.ts | 4 +- src/services/llm/context_service.ts | 99 ++-- .../llm/formatters/anthropic_formatter.ts | 223 +++++++++ src/services/llm/formatters/base_formatter.ts | 161 ++++++ .../llm/formatters/ollama_formatter.ts | 120 +++++ .../llm/formatters/openai_formatter.ts | 152 ++++++ .../llm/interfaces/message_formatter.ts | 92 ++++ src/services/llm/providers/ollama_service.ts | 467 +++--------------- 9 files changed, 856 insertions(+), 468 deletions(-) create mode 100644 src/services/llm/formatters/anthropic_formatter.ts create mode 100644 src/services/llm/formatters/base_formatter.ts create mode 100644 src/services/llm/formatters/ollama_formatter.ts create mode 100644 src/services/llm/formatters/openai_formatter.ts create mode 100644 src/services/llm/interfaces/message_formatter.ts diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts index a65c74fff..57a9c38f3 100644 --- a/src/routes/api/llm.ts +++ b/src/routes/api/llm.ts @@ -956,8 +956,8 @@ async function sendMessage(req: Request, res: Response) { log.info(`Context ends with: "...${context.substring(context.length - 200)}"`); log.info(`Number of notes included: ${sourceNotes.length}`); - // Get messages with context properly formatted for the specific LLM provider - const aiMessages = contextService.buildMessagesWithContext( + // Format messages for the LLM using the proper context + const aiMessages = await contextService.buildMessagesWithContext( session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content @@ -1104,7 +1104,7 @@ async function sendMessage(req: Request, res: Response) { const context = buildContextFromNotes(relevantNotes, messageContent); // Get messages with context properly formatted for the specific LLM provider - const aiMessages = contextService.buildMessagesWithContext( + const aiMessages = await contextService.buildMessagesWithContext( session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content diff --git a/src/services/llm/chat_service.ts b/src/services/llm/chat_service.ts index f155248da..d6a12bf63 100644 --- a/src/services/llm/chat_service.ts +++ b/src/services/llm/chat_service.ts @@ -265,10 +265,10 @@ export class ChatService { ); // Create messages array with context using the improved method - const messagesWithContext = contextService.buildMessagesWithContext( + const messagesWithContext = await contextService.buildMessagesWithContext( session.messages, enhancedContext, - aiServiceManager.getService() // Get the default service + aiServiceManager.getService() ); // Generate AI response diff --git a/src/services/llm/context_service.ts b/src/services/llm/context_service.ts index bf6fa14ef..c8a619421 100644 --- a/src/services/llm/context_service.ts +++ b/src/services/llm/context_service.ts @@ -11,6 +11,7 @@ import { ContextExtractor } from './context/index.js'; import type { NoteSearchResult } from './interfaces/context_interfaces.js'; import type { Message } from './ai_interface.js'; import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js'; +import { MessageFormatterFactory } from './interfaces/message_formatter.js'; /** * Main Context Service for Trilium Notes @@ -189,72 +190,62 @@ class TriliumContextService { } /** - * Build messages with proper context for an LLM-enhanced chat + * Builds messages with context for LLM service + * This takes a set of messages and adds context in the appropriate format for each LLM provider + * + * @param messages Array of messages to enhance with context + * @param context The context to add (built from relevant notes) + * @param llmService The LLM service to format messages for + * @returns Promise resolving to the messages array with context properly integrated */ - buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] { - // For simple conversations just add context to the system message + async buildMessagesWithContext( + messages: Message[], + context: string, + llmService: LLMServiceInterface + ): Promise { try { if (!messages || messages.length === 0) { - return [{ role: 'system', content: context }]; + log.info('No messages provided to buildMessagesWithContext'); + return []; } - const result: Message[] = []; - let hasSystemMessage = false; - - // First pass: identify if there's a system message - for (const msg of messages) { - if (msg.role === 'system') { - hasSystemMessage = true; - break; - } + if (!context || context.trim() === '') { + log.info('No context provided to buildMessagesWithContext, returning original messages'); + return messages; } - // If we have a system message, prepend context to it - // Otherwise create a new system message with the context - if (hasSystemMessage) { - for (const msg of messages) { - if (msg.role === 'system') { - // For Ollama, use a cleaner approach with just one system message - if (llmService.constructor.name === 'OllamaService') { - // If this is the first system message we've seen, - // add context to it, otherwise skip (Ollama handles multiple - // system messages poorly) - if (result.findIndex(m => m.role === 'system') === -1) { - result.push({ - role: 'system', - content: `${context}\n\n${msg.content}` - }); - } - } else { - // For other providers, include all system messages - result.push({ - role: 'system', - content: msg.content.includes(context) ? - msg.content : // Avoid duplicate context - `${context}\n\n${msg.content}` - }); - } - } else { - result.push(msg); - } - } + // Get the provider name, handling service classes and raw provider names + let providerName: string; + if (typeof llmService === 'string') { + // If llmService is a string, assume it's the provider name + providerName = llmService; + } else if (llmService.constructor && llmService.constructor.name) { + // Extract provider name from service class name (e.g., OllamaService -> ollama) + providerName = llmService.constructor.name.replace('Service', '').toLowerCase(); } else { - // No system message found, prepend one with the context - result.push({ role: 'system', content: context }); - // Add all the original messages - result.push(...messages); + // Fallback to default + providerName = 'default'; } - return result; + log.info(`Using formatter for provider: ${providerName}`); + + // Get the appropriate formatter for this provider + const formatter = MessageFormatterFactory.getFormatter(providerName); + + // Format messages with context using the provider-specific formatter + const formattedMessages = formatter.formatMessages( + messages, + undefined, // No system prompt override - use what's in the messages + context + ); + + log.info(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for ${providerName}`); + + return formattedMessages; } catch (error) { log.error(`Error building messages with context: ${error}`); - - // Fallback: prepend a system message with context - const safeMessages = Array.isArray(messages) ? messages : []; - return [ - { role: 'system', content: context }, - ...safeMessages.filter(msg => msg.role !== 'system') - ]; + // Fallback to original messages in case of error + return messages; } } } diff --git a/src/services/llm/formatters/anthropic_formatter.ts b/src/services/llm/formatters/anthropic_formatter.ts new file mode 100644 index 000000000..3c1dfb624 --- /dev/null +++ b/src/services/llm/formatters/anthropic_formatter.ts @@ -0,0 +1,223 @@ +import sanitizeHtml from 'sanitize-html'; +import type { Message } from '../ai_interface.js'; +import { BaseMessageFormatter } from './base_formatter.js'; + +/** + * Anthropic-specific message formatter + * Optimized for Claude's API and preferences + */ +export class AnthropicMessageFormatter extends BaseMessageFormatter { + /** + * Maximum recommended context length for Anthropic models + * Claude has a very large context window + */ + private static MAX_CONTEXT_LENGTH = 100000; + + /** + * Format messages for the Anthropic API + */ + formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] { + const formattedMessages: Message[] = []; + + // For Anthropic, system prompts work best as the first user message with XML tags + // First, collect all non-system messages + const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant'); + + // For Anthropic, we need to handle context differently + // 1. If explicit context is provided, we format it with XML tags + if (context) { + // Build the system message with context + const baseInstructions = this.getDefaultSystemPrompt(systemPrompt); + + const formattedContext = + `\n${baseInstructions}\n\n` + + `Use the following information from the user's notes to answer their questions:\n\n` + + `\n${this.cleanContextContent(context)}\n\n\n` + + `When responding:\n` + + `- Focus on the most relevant information from the notes\n` + + `- Be concise and direct in your answers\n` + + `- If quoting from notes, mention which note it's from\n` + + `- If the notes don't contain relevant information, say so clearly\n` + + ``; + + // If there's at least one user message, add the context to the first one + if (userAssistantMessages.length > 0 && userAssistantMessages[0].role === 'user') { + // Add system as a new first message + formattedMessages.push({ + role: 'user', + content: formattedContext + }); + + // Add system response acknowledgment + formattedMessages.push({ + role: 'assistant', + content: "I'll help you with your notes based on the context provided." + }); + + // Add remaining messages + for (const msg of userAssistantMessages) { + formattedMessages.push(msg); + } + } + // If no user messages, create a placeholder + else { + formattedMessages.push({ + role: 'user', + content: formattedContext + }); + + formattedMessages.push({ + role: 'assistant', + content: "I'll help you with your notes based on the context provided. What would you like to know?" + }); + + // Add any existing assistant messages if they exist + const assistantMsgs = userAssistantMessages.filter(msg => msg.role === 'assistant'); + for (const msg of assistantMsgs) { + formattedMessages.push(msg); + } + } + } + // 2. If no explicit context but we have system messages, convert them to Claude format + else if (messages.some(msg => msg.role === 'system')) { + // Get system messages + const systemMessages = messages.filter(msg => msg.role === 'system'); + + // Build system content with XML tags + const systemContent = + `\n${systemMessages.map(msg => this.cleanContextContent(msg.content)).join('\n\n')}\n`; + + // Add as first user message + formattedMessages.push({ + role: 'user', + content: systemContent + }); + + // Add assistant acknowledgment + formattedMessages.push({ + role: 'assistant', + content: "I understand. I'll follow those instructions." + }); + + // Add remaining user/assistant messages + for (const msg of userAssistantMessages) { + formattedMessages.push(msg); + } + } + // 3. Just a system prompt, no context + else if (systemPrompt) { + // Add as first user message with XML tags + formattedMessages.push({ + role: 'user', + content: `\n${systemPrompt}\n` + }); + + // Add assistant acknowledgment + formattedMessages.push({ + role: 'assistant', + content: "I understand. I'll follow those instructions." + }); + + // Add all other messages + for (const msg of userAssistantMessages) { + formattedMessages.push(msg); + } + } + // 4. No system prompt, use default from constants + else if (userAssistantMessages.length > 0) { + // Add default system prompt with XML tags + formattedMessages.push({ + role: 'user', + content: `\n${this.getDefaultSystemPrompt()}\n` + }); + + // Add assistant acknowledgment + formattedMessages.push({ + role: 'assistant', + content: "I understand. I'll follow those instructions." + }); + + // Add all user messages + for (const msg of userAssistantMessages) { + formattedMessages.push(msg); + } + } + // 5. No special handling needed + else { + // Just add all messages as-is + for (const msg of userAssistantMessages) { + formattedMessages.push(msg); + } + } + + console.log(`Anthropic formatter: ${messages.length} messages → ${formattedMessages.length} messages`); + return formattedMessages; + } + + /** + * Clean context content for Anthropic + * Claude works well with XML-structured content + */ + cleanContextContent(content: string): string { + if (!content) return ''; + + try { + // Convert HTML to a Claude-friendly format + const cleaned = sanitizeHtml(content, { + allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'], + allowedAttributes: { + 'a': ['href'] + } + }); + + // Convert to markdown but preserve some structure + let markdown = cleaned + .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') + .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') + .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') + .replace(/]*>(.*?)<\/h4>/gi, '#### $1\n') + .replace(/]*>(.*?)<\/h5>/gi, '##### $1\n') + .replace(/]*>(.*?)<\/p>/gi, '$1\n\n') + .replace(/]*>/gi, '\n') + .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') + .replace(/]*>(.*?)<\/strong>/gi, '**$1**') + .replace(/]*>(.*?)<\/b>/gi, '**$1**') + .replace(/]*>(.*?)<\/em>/gi, '*$1*') + .replace(/]*>(.*?)<\/i>/gi, '*$1*') + .replace(/]*>(.*?)<\/code>/gi, '`$1`') + .replace(/]*>(.*?)<\/pre>/gi, '```\n$1\n```') + // Process lists + .replace(/]*>(.*?)<\/ul>/gs, (match, content) => { + return content.replace(/]*>(.*?)<\/li>/gi, '- $1\n'); + }) + .replace(/]*>(.*?)<\/ol>/gs, (match, content) => { + let index = 1; + return content.replace(/]*>(.*?)<\/li>/gi, (m: string, item: string) => { + return `${index++}. ${item}\n`; + }); + }) + // Clean up any remaining HTML tags + .replace(/<[^>]*>/g, '') + // Clean up excessive newlines + .replace(/\n{3,}/g, '\n\n') + // Fix common HTML entities + .replace(/ /g, ' ') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&') + .replace(/"/g, '"'); + + return markdown.trim(); + } catch (error) { + console.error("Error cleaning content for Anthropic:", error); + return content; // Return original if cleaning fails + } + } + + /** + * Get the maximum recommended context length for Anthropic + */ + getMaxContextLength(): number { + return AnthropicMessageFormatter.MAX_CONTEXT_LENGTH; + } +} diff --git a/src/services/llm/formatters/base_formatter.ts b/src/services/llm/formatters/base_formatter.ts new file mode 100644 index 000000000..55f342b9a --- /dev/null +++ b/src/services/llm/formatters/base_formatter.ts @@ -0,0 +1,161 @@ +import sanitizeHtml from 'sanitize-html'; +import type { Message } from '../ai_interface.js'; +import type { MessageFormatter } from '../interfaces/message_formatter.js'; +import { DEFAULT_SYSTEM_PROMPT } from '../constants/llm_prompt_constants.js'; + +/** + * Base formatter with common functionality for all providers + * Provider-specific formatters should extend this class + */ +export abstract class BaseMessageFormatter implements MessageFormatter { + /** + * Format messages for the LLM API + * Each provider should override this method with its specific formatting logic + */ + abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[]; + + /** + * Get the maximum recommended context length for this provider + * Each provider should override this with appropriate value + */ + abstract getMaxContextLength(): number; + + /** + * Get the default system prompt + * Uses the default prompt from constants + */ + protected getDefaultSystemPrompt(systemPrompt?: string): string { + return systemPrompt || DEFAULT_SYSTEM_PROMPT; + } + + /** + * Clean context content - common method with standard HTML cleaning + * Provider-specific formatters can override for custom behavior + */ + cleanContextContent(content: string): string { + if (!content) return ''; + + try { + // First fix any encoding issues + const fixedContent = this.fixEncodingIssues(content); + + // Convert HTML to markdown for better readability + const cleaned = sanitizeHtml(fixedContent, { + allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'], + allowedAttributes: { + 'a': ['href'] + }, + transformTags: { + 'h1': 'h2', + 'h2': 'h3', + 'div': 'p', + 'span': 'span' + } + }); + + // Process inline elements to markdown + let markdown = cleaned + .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') + .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') + .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') + .replace(/]*>(.*?)<\/h4>/gi, '#### $1\n') + .replace(/]*>(.*?)<\/h5>/gi, '##### $1\n') + .replace(/]*>(.*?)<\/p>/gi, '$1\n\n') + .replace(/]*>/gi, '\n') + .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') + .replace(/]*>(.*?)<\/strong>/gi, '**$1**') + .replace(/]*>(.*?)<\/b>/gi, '**$1**') + .replace(/]*>(.*?)<\/em>/gi, '*$1*') + .replace(/]*>(.*?)<\/i>/gi, '*$1*') + .replace(/]*>(.*?)<\/code>/gi, '`$1`') + .replace(/]*>(.*?)<\/pre>/gi, '```\n$1\n```') + // Clean up any remaining HTML tags + .replace(/<[^>]*>/g, '') + // Clean up excessive newlines + .replace(/\n{3,}/g, '\n\n'); + + // Process list items + markdown = this.processListItems(markdown); + + // Fix common HTML entities + markdown = markdown + .replace(/ /g, ' ') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/“/g, '"') + .replace(/”/g, '"') + .replace(/‘/g, "'") + .replace(/’/g, "'") + .replace(/—/g, '—') + .replace(/–/g, '–') + .replace(/…/g, '…'); + + return markdown.trim(); + } catch (error) { + console.error("Error cleaning context content:", error); + return content; // Return original if cleaning fails + } + } + + /** + * Process HTML list items in markdown conversion + * This is a helper method that safely processes HTML list items + */ + protected processListItems(content: string): string { + // Process unordered lists + let result = content.replace(/]*>([\s\S]*?)<\/ul>/gi, (match: string, listContent: string) => { + return listContent.replace(/]*>([\s\S]*?)<\/li>/gi, '- $1\n'); + }); + + // Process ordered lists + result = result.replace(/]*>([\s\S]*?)<\/ol>/gi, (match: string, listContent: string) => { + let index = 1; + return listContent.replace(/]*>([\s\S]*?)<\/li>/gi, (itemMatch: string, item: string) => { + return `${index++}. ${item}\n`; + }); + }); + + return result; + } + + /** + * Fix common encoding issues in content + * This fixes issues like broken quote characters and other encoding problems + * + * @param content The content to fix encoding issues in + * @returns Content with encoding issues fixed + */ + protected fixEncodingIssues(content: string): string { + if (!content) return ''; + + try { + // Fix common encoding issues + return content + // Fix broken quote characters + .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"') + // Fix other common broken unicode + .replace(/[\u{0080}-\u{FFFF}]/gu, (match) => { + // Some common replacements + const replacements: Record = { + '\u00A0': ' ', // Non-breaking space + '\u2018': "'", // Left single quote + '\u2019': "'", // Right single quote + '\u201C': '"', // Left double quote + '\u201D': '"', // Right double quote + '\u2013': '-', // En dash + '\u2014': '--', // Em dash + '\u2022': '*', // Bullet + '\u2026': '...' // Ellipsis + }; + + return replacements[match] || match; + }); + } catch (error) { + console.error('Error fixing encoding issues:', error); + return content; // Return original if fixing fails + } + } +} diff --git a/src/services/llm/formatters/ollama_formatter.ts b/src/services/llm/formatters/ollama_formatter.ts new file mode 100644 index 000000000..aee2a9025 --- /dev/null +++ b/src/services/llm/formatters/ollama_formatter.ts @@ -0,0 +1,120 @@ +import type { Message } from '../ai_interface.js'; +import { BaseMessageFormatter } from './base_formatter.js'; +import sanitizeHtml from 'sanitize-html'; + +/** + * Ollama-specific message formatter + * Handles the unique requirements of the Ollama API + */ +export class OllamaMessageFormatter extends BaseMessageFormatter { + /** + * Maximum recommended context length for Ollama + * Smaller than other providers due to Ollama's handling of context + */ + private static MAX_CONTEXT_LENGTH = 4000; + + /** + * Format messages for the Ollama API + */ + formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] { + const formattedMessages: Message[] = []; + + // First identify user and system messages + const systemMessages = messages.filter(msg => msg.role === 'system'); + const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant'); + + // Create base system message with instructions + const basePrompt = this.getDefaultSystemPrompt(systemPrompt); + + // Always add a system message with the base prompt + formattedMessages.push({ + role: 'system', + content: basePrompt + }); + + // If we have context, inject it into the first user message + if (context && userMessages.length > 0) { + let injectedContext = false; + + for (let i = 0; i < userMessages.length; i++) { + const msg = userMessages[i]; + + if (msg.role === 'user' && !injectedContext) { + // Simple context injection directly in the user's message + const cleanedContext = this.cleanContextContent(context); + const formattedContext = + "Here's information from my notes to help answer the question:\n\n" + + cleanedContext + + "\n\nBased on this information, please answer: " + msg.content; + + formattedMessages.push({ + role: 'user', + content: formattedContext + }); + + injectedContext = true; + } else { + formattedMessages.push(msg); + } + } + } else { + // No context, just add all messages as-is + for (const msg of userMessages) { + formattedMessages.push(msg); + } + } + + console.log(`Ollama formatter processed ${messages.length} messages into ${formattedMessages.length} messages`); + + return formattedMessages; + } + + /** + * Clean up HTML and other problematic content before sending to Ollama + * Ollama needs a more aggressive cleaning than other models + */ + override cleanContextContent(content: string): string { + if (!content) return ''; + + try { + // First use the parent class to do standard cleaning + let sanitized = super.cleanContextContent(content); + + // Then apply Ollama-specific aggressive cleaning + // Remove any remaining HTML using sanitizeHtml + let plaintext = sanitizeHtml(sanitized, { + allowedTags: [], + allowedAttributes: {}, + textFilter: (text) => text + }); + + // Then aggressively sanitize to plain ASCII and simple formatting + plaintext = plaintext + // Replace common problematic quotes with simple ASCII quotes + .replace(/[""]/g, '"') + .replace(/['']/g, "'") + // Replace other common Unicode characters + .replace(/[–—]/g, '-') + .replace(/[•]/g, '*') + .replace(/[…]/g, '...') + // Strip all non-ASCII characters + .replace(/[^\x00-\x7F]/g, '') + // Normalize whitespace + .replace(/\s+/g, ' ') + .replace(/\n\s+/g, '\n') + .trim(); + + return plaintext; + } catch (error) { + console.error("Error cleaning context content for Ollama:", error); + return content; // Return original if cleaning fails + } + } + + /** + * Get the maximum recommended context length for Ollama + */ + getMaxContextLength(): number { + return OllamaMessageFormatter.MAX_CONTEXT_LENGTH; + } +} diff --git a/src/services/llm/formatters/openai_formatter.ts b/src/services/llm/formatters/openai_formatter.ts new file mode 100644 index 000000000..249289751 --- /dev/null +++ b/src/services/llm/formatters/openai_formatter.ts @@ -0,0 +1,152 @@ +import sanitizeHtml from 'sanitize-html'; +import type { Message } from '../ai_interface.js'; +import { BaseMessageFormatter } from './base_formatter.js'; + +/** + * OpenAI-specific message formatter + * Optimized for OpenAI's API requirements and preferences + */ +export class OpenAIMessageFormatter extends BaseMessageFormatter { + /** + * Maximum recommended context length for OpenAI + * Based on GPT-4 context window size + */ + private static MAX_CONTEXT_LENGTH = 16000; + + /** + * Format messages for the OpenAI API + */ + formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] { + const formattedMessages: Message[] = []; + + // Check if we already have a system message + const hasSystemMessage = messages.some(msg => msg.role === 'system'); + const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant'); + + // If we have explicit context, format it properly + if (context) { + // For OpenAI, it's best to put context in the system message + const formattedContext = + "You are an AI assistant integrated into TriliumNext Notes. " + + "Use the following information from the user's notes to answer their questions:\n\n" + + this.cleanContextContent(context) + + "\n\nFocus on relevant information from these notes when answering. " + + "Be concise and informative in your responses."; + + // Add as system message + formattedMessages.push({ + role: 'system', + content: formattedContext + }); + } + // If we don't have explicit context but have a system prompt + else if (!hasSystemMessage && systemPrompt) { + formattedMessages.push({ + role: 'system', + content: systemPrompt + }); + } + // If neither context nor system prompt is provided, use default system prompt + else if (!hasSystemMessage) { + formattedMessages.push({ + role: 'system', + content: this.getDefaultSystemPrompt(systemPrompt) + }); + } + // Otherwise if there are existing system messages, keep them + else if (hasSystemMessage) { + // Keep any existing system messages + const systemMessages = messages.filter(msg => msg.role === 'system'); + for (const msg of systemMessages) { + formattedMessages.push({ + role: 'system', + content: this.cleanContextContent(msg.content) + }); + } + } + + // Add all user and assistant messages + for (const msg of userAssistantMessages) { + formattedMessages.push({ + role: msg.role, + content: msg.content + }); + } + + console.log(`OpenAI formatter: ${messages.length} messages → ${formattedMessages.length} messages`); + return formattedMessages; + } + + /** + * Clean context content for OpenAI + * OpenAI handles HTML better than Ollama but still benefits from some cleaning + */ + cleanContextContent(content: string): string { + if (!content) return ''; + + try { + // Convert HTML to Markdown for better readability + const cleaned = sanitizeHtml(content, { + allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'], + allowedAttributes: { + 'a': ['href'] + }, + transformTags: { + 'h1': 'h2', + 'h2': 'h3', + 'div': 'p', + 'span': 'span' + } + }); + + // Process inline elements to markdown with simpler approach + let markdown = cleaned + .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') + .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') + .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') + .replace(/]*>(.*?)<\/h4>/gi, '#### $1\n') + .replace(/]*>(.*?)<\/h5>/gi, '##### $1\n') + .replace(/]*>(.*?)<\/p>/gi, '$1\n\n') + .replace(/]*>/gi, '\n') + .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') + .replace(/]*>(.*?)<\/strong>/gi, '**$1**') + .replace(/]*>(.*?)<\/b>/gi, '**$1**') + .replace(/]*>(.*?)<\/em>/gi, '*$1*') + .replace(/]*>(.*?)<\/i>/gi, '*$1*') + .replace(/]*>(.*?)<\/code>/gi, '`$1`') + .replace(/]*>(.*?)<\/pre>/gi, '```\n$1\n```') + // Clean up any remaining HTML tags + .replace(/<[^>]*>/g, '') + // Clean up excessive newlines + .replace(/\n{3,}/g, '\n\n'); + + // Fix common HTML entities + markdown = markdown + .replace(/ /g, ' ') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/“/g, '"') + .replace(/”/g, '"') + .replace(/‘/g, "'") + .replace(/’/g, "'") + .replace(/—/g, '—') + .replace(/–/g, '–') + .replace(/…/g, '…'); + + return markdown.trim(); + } catch (error) { + console.error("Error cleaning content for OpenAI:", error); + return content; // Return original if cleaning fails + } + } + + /** + * Get the maximum recommended context length for OpenAI + */ + getMaxContextLength(): number { + return OpenAIMessageFormatter.MAX_CONTEXT_LENGTH; + } +} diff --git a/src/services/llm/interfaces/message_formatter.ts b/src/services/llm/interfaces/message_formatter.ts new file mode 100644 index 000000000..634c70cd8 --- /dev/null +++ b/src/services/llm/interfaces/message_formatter.ts @@ -0,0 +1,92 @@ +import type { Message } from "../ai_interface.js"; +// These imports need to be added for the factory to work +import { OpenAIMessageFormatter } from "../formatters/openai_formatter.js"; +import { AnthropicMessageFormatter } from "../formatters/anthropic_formatter.js"; +import { OllamaMessageFormatter } from "../formatters/ollama_formatter.js"; + +/** + * Interface for provider-specific message formatters + * This allows each provider to have custom formatting logic while maintaining a consistent interface + */ +export interface MessageFormatter { + /** + * Format messages for a specific LLM provider + * + * @param messages Array of messages to format + * @param systemPrompt Optional system prompt to include + * @param context Optional context to incorporate into messages + * @returns Formatted messages ready to send to the provider + */ + formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[]; + + /** + * Clean context content to prepare it for this specific provider + * + * @param content The raw context content + * @returns Cleaned and formatted context content + */ + cleanContextContent(content: string): string; + + /** + * Get the maximum recommended context length for this provider + * + * @returns Maximum context length in characters + */ + getMaxContextLength(): number; +} + +/** + * Factory to get the appropriate message formatter for a provider + */ +export class MessageFormatterFactory { + // Cache formatters for reuse + private static formatters: Record = {}; + + /** + * Get the appropriate message formatter for a provider + * + * @param providerName Name of the LLM provider (e.g., 'openai', 'anthropic', 'ollama') + * @returns MessageFormatter instance for the specified provider + */ + static getFormatter(providerName: string): MessageFormatter { + // Normalize provider name and handle variations + let providerKey: string; + + // Normalize provider name from various forms (constructor.name, etc.) + if (providerName.toLowerCase().includes('openai')) { + providerKey = 'openai'; + } else if (providerName.toLowerCase().includes('anthropic') || + providerName.toLowerCase().includes('claude')) { + providerKey = 'anthropic'; + } else if (providerName.toLowerCase().includes('ollama')) { + providerKey = 'ollama'; + } else { + // Default to lowercase of whatever name we got + providerKey = providerName.toLowerCase(); + } + + // Return cached formatter if available + if (this.formatters[providerKey]) { + return this.formatters[providerKey]; + } + + // Create and cache new formatter + switch (providerKey) { + case 'openai': + this.formatters[providerKey] = new OpenAIMessageFormatter(); + break; + case 'anthropic': + this.formatters[providerKey] = new AnthropicMessageFormatter(); + break; + case 'ollama': + this.formatters[providerKey] = new OllamaMessageFormatter(); + break; + default: + // Default to OpenAI formatter for unknown providers + console.warn(`No specific formatter for provider: ${providerName}. Using OpenAI formatter as default.`); + this.formatters[providerKey] = new OpenAIMessageFormatter(); + } + + return this.formatters[providerKey]; + } +} diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts index ff54459fb..d8aab4598 100644 --- a/src/services/llm/providers/ollama_service.ts +++ b/src/services/llm/providers/ollama_service.ts @@ -1,447 +1,96 @@ import options from '../../options.js'; import { BaseAIService } from '../base_ai_service.js'; -import type { ChatCompletionOptions, ChatResponse, Message } from '../ai_interface.js'; -import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js'; +import type { Message, ChatCompletionOptions, ChatResponse } from '../ai_interface.js'; +import sanitizeHtml from 'sanitize-html'; +import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js'; interface OllamaMessage { role: string; content: string; } +interface OllamaResponse { + model: string; + created_at: string; + message: OllamaMessage; + done: boolean; + total_duration: number; + load_duration: number; + prompt_eval_count: number; + prompt_eval_duration: number; + eval_count: number; + eval_duration: number; +} + export class OllamaService extends BaseAIService { + private formatter: OllamaMessageFormatter; + constructor() { super('Ollama'); + this.formatter = new OllamaMessageFormatter(); } isAvailable(): boolean { - return super.isAvailable() && - options.getOption('ollamaEnabled') === 'true' && - !!options.getOption('ollamaBaseUrl'); + return super.isAvailable() && !!options.getOption('ollamaBaseUrl'); } async generateChatCompletion(messages: Message[], opts: ChatCompletionOptions = {}): Promise { if (!this.isAvailable()) { - throw new Error('Ollama service is not available. Check Ollama settings.'); + throw new Error('Ollama service is not available. Check API URL in settings.'); } - const baseUrl = options.getOption('ollamaBaseUrl') || PROVIDER_CONSTANTS.OLLAMA.BASE_URL; - const model = opts.model || options.getOption('ollamaDefaultModel') || PROVIDER_CONSTANTS.OLLAMA.DEFAULT_MODEL; + const apiBase = options.getOption('ollamaBaseUrl'); + const model = opts.model || options.getOption('ollamaDefaultModel') || 'llama3'; const temperature = opts.temperature !== undefined ? opts.temperature : parseFloat(options.getOption('aiTemperature') || '0.7'); const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt')); - // Format messages for Ollama - const formattedMessages = this.formatMessages(messages, systemPrompt); - - // Log the formatted messages for debugging - console.log('Input messages for formatting:', messages); - console.log('Formatted messages for Ollama:', formattedMessages); - try { - const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`; + // Use the formatter to prepare messages + const formattedMessages = this.formatter.formatMessages(messages, systemPrompt); - // Determine if we should stream the response - const shouldStream = opts.stream === true; + console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2)); - if (shouldStream) { - // Handle streaming response - const response = await fetch(endpoint, { - method: 'POST', - headers: { - 'Content-Type': 'application/json' + const response = await fetch(`${apiBase}/api/chat`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model, + messages: formattedMessages, + options: { + temperature }, - body: JSON.stringify({ - model, - messages: formattedMessages, - stream: true, - options: { - temperature, - } - }) - }); - - if (!response.ok) { - const errorBody = await response.text(); - throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`); - } - - // For streaming, we return an object that has a callback for handling the stream - return { - text: "", // Initial empty text that will be built up - model: model, - provider: this.getName(), - usage: { - promptTokens: 0, - completionTokens: 0, - totalTokens: 0 - }, - stream: async (callback) => { - if (!response.body) { - throw new Error("No response body from Ollama"); - } - - const reader = response.body.getReader(); - let fullText = ""; - let partialLine = ""; - let receivedAnyContent = false; - - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - // Convert the chunk to text - const chunk = new TextDecoder().decode(value); - partialLine += chunk; - - // Split by lines and process each complete JSON object - const lines = partialLine.split('\n'); - - // Process all complete lines except the last one (which might be incomplete) - for (let i = 0; i < lines.length - 1; i++) { - const line = lines[i].trim(); - if (!line) continue; - - try { - const data = JSON.parse(line); - console.log("Streaming chunk received:", data); - - if (data.message && data.message.content) { - // Extract just the new content - const newContent = data.message.content; - // Add to full text - fullText += newContent; - receivedAnyContent = true; - // Call the callback with the new content - await callback({ - text: newContent, - done: false - }); - } - - if (data.done) { - // If we received an empty response with done=true, - // generate a fallback response - if (!receivedAnyContent && fullText.trim() === "") { - // Generate a fallback response - const fallbackText = "I've processed your request but don't have a specific response for you at this time."; - await callback({ - text: fallbackText, - done: false - }); - fullText = fallbackText; - } - - // Final message in the stream - await callback({ - text: "", - done: true, - usage: { - promptTokens: data.prompt_eval_count || 0, - completionTokens: data.eval_count || 0, - totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0) - } - }); - } - } catch (err) { - console.error("Error parsing JSON from Ollama stream:", err, "Line:", line); - } - } - - // Keep the potentially incomplete last line for the next iteration - partialLine = lines[lines.length - 1]; - } - - // Handle any remaining content in partialLine - if (partialLine.trim()) { - try { - const data = JSON.parse(partialLine.trim()); - if (data.message && data.message.content) { - fullText += data.message.content; - receivedAnyContent = true; - await callback({ - text: data.message.content, - done: false - }); - } - - if (data.done) { - // Check for empty responses - if (!receivedAnyContent && fullText.trim() === "") { - // Generate a fallback response - const fallbackText = "I've processed your request but don't have a specific response for you at this time."; - await callback({ - text: fallbackText, - done: false - }); - fullText = fallbackText; - } - - await callback({ - text: "", - done: true, - usage: { - promptTokens: data.prompt_eval_count || 0, - completionTokens: data.eval_count || 0, - totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0) - } - }); - } - } catch (err) { - console.error("Error parsing JSON from last line:", err, "Line:", partialLine); - } - } - - // If we reached the end without a done message and without any content - if (!receivedAnyContent && fullText.trim() === "") { - // Generate a fallback response - const fallbackText = "I've processed your request but don't have a specific response for you at this time."; - await callback({ - text: fallbackText, - done: false - }); - - // Final message - await callback({ - text: "", - done: true, - usage: { - promptTokens: 0, - completionTokens: 0, - totalTokens: 0 - } - }); - } - - return fullText; - } catch (err) { - console.error("Error processing Ollama stream:", err); - throw err; - } - } - }; - } else { - // Non-streaming response - explicitly request JSON format - console.log("Sending to Ollama with formatted messages:", JSON.stringify(formattedMessages, null, 2)); - - const response = await fetch(endpoint, { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ - model, - messages: formattedMessages, - stream: false, - options: { - temperature, - } - }) - }); - - if (!response.ok) { - const errorBody = await response.text(); - throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`); - } - - const rawResponseText = await response.text(); - console.log("Raw response from Ollama:", rawResponseText); - - let data; - - try { - data = JSON.parse(rawResponseText); - console.log("Parsed Ollama response:", JSON.stringify(data, null, 2)); - } catch (err: any) { - console.error("Error parsing JSON response from Ollama:", err); - console.error("Raw response:", rawResponseText); - throw new Error(`Failed to parse Ollama response as JSON: ${err.message}`); - } - - // Check for empty or JSON object responses - const content = data.message?.content || ''; - let finalResponseText = content; - - if (content === '{}' || content === '{ }' || content === '{ }') { - finalResponseText = "I don't have information about that in my notes."; - } else if (!content.trim()) { - finalResponseText = "No response was generated. Please try asking a different question."; - } - - return { - text: finalResponseText, - model: data.model || model, - provider: this.getName(), - usage: { - promptTokens: data.prompt_eval_count || 0, - completionTokens: data.eval_count || 0, - totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0) - } - }; - } - } catch (error: any) { - console.error("Ollama service error:", error); - throw new Error(`Ollama service error: ${error.message}`); - } - } - - /** - * Clean up HTML and other problematic content before sending to Ollama - */ - private cleanContextContent(content: string): string { - if (!content) return ''; - - try { - // First fix potential encoding issues - let sanitized = content - // Fix common encoding issues with quotes and special characters - .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"') // Fix broken quote chars - .replace(/[\u00A0-\u9999]/g, match => { - try { - return encodeURIComponent(match).replace(/%/g, ''); - } catch (e) { - return ''; - } - }); - - // Replace common HTML tags with markdown or plain text equivalents - sanitized = sanitized - // Remove HTML divs, spans, etc. - .replace(/<\/?div[^>]*>/g, '') - .replace(/<\/?span[^>]*>/g, '') - .replace(/<\/?p[^>]*>/g, '\n') - // Convert headers - .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') - .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') - .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') - // Convert lists - .replace(/<\/?ul[^>]*>/g, '') - .replace(/<\/?ol[^>]*>/g, '') - .replace(/]*>(.*?)<\/li>/gi, '- $1\n') - // Convert links - .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') - // Convert code blocks - .replace(/]*>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```') - .replace(/]*>(.*?)<\/code>/gi, '`$1`') - // Convert emphasis - .replace(/<\/?strong[^>]*>/g, '**') - .replace(/<\/?em[^>]*>/g, '*') - // Remove figure tags - .replace(/<\/?figure[^>]*>/g, '') - // Remove all other HTML tags - .replace(/<[^>]*>/g, '') - // Fix double line breaks - .replace(/\n\s*\n\s*\n/g, '\n\n') - // Fix HTML entities - .replace(/ /g, ' ') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/&/g, '&') - .replace(/"/g, '"') - // Final clean whitespace - .replace(/\s+/g, ' ') - .replace(/\n\s+/g, '\n') - .trim(); - - return sanitized; - } catch (error) { - console.error("Error cleaning context content:", error); - return content; // Return original if cleaning fails - } - } - - /** - * Format messages for the Ollama API - */ - private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] { - const formattedMessages: OllamaMessage[] = []; - const MAX_SYSTEM_CONTENT_LENGTH = 4000; - - // First identify user and system messages - const systemMessages = messages.filter(msg => msg.role === 'system'); - const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant'); - - // In the case of Ollama, we need to ensure context is properly integrated - // The key insight is that simply including it in a system message doesn't work well - - // Check if we have context (typically in the first system message) - let hasContext = false; - let contextContent = ''; - - if (systemMessages.length > 0) { - const potentialContext = systemMessages[0].content; - if (potentialContext && potentialContext.includes('# Context for your query')) { - hasContext = true; - contextContent = this.cleanContextContent(potentialContext); - } - } - - // Create base system message with instructions - let basePrompt = systemPrompt || - "You are an AI assistant integrated into TriliumNext Notes. " + - "Focus on helping users find information in their notes and answering questions based on their knowledge base. " + - "Be concise, informative, and direct when responding to queries."; - - // If we have context, inject it differently - prepend it to the user's first question - if (hasContext && userMessages.length > 0) { - // Create initial system message with just the base prompt - formattedMessages.push({ - role: 'system', - content: basePrompt + stream: false + }) }); - // For user messages, inject context into the first user message - let injectedContext = false; + if (!response.ok) { + const errorBody = await response.text(); + console.error(`Ollama API error: ${response.status} ${response.statusText}`, errorBody); + throw new Error(`Ollama API error: ${response.status} ${response.statusText}`); + } - for (let i = 0; i < userMessages.length; i++) { - const msg = userMessages[i]; + const data: OllamaResponse = await response.json(); + console.log('Raw response from Ollama:', JSON.stringify(data, null, 2)); + console.log('Parsed Ollama response:', JSON.stringify(data, null, 2)); - if (msg.role === 'user' && !injectedContext) { - // Format the context in a way Ollama can't ignore - const formattedContext = - "I need you to answer based on the following information from my notes:\n\n" + - "-----BEGIN MY NOTES-----\n" + - contextContent + - "\n-----END MY NOTES-----\n\n" + - "Based on these notes, please answer: " + msg.content; - - formattedMessages.push({ - role: 'user', - content: formattedContext - }); - - injectedContext = true; - } else { - formattedMessages.push({ - role: msg.role, - content: msg.content - }); + return { + text: data.message.content, + model: data.model, + provider: this.getName(), + usage: { + promptTokens: data.prompt_eval_count, + completionTokens: data.eval_count, + totalTokens: data.prompt_eval_count + data.eval_count } - } - } else { - // No context or empty context case - // Add system message (with system prompt) - if (systemPrompt) { - formattedMessages.push({ - role: 'system', - content: systemPrompt - }); - } - - // Add all user and assistant messages as-is - for (const msg of userMessages) { - formattedMessages.push({ - role: msg.role, - content: msg.content - }); - } + }; + } catch (error) { + console.error('Ollama service error:', error); + throw error; } - - console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`); - console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`); - - return formattedMessages; } }