From 72c380b6f43658f0e5609926f2179031a4581694 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Fri, 28 Mar 2025 22:50:15 +0000
Subject: [PATCH] do a wayyy better job at building the messages with context

---
 src/routes/api/llm.ts                         |   6 +-
 src/services/llm/chat_service.ts              |   4 +-
 src/services/llm/context_service.ts           |  99 ++--
 .../llm/formatters/anthropic_formatter.ts     | 223 +++++++++
 src/services/llm/formatters/base_formatter.ts | 161 ++++++
 .../llm/formatters/ollama_formatter.ts        | 120 +++++
 .../llm/formatters/openai_formatter.ts        | 152 ++++++
 .../llm/interfaces/message_formatter.ts       |  92 ++++
 src/services/llm/providers/ollama_service.ts  | 467 +++---------------
 9 files changed, 856 insertions(+), 468 deletions(-)
 create mode 100644 src/services/llm/formatters/anthropic_formatter.ts
 create mode 100644 src/services/llm/formatters/base_formatter.ts
 create mode 100644 src/services/llm/formatters/ollama_formatter.ts
 create mode 100644 src/services/llm/formatters/openai_formatter.ts
 create mode 100644 src/services/llm/interfaces/message_formatter.ts

diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts
index a65c74fff..57a9c38f3 100644
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -956,8 +956,8 @@ async function sendMessage(req: Request, res: Response) {
                 log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
                 log.info(`Number of notes included: ${sourceNotes.length}`);
 
-                // Get messages with context properly formatted for the specific LLM provider
-                const aiMessages = contextService.buildMessagesWithContext(
+                // Format messages for the LLM using the proper context
+                const aiMessages = await contextService.buildMessagesWithContext(
                     session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
@@ -1104,7 +1104,7 @@ async function sendMessage(req: Request, res: Response) {
                 const context = buildContextFromNotes(relevantNotes, messageContent);
 
                 // Get messages with context properly formatted for the specific LLM provider
-                const aiMessages = contextService.buildMessagesWithContext(
+                const aiMessages = await contextService.buildMessagesWithContext(
                     session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
diff --git a/src/services/llm/chat_service.ts b/src/services/llm/chat_service.ts
index f155248da..d6a12bf63 100644
--- a/src/services/llm/chat_service.ts
+++ b/src/services/llm/chat_service.ts
@@ -265,10 +265,10 @@ export class ChatService {
             );
 
             // Create messages array with context using the improved method
-            const messagesWithContext = contextService.buildMessagesWithContext(
+            const messagesWithContext = await contextService.buildMessagesWithContext(
                 session.messages,
                 enhancedContext,
-                aiServiceManager.getService() // Get the default service
+                aiServiceManager.getService()
             );
 
             // Generate AI response
diff --git a/src/services/llm/context_service.ts b/src/services/llm/context_service.ts
index bf6fa14ef..c8a619421 100644
--- a/src/services/llm/context_service.ts
+++ b/src/services/llm/context_service.ts
@@ -11,6 +11,7 @@ import { ContextExtractor } from './context/index.js';
 import type { NoteSearchResult } from './interfaces/context_interfaces.js';
 import type { Message } from './ai_interface.js';
 import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js';
+import { MessageFormatterFactory } from './interfaces/message_formatter.js';
 
 /**
  * Main Context Service for Trilium Notes
@@ -189,72 +190,62 @@ class TriliumContextService {
     }
 
     /**
-     * Build messages with proper context for an LLM-enhanced chat
+     * Builds messages with context for LLM service
+     * This takes a set of messages and adds context in the appropriate format for each LLM provider
+     *
+     * @param messages Array of messages to enhance with context
+     * @param context The context to add (built from relevant notes)
+     * @param llmService The LLM service to format messages for
+     * @returns Promise resolving to the messages array with context properly integrated
      */
-    buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] {
-        // For simple conversations just add context to the system message
+    async buildMessagesWithContext(
+        messages: Message[],
+        context: string,
+        llmService: LLMServiceInterface
+    ): Promise<Message[]> {
         try {
             if (!messages || messages.length === 0) {
-                return [{ role: 'system', content: context }];
+                log.info('No messages provided to buildMessagesWithContext');
+                return [];
             }
 
-            const result: Message[] = [];
-            let hasSystemMessage = false;
-
-            // First pass: identify if there's a system message
-            for (const msg of messages) {
-                if (msg.role === 'system') {
-                    hasSystemMessage = true;
-                    break;
-                }
+            if (!context || context.trim() === '') {
+                log.info('No context provided to buildMessagesWithContext, returning original messages');
+                return messages;
             }
 
-            // If we have a system message, prepend context to it
-            // Otherwise create a new system message with the context
-            if (hasSystemMessage) {
-                for (const msg of messages) {
-                    if (msg.role === 'system') {
-                        // For Ollama, use a cleaner approach with just one system message
-                        if (llmService.constructor.name === 'OllamaService') {
-                            // If this is the first system message we've seen,
-                            // add context to it, otherwise skip (Ollama handles multiple
-                            // system messages poorly)
-                            if (result.findIndex(m => m.role === 'system') === -1) {
-                                result.push({
-                                    role: 'system',
-                                    content: `${context}\n\n${msg.content}`
-                                });
-                            }
-                        } else {
-                            // For other providers, include all system messages
-                            result.push({
-                                role: 'system',
-                                content: msg.content.includes(context) ?
-                                    msg.content : // Avoid duplicate context
-                                    `${context}\n\n${msg.content}`
-                            });
-                        }
-                    } else {
-                        result.push(msg);
-                    }
-                }
+            // Get the provider name, handling service classes and raw provider names
+            let providerName: string;
+            if (typeof llmService === 'string') {
+                // If llmService is a string, assume it's the provider name
+                providerName = llmService;
+            } else if (llmService.constructor && llmService.constructor.name) {
+                // Extract provider name from service class name (e.g., OllamaService -> ollama)
+                providerName = llmService.constructor.name.replace('Service', '').toLowerCase();
             } else {
-                // No system message found, prepend one with the context
-                result.push({ role: 'system', content: context });
-                // Add all the original messages
-                result.push(...messages);
+                // Fallback to default
+                providerName = 'default';
             }
 
-            return result;
+            log.info(`Using formatter for provider: ${providerName}`);
+
+            // Get the appropriate formatter for this provider
+            const formatter = MessageFormatterFactory.getFormatter(providerName);
+
+            // Format messages with context using the provider-specific formatter
+            const formattedMessages = formatter.formatMessages(
+                messages,
+                undefined, // No system prompt override - use what's in the messages
+                context
+            );
+
+            log.info(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for ${providerName}`);
+
+            return formattedMessages;
         } catch (error) {
             log.error(`Error building messages with context: ${error}`);
-
-            // Fallback: prepend a system message with context
-            const safeMessages = Array.isArray(messages) ? messages : [];
-            return [
-                { role: 'system', content: context },
-                ...safeMessages.filter(msg => msg.role !== 'system')
-            ];
+            // Fallback to original messages in case of error
+            return messages;
         }
     }
 }
diff --git a/src/services/llm/formatters/anthropic_formatter.ts b/src/services/llm/formatters/anthropic_formatter.ts
new file mode 100644
index 000000000..3c1dfb624
--- /dev/null
+++ b/src/services/llm/formatters/anthropic_formatter.ts
@@ -0,0 +1,223 @@
+import sanitizeHtml from 'sanitize-html';
+import type { Message } from '../ai_interface.js';
+import { BaseMessageFormatter } from './base_formatter.js';
+
+/**
+ * Anthropic-specific message formatter
+ * Optimized for Claude's API and preferences
+ */
+export class AnthropicMessageFormatter extends BaseMessageFormatter {
+    /**
+     * Maximum recommended context length for Anthropic models
+     * Claude has a very large context window
+     */
+    private static MAX_CONTEXT_LENGTH = 100000;
+
+    /**
+     * Format messages for the Anthropic API
+     */
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+
+        // For Anthropic, system prompts work best as the first user message with <instructions> XML tags
+        // First, collect all non-system messages
+        const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
+
+        // For Anthropic, we need to handle context differently
+        // 1. If explicit context is provided, we format it with XML tags
+        if (context) {
+            // Build the system message with context
+            const baseInstructions = this.getDefaultSystemPrompt(systemPrompt);
+
+            const formattedContext =
+                `<instructions>\n${baseInstructions}\n\n` +
+                `Use the following information from the user's notes to answer their questions:\n\n` +
+                `<user_notes>\n${this.cleanContextContent(context)}\n</user_notes>\n\n` +
+                `When responding:\n` +
+                `- Focus on the most relevant information from the notes\n` +
+                `- Be concise and direct in your answers\n` +
+                `- If quoting from notes, mention which note it's from\n` +
+                `- If the notes don't contain relevant information, say so clearly\n` +
+                `</instructions>`;
+
+            // If there's at least one user message, add the context to the first one
+            if (userAssistantMessages.length > 0 && userAssistantMessages[0].role === 'user') {
+                // Add system as a new first message
+                formattedMessages.push({
+                    role: 'user',
+                    content: formattedContext
+                });
+
+                // Add system response acknowledgment
+                formattedMessages.push({
+                    role: 'assistant',
+                    content: "I'll help you with your notes based on the context provided."
+                });
+
+                // Add remaining messages
+                for (const msg of userAssistantMessages) {
+                    formattedMessages.push(msg);
+                }
+            }
+            // If no user messages, create a placeholder
+            else {
+                formattedMessages.push({
+                    role: 'user',
+                    content: formattedContext
+                });
+
+                formattedMessages.push({
+                    role: 'assistant',
+                    content: "I'll help you with your notes based on the context provided. What would you like to know?"
+                });
+
+                // Add any existing assistant messages if they exist
+                const assistantMsgs = userAssistantMessages.filter(msg => msg.role === 'assistant');
+                for (const msg of assistantMsgs) {
+                    formattedMessages.push(msg);
+                }
+            }
+        }
+        // 2. If no explicit context but we have system messages, convert them to Claude format
+        else if (messages.some(msg => msg.role === 'system')) {
+            // Get system messages
+            const systemMessages = messages.filter(msg => msg.role === 'system');
+
+            // Build system content with XML tags
+            const systemContent =
+                `<instructions>\n${systemMessages.map(msg => this.cleanContextContent(msg.content)).join('\n\n')}\n</instructions>`;
+
+            // Add as first user message
+            formattedMessages.push({
+                role: 'user',
+                content: systemContent
+            });
+
+            // Add assistant acknowledgment
+            formattedMessages.push({
+                role: 'assistant',
+                content: "I understand. I'll follow those instructions."
+            });
+
+            // Add remaining user/assistant messages
+            for (const msg of userAssistantMessages) {
+                formattedMessages.push(msg);
+            }
+        }
+        // 3. Just a system prompt, no context
+        else if (systemPrompt) {
+            // Add as first user message with XML tags
+            formattedMessages.push({
+                role: 'user',
+                content: `<instructions>\n${systemPrompt}\n</instructions>`
+            });
+
+            // Add assistant acknowledgment
+            formattedMessages.push({
+                role: 'assistant',
+                content: "I understand. I'll follow those instructions."
+            });
+
+            // Add all other messages
+            for (const msg of userAssistantMessages) {
+                formattedMessages.push(msg);
+            }
+        }
+        // 4. No system prompt, use default from constants
+        else if (userAssistantMessages.length > 0) {
+            // Add default system prompt with XML tags
+            formattedMessages.push({
+                role: 'user',
+                content: `<instructions>\n${this.getDefaultSystemPrompt()}\n</instructions>`
+            });
+
+            // Add assistant acknowledgment
+            formattedMessages.push({
+                role: 'assistant',
+                content: "I understand. I'll follow those instructions."
+            });
+
+            // Add all user messages
+            for (const msg of userAssistantMessages) {
+                formattedMessages.push(msg);
+            }
+        }
+        // 5. No special handling needed
+        else {
+            // Just add all messages as-is
+            for (const msg of userAssistantMessages) {
+                formattedMessages.push(msg);
+            }
+        }
+
+        console.log(`Anthropic formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
+        return formattedMessages;
+    }
+
+    /**
+     * Clean context content for Anthropic
+     * Claude works well with XML-structured content
+     */
+    cleanContextContent(content: string): string {
+        if (!content) return '';
+
+        try {
+            // Convert HTML to a Claude-friendly format
+            const cleaned = sanitizeHtml(content, {
+                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
+                allowedAttributes: {
+                    'a': ['href']
+                }
+            });
+
+            // Convert to markdown but preserve some structure
+            let markdown = cleaned
+                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
+                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
+                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
+                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
+                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
+                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
+                .replace(/<br[^>]*>/gi, '\n')
+                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
+                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
+                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
+                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
+                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
+                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
+                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
+                // Process lists
+                .replace(/<ul[^>]*>(.*?)<\/ul>/gs, (match, content) => {
+                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
+                })
+                .replace(/<ol[^>]*>(.*?)<\/ol>/gs, (match, content) => {
+                    let index = 1;
+                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, (m: string, item: string) => {
+                        return `${index++}. ${item}\n`;
+                    });
+                })
+                // Clean up any remaining HTML tags
+                .replace(/<[^>]*>/g, '')
+                // Clean up excessive newlines
+                .replace(/\n{3,}/g, '\n\n')
+                // Fix common HTML entities
+                .replace(/&nbsp;/g, ' ')
+                .replace(/&lt;/g, '<')
+                .replace(/&gt;/g, '>')
+                .replace(/&amp;/g, '&')
+                .replace(/&quot;/g, '"');
+
+            return markdown.trim();
+        } catch (error) {
+            console.error("Error cleaning content for Anthropic:", error);
+            return content; // Return original if cleaning fails
+        }
+    }
+
+    /**
+     * Get the maximum recommended context length for Anthropic
+     */
+    getMaxContextLength(): number {
+        return AnthropicMessageFormatter.MAX_CONTEXT_LENGTH;
+    }
+}
diff --git a/src/services/llm/formatters/base_formatter.ts b/src/services/llm/formatters/base_formatter.ts
new file mode 100644
index 000000000..55f342b9a
--- /dev/null
+++ b/src/services/llm/formatters/base_formatter.ts
@@ -0,0 +1,161 @@
+import sanitizeHtml from 'sanitize-html';
+import type { Message } from '../ai_interface.js';
+import type { MessageFormatter } from '../interfaces/message_formatter.js';
+import { DEFAULT_SYSTEM_PROMPT } from '../constants/llm_prompt_constants.js';
+
+/**
+ * Base formatter with common functionality for all providers
+ * Provider-specific formatters should extend this class
+ */
+export abstract class BaseMessageFormatter implements MessageFormatter {
+    /**
+     * Format messages for the LLM API
+     * Each provider should override this method with its specific formatting logic
+     */
+    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+
+    /**
+     * Get the maximum recommended context length for this provider
+     * Each provider should override this with appropriate value
+     */
+    abstract getMaxContextLength(): number;
+
+    /**
+     * Get the default system prompt
+     * Uses the default prompt from constants
+     */
+    protected getDefaultSystemPrompt(systemPrompt?: string): string {
+        return systemPrompt || DEFAULT_SYSTEM_PROMPT;
+    }
+
+    /**
+     * Clean context content - common method with standard HTML cleaning
+     * Provider-specific formatters can override for custom behavior
+     */
+    cleanContextContent(content: string): string {
+        if (!content) return '';
+
+        try {
+            // First fix any encoding issues
+            const fixedContent = this.fixEncodingIssues(content);
+
+            // Convert HTML to markdown for better readability
+            const cleaned = sanitizeHtml(fixedContent, {
+                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
+                allowedAttributes: {
+                    'a': ['href']
+                },
+                transformTags: {
+                    'h1': 'h2',
+                    'h2': 'h3',
+                    'div': 'p',
+                    'span': 'span'
+                }
+            });
+
+            // Process inline elements to markdown
+            let markdown = cleaned
+                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
+                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
+                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
+                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
+                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
+                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
+                .replace(/<br[^>]*>/gi, '\n')
+                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
+                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
+                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
+                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
+                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
+                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
+                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
+                // Clean up any remaining HTML tags
+                .replace(/<[^>]*>/g, '')
+                // Clean up excessive newlines
+                .replace(/\n{3,}/g, '\n\n');
+
+            // Process list items
+            markdown = this.processListItems(markdown);
+
+            // Fix common HTML entities
+            markdown = markdown
+                .replace(/&nbsp;/g, ' ')
+                .replace(/&lt;/g, '<')
+                .replace(/&gt;/g, '>')
+                .replace(/&amp;/g, '&')
+                .replace(/&quot;/g, '"')
+                .replace(/&#39;/g, "'")
+                .replace(/&ldquo;/g, '"')
+                .replace(/&rdquo;/g, '"')
+                .replace(/&lsquo;/g, "'")
+                .replace(/&rsquo;/g, "'")
+                .replace(/&mdash;/g, '—')
+                .replace(/&ndash;/g, '–')
+                .replace(/&hellip;/g, '…');
+
+            return markdown.trim();
+        } catch (error) {
+            console.error("Error cleaning context content:", error);
+            return content; // Return original if cleaning fails
+        }
+    }
+
+    /**
+     * Process HTML list items in markdown conversion
+     * This is a helper method that safely processes HTML list items
+     */
+    protected processListItems(content: string): string {
+        // Process unordered lists
+        let result = content.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match: string, listContent: string) => {
+            return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
+        });
+
+        // Process ordered lists
+        result = result.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match: string, listContent: string) => {
+            let index = 1;
+            return listContent.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (itemMatch: string, item: string) => {
+                return `${index++}. ${item}\n`;
+            });
+        });
+
+        return result;
+    }
+
+    /**
+     * Fix common encoding issues in content
+     * This fixes issues like broken quote characters and other encoding problems
+     *
+     * @param content The content to fix encoding issues in
+     * @returns Content with encoding issues fixed
+     */
+    protected fixEncodingIssues(content: string): string {
+        if (!content) return '';
+
+        try {
+            // Fix common encoding issues
+            return content
+                // Fix broken quote characters
+                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')
+                // Fix other common broken unicode
+                .replace(/[\u{0080}-\u{FFFF}]/gu, (match) => {
+                    // Some common replacements
+                    const replacements: Record<string, string> = {
+                        '\u00A0': ' ',  // Non-breaking space
+                        '\u2018': "'",  // Left single quote
+                        '\u2019': "'",  // Right single quote
+                        '\u201C': '"',  // Left double quote
+                        '\u201D': '"',  // Right double quote
+                        '\u2013': '-',  // En dash
+                        '\u2014': '--', // Em dash
+                        '\u2022': '*',  // Bullet
+                        '\u2026': '...' // Ellipsis
+                    };
+
+                    return replacements[match] || match;
+                });
+        } catch (error) {
+            console.error('Error fixing encoding issues:', error);
+            return content; // Return original if fixing fails
+        }
+    }
+}
diff --git a/src/services/llm/formatters/ollama_formatter.ts b/src/services/llm/formatters/ollama_formatter.ts
new file mode 100644
index 000000000..aee2a9025
--- /dev/null
+++ b/src/services/llm/formatters/ollama_formatter.ts
@@ -0,0 +1,120 @@
+import type { Message } from '../ai_interface.js';
+import { BaseMessageFormatter } from './base_formatter.js';
+import sanitizeHtml from 'sanitize-html';
+
+/**
+ * Ollama-specific message formatter
+ * Handles the unique requirements of the Ollama API
+ */
+export class OllamaMessageFormatter extends BaseMessageFormatter {
+    /**
+     * Maximum recommended context length for Ollama
+     * Smaller than other providers due to Ollama's handling of context
+     */
+    private static MAX_CONTEXT_LENGTH = 4000;
+
+    /**
+     * Format messages for the Ollama API
+     */
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+
+        // First identify user and system messages
+        const systemMessages = messages.filter(msg => msg.role === 'system');
+        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
+
+        // Create base system message with instructions
+        const basePrompt = this.getDefaultSystemPrompt(systemPrompt);
+
+        // Always add a system message with the base prompt
+        formattedMessages.push({
+            role: 'system',
+            content: basePrompt
+        });
+
+        // If we have context, inject it into the first user message
+        if (context && userMessages.length > 0) {
+            let injectedContext = false;
+
+            for (let i = 0; i < userMessages.length; i++) {
+                const msg = userMessages[i];
+
+                if (msg.role === 'user' && !injectedContext) {
+                    // Simple context injection directly in the user's message
+                    const cleanedContext = this.cleanContextContent(context);
+                    const formattedContext =
+                        "Here's information from my notes to help answer the question:\n\n" +
+                        cleanedContext +
+                        "\n\nBased on this information, please answer: " + msg.content;
+
+                    formattedMessages.push({
+                        role: 'user',
+                        content: formattedContext
+                    });
+
+                    injectedContext = true;
+                } else {
+                    formattedMessages.push(msg);
+                }
+            }
+        } else {
+            // No context, just add all messages as-is
+            for (const msg of userMessages) {
+                formattedMessages.push(msg);
+            }
+        }
+
+        console.log(`Ollama formatter processed ${messages.length} messages into ${formattedMessages.length} messages`);
+
+        return formattedMessages;
+    }
+
+    /**
+     * Clean up HTML and other problematic content before sending to Ollama
+     * Ollama needs a more aggressive cleaning than other models
+     */
+    override cleanContextContent(content: string): string {
+        if (!content) return '';
+
+        try {
+            // First use the parent class to do standard cleaning
+            let sanitized = super.cleanContextContent(content);
+
+            // Then apply Ollama-specific aggressive cleaning
+            // Remove any remaining HTML using sanitizeHtml
+            let plaintext = sanitizeHtml(sanitized, {
+                allowedTags: [],
+                allowedAttributes: {},
+                textFilter: (text) => text
+            });
+
+            // Then aggressively sanitize to plain ASCII and simple formatting
+            plaintext = plaintext
+                // Replace common problematic quotes with simple ASCII quotes
+                .replace(/[""]/g, '"')
+                .replace(/['']/g, "'")
+                // Replace other common Unicode characters
+                .replace(/[–—]/g, '-')
+                .replace(/[•]/g, '*')
+                .replace(/[…]/g, '...')
+                // Strip all non-ASCII characters
+                .replace(/[^\x00-\x7F]/g, '')
+                // Normalize whitespace
+                .replace(/\s+/g, ' ')
+                .replace(/\n\s+/g, '\n')
+                .trim();
+
+            return plaintext;
+        } catch (error) {
+            console.error("Error cleaning context content for Ollama:", error);
+            return content; // Return original if cleaning fails
+        }
+    }
+
+    /**
+     * Get the maximum recommended context length for Ollama
+     */
+    getMaxContextLength(): number {
+        return OllamaMessageFormatter.MAX_CONTEXT_LENGTH;
+    }
+}
diff --git a/src/services/llm/formatters/openai_formatter.ts b/src/services/llm/formatters/openai_formatter.ts
new file mode 100644
index 000000000..249289751
--- /dev/null
+++ b/src/services/llm/formatters/openai_formatter.ts
@@ -0,0 +1,152 @@
+import sanitizeHtml from 'sanitize-html';
+import type { Message } from '../ai_interface.js';
+import { BaseMessageFormatter } from './base_formatter.js';
+
+/**
+ * OpenAI-specific message formatter
+ * Optimized for OpenAI's API requirements and preferences
+ */
+export class OpenAIMessageFormatter extends BaseMessageFormatter {
+    /**
+     * Maximum recommended context length for OpenAI
+     * Based on GPT-4 context window size
+     */
+    private static MAX_CONTEXT_LENGTH = 16000;
+
+    /**
+     * Format messages for the OpenAI API
+     */
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+
+        // Check if we already have a system message
+        const hasSystemMessage = messages.some(msg => msg.role === 'system');
+        const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
+
+        // If we have explicit context, format it properly
+        if (context) {
+            // For OpenAI, it's best to put context in the system message
+            const formattedContext =
+                "You are an AI assistant integrated into TriliumNext Notes. " +
+                "Use the following information from the user's notes to answer their questions:\n\n" +
+                this.cleanContextContent(context) +
+                "\n\nFocus on relevant information from these notes when answering. " +
+                "Be concise and informative in your responses.";
+
+            // Add as system message
+            formattedMessages.push({
+                role: 'system',
+                content: formattedContext
+            });
+        }
+        // If we don't have explicit context but have a system prompt
+        else if (!hasSystemMessage && systemPrompt) {
+            formattedMessages.push({
+                role: 'system',
+                content: systemPrompt
+            });
+        }
+        // If neither context nor system prompt is provided, use default system prompt
+        else if (!hasSystemMessage) {
+            formattedMessages.push({
+                role: 'system',
+                content: this.getDefaultSystemPrompt(systemPrompt)
+            });
+        }
+        // Otherwise if there are existing system messages, keep them
+        else if (hasSystemMessage) {
+            // Keep any existing system messages
+            const systemMessages = messages.filter(msg => msg.role === 'system');
+            for (const msg of systemMessages) {
+                formattedMessages.push({
+                    role: 'system',
+                    content: this.cleanContextContent(msg.content)
+                });
+            }
+        }
+
+        // Add all user and assistant messages
+        for (const msg of userAssistantMessages) {
+            formattedMessages.push({
+                role: msg.role,
+                content: msg.content
+            });
+        }
+
+        console.log(`OpenAI formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
+        return formattedMessages;
+    }
+
+    /**
+     * Clean context content for OpenAI
+     * OpenAI handles HTML better than Ollama but still benefits from some cleaning
+     */
+    cleanContextContent(content: string): string {
+        if (!content) return '';
+
+        try {
+            // Convert HTML to Markdown for better readability
+            const cleaned = sanitizeHtml(content, {
+                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
+                allowedAttributes: {
+                    'a': ['href']
+                },
+                transformTags: {
+                    'h1': 'h2',
+                    'h2': 'h3',
+                    'div': 'p',
+                    'span': 'span'
+                }
+            });
+
+            // Process inline elements to markdown with simpler approach
+            let markdown = cleaned
+                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
+                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
+                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
+                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
+                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
+                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
+                .replace(/<br[^>]*>/gi, '\n')
+                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
+                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
+                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
+                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
+                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
+                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
+                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
+                // Clean up any remaining HTML tags
+                .replace(/<[^>]*>/g, '')
+                // Clean up excessive newlines
+                .replace(/\n{3,}/g, '\n\n');
+
+            // Fix common HTML entities
+            markdown = markdown
+                .replace(/&nbsp;/g, ' ')
+                .replace(/&lt;/g, '<')
+                .replace(/&gt;/g, '>')
+                .replace(/&amp;/g, '&')
+                .replace(/&quot;/g, '"')
+                .replace(/&#39;/g, "'")
+                .replace(/&ldquo;/g, '"')
+                .replace(/&rdquo;/g, '"')
+                .replace(/&lsquo;/g, "'")
+                .replace(/&rsquo;/g, "'")
+                .replace(/&mdash;/g, '—')
+                .replace(/&ndash;/g, '–')
+                .replace(/&hellip;/g, '…');
+
+            return markdown.trim();
+        } catch (error) {
+            console.error("Error cleaning content for OpenAI:", error);
+            return content; // Return original if cleaning fails
+        }
+    }
+
+    /**
+     * Get the maximum recommended context length for OpenAI
+     */
+    getMaxContextLength(): number {
+        return OpenAIMessageFormatter.MAX_CONTEXT_LENGTH;
+    }
+}
diff --git a/src/services/llm/interfaces/message_formatter.ts b/src/services/llm/interfaces/message_formatter.ts
new file mode 100644
index 000000000..634c70cd8
--- /dev/null
+++ b/src/services/llm/interfaces/message_formatter.ts
@@ -0,0 +1,92 @@
+import type { Message } from "../ai_interface.js";
+// These imports need to be added for the factory to work
+import { OpenAIMessageFormatter } from "../formatters/openai_formatter.js";
+import { AnthropicMessageFormatter } from "../formatters/anthropic_formatter.js";
+import { OllamaMessageFormatter } from "../formatters/ollama_formatter.js";
+
+/**
+ * Interface for provider-specific message formatters
+ * This allows each provider to have custom formatting logic while maintaining a consistent interface
+ */
+export interface MessageFormatter {
+    /**
+     * Format messages for a specific LLM provider
+     *
+     * @param messages Array of messages to format
+     * @param systemPrompt Optional system prompt to include
+     * @param context Optional context to incorporate into messages
+     * @returns Formatted messages ready to send to the provider
+     */
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+
+    /**
+     * Clean context content to prepare it for this specific provider
+     *
+     * @param content The raw context content
+     * @returns Cleaned and formatted context content
+     */
+    cleanContextContent(content: string): string;
+
+    /**
+     * Get the maximum recommended context length for this provider
+     *
+     * @returns Maximum context length in characters
+     */
+    getMaxContextLength(): number;
+}
+
+/**
+ * Factory to get the appropriate message formatter for a provider
+ */
+export class MessageFormatterFactory {
+    // Cache formatters for reuse
+    private static formatters: Record<string, MessageFormatter> = {};
+
+    /**
+     * Get the appropriate message formatter for a provider
+     *
+     * @param providerName Name of the LLM provider (e.g., 'openai', 'anthropic', 'ollama')
+     * @returns MessageFormatter instance for the specified provider
+     */
+    static getFormatter(providerName: string): MessageFormatter {
+        // Normalize provider name and handle variations
+        let providerKey: string;
+
+        // Normalize provider name from various forms (constructor.name, etc.)
+        if (providerName.toLowerCase().includes('openai')) {
+            providerKey = 'openai';
+        } else if (providerName.toLowerCase().includes('anthropic') ||
+                  providerName.toLowerCase().includes('claude')) {
+            providerKey = 'anthropic';
+        } else if (providerName.toLowerCase().includes('ollama')) {
+            providerKey = 'ollama';
+        } else {
+            // Default to lowercase of whatever name we got
+            providerKey = providerName.toLowerCase();
+        }
+
+        // Return cached formatter if available
+        if (this.formatters[providerKey]) {
+            return this.formatters[providerKey];
+        }
+
+        // Create and cache new formatter
+        switch (providerKey) {
+            case 'openai':
+                this.formatters[providerKey] = new OpenAIMessageFormatter();
+                break;
+            case 'anthropic':
+                this.formatters[providerKey] = new AnthropicMessageFormatter();
+                break;
+            case 'ollama':
+                this.formatters[providerKey] = new OllamaMessageFormatter();
+                break;
+            default:
+                // Default to OpenAI formatter for unknown providers
+                console.warn(`No specific formatter for provider: ${providerName}. Using OpenAI formatter as default.`);
+                this.formatters[providerKey] = new OpenAIMessageFormatter();
+        }
+
+        return this.formatters[providerKey];
+    }
+}
diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts
index ff54459fb..d8aab4598 100644
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@@ -1,447 +1,96 @@
 import options from '../../options.js';
 import { BaseAIService } from '../base_ai_service.js';
-import type { ChatCompletionOptions, ChatResponse, Message } from '../ai_interface.js';
-import { PROVIDER_CONSTANTS } from '../constants/provider_constants.js';
+import type { Message, ChatCompletionOptions, ChatResponse } from '../ai_interface.js';
+import sanitizeHtml from 'sanitize-html';
+import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js';
 
 interface OllamaMessage {
     role: string;
     content: string;
 }
 
+interface OllamaResponse {
+    model: string;
+    created_at: string;
+    message: OllamaMessage;
+    done: boolean;
+    total_duration: number;
+    load_duration: number;
+    prompt_eval_count: number;
+    prompt_eval_duration: number;
+    eval_count: number;
+    eval_duration: number;
+}
+
 export class OllamaService extends BaseAIService {
+    private formatter: OllamaMessageFormatter;
+
     constructor() {
         super('Ollama');
+        this.formatter = new OllamaMessageFormatter();
     }
 
     isAvailable(): boolean {
-        return super.isAvailable() &&
-               options.getOption('ollamaEnabled') === 'true' &&
-               !!options.getOption('ollamaBaseUrl');
+        return super.isAvailable() && !!options.getOption('ollamaBaseUrl');
     }
 
     async generateChatCompletion(messages: Message[], opts: ChatCompletionOptions = {}): Promise<ChatResponse> {
         if (!this.isAvailable()) {
-            throw new Error('Ollama service is not available. Check Ollama settings.');
+            throw new Error('Ollama service is not available. Check API URL in settings.');
         }
 
-        const baseUrl = options.getOption('ollamaBaseUrl') || PROVIDER_CONSTANTS.OLLAMA.BASE_URL;
-        const model = opts.model || options.getOption('ollamaDefaultModel') || PROVIDER_CONSTANTS.OLLAMA.DEFAULT_MODEL;
+        const apiBase = options.getOption('ollamaBaseUrl');
+        const model = opts.model || options.getOption('ollamaDefaultModel') || 'llama3';
         const temperature = opts.temperature !== undefined
             ? opts.temperature
             : parseFloat(options.getOption('aiTemperature') || '0.7');
 
         const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt'));
 
-        // Format messages for Ollama
-        const formattedMessages = this.formatMessages(messages, systemPrompt);
-
-        // Log the formatted messages for debugging
-        console.log('Input messages for formatting:', messages);
-        console.log('Formatted messages for Ollama:', formattedMessages);
-
         try {
-            const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`;
+            // Use the formatter to prepare messages
+            const formattedMessages = this.formatter.formatMessages(messages, systemPrompt);
 
-            // Determine if we should stream the response
-            const shouldStream = opts.stream === true;
+            console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2));
 
-            if (shouldStream) {
-                // Handle streaming response
-                const response = await fetch(endpoint, {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json'
+            const response = await fetch(`${apiBase}/api/chat`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    model,
+                    messages: formattedMessages,
+                    options: {
+                        temperature
                     },
-                    body: JSON.stringify({
-                        model,
-                        messages: formattedMessages,
-                        stream: true,
-                        options: {
-                            temperature,
-                        }
-                    })
-                });
-
-                if (!response.ok) {
-                    const errorBody = await response.text();
-                    throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`);
-                }
-
-                // For streaming, we return an object that has a callback for handling the stream
-                return {
-                    text: "", // Initial empty text that will be built up
-                    model: model,
-                    provider: this.getName(),
-                    usage: {
-                        promptTokens: 0,
-                        completionTokens: 0,
-                        totalTokens: 0
-                    },
-                    stream: async (callback) => {
-                        if (!response.body) {
-                            throw new Error("No response body from Ollama");
-                        }
-
-                        const reader = response.body.getReader();
-                        let fullText = "";
-                        let partialLine = "";
-                        let receivedAnyContent = false;
-
-                        try {
-                            while (true) {
-                                const { done, value } = await reader.read();
-                                if (done) break;
-
-                                // Convert the chunk to text
-                                const chunk = new TextDecoder().decode(value);
-                                partialLine += chunk;
-
-                                // Split by lines and process each complete JSON object
-                                const lines = partialLine.split('\n');
-
-                                // Process all complete lines except the last one (which might be incomplete)
-                                for (let i = 0; i < lines.length - 1; i++) {
-                                    const line = lines[i].trim();
-                                    if (!line) continue;
-
-                                    try {
-                                        const data = JSON.parse(line);
-                                        console.log("Streaming chunk received:", data);
-
-                                        if (data.message && data.message.content) {
-                                            // Extract just the new content
-                                            const newContent = data.message.content;
-                                            // Add to full text
-                                            fullText += newContent;
-                                            receivedAnyContent = true;
-                                            // Call the callback with the new content
-                                            await callback({
-                                                text: newContent,
-                                                done: false
-                                            });
-                                        }
-
-                                        if (data.done) {
-                                            // If we received an empty response with done=true,
-                                            // generate a fallback response
-                                            if (!receivedAnyContent && fullText.trim() === "") {
-                                                // Generate a fallback response
-                                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
-                                                await callback({
-                                                    text: fallbackText,
-                                                    done: false
-                                                });
-                                                fullText = fallbackText;
-                                            }
-
-                                            // Final message in the stream
-                                            await callback({
-                                                text: "",
-                                                done: true,
-                                                usage: {
-                                                    promptTokens: data.prompt_eval_count || 0,
-                                                    completionTokens: data.eval_count || 0,
-                                                    totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
-                                                }
-                                            });
-                                        }
-                                    } catch (err) {
-                                        console.error("Error parsing JSON from Ollama stream:", err, "Line:", line);
-                                    }
-                                }
-
-                                // Keep the potentially incomplete last line for the next iteration
-                                partialLine = lines[lines.length - 1];
-                            }
-
-                            // Handle any remaining content in partialLine
-                            if (partialLine.trim()) {
-                                try {
-                                    const data = JSON.parse(partialLine.trim());
-                                    if (data.message && data.message.content) {
-                                        fullText += data.message.content;
-                                        receivedAnyContent = true;
-                                        await callback({
-                                            text: data.message.content,
-                                            done: false
-                                        });
-                                    }
-
-                                    if (data.done) {
-                                        // Check for empty responses
-                                        if (!receivedAnyContent && fullText.trim() === "") {
-                                            // Generate a fallback response
-                                            const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
-                                            await callback({
-                                                text: fallbackText,
-                                                done: false
-                                            });
-                                            fullText = fallbackText;
-                                        }
-
-                                        await callback({
-                                            text: "",
-                                            done: true,
-                                            usage: {
-                                                promptTokens: data.prompt_eval_count || 0,
-                                                completionTokens: data.eval_count || 0,
-                                                totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
-                                            }
-                                        });
-                                    }
-                                } catch (err) {
-                                    console.error("Error parsing JSON from last line:", err, "Line:", partialLine);
-                                }
-                            }
-
-                            // If we reached the end without a done message and without any content
-                            if (!receivedAnyContent && fullText.trim() === "") {
-                                // Generate a fallback response
-                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
-                                await callback({
-                                    text: fallbackText,
-                                    done: false
-                                });
-
-                                // Final message
-                                await callback({
-                                    text: "",
-                                    done: true,
-                                    usage: {
-                                        promptTokens: 0,
-                                        completionTokens: 0,
-                                        totalTokens: 0
-                                    }
-                                });
-                            }
-
-                            return fullText;
-                        } catch (err) {
-                            console.error("Error processing Ollama stream:", err);
-                            throw err;
-                        }
-                    }
-                };
-            } else {
-                // Non-streaming response - explicitly request JSON format
-                console.log("Sending to Ollama with formatted messages:", JSON.stringify(formattedMessages, null, 2));
-
-                const response = await fetch(endpoint, {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json'
-                    },
-                    body: JSON.stringify({
-                        model,
-                        messages: formattedMessages,
-                        stream: false,
-                        options: {
-                            temperature,
-                        }
-                    })
-                });
-
-                if (!response.ok) {
-                    const errorBody = await response.text();
-                    throw new Error(`Ollama API error: ${response.status} ${response.statusText} - ${errorBody}`);
-                }
-
-                const rawResponseText = await response.text();
-                console.log("Raw response from Ollama:", rawResponseText);
-
-                let data;
-
-                try {
-                    data = JSON.parse(rawResponseText);
-                    console.log("Parsed Ollama response:", JSON.stringify(data, null, 2));
-                } catch (err: any) {
-                    console.error("Error parsing JSON response from Ollama:", err);
-                    console.error("Raw response:", rawResponseText);
-                    throw new Error(`Failed to parse Ollama response as JSON: ${err.message}`);
-                }
-
-                // Check for empty or JSON object responses
-                const content = data.message?.content || '';
-                let finalResponseText = content;
-
-                if (content === '{}' || content === '{  }' || content === '{ }') {
-                    finalResponseText = "I don't have information about that in my notes.";
-                } else if (!content.trim()) {
-                    finalResponseText = "No response was generated. Please try asking a different question.";
-                }
-
-                return {
-                    text: finalResponseText,
-                    model: data.model || model,
-                    provider: this.getName(),
-                    usage: {
-                        promptTokens: data.prompt_eval_count || 0,
-                        completionTokens: data.eval_count || 0,
-                        totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
-                    }
-                };
-            }
-        } catch (error: any) {
-            console.error("Ollama service error:", error);
-            throw new Error(`Ollama service error: ${error.message}`);
-        }
-    }
-
-    /**
-     * Clean up HTML and other problematic content before sending to Ollama
-     */
-    private cleanContextContent(content: string): string {
-        if (!content) return '';
-
-        try {
-            // First fix potential encoding issues
-            let sanitized = content
-                // Fix common encoding issues with quotes and special characters
-                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')  // Fix broken quote chars
-                .replace(/[\u00A0-\u9999]/g, match => {
-                    try {
-                        return encodeURIComponent(match).replace(/%/g, '');
-                    } catch (e) {
-                        return '';
-                    }
-                });
-
-            // Replace common HTML tags with markdown or plain text equivalents
-            sanitized = sanitized
-                // Remove HTML divs, spans, etc.
-                .replace(/<\/?div[^>]*>/g, '')
-                .replace(/<\/?span[^>]*>/g, '')
-                .replace(/<\/?p[^>]*>/g, '\n')
-                // Convert headers
-                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
-                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
-                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
-                // Convert lists
-                .replace(/<\/?ul[^>]*>/g, '')
-                .replace(/<\/?ol[^>]*>/g, '')
-                .replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
-                // Convert links
-                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
-                // Convert code blocks
-                .replace(/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```')
-                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
-                // Convert emphasis
-                .replace(/<\/?strong[^>]*>/g, '**')
-                .replace(/<\/?em[^>]*>/g, '*')
-                // Remove figure tags
-                .replace(/<\/?figure[^>]*>/g, '')
-                // Remove all other HTML tags
-                .replace(/<[^>]*>/g, '')
-                // Fix double line breaks
-                .replace(/\n\s*\n\s*\n/g, '\n\n')
-                // Fix HTML entities
-                .replace(/&nbsp;/g, ' ')
-                .replace(/&lt;/g, '<')
-                .replace(/&gt;/g, '>')
-                .replace(/&amp;/g, '&')
-                .replace(/&quot;/g, '"')
-                // Final clean whitespace
-                .replace(/\s+/g, ' ')
-                .replace(/\n\s+/g, '\n')
-                .trim();
-
-            return sanitized;
-        } catch (error) {
-            console.error("Error cleaning context content:", error);
-            return content; // Return original if cleaning fails
-        }
-    }
-
-    /**
-     * Format messages for the Ollama API
-     */
-    private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] {
-        const formattedMessages: OllamaMessage[] = [];
-        const MAX_SYSTEM_CONTENT_LENGTH = 4000;
-
-        // First identify user and system messages
-        const systemMessages = messages.filter(msg => msg.role === 'system');
-        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
-
-        // In the case of Ollama, we need to ensure context is properly integrated
-        // The key insight is that simply including it in a system message doesn't work well
-
-        // Check if we have context (typically in the first system message)
-        let hasContext = false;
-        let contextContent = '';
-
-        if (systemMessages.length > 0) {
-            const potentialContext = systemMessages[0].content;
-            if (potentialContext && potentialContext.includes('# Context for your query')) {
-                hasContext = true;
-                contextContent = this.cleanContextContent(potentialContext);
-            }
-        }
-
-        // Create base system message with instructions
-        let basePrompt = systemPrompt ||
-            "You are an AI assistant integrated into TriliumNext Notes. " +
-            "Focus on helping users find information in their notes and answering questions based on their knowledge base. " +
-            "Be concise, informative, and direct when responding to queries.";
-
-        // If we have context, inject it differently - prepend it to the user's first question
-        if (hasContext && userMessages.length > 0) {
-            // Create initial system message with just the base prompt
-            formattedMessages.push({
-                role: 'system',
-                content: basePrompt
+                    stream: false
+                })
             });
 
-            // For user messages, inject context into the first user message
-            let injectedContext = false;
+            if (!response.ok) {
+                const errorBody = await response.text();
+                console.error(`Ollama API error: ${response.status} ${response.statusText}`, errorBody);
+                throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
+            }
 
-            for (let i = 0; i < userMessages.length; i++) {
-                const msg = userMessages[i];
+            const data: OllamaResponse = await response.json();
+            console.log('Raw response from Ollama:', JSON.stringify(data, null, 2));
+            console.log('Parsed Ollama response:', JSON.stringify(data, null, 2));
 
-                if (msg.role === 'user' && !injectedContext) {
-                    // Format the context in a way Ollama can't ignore
-                    const formattedContext =
-                        "I need you to answer based on the following information from my notes:\n\n" +
-                        "-----BEGIN MY NOTES-----\n" +
-                        contextContent +
-                        "\n-----END MY NOTES-----\n\n" +
-                        "Based on these notes, please answer: " + msg.content;
-
-                    formattedMessages.push({
-                        role: 'user',
-                        content: formattedContext
-                    });
-
-                    injectedContext = true;
-                } else {
-                    formattedMessages.push({
-                        role: msg.role,
-                        content: msg.content
-                    });
+            return {
+                text: data.message.content,
+                model: data.model,
+                provider: this.getName(),
+                usage: {
+                    promptTokens: data.prompt_eval_count,
+                    completionTokens: data.eval_count,
+                    totalTokens: data.prompt_eval_count + data.eval_count
                 }
-            }
-        } else {
-            // No context or empty context case
-            // Add system message (with system prompt)
-            if (systemPrompt) {
-                formattedMessages.push({
-                    role: 'system',
-                    content: systemPrompt
-                });
-            }
-
-            // Add all user and assistant messages as-is
-            for (const msg of userMessages) {
-                formattedMessages.push({
-                    role: msg.role,
-                    content: msg.content
-                });
-            }
+            };
+        } catch (error) {
+            console.error('Ollama service error:', error);
+            throw error;
         }
-
-        console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`);
-        console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`);
-
-        return formattedMessages;
     }
 }