trilium/src/services/llm/formatters/anthropic_formatter.ts

204 lines
7.5 KiB
TypeScript

import sanitizeHtml from 'sanitize-html';
import type { Message } from '../ai_interface.js';
import { BaseMessageFormatter } from './base_formatter.js';
import { PROVIDER_PROMPTS } from '../constants/llm_prompt_constants.js';
import { LLM_CONSTANTS } from '../constants/provider_constants.js';
import {
HTML_ALLOWED_TAGS,
HTML_ALLOWED_ATTRIBUTES,
FORMATTER_LOGS,
HTML_TO_MARKDOWN_PATTERNS,
HTML_ENTITY_REPLACEMENTS
} from '../constants/formatter_constants.js';
/**
* Anthropic-specific message formatter
* Optimized for Claude's API and preferences
*/
export class AnthropicMessageFormatter extends BaseMessageFormatter {
/**
* Maximum recommended context length for Anthropic models
* Claude has a very large context window
*/
private static MAX_CONTEXT_LENGTH = LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC;
/**
* Format messages for the Anthropic API
*/
formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
const formattedMessages: Message[] = [];
// For Anthropic, system prompts work best as the first user message with <instructions> XML tags
// First, collect all non-system messages
const userAssistantMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
// For Anthropic, we need to handle context differently
// 1. If explicit context is provided, we format it with XML tags
if (context) {
// Build the system message with context
const formattedContext = PROVIDER_PROMPTS.ANTHROPIC.SYSTEM_WITH_CONTEXT(
this.cleanContextContent(context)
);
// If there's at least one user message, add the context to the first one
if (userAssistantMessages.length > 0 && userAssistantMessages[0].role === 'user') {
// Add system as a new first message
formattedMessages.push({
role: 'user',
content: formattedContext
});
// Add system response acknowledgment
formattedMessages.push({
role: 'assistant',
content: PROVIDER_PROMPTS.ANTHROPIC.CONTEXT_ACKNOWLEDGMENT
});
// Add remaining messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// If no user messages, create a placeholder
else {
formattedMessages.push({
role: 'user',
content: formattedContext
});
formattedMessages.push({
role: 'assistant',
content: PROVIDER_PROMPTS.ANTHROPIC.CONTEXT_QUERY_ACKNOWLEDGMENT
});
// Add any existing assistant messages if they exist
const assistantMsgs = userAssistantMessages.filter(msg => msg.role === 'assistant');
for (const msg of assistantMsgs) {
formattedMessages.push(msg);
}
}
}
// 2. If no explicit context but we have system messages, convert them to Claude format
else if (messages.some(msg => msg.role === 'system')) {
// Get system messages
const systemMessages = messages.filter(msg => msg.role === 'system');
// Build system content with XML tags
const systemContent = PROVIDER_PROMPTS.ANTHROPIC.INSTRUCTIONS_WRAPPER(
systemMessages.map(msg => this.cleanContextContent(msg.content)).join('\n\n')
);
// Add as first user message
formattedMessages.push({
role: 'user',
content: systemContent
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: PROVIDER_PROMPTS.ANTHROPIC.ACKNOWLEDGMENT
});
// Add remaining user/assistant messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 3. Just a system prompt, no context
else if (systemPrompt) {
// Add as first user message with XML tags
formattedMessages.push({
role: 'user',
content: PROVIDER_PROMPTS.ANTHROPIC.INSTRUCTIONS_WRAPPER(systemPrompt)
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: PROVIDER_PROMPTS.ANTHROPIC.ACKNOWLEDGMENT
});
// Add all other messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 4. No system prompt, use default from constants
else if (userAssistantMessages.length > 0) {
// Add default system prompt with XML tags
formattedMessages.push({
role: 'user',
content: PROVIDER_PROMPTS.ANTHROPIC.INSTRUCTIONS_WRAPPER(this.getDefaultSystemPrompt())
});
// Add assistant acknowledgment
formattedMessages.push({
role: 'assistant',
content: PROVIDER_PROMPTS.ANTHROPIC.ACKNOWLEDGMENT
});
// Add all user messages
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
// 5. No special handling needed
else {
// Just add all messages as-is
for (const msg of userAssistantMessages) {
formattedMessages.push(msg);
}
}
console.log(FORMATTER_LOGS.ANTHROPIC.PROCESSED(messages.length, formattedMessages.length));
return formattedMessages;
}
/**
* Clean context content for Anthropic
* Claude works well with XML-structured content
*/
cleanContextContent(content: string): string {
if (!content) return '';
try {
// Convert HTML to a Claude-friendly format
const cleaned = sanitizeHtml(content, {
allowedTags: HTML_ALLOWED_TAGS.STANDARD,
allowedAttributes: HTML_ALLOWED_ATTRIBUTES.STANDARD
});
// Convert to markdown but preserve some structure
let markdown = cleaned;
// Apply all standard HTML to Markdown patterns
const patterns = HTML_TO_MARKDOWN_PATTERNS;
for (const pattern of Object.values(patterns)) {
markdown = markdown.replace(pattern.pattern, pattern.replacement);
}
// Process lists - use the parent class method
markdown = this.processListItems(markdown);
// Fix common HTML entities
const entityPatterns = HTML_ENTITY_REPLACEMENTS;
for (const pattern of Object.values(entityPatterns)) {
markdown = markdown.replace(pattern.pattern, pattern.replacement);
}
return markdown.trim();
} catch (error) {
console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("Anthropic"), error);
return content; // Return original if cleaning fails
}
}
/**
* Get the maximum recommended context length for Anthropic
*/
getMaxContextLength(): number {
return AnthropicMessageFormatter.MAX_CONTEXT_LENGTH;
}
}