]*>(.*?)<\/code>/gi, replacement: '`$1`' },
CODE_BLOCK: { pattern: /]*>(.*?)<\/pre>/gi, replacement: '```\n$1\n```' },
// Clean up
ANY_REMAINING_TAG: { pattern: /<[^>]*>/g, replacement: '' },
EXCESSIVE_NEWLINES: { pattern: /\n{3,}/g, replacement: '\n\n' }
};
/**
* HTML entity replacements
*/
export const HTML_ENTITY_REPLACEMENTS = {
// Common HTML entities
NBSP: { pattern: / /g, replacement: ' ' },
LT: { pattern: /</g, replacement: '<' },
GT: { pattern: />/g, replacement: '>' },
AMP: { pattern: /&/g, replacement: '&' },
QUOT: { pattern: /"/g, replacement: '"' },
APOS: { pattern: /'/g, replacement: "'" },
LDQUO: { pattern: /“/g, replacement: '"' },
RDQUO: { pattern: /”/g, replacement: '"' },
LSQUO: { pattern: /‘/g, replacement: "'" },
RSQUO: { pattern: /’/g, replacement: "'" },
MDASH: { pattern: /—/g, replacement: '—' },
NDASH: { pattern: /–/g, replacement: '–' },
HELLIP: { pattern: /…/g, replacement: '…' }
};
/**
* Encoding issue fixes
*/
export const ENCODING_FIXES = {
// Common encoding issues
BROKEN_QUOTES: { pattern: /Γ\u00c2[\u00a3\u00a5]/g, replacement: '"' },
// Character replacements for Unicode
UNICODE_REPLACEMENTS: {
'\u00A0': ' ', // Non-breaking space
'\u2018': "'", // Left single quote
'\u2019': "'", // Right single quote
'\u201C': '"', // Left double quote
'\u201D': '"', // Right double quote
'\u2013': '-', // En dash
'\u2014': '--', // Em dash
'\u2022': '*', // Bullet
'\u2026': '...' // Ellipsis
}
};
/**
* Ollama-specific cleaning patterns
*/
export const OLLAMA_CLEANING = {
// Replace fancy quotes
QUOTES: { pattern: /[""]/g, replacement: '"' },
APOSTROPHES: { pattern: /['']/g, replacement: "'" },
// Replace other Unicode characters
DASHES: { pattern: /[–—]/g, replacement: '-' },
BULLETS: { pattern: /[•]/g, replacement: '*' },
ELLIPSES: { pattern: /[…]/g, replacement: '...' },
// Remove non-ASCII characters
NON_ASCII: { pattern: /[^\x00-\x7F]/g, replacement: '' },
// Normalize whitespace
WHITESPACE: { pattern: /\s+/g, replacement: ' ' },
NEWLINE_WHITESPACE: { pattern: /\n\s+/g, replacement: '\n' }
};
/**
* Console log messages for formatters
*/
export const FORMATTER_LOGS = {
ANTHROPIC: {
PROCESSED: (before: number, after: number) => `Anthropic formatter: ${before} messages → ${after} messages`
},
OPENAI: {
PROCESSED: (before: number, after: number) => `OpenAI formatter: ${before} messages → ${after} messages`
},
OLLAMA: {
PROCESSED: (before: number, after: number) => `Ollama formatter processed ${before} messages into ${after} messages`
},
ERROR: {
CONTEXT_CLEANING: (provider: string) => `Error cleaning content for ${provider}:`,
ENCODING: 'Error fixing encoding issues:'
}
};
/**
* Message formatter text templates
*/
export const MESSAGE_FORMATTER_TEMPLATES = {
/**
* OpenAI-specific message templates
*/
OPENAI: {
CONTEXT_INSTRUCTION: 'Please use the following context to respond to the user\'s messages:\n\n'
},
/**
* Anthropic-specific message templates
*/
ANTHROPIC: {
CONTEXT_START: '\n\n\n',
CONTEXT_END: '\n'
},
/**
* Ollama-specific message templates
*/
OLLAMA: {
REFERENCE_INFORMATION: '\n\nReference information:\n'
},
/**
* Default formatter message templates
*/
DEFAULT: {
CONTEXT_INSTRUCTION: 'Here is context to help you answer my questions: '
}
};
/**
* Provider identifier constants
*/
export const PROVIDER_IDENTIFIERS = {
OPENAI: 'openai',
ANTHROPIC: 'anthropic',
OLLAMA: 'ollama',
DEFAULT: 'default'
};