diff --git a/src/services/llm/ai_service_manager.ts b/src/services/llm/ai_service_manager.ts index 8d4331d1b..a5fd11c87 100644 --- a/src/services/llm/ai_service_manager.ts +++ b/src/services/llm/ai_service_manager.ts @@ -4,6 +4,8 @@ import { OpenAIService } from './providers/openai_service.js'; import { AnthropicService } from './providers/anthropic_service.js'; import { OllamaService } from './providers/ollama_service.js'; import log from '../log.js'; +import contextExtractor from './context_extractor.js'; +import semanticContextService from './semantic_context_service.js'; type ServiceProviders = 'openai' | 'anthropic' | 'ollama'; @@ -159,6 +161,26 @@ export class AIServiceManager { // If we get here, all providers failed throw new Error(`All AI providers failed: ${lastError?.message || 'Unknown error'}`); } + + setupEventListeners() { + // Setup event listeners for AI services + } + + /** + * Get the context extractor service + * @returns The context extractor instance + */ + getContextExtractor() { + return contextExtractor; + } + + /** + * Get the semantic context service for advanced context handling + * @returns The semantic context service instance + */ + getSemanticContextService() { + return semanticContextService; + } } // Don't create singleton immediately, use a lazy-loading pattern @@ -185,5 +207,12 @@ export default { }, async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise { return getInstance().generateChatCompletion(messages, options); + }, + // Add our new methods + getContextExtractor() { + return getInstance().getContextExtractor(); + }, + getSemanticContextService() { + return getInstance().getSemanticContextService(); } }; diff --git a/src/services/llm/chat_service.ts b/src/services/llm/chat_service.ts index fbb9419e4..416ca4131 100644 --- a/src/services/llm/chat_service.ts +++ b/src/services/llm/chat_service.ts @@ -152,10 +152,28 @@ export class ChatService { /** * Add context from the current note to the chat + * + * @param sessionId - The ID of the chat session + * @param noteId - The ID of the note to add context from + * @param useSmartContext - Whether to use smart context extraction (default: true) + * @returns The updated chat session */ - async addNoteContext(sessionId: string, noteId: string): Promise { + async addNoteContext(sessionId: string, noteId: string, useSmartContext = true): Promise { const session = await this.getOrCreateSession(sessionId); - const context = await contextExtractor.getFullContext(noteId); + + // Get the last user message to use as context for semantic search + const lastUserMessage = [...session.messages].reverse() + .find(msg => msg.role === 'user' && msg.content.length > 10)?.content || ''; + + let context; + + if (useSmartContext && lastUserMessage) { + // Use smart context that considers the query for better relevance + context = await contextExtractor.getSmartContext(noteId, lastUserMessage); + } else { + // Fall back to full context if smart context is disabled or no query available + context = await contextExtractor.getFullContext(noteId); + } const contextMessage: Message = { role: 'user', @@ -168,6 +186,61 @@ export class ChatService { return session; } + /** + * Add semantically relevant context from a note based on a specific query + * + * @param sessionId - The ID of the chat session + * @param noteId - The ID of the note to add context from + * @param query - The specific query to find relevant information for + * @returns The updated chat session + */ + async addSemanticNoteContext(sessionId: string, noteId: string, query: string): Promise { + const session = await this.getOrCreateSession(sessionId); + + // Use semantic context that considers the query for better relevance + const context = await contextExtractor.getSemanticContext(noteId, query); + + const contextMessage: Message = { + role: 'user', + content: `Here is the relevant information from my notes based on my query "${query}":\n\n${context}\n\nPlease help me understand this information in relation to my query.` + }; + + session.messages.push(contextMessage); + await chatStorageService.updateChat(session.id, session.messages); + + return session; + } + + /** + * Send a context-aware message with automatically included semantic context from a note + * This method combines the query with relevant note context before sending to the AI + * + * @param sessionId - The ID of the chat session + * @param content - The user's message content + * @param noteId - The ID of the note to add context from + * @param options - Optional completion options + * @param streamCallback - Optional streaming callback + * @returns The updated chat session + */ + async sendContextAwareMessage( + sessionId: string, + content: string, + noteId: string, + options?: ChatCompletionOptions, + streamCallback?: (content: string, isDone: boolean) => void + ): Promise { + const session = await this.getOrCreateSession(sessionId); + + // Get semantically relevant context based on the user's message + const context = await contextExtractor.getSmartContext(noteId, content); + + // Combine the user's message with the relevant context + const enhancedContent = `${content}\n\nHere's relevant information from my notes that may help:\n\n${context}`; + + // Send the enhanced message + return this.sendMessage(sessionId, enhancedContent, options, streamCallback); + } + /** * Get all user's chat sessions */ diff --git a/src/services/llm/context_extractor.ts b/src/services/llm/context_extractor.ts index 41517035f..3c9115cf7 100644 --- a/src/services/llm/context_extractor.ts +++ b/src/services/llm/context_extractor.ts @@ -3,6 +3,7 @@ import sanitizeHtml from 'sanitize-html'; /** * Utility class for extracting context from notes to provide to AI models + * Enhanced with advanced capabilities for handling large notes and specialized content */ export class ContextExtractor { /** @@ -24,6 +25,158 @@ export class ContextExtractor { return this.formatNoteContent(note.content, note.type, note.mime, note.title); } + /** + * Split a large note into smaller, semantically meaningful chunks + * This is useful for handling large notes that exceed the context window of LLMs + * + * @param noteId - The ID of the note to chunk + * @param maxChunkSize - Maximum size of each chunk in characters + * @returns Array of content chunks, or empty array if note not found + */ + async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise { + const content = await this.getNoteContent(noteId); + if (!content) return []; + + // Split into semantic chunks (paragraphs, sections, etc.) + return this.splitContentIntoChunks(content, maxChunkSize); + } + + /** + * Split text content into semantically meaningful chunks based on natural boundaries + * like paragraphs, headings, and code blocks + * + * @param content - The text content to split + * @param maxChunkSize - Maximum size of each chunk in characters + * @returns Array of content chunks + */ + private splitContentIntoChunks(content: string, maxChunkSize: number): string[] { + // Look for semantic boundaries (headings, blank lines, etc.) + const headingPattern = /^(#+)\s+(.+)$/gm; + const codeBlockPattern = /```[\s\S]+?```/gm; + + // Replace code blocks with placeholders to avoid splitting inside them + const codeBlocks: string[] = []; + let contentWithPlaceholders = content.replace(codeBlockPattern, (match) => { + const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`; + codeBlocks.push(match); + return placeholder; + }); + + // Split content at headings and paragraphs + const sections: string[] = []; + let currentSection = ''; + + // First split by headings + const lines = contentWithPlaceholders.split('\n'); + for (const line of lines) { + const isHeading = headingPattern.test(line); + headingPattern.lastIndex = 0; // Reset regex + + // If this is a heading and we already have content, start a new section + if (isHeading && currentSection.trim().length > 0) { + sections.push(currentSection.trim()); + currentSection = line; + } else { + currentSection += (currentSection ? '\n' : '') + line; + } + } + + // Add the last section if there's any content + if (currentSection.trim().length > 0) { + sections.push(currentSection.trim()); + } + + // Now combine smaller sections to respect maxChunkSize + const chunks: string[] = []; + let currentChunk = ''; + + for (const section of sections) { + // If adding this section exceeds maxChunkSize and we already have content, + // finalize the current chunk and start a new one + if ((currentChunk + section).length > maxChunkSize && currentChunk.length > 0) { + chunks.push(currentChunk); + currentChunk = section; + } else { + currentChunk += (currentChunk ? '\n\n' : '') + section; + } + } + + // Add the last chunk if there's any content + if (currentChunk.length > 0) { + chunks.push(currentChunk); + } + + // Restore code blocks in all chunks + return chunks.map(chunk => { + return chunk.replace(/__CODE_BLOCK_(\d+)__/g, (_, index) => { + return codeBlocks[parseInt(index)]; + }); + }); + } + + /** + * Generate a summary of a note's content + * Useful for providing a condensed version of very large notes + * + * @param noteId - The ID of the note to summarize + * @param maxLength - Cut-off length to trigger summarization + * @returns Summary of the note or the original content if small enough + */ + async getNoteSummary(noteId: string, maxLength = 5000): Promise { + const content = await this.getNoteContent(noteId); + if (!content || content.length < maxLength) return content || ''; + + // For larger content, generate a summary + return this.summarizeContent(content); + } + + /** + * Summarize content by extracting key information + * This uses a heuristic approach to find important sentences and paragraphs + * + * @param content - The content to summarize + * @returns A summarized version of the content + */ + private summarizeContent(content: string): string { + // Extract title/heading if present + const titleMatch = content.match(/^# (.+)$/m); + const title = titleMatch ? titleMatch[1] : 'Untitled Note'; + + // Extract all headings for an outline + const headings: string[] = []; + const headingMatches = content.matchAll(/^(#+)\s+(.+)$/gm); + for (const match of headingMatches) { + const level = match[1].length; + const text = match[2]; + headings.push(`${' '.repeat(level-1)}- ${text}`); + } + + // Extract first sentence of each paragraph for a summary + const paragraphs = content.split(/\n\s*\n/); + const firstSentences = paragraphs + .filter(p => p.trim().length > 0 && !p.trim().startsWith('#') && !p.trim().startsWith('```')) + .map(p => { + const sentenceMatch = p.match(/^[^.!?]+[.!?]/); + return sentenceMatch ? sentenceMatch[0].trim() : p.substring(0, Math.min(150, p.length)).trim() + '...'; + }) + .slice(0, 5); // Limit to 5 sentences + + // Create the summary + let summary = `# Summary of: ${title}\n\n`; + + if (headings.length > 0) { + summary += `## Document Outline\n${headings.join('\n')}\n\n`; + } + + if (firstSentences.length > 0) { + summary += `## Key Points\n${firstSentences.map(s => `- ${s}`).join('\n')}\n\n`; + } + + summary += `(Note: This is an automatically generated summary of a larger document with ${content.length} characters)`; + + return summary; + } + /** * Get a set of parent notes to provide hierarchical context */ @@ -89,6 +242,7 @@ export class ContextExtractor { /** * Format the content of a note based on its type + * Enhanced with better handling for large and specialized content types */ private formatNoteContent(content: string, type: string, mime: string, title: string): string { let formattedContent = `# ${title}\n\n`; @@ -98,10 +252,19 @@ export class ContextExtractor { // Remove HTML formatting for text notes formattedContent += this.sanitizeHtml(content); break; + case 'code': - // Format code notes with code blocks - formattedContent += '```\n' + content + '\n```'; + // Improved code handling with language detection + const codeLanguage = this.detectCodeLanguage(content, mime); + + // For large code files, extract structure rather than full content + if (content.length > 8000) { + formattedContent += this.extractCodeStructure(content, codeLanguage); + } else { + formattedContent += `\`\`\`${codeLanguage}\n${content}\n\`\`\``; + } break; + case 'canvas': if (mime === 'application/json') { try { @@ -249,6 +412,230 @@ export class ContextExtractor { return formattedContent; } + /** + * Detect the programming language of code content + * + * @param content - The code content to analyze + * @param mime - MIME type (if available) + * @returns The detected language or empty string + */ + private detectCodeLanguage(content: string, mime: string): string { + // First check if mime type provides a hint + if (mime) { + const mimeMap: Record = { + 'text/x-python': 'python', + 'text/javascript': 'javascript', + 'application/javascript': 'javascript', + 'text/typescript': 'typescript', + 'application/typescript': 'typescript', + 'text/x-java': 'java', + 'text/html': 'html', + 'text/css': 'css', + 'text/x-c': 'c', + 'text/x-c++': 'cpp', + 'text/x-csharp': 'csharp', + 'text/x-go': 'go', + 'text/x-ruby': 'ruby', + 'text/x-php': 'php', + 'text/x-swift': 'swift', + 'text/x-rust': 'rust', + 'text/markdown': 'markdown', + 'text/x-sql': 'sql', + 'text/x-yaml': 'yaml', + 'application/json': 'json', + 'text/x-shell': 'bash' + }; + + for (const [mimePattern, language] of Object.entries(mimeMap)) { + if (mime.includes(mimePattern)) { + return language; + } + } + } + + // Check for common language patterns in the content + const firstLines = content.split('\n', 20).join('\n'); + + const languagePatterns: Record = { + 'python': /^(import\s+|from\s+\w+\s+import|def\s+\w+\s*\(|class\s+\w+\s*:)/m, + 'javascript': /^(const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=|function\s+\w+\s*\(|import\s+.*from\s+)/m, + 'typescript': /^(interface\s+\w+|type\s+\w+\s*=|class\s+\w+\s*{)/m, + 'html': /^|||/m, + 'css': /^(\.\w+\s*{|\#\w+\s*{|@media|@import)/m, + 'java': /^(public\s+class|import\s+java|package\s+)/m, + 'cpp': /^(#include\s+<\w+>|namespace\s+\w+|void\s+\w+\s*\()/m, + 'csharp': /^(using\s+System|namespace\s+\w+|public\s+class)/m, + 'go': /^(package\s+\w+|import\s+\(|func\s+\w+\s*\()/m, + 'ruby': /^(require\s+|class\s+\w+\s*<|def\s+\w+)/m, + 'php': /^(<\?php|namespace\s+\w+|use\s+\w+)/m, + 'sql': /^(SELECT|INSERT|UPDATE|DELETE|CREATE TABLE|ALTER TABLE)/im, + 'bash': /^(#!\/bin\/sh|#!\/bin\/bash|function\s+\w+\s*\(\))/m, + 'markdown': /^(#\s+|##\s+|###\s+|\*\s+|-\s+|>\s+)/m, + 'json': /^({[\s\n]*"|[\s\n]*\[)/m, + 'yaml': /^(---|\w+:\s+)/m + }; + + for (const [language, pattern] of Object.entries(languagePatterns)) { + if (pattern.test(firstLines)) { + return language; + } + } + + // Default to empty string if we can't detect the language + return ''; + } + + /** + * Extract the structure of a code file rather than its full content + * Useful for providing high-level understanding of large code files + * + * @param content - The full code content + * @param language - The programming language + * @returns A structured representation of the code + */ + private extractCodeStructure(content: string, language: string): string { + const lines = content.split('\n'); + const maxLines = 8000; + + // If it's not that much over the limit, just include the whole thing + if (lines.length <= maxLines * 1.2) { + return `\`\`\`${language}\n${content}\n\`\`\``; + } + + // For large files, extract important structural elements based on language + let extractedStructure = ''; + let importSection = ''; + let classDefinitions = []; + let functionDefinitions = []; + let otherImportantLines = []; + + // Extract imports/includes, class/function definitions based on language + if (['javascript', 'typescript', 'python', 'java', 'csharp'].includes(language)) { + // Find imports + for (let i = 0; i < Math.min(100, lines.length); i++) { + if (lines[i].match(/^(import|from|using|require|#include|package)\s+/)) { + importSection += lines[i] + '\n'; + } + } + + // Find class definitions + for (let i = 0; i < lines.length; i++) { + if (lines[i].match(/^(class|interface|type)\s+\w+/)) { + const endBracketLine = this.findMatchingEnd(lines, i, language); + if (endBracketLine > i && endBracketLine <= i + 10) { + // Include small class definitions entirely + classDefinitions.push(lines.slice(i, endBracketLine + 1).join('\n')); + i = endBracketLine; + } else { + // For larger classes, just show the definition and methods + let className = lines[i]; + classDefinitions.push(className); + + // Look for methods in this class + for (let j = i + 1; j < Math.min(endBracketLine, lines.length); j++) { + if (lines[j].match(/^\s+(function|def|public|private|protected)\s+\w+/)) { + classDefinitions.push(' ' + lines[j].trim()); + } + } + + if (endBracketLine > 0 && endBracketLine < lines.length) { + i = endBracketLine; + } + } + } + } + + // Find function definitions not inside classes + for (let i = 0; i < lines.length; i++) { + if (lines[i].match(/^(function|def|const\s+\w+\s*=\s*\(|let\s+\w+\s*=\s*\(|var\s+\w+\s*=\s*\()/)) { + functionDefinitions.push(lines[i]); + } + } + } + + // Build the extracted structure + extractedStructure += `# Code Structure (${lines.length} lines total)\n\n`; + + if (importSection) { + extractedStructure += "## Imports/Dependencies\n```" + language + "\n" + importSection + "```\n\n"; + } + + if (classDefinitions.length > 0) { + extractedStructure += "## Classes/Interfaces\n```" + language + "\n" + classDefinitions.join('\n\n') + "\n```\n\n"; + } + + if (functionDefinitions.length > 0) { + extractedStructure += "## Functions\n```" + language + "\n" + functionDefinitions.join('\n\n') + "\n```\n\n"; + } + + // Add beginning and end of the file for context + extractedStructure += "## Beginning of File\n```" + language + "\n" + + lines.slice(0, Math.min(50, lines.length)).join('\n') + "\n```\n\n"; + + if (lines.length > 100) { + extractedStructure += "## End of File\n```" + language + "\n" + + lines.slice(Math.max(0, lines.length - 50)).join('\n') + "\n```\n\n"; + } + + return extractedStructure; + } + + /** + * Find the line number of the matching ending bracket/block + * + * @param lines - Array of code lines + * @param startLine - Starting line number + * @param language - Programming language + * @returns The line number of the matching end, or -1 if not found + */ + private findMatchingEnd(lines: string[], startLine: number, language: string): number { + let depth = 0; + let inClass = false; + + // Different languages have different ways to define blocks + if (['javascript', 'typescript', 'java', 'csharp', 'cpp'].includes(language)) { + // Curly brace languages + for (let i = startLine; i < lines.length; i++) { + const line = lines[i]; + // Count opening braces + for (const char of line) { + if (char === '{') depth++; + if (char === '}') { + depth--; + if (depth === 0 && inClass) return i; + } + } + + // Check if this line contains the class declaration + if (i === startLine && line.includes('{')) { + inClass = true; + } else if (i === startLine) { + // If the first line doesn't have an opening brace, look at the next few lines + if (i + 1 < lines.length && lines[i + 1].includes('{')) { + inClass = true; + } + } + } + } else if (language === 'python') { + // Indentation-based language + const baseIndentation = lines[startLine].match(/^\s*/)?.[0].length || 0; + + for (let i = startLine + 1; i < lines.length; i++) { + // Skip empty lines + if (lines[i].trim() === '') continue; + + const currentIndentation = lines[i].match(/^\s*/)?.[0].length || 0; + + // If we're back to the same or lower indentation level, we've reached the end + if (currentIndentation <= baseIndentation) { + return i - 1; + } + } + } + + return -1; + } + /** * Sanitize HTML content to plain text */ @@ -328,6 +715,88 @@ export class ContextExtractor { linkedContext ].filter(Boolean).join('\n\n'); } + + /** + * Get semantically ranked context based on semantic similarity to a query + * This method delegates to the semantic context service for the actual ranking + * + * @param noteId - The ID of the current note + * @param query - The user's query to compare against + * @param maxResults - Maximum number of related notes to include + * @returns Context with the most semantically relevant related notes + */ + async getSemanticContext(noteId: string, query: string, maxResults = 5): Promise { + try { + // This requires the semantic context service to be available + // We're using a dynamic import to avoid circular dependencies + const { default: aiServiceManager } = await import('./ai_service_manager.js'); + const semanticContext = aiServiceManager.getInstance().getSemanticContextService(); + + if (!semanticContext) { + return this.getFullContext(noteId); + } + + return await semanticContext.getSemanticContext(noteId, query, maxResults); + } catch (error) { + // Fall back to regular context if semantic ranking fails + console.error('Error in semantic context ranking:', error); + return this.getFullContext(noteId); + } + } + + /** + * Get progressively loaded context based on depth level + * This provides different levels of context detail depending on the depth parameter + * + * @param noteId - The ID of the note to get context for + * @param depth - Depth level (1-4) determining how much context to include + * @returns Context appropriate for the requested depth + */ + async getProgressiveContext(noteId: string, depth = 1): Promise { + try { + // This requires the semantic context service to be available + // We're using a dynamic import to avoid circular dependencies + const { default: aiServiceManager } = await import('./ai_service_manager.js'); + const semanticContext = aiServiceManager.getInstance().getSemanticContextService(); + + if (!semanticContext) { + return this.getFullContext(noteId); + } + + return await semanticContext.getProgressiveContext(noteId, depth); + } catch (error) { + // Fall back to regular context if progressive loading fails + console.error('Error in progressive context loading:', error); + return this.getFullContext(noteId); + } + } + + /** + * Get smart context based on the query complexity + * This automatically selects the appropriate context depth and relevance + * + * @param noteId - The ID of the note to get context for + * @param query - The user's query for semantic relevance matching + * @returns The optimal context for answering the query + */ + async getSmartContext(noteId: string, query: string): Promise { + try { + // This requires the semantic context service to be available + // We're using a dynamic import to avoid circular dependencies + const { default: aiServiceManager } = await import('./ai_service_manager.js'); + const semanticContext = aiServiceManager.getInstance().getSemanticContextService(); + + if (!semanticContext) { + return this.getFullContext(noteId); + } + + return await semanticContext.getSmartContext(noteId, query); + } catch (error) { + // Fall back to regular context if smart context fails + console.error('Error in smart context selection:', error); + return this.getFullContext(noteId); + } + } } // Singleton instance diff --git a/src/services/llm/semantic_context_service.ts b/src/services/llm/semantic_context_service.ts new file mode 100644 index 000000000..2b215abf5 --- /dev/null +++ b/src/services/llm/semantic_context_service.ts @@ -0,0 +1,401 @@ +import contextExtractor from './context_extractor.js'; +import * as vectorStore from './embeddings/vector_store.js'; +import sql from '../sql.js'; +import { cosineSimilarity } from './embeddings/vector_store.js'; +import log from '../log.js'; +import { getEmbeddingProvider, getEnabledEmbeddingProviders } from './embeddings/providers.js'; +import options from '../options.js'; + +/** + * SEMANTIC CONTEXT SERVICE + * + * This service provides advanced context extraction capabilities for AI models. + * It enhances the basic context extractor with vector embedding-based semantic + * search and progressive context loading for large notes. + * + * === USAGE GUIDE === + * + * 1. To use this service in other modules: + * ``` + * import aiServiceManager from './services/llm/ai_service_manager.js'; + * const semanticContext = aiServiceManager.getSemanticContextService(); + * ``` + * + * Or with the instance directly: + * ``` + * import aiServiceManager from './services/llm/ai_service_manager.js'; + * const semanticContext = aiServiceManager.getInstance().getSemanticContextService(); + * ``` + * + * 2. Retrieve context based on semantic relevance to a query: + * ``` + * const context = await semanticContext.getSemanticContext(noteId, userQuery); + * ``` + * + * 3. Load context progressively (only what's needed): + * ``` + * const context = await semanticContext.getProgressiveContext(noteId, depth); + * // depth: 1=just note, 2=+parents, 3=+children, 4=+linked notes + * ``` + * + * 4. Use smart context selection that adapts to query complexity: + * ``` + * const context = await semanticContext.getSmartContext(noteId, userQuery); + * ``` + * + * === REQUIREMENTS === + * + * - Requires at least one configured embedding provider (OpenAI, Anthropic, Ollama) + * - Will fall back to non-semantic methods if no embedding provider is available + * - Uses OpenAI embeddings by default if API key is configured + */ + +/** + * Provides advanced semantic context capabilities, enhancing the basic context extractor + * with vector embedding-based semantic search and progressive context loading. + * + * This service is especially useful for retrieving the most relevant context from large + * knowledge bases when working with limited-context LLMs. + */ +class SemanticContextService { + /** + * Get the preferred embedding provider based on user settings + * Tries to use the most appropriate provider in this order: + * 1. OpenAI if API key is set + * 2. Anthropic if API key is set + * 3. Ollama if configured + * 4. Any available provider + * 5. Local provider as fallback + * + * @returns The preferred embedding provider or null if none available + */ + private async getPreferredEmbeddingProvider(): Promise { + // Try to get provider in order of preference + const openaiKey = await options.getOption('openaiApiKey'); + if (openaiKey) { + const provider = await getEmbeddingProvider('openai'); + if (provider) return provider; + } + + const anthropicKey = await options.getOption('anthropicApiKey'); + if (anthropicKey) { + const provider = await getEmbeddingProvider('anthropic'); + if (provider) return provider; + } + + // If neither of the preferred providers is available, get any provider + const providers = await getEnabledEmbeddingProviders(); + if (providers.length > 0) { + return providers[0]; + } + + // Last resort is local provider + return await getEmbeddingProvider('local'); + } + + /** + * Generate embeddings for a text query + * + * @param query - The text query to embed + * @returns The generated embedding or null if failed + */ + private async generateQueryEmbedding(query: string): Promise { + try { + // Get the preferred embedding provider + const provider = await this.getPreferredEmbeddingProvider(); + if (!provider) { + return null; + } + return await provider.generateEmbeddings(query); + } catch (error) { + log.error(`Error generating query embedding: ${error}`); + return null; + } + } + + /** + * Rank notes by semantic relevance to a query using vector similarity + * + * @param notes - Array of notes with noteId and title + * @param userQuery - The user's query to compare against + * @returns Sorted array of notes with relevance score + */ + async rankNotesByRelevance( + notes: Array<{noteId: string, title: string}>, + userQuery: string + ): Promise> { + const queryEmbedding = await this.generateQueryEmbedding(userQuery); + if (!queryEmbedding) { + // If embedding fails, return notes in original order + return notes.map(note => ({ ...note, relevance: 0 })); + } + + const provider = await this.getPreferredEmbeddingProvider(); + if (!provider) { + return notes.map(note => ({ ...note, relevance: 0 })); + } + + const rankedNotes = []; + + for (const note of notes) { + // Get note embedding from vector store or generate it if not exists + let noteEmbedding = null; + try { + const embeddingResult = await vectorStore.getEmbeddingForNote( + note.noteId, + provider.name, + provider.getConfig().model || '' + ); + + if (embeddingResult) { + noteEmbedding = embeddingResult.embedding; + } + } catch (error) { + log.error(`Error retrieving embedding for note ${note.noteId}: ${error}`); + } + + if (!noteEmbedding) { + // If note doesn't have an embedding yet, get content and generate one + const content = await contextExtractor.getNoteContent(note.noteId); + if (content && provider) { + try { + noteEmbedding = await provider.generateEmbeddings(content); + // Store the embedding for future use + await vectorStore.storeNoteEmbedding( + note.noteId, + provider.name, + provider.getConfig().model || '', + noteEmbedding + ); + } catch (error) { + log.error(`Error generating embedding for note ${note.noteId}: ${error}`); + } + } + } + + let relevance = 0; + if (noteEmbedding) { + // Calculate cosine similarity between query and note + relevance = cosineSimilarity(queryEmbedding, noteEmbedding); + } + + rankedNotes.push({ + ...note, + relevance + }); + } + + // Sort by relevance (highest first) + return rankedNotes.sort((a, b) => b.relevance - a.relevance); + } + + /** + * Retrieve semantic context based on relevance to user query + * Finds the most semantically similar notes to the user's query + * + * @param noteId - Base note ID to start the search from + * @param userQuery - Query to find relevant context for + * @param maxResults - Maximum number of notes to include in context + * @returns Formatted context with the most relevant notes + */ + async getSemanticContext(noteId: string, userQuery: string, maxResults = 5): Promise { + // Get related notes (parents, children, linked notes) + const [ + parentNotes, + childNotes, + linkedNotes + ] = await Promise.all([ + this.getParentNotes(noteId, 3), + this.getChildNotes(noteId, 10), + this.getLinkedNotes(noteId, 10) + ]); + + // Combine all related notes + const allRelatedNotes = [...parentNotes, ...childNotes, ...linkedNotes]; + + // If no related notes, return empty context + if (allRelatedNotes.length === 0) { + return ''; + } + + // Rank notes by relevance to query + const rankedNotes = await this.rankNotesByRelevance(allRelatedNotes, userQuery); + + // Get content for the top N most relevant notes + const mostRelevantNotes = rankedNotes.slice(0, maxResults); + const relevantContent = await Promise.all( + mostRelevantNotes.map(async note => { + const content = await contextExtractor.getNoteContent(note.noteId); + if (!content) return null; + + // Format with relevance score and title + return `### ${note.title} (Relevance: ${Math.round(note.relevance * 100)}%)\n\n${content}`; + }) + ); + + // If no content retrieved, return empty string + if (!relevantContent.filter(Boolean).length) { + return ''; + } + + return `# Relevant Context\n\nThe following notes are most relevant to your query:\n\n${ + relevantContent.filter(Boolean).join('\n\n---\n\n') + }`; + } + + /** + * Load context progressively based on depth level + * This allows starting with minimal context and expanding as needed + * + * @param noteId - The ID of the note to get context for + * @param depth - Depth level (1-4) determining how much context to include + * @returns Context appropriate for the requested depth + */ + async getProgressiveContext(noteId: string, depth = 1): Promise { + // Start with the note content + const noteContent = await contextExtractor.getNoteContent(noteId); + if (!noteContent) return 'Note not found'; + + // If depth is 1, just return the note content + if (depth <= 1) return noteContent; + + // Add parent context for depth >= 2 + const parentContext = await contextExtractor.getParentContext(noteId); + if (depth <= 2) return `${parentContext}\n\n${noteContent}`; + + // Add child context for depth >= 3 + const childContext = await contextExtractor.getChildContext(noteId); + if (depth <= 3) return `${parentContext}\n\n${noteContent}\n\n${childContext}`; + + // Add linked notes for depth >= 4 + const linkedContext = await contextExtractor.getLinkedNotesContext(noteId); + return `${parentContext}\n\n${noteContent}\n\n${childContext}\n\n${linkedContext}`; + } + + /** + * Get parent notes in the hierarchy + * Helper method that queries the database directly + */ + private async getParentNotes(noteId: string, maxDepth: number): Promise<{noteId: string, title: string}[]> { + const parentNotes: {noteId: string, title: string}[] = []; + let currentNoteId = noteId; + + for (let i = 0; i < maxDepth; i++) { + const parent = await sql.getRow<{parentNoteId: string, title: string}>( + `SELECT branches.parentNoteId, notes.title + FROM branches + JOIN notes ON branches.parentNoteId = notes.noteId + WHERE branches.noteId = ? AND branches.isDeleted = 0 LIMIT 1`, + [currentNoteId] + ); + + if (!parent || parent.parentNoteId === 'root') { + break; + } + + parentNotes.unshift({ + noteId: parent.parentNoteId, + title: parent.title + }); + + currentNoteId = parent.parentNoteId; + } + + return parentNotes; + } + + /** + * Get child notes + * Helper method that queries the database directly + */ + private async getChildNotes(noteId: string, maxChildren: number): Promise<{noteId: string, title: string}[]> { + return await sql.getRows<{noteId: string, title: string}>( + `SELECT noteId, title FROM notes + WHERE parentNoteId = ? AND isDeleted = 0 + LIMIT ?`, + [noteId, maxChildren] + ); + } + + /** + * Get linked notes + * Helper method that queries the database directly + */ + private async getLinkedNotes(noteId: string, maxLinks: number): Promise<{noteId: string, title: string}[]> { + return await sql.getRows<{noteId: string, title: string}>( + `SELECT noteId, title FROM notes + WHERE noteId IN ( + SELECT value FROM attributes + WHERE noteId = ? AND type = 'relation' + LIMIT ? + )`, + [noteId, maxLinks] + ); + } + + /** + * Smart context selection that combines semantic matching with progressive loading + * Returns the most appropriate context based on the query and available information + * + * @param noteId - The ID of the note to get context for + * @param userQuery - The user's query for semantic relevance matching + * @returns The optimal context for answering the query + */ + async getSmartContext(noteId: string, userQuery: string): Promise { + // Check if embedding provider is available + const provider = await this.getPreferredEmbeddingProvider(); + + if (provider) { + try { + const semanticContext = await this.getSemanticContext(noteId, userQuery); + if (semanticContext) { + return semanticContext; + } + } catch (error) { + log.error(`Error getting semantic context: ${error}`); + // Fall back to progressive context if semantic fails + } + } + + // Default to progressive context with appropriate depth based on query complexity + // Simple queries get less context, complex ones get more + const queryComplexity = this.estimateQueryComplexity(userQuery); + const depth = Math.min(4, Math.max(1, queryComplexity)); + + return this.getProgressiveContext(noteId, depth); + } + + /** + * Estimate query complexity to determine appropriate context depth + * + * @param query - The user's query string + * @returns Complexity score from 1-4 + */ + private estimateQueryComplexity(query: string): number { + if (!query) return 1; + + // Simple heuristics for query complexity: + // 1. Length (longer queries tend to be more complex) + // 2. Number of questions or specific requests + // 3. Presence of complex terms/concepts + + const words = query.split(/\s+/).length; + const questions = (query.match(/\?/g) || []).length; + const comparisons = (query.match(/compare|difference|versus|vs\.|between/gi) || []).length; + const complexity = (query.match(/explain|analyze|synthesize|evaluate|critique|recommend|suggest/gi) || []).length; + + // Calculate complexity score + let score = 1; + + if (words > 20) score += 1; + if (questions > 1) score += 1; + if (comparisons > 0) score += 1; + if (complexity > 0) score += 1; + + return Math.min(4, score); + } +} + +// Singleton instance +const semanticContextService = new SemanticContextService(); +export default semanticContextService;