do a better job of building the context

2025-10-31 11:39:01 +01:00 · 2025-03-20 19:35:20 +00:00 · 2025-03-20 19:35:20 +00:00 · 1be70f1163
commit 1be70f1163
parent 9c1ab4f322
3 changed files with 254 additions and 19 deletions
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@ -670,6 +670,12 @@ async function sendMessage(req: Request, res: Response) {
                    content: context
                };
                // DEBUG: Log context details before sending to LLM
                log.info(`CONTEXT BEING SENT TO LLM: ${context.length} chars`);
                log.info(`Context begins with: "${context.substring(0, 200)}..."`);
                log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
                log.info(`Number of notes included: ${sourceNotes.length}`);
                // Format all messages for the AI (advanced context case)
                const aiMessages: Message[] = [
                    contextMessage,
@ -679,6 +685,12 @@ async function sendMessage(req: Request, res: Response) {
                    }))
                ];
                // DEBUG: Log message structure being sent to LLM
                log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`);
                aiMessages.forEach((msg, idx) => {
                    log.info(`Message ${idx}: role=${msg.role}, content length=${msg.content.length} chars, begins with: "${msg.content.substring(0, 50)}..."`);
                });
                // Configure chat options from session metadata
                const chatOptions: ChatCompletionOptions = {
                    temperature: session.metadata.temperature || 0.7,
--- a/src/services/llm/context/modules/context_formatter.ts
+++ b/src/services/llm/context/modules/context_formatter.ts
@ -36,6 +36,10 @@ export class ContextFormatter {
                providerId === 'ollama' ? CONTEXT_WINDOW.OLLAMA :
                CONTEXT_WINDOW.DEFAULT;
            // DEBUG: Log context window size
            log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`);
            log.info(`Building context from ${sources.length} sources for query: "${query.substring(0, 50)}..."`);
            // Use a format appropriate for the model family
            const isAnthropicFormat = providerId === 'anthropic';
@ -47,24 +51,35 @@ export class ContextFormatter {
            // Sort sources by similarity if available to prioritize most relevant
            if (sources[0] && sources[0].similarity !== undefined) {
                sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
                // DEBUG: Log sorting information
                log.info(`Sources sorted by similarity. Top sources: ${sources.slice(0, 3).map(s => s.title || 'Untitled').join(', ')}`);
            }
            // Track total size to avoid exceeding model context window
            let totalSize = context.length;
            const formattedSources: string[] = [];
            // DEBUG: Track stats for logging
            let sourcesProcessed = 0;
            let sourcesIncluded = 0;
            let sourcesSkipped = 0;
            let sourcesExceededLimit = 0;
            // Process each source
            for (const source of sources) {
                sourcesProcessed++;
                let content = '';
                if (typeof source === 'string') {
                    content = source;
                } else if (source.content) {
                    content = this.sanitizeNoteContent(source.content, source.type, source.mime);
                } else {
                    sourcesSkipped++;
                    continue; // Skip invalid sources
                }
                if (!content || content.trim().length === 0) {
                    sourcesSkipped++;
                    continue;
                }
@ -75,6 +90,7 @@ export class ContextFormatter {
                // Check if adding this would exceed our size limit
                if (totalSize + formattedSource.length > maxTotalLength) {
                    sourcesExceededLimit++;
                    // If this is the first source, include a truncated version
                    if (formattedSources.length === 0) {
                        const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text
@ -82,6 +98,9 @@ export class ContextFormatter {
                            const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`;
                            formattedSources.push(truncatedContent);
                            totalSize += truncatedContent.length;
                            sourcesIncluded++;
                            // DEBUG: Log truncation
                            log.info(`Truncated first source "${title}" to fit in context window. Used ${truncatedContent.length} of ${formattedSource.length} chars`);
                        }
                    }
                    break;
@ -89,8 +108,13 @@ export class ContextFormatter {
                formattedSources.push(formattedSource);
                totalSize += formattedSource.length;
                sourcesIncluded++;
            }
            // DEBUG: Log sources stats
            log.info(`Context building stats: processed ${sourcesProcessed}/${sources.length} sources, included ${sourcesIncluded}, skipped ${sourcesSkipped}, exceeded limit ${sourcesExceededLimit}`);
            log.info(`Context size so far: ${totalSize}/${maxTotalLength} chars (${(totalSize/maxTotalLength*100).toFixed(2)}% of limit)`);
            // Add the formatted sources to the context
            context += formattedSources.join('\n');
@ -104,6 +128,9 @@ export class ContextFormatter {
                context += closing;
            }
            // DEBUG: Log final context size
            log.info(`Final context: ${context.length} chars, ${formattedSources.length} sources included`);
            return context;
        } catch (error) {
            log.error(`Error building context from notes: ${error}`);
--- a/src/services/llm/context/modules/context_service.ts
+++ b/src/services/llm/context/modules/context_service.ts
@ -134,7 +134,7 @@ export class ContextService {
                // Convert map to array and limit to top results
                relevantNotes = Array.from(allResults.values())
                    .sort((a, b) => b.similarity - a.similarity)
-                    .slice(0, 8); // Get top 8 notes
+                    .slice(0, 20); // Increased from 8 to 20 notes
            } catch (error) {
                log.error(`Error finding relevant notes: ${error}`);
                // Continue with empty notes list
@ -145,6 +145,9 @@ export class ContextService {
            const providerId = provider?.name || 'default';
            const context = await contextFormatter.buildContextFromNotes(relevantNotes, userQuestion, providerId);
            // DEBUG: Log the initial context built from notes
            log.info(`Initial context from buildContextFromNotes: ${context.length} chars, starting with: "${context.substring(0, 150)}..."`);
            // Step 4: Add agent tools context with thinking process if requested
            let enhancedContext = context;
            try {
@ -162,6 +165,9 @@ export class ContextService {
                if (agentContext) {
                    enhancedContext = enhancedContext + "\n\n" + agentContext;
                }
                // DEBUG: Log the final combined context
                log.info(`FINAL COMBINED CONTEXT: ${enhancedContext.length} chars, with content structure: ${this.summarizeContextStructure(enhancedContext)}`);
            } catch (error) {
                log.error(`Error getting agent tools context: ${error}`);
                // Continue with the basic context
@ -372,31 +378,89 @@ export class ContextService {
                log.error(`Error adding note structure to context: ${error}`);
            }
-            // Add most relevant notes from search results
+            // Combine the notes from both searches - the initial relevantNotes and from vector search
-            const allSearchResults = searchResults.flatMap(r => r.results);
+            // Start with a Map to deduplicate by noteId
            const allNotes = new Map<string, any>();
-            // Deduplicate results by noteId
+            // Add notes from the initial search in processQuery (relevantNotes parameter)
-            const uniqueResults = new Map();
+            if (relevantNotes && relevantNotes.length > 0) {
-            for (const result of allSearchResults) {
+                log.info(`Adding ${relevantNotes.length} notes from initial search to combined results`);
-                if (!uniqueResults.has(result.noteId) || uniqueResults.get(result.noteId).similarity < result.similarity) {
+                for (const note of relevantNotes) {
-                    uniqueResults.set(result.noteId, result);
+                    if (note.noteId) {
                        allNotes.set(note.noteId, note);
                    }
                }
            }
-            // Sort by similarity
+            // Add notes from vector search of sub-queries
-            const sortedResults = Array.from(uniqueResults.values())
+            const vectorSearchNotes = searchResults.flatMap(r => r.results);
-                .sort((a, b) => b.similarity - a.similarity)
+            if (vectorSearchNotes.length > 0) {
-                .slice(0, 10);  // Get top 10 unique results
+                log.info(`Adding ${vectorSearchNotes.length} notes from vector search to combined results`);
                for (const note of vectorSearchNotes) {
                    // If note already exists, keep the one with higher similarity
                    if (!allNotes.has(note.noteId) || note.similarity > allNotes.get(note.noteId).similarity) {
                        allNotes.set(note.noteId, note);
                    }
                }
            }
-            if (sortedResults.length > 0) {
+            // Convert the combined Map to an array and sort by similarity
-                agentContext += `## Relevant Information\n`;
+            const combinedNotes = Array.from(allNotes.values())
                .sort((a, b) => b.similarity - a.similarity);
-                for (const result of sortedResults) {
+            log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`);
                    agentContext += `### ${result.title}\n`;
-                    if (result.content) {
+            // Filter for Qu-related notes
-                        // Limit content to 500 chars per note to avoid token explosion
+            const quNotes = combinedNotes.filter(result =>
-                        agentContext += `${result.content.substring(0, 500)}${result.content.length > 500 ? '...' : ''}\n\n`;
+                result.title.toLowerCase().includes('qu') ||
                (result.content && result.content.toLowerCase().includes('qu'))
            );
            if (quNotes.length > 0) {
                log.info(`Found ${quNotes.length} Qu-related notes out of ${combinedNotes.length} total notes`);
                quNotes.forEach((note, idx) => {
                    if (idx < 3) { // Log just a sample to avoid log spam
                        log.info(`Qu note ${idx+1}: "${note.title}" (similarity: ${Math.round(note.similarity * 100)}%), content length: ${note.content ? note.content.length : 0} chars`);
                    }
                });
                // Prioritize Qu notes first, then other notes by similarity
                const nonQuNotes = combinedNotes.filter(note => !quNotes.includes(note));
                const finalNotes = [...quNotes, ...nonQuNotes].slice(0, 30); // Take top 30 prioritized notes
                log.info(`Selected ${finalNotes.length} notes for context, with ${quNotes.length} Qu-related notes prioritized`);
                // Add the selected notes to the context
                if (finalNotes.length > 0) {
                    agentContext += `## Relevant Information\n`;
                    for (const note of finalNotes) {
                        agentContext += `### ${note.title}\n`;
                        if (note.content) {
                            // Extract relevant content instead of just taking first 2000 chars
                            const relevantContent = await this.extractRelevantContent(note.content, query, 2000);
                            agentContext += `${relevantContent}\n\n`;
                        }
                    }
                }
            } else {
                log.info(`No Qu-related notes found among the ${combinedNotes.length} combined notes`);
                // Just take the top notes by similarity
                const finalNotes = combinedNotes.slice(0, 30); // Take top 30 notes
                if (finalNotes.length > 0) {
                    agentContext += `## Relevant Information\n`;
                    for (const note of finalNotes) {
                        agentContext += `### ${note.title}\n`;
                        if (note.content) {
                            // Extract relevant content instead of just taking first 2000 chars
                            const relevantContent = await this.extractRelevantContent(note.content, query, 2000);
                            agentContext += `${relevantContent}\n\n`;
                        }
                    }
                }
            }
@ -415,6 +479,15 @@ export class ContextService {
            // Log stats about the context
            log.info(`Agent tools context built: ${agentContext.length} chars, ${agentContext.split('\n').length} lines`);
            // DEBUG: Log more detailed information about the agent tools context content
            log.info(`Agent tools context content structure: ${this.summarizeContextStructure(agentContext)}`);
            if (agentContext.length < 1000) {
                log.info(`Agent tools context full content (short): ${agentContext}`);
            } else {
                log.info(`Agent tools context first 500 chars: ${agentContext.substring(0, 500)}...`);
                log.info(`Agent tools context last 500 chars: ${agentContext.substring(agentContext.length - 500)}`);
            }
            return agentContext;
        } catch (error) {
            log.error(`Error getting agent tools context: ${error}`);
@ -422,6 +495,31 @@ export class ContextService {
        }
    }
    /**
     * Summarize the structure of a context string for debugging
     * @param context - The context string to summarize
     * @returns A summary of the context structure
     */
    private summarizeContextStructure(context: string): string {
        if (!context) return "Empty context";
        // Count sections and headers
        const sections = context.split('##').length - 1;
        const subSections = context.split('###').length - 1;
        // Count notes referenced
        const noteMatches = context.match(/### [^\n]+/g);
        const noteCount = noteMatches ? noteMatches.length : 0;
        // Extract note titles if present
        let noteTitles = "";
        if (noteMatches && noteMatches.length > 0) {
            noteTitles = ` Note titles: ${noteMatches.slice(0, 3).map(m => m.substring(4)).join(', ')}${noteMatches.length > 3 ? '...' : ''}`;
        }
        return `${sections} main sections, ${subSections} subsections, ${noteCount} notes referenced.${noteTitles}`;
    }
    /**
     * Get semantic context for a note and query
     *
@ -586,6 +684,104 @@ export class ContextService {
    clearCaches(): void {
        cacheManager.clearAllCaches();
    }
    /**
     * Extract the most relevant portions from a note's content
     * @param content - The full note content
     * @param query - The user's query
     * @param maxChars - Maximum characters to include
     * @returns The most relevant content sections
     */
    private async extractRelevantContent(content: string, query: string, maxChars: number = 2000): Promise<string> {
        if (!content || content.length <= maxChars) {
            return content; // Return full content if it's already short enough
        }
        try {
            // Get the vector search tool for relevance calculation
            const agentManager = aiServiceManager.getInstance();
            const vectorSearchTool = agentManager.getVectorSearchTool();
            // Split content into chunks of reasonable size (300-500 chars with overlap)
            const chunkSize = 400;
            const overlap = 100;
            const chunks: string[] = [];
            for (let i = 0; i < content.length; i += (chunkSize - overlap)) {
                const end = Math.min(i + chunkSize, content.length);
                chunks.push(content.substring(i, end));
                if (end === content.length) break;
            }
            log.info(`Split note content into ${chunks.length} chunks for relevance extraction`);
            // Get embedding provider from service
            const provider = await providerManager.getPreferredEmbeddingProvider();
            if (!provider) {
                throw new Error("No embedding provider available");
            }
            // Get embeddings for the query and all chunks
            const queryEmbedding = await provider.createEmbedding(query);
            // Process chunks in smaller batches to avoid overwhelming the provider
            const batchSize = 5;
            const chunkEmbeddings = [];
            for (let i = 0; i < chunks.length; i += batchSize) {
                const batch = chunks.slice(i, i + batchSize);
                const batchEmbeddings = await Promise.all(
                    batch.map(chunk => provider.createEmbedding(chunk))
                );
                chunkEmbeddings.push(...batchEmbeddings);
            }
            // Calculate similarity between query and each chunk
            const similarities: Array<{index: number, similarity: number, content: string}> =
                chunkEmbeddings.map((embedding, index) => {
                    const similarity = provider.calculateSimilarity(queryEmbedding, embedding);
                    return { index, similarity, content: chunks[index] };
                });
            // Sort chunks by similarity (most relevant first)
            similarities.sort((a, b) => b.similarity - a.similarity);
            // DEBUG: Log some info about the top chunks
            log.info(`Top 3 most relevant chunks for query "${query.substring(0, 30)}..." (out of ${chunks.length} total):`);
            similarities.slice(0, 3).forEach((chunk, idx) => {
                log.info(`  Chunk ${idx+1}: Similarity ${Math.round(chunk.similarity * 100)}%, Content: "${chunk.content.substring(0, 50)}..."`);
            });
            // Take the most relevant chunks up to maxChars
            let result = '';
            let totalChars = 0;
            let chunksIncluded = 0;
            for (const chunk of similarities) {
                if (totalChars + chunk.content.length > maxChars) {
                    // If adding full chunk would exceed limit, add as much as possible
                    const remainingSpace = maxChars - totalChars;
                    if (remainingSpace > 100) { // Only add if we can include something meaningful
                        result += `\n...\n${chunk.content.substring(0, remainingSpace)}...`;
                        log.info(`  Added partial chunk with similarity ${Math.round(chunk.similarity * 100)}% (${remainingSpace} chars)`);
                    }
                    break;
                }
                if (result.length > 0) result += '\n...\n';
                result += chunk.content;
                totalChars += chunk.content.length;
                chunksIncluded++;
            }
            log.info(`Extracted ${totalChars} chars of relevant content from ${content.length} chars total (${chunksIncluded} chunks included)`);
            return result;
        } catch (error) {
            log.error(`Error extracting relevant content: ${error}`);
            // Fallback to simple truncation if extraction fails
            return content.substring(0, maxChars) + '...';
        }
    }
 }
 // Export singleton instance