mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-31 11:39:01 +01:00 
			
		
		
		
	do a better job of building the context
This commit is contained in:
		
							parent
							
								
									9c1ab4f322
								
							
						
					
					
						commit
						1be70f1163
					
				| @ -670,6 +670,12 @@ async function sendMessage(req: Request, res: Response) { | |||||||
|                     content: context |                     content: context | ||||||
|                 }; |                 }; | ||||||
| 
 | 
 | ||||||
|  |                 // DEBUG: Log context details before sending to LLM
 | ||||||
|  |                 log.info(`CONTEXT BEING SENT TO LLM: ${context.length} chars`); | ||||||
|  |                 log.info(`Context begins with: "${context.substring(0, 200)}..."`); | ||||||
|  |                 log.info(`Context ends with: "...${context.substring(context.length - 200)}"`); | ||||||
|  |                 log.info(`Number of notes included: ${sourceNotes.length}`); | ||||||
|  | 
 | ||||||
|                 // Format all messages for the AI (advanced context case)
 |                 // Format all messages for the AI (advanced context case)
 | ||||||
|                 const aiMessages: Message[] = [ |                 const aiMessages: Message[] = [ | ||||||
|                     contextMessage, |                     contextMessage, | ||||||
| @ -679,6 +685,12 @@ async function sendMessage(req: Request, res: Response) { | |||||||
|                     })) |                     })) | ||||||
|                 ]; |                 ]; | ||||||
| 
 | 
 | ||||||
|  |                 // DEBUG: Log message structure being sent to LLM
 | ||||||
|  |                 log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`); | ||||||
|  |                 aiMessages.forEach((msg, idx) => { | ||||||
|  |                     log.info(`Message ${idx}: role=${msg.role}, content length=${msg.content.length} chars, begins with: "${msg.content.substring(0, 50)}..."`); | ||||||
|  |                 }); | ||||||
|  | 
 | ||||||
|                 // Configure chat options from session metadata
 |                 // Configure chat options from session metadata
 | ||||||
|                 const chatOptions: ChatCompletionOptions = { |                 const chatOptions: ChatCompletionOptions = { | ||||||
|                     temperature: session.metadata.temperature || 0.7, |                     temperature: session.metadata.temperature || 0.7, | ||||||
|  | |||||||
| @ -36,6 +36,10 @@ export class ContextFormatter { | |||||||
|                 providerId === 'ollama' ? CONTEXT_WINDOW.OLLAMA : |                 providerId === 'ollama' ? CONTEXT_WINDOW.OLLAMA : | ||||||
|                 CONTEXT_WINDOW.DEFAULT; |                 CONTEXT_WINDOW.DEFAULT; | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Log context window size
 | ||||||
|  |             log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`); | ||||||
|  |             log.info(`Building context from ${sources.length} sources for query: "${query.substring(0, 50)}..."`); | ||||||
|  | 
 | ||||||
|             // Use a format appropriate for the model family
 |             // Use a format appropriate for the model family
 | ||||||
|             const isAnthropicFormat = providerId === 'anthropic'; |             const isAnthropicFormat = providerId === 'anthropic'; | ||||||
| 
 | 
 | ||||||
| @ -47,24 +51,35 @@ export class ContextFormatter { | |||||||
|             // Sort sources by similarity if available to prioritize most relevant
 |             // Sort sources by similarity if available to prioritize most relevant
 | ||||||
|             if (sources[0] && sources[0].similarity !== undefined) { |             if (sources[0] && sources[0].similarity !== undefined) { | ||||||
|                 sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); |                 sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); | ||||||
|  |                 // DEBUG: Log sorting information
 | ||||||
|  |                 log.info(`Sources sorted by similarity. Top sources: ${sources.slice(0, 3).map(s => s.title || 'Untitled').join(', ')}`); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Track total size to avoid exceeding model context window
 |             // Track total size to avoid exceeding model context window
 | ||||||
|             let totalSize = context.length; |             let totalSize = context.length; | ||||||
|             const formattedSources: string[] = []; |             const formattedSources: string[] = []; | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Track stats for logging
 | ||||||
|  |             let sourcesProcessed = 0; | ||||||
|  |             let sourcesIncluded = 0; | ||||||
|  |             let sourcesSkipped = 0; | ||||||
|  |             let sourcesExceededLimit = 0; | ||||||
|  | 
 | ||||||
|             // Process each source
 |             // Process each source
 | ||||||
|             for (const source of sources) { |             for (const source of sources) { | ||||||
|  |                 sourcesProcessed++; | ||||||
|                 let content = ''; |                 let content = ''; | ||||||
|                 if (typeof source === 'string') { |                 if (typeof source === 'string') { | ||||||
|                     content = source; |                     content = source; | ||||||
|                 } else if (source.content) { |                 } else if (source.content) { | ||||||
|                     content = this.sanitizeNoteContent(source.content, source.type, source.mime); |                     content = this.sanitizeNoteContent(source.content, source.type, source.mime); | ||||||
|                 } else { |                 } else { | ||||||
|  |                     sourcesSkipped++; | ||||||
|                     continue; // Skip invalid sources
 |                     continue; // Skip invalid sources
 | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (!content || content.trim().length === 0) { |                 if (!content || content.trim().length === 0) { | ||||||
|  |                     sourcesSkipped++; | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
| @ -75,6 +90,7 @@ export class ContextFormatter { | |||||||
| 
 | 
 | ||||||
|                 // Check if adding this would exceed our size limit
 |                 // Check if adding this would exceed our size limit
 | ||||||
|                 if (totalSize + formattedSource.length > maxTotalLength) { |                 if (totalSize + formattedSource.length > maxTotalLength) { | ||||||
|  |                     sourcesExceededLimit++; | ||||||
|                     // If this is the first source, include a truncated version
 |                     // If this is the first source, include a truncated version
 | ||||||
|                     if (formattedSources.length === 0) { |                     if (formattedSources.length === 0) { | ||||||
|                         const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text
 |                         const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text
 | ||||||
| @ -82,6 +98,9 @@ export class ContextFormatter { | |||||||
|                             const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`; |                             const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`; | ||||||
|                             formattedSources.push(truncatedContent); |                             formattedSources.push(truncatedContent); | ||||||
|                             totalSize += truncatedContent.length; |                             totalSize += truncatedContent.length; | ||||||
|  |                             sourcesIncluded++; | ||||||
|  |                             // DEBUG: Log truncation
 | ||||||
|  |                             log.info(`Truncated first source "${title}" to fit in context window. Used ${truncatedContent.length} of ${formattedSource.length} chars`); | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|                     break; |                     break; | ||||||
| @ -89,8 +108,13 @@ export class ContextFormatter { | |||||||
| 
 | 
 | ||||||
|                 formattedSources.push(formattedSource); |                 formattedSources.push(formattedSource); | ||||||
|                 totalSize += formattedSource.length; |                 totalSize += formattedSource.length; | ||||||
|  |                 sourcesIncluded++; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Log sources stats
 | ||||||
|  |             log.info(`Context building stats: processed ${sourcesProcessed}/${sources.length} sources, included ${sourcesIncluded}, skipped ${sourcesSkipped}, exceeded limit ${sourcesExceededLimit}`); | ||||||
|  |             log.info(`Context size so far: ${totalSize}/${maxTotalLength} chars (${(totalSize/maxTotalLength*100).toFixed(2)}% of limit)`); | ||||||
|  | 
 | ||||||
|             // Add the formatted sources to the context
 |             // Add the formatted sources to the context
 | ||||||
|             context += formattedSources.join('\n'); |             context += formattedSources.join('\n'); | ||||||
| 
 | 
 | ||||||
| @ -104,6 +128,9 @@ export class ContextFormatter { | |||||||
|                 context += closing; |                 context += closing; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Log final context size
 | ||||||
|  |             log.info(`Final context: ${context.length} chars, ${formattedSources.length} sources included`); | ||||||
|  | 
 | ||||||
|             return context; |             return context; | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             log.error(`Error building context from notes: ${error}`); |             log.error(`Error building context from notes: ${error}`); | ||||||
|  | |||||||
| @ -134,7 +134,7 @@ export class ContextService { | |||||||
|                 // Convert map to array and limit to top results
 |                 // Convert map to array and limit to top results
 | ||||||
|                 relevantNotes = Array.from(allResults.values()) |                 relevantNotes = Array.from(allResults.values()) | ||||||
|                     .sort((a, b) => b.similarity - a.similarity) |                     .sort((a, b) => b.similarity - a.similarity) | ||||||
|                     .slice(0, 8); // Get top 8 notes
 |                     .slice(0, 20); // Increased from 8 to 20 notes
 | ||||||
|             } catch (error) { |             } catch (error) { | ||||||
|                 log.error(`Error finding relevant notes: ${error}`); |                 log.error(`Error finding relevant notes: ${error}`); | ||||||
|                 // Continue with empty notes list
 |                 // Continue with empty notes list
 | ||||||
| @ -145,6 +145,9 @@ export class ContextService { | |||||||
|             const providerId = provider?.name || 'default'; |             const providerId = provider?.name || 'default'; | ||||||
|             const context = await contextFormatter.buildContextFromNotes(relevantNotes, userQuestion, providerId); |             const context = await contextFormatter.buildContextFromNotes(relevantNotes, userQuestion, providerId); | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Log the initial context built from notes
 | ||||||
|  |             log.info(`Initial context from buildContextFromNotes: ${context.length} chars, starting with: "${context.substring(0, 150)}..."`); | ||||||
|  | 
 | ||||||
|             // Step 4: Add agent tools context with thinking process if requested
 |             // Step 4: Add agent tools context with thinking process if requested
 | ||||||
|             let enhancedContext = context; |             let enhancedContext = context; | ||||||
|             try { |             try { | ||||||
| @ -162,6 +165,9 @@ export class ContextService { | |||||||
|                 if (agentContext) { |                 if (agentContext) { | ||||||
|                     enhancedContext = enhancedContext + "\n\n" + agentContext; |                     enhancedContext = enhancedContext + "\n\n" + agentContext; | ||||||
|                 } |                 } | ||||||
|  | 
 | ||||||
|  |                 // DEBUG: Log the final combined context
 | ||||||
|  |                 log.info(`FINAL COMBINED CONTEXT: ${enhancedContext.length} chars, with content structure: ${this.summarizeContextStructure(enhancedContext)}`); | ||||||
|             } catch (error) { |             } catch (error) { | ||||||
|                 log.error(`Error getting agent tools context: ${error}`); |                 log.error(`Error getting agent tools context: ${error}`); | ||||||
|                 // Continue with the basic context
 |                 // Continue with the basic context
 | ||||||
| @ -372,31 +378,89 @@ export class ContextService { | |||||||
|                 log.error(`Error adding note structure to context: ${error}`); |                 log.error(`Error adding note structure to context: ${error}`); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Add most relevant notes from search results
 |             // Combine the notes from both searches - the initial relevantNotes and from vector search
 | ||||||
|             const allSearchResults = searchResults.flatMap(r => r.results); |             // Start with a Map to deduplicate by noteId
 | ||||||
|  |             const allNotes = new Map<string, any>(); | ||||||
| 
 | 
 | ||||||
|             // Deduplicate results by noteId
 |             // Add notes from the initial search in processQuery (relevantNotes parameter)
 | ||||||
|             const uniqueResults = new Map(); |             if (relevantNotes && relevantNotes.length > 0) { | ||||||
|             for (const result of allSearchResults) { |                 log.info(`Adding ${relevantNotes.length} notes from initial search to combined results`); | ||||||
|                 if (!uniqueResults.has(result.noteId) || uniqueResults.get(result.noteId).similarity < result.similarity) { |                 for (const note of relevantNotes) { | ||||||
|                     uniqueResults.set(result.noteId, result); |                     if (note.noteId) { | ||||||
|  |                         allNotes.set(note.noteId, note); | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Sort by similarity
 |             // Add notes from vector search of sub-queries
 | ||||||
|             const sortedResults = Array.from(uniqueResults.values()) |             const vectorSearchNotes = searchResults.flatMap(r => r.results); | ||||||
|                 .sort((a, b) => b.similarity - a.similarity) |             if (vectorSearchNotes.length > 0) { | ||||||
|                 .slice(0, 10);  // Get top 10 unique results
 |                 log.info(`Adding ${vectorSearchNotes.length} notes from vector search to combined results`); | ||||||
|  |                 for (const note of vectorSearchNotes) { | ||||||
|  |                     // If note already exists, keep the one with higher similarity
 | ||||||
|  |                     if (!allNotes.has(note.noteId) || note.similarity > allNotes.get(note.noteId).similarity) { | ||||||
|  |                         allNotes.set(note.noteId, note); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
| 
 | 
 | ||||||
|             if (sortedResults.length > 0) { |             // Convert the combined Map to an array and sort by similarity
 | ||||||
|                 agentContext += `## Relevant Information\n`; |             const combinedNotes = Array.from(allNotes.values()) | ||||||
|  |                 .sort((a, b) => b.similarity - a.similarity); | ||||||
| 
 | 
 | ||||||
|                 for (const result of sortedResults) { |             log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`); | ||||||
|                     agentContext += `### ${result.title}\n`; |  | ||||||
| 
 | 
 | ||||||
|                     if (result.content) { |             // Filter for Qu-related notes
 | ||||||
|                         // Limit content to 500 chars per note to avoid token explosion
 |             const quNotes = combinedNotes.filter(result => | ||||||
|                         agentContext += `${result.content.substring(0, 500)}${result.content.length > 500 ? '...' : ''}\n\n`; |                 result.title.toLowerCase().includes('qu') || | ||||||
|  |                 (result.content && result.content.toLowerCase().includes('qu')) | ||||||
|  |             ); | ||||||
|  | 
 | ||||||
|  |             if (quNotes.length > 0) { | ||||||
|  |                 log.info(`Found ${quNotes.length} Qu-related notes out of ${combinedNotes.length} total notes`); | ||||||
|  |                 quNotes.forEach((note, idx) => { | ||||||
|  |                     if (idx < 3) { // Log just a sample to avoid log spam
 | ||||||
|  |                         log.info(`Qu note ${idx+1}: "${note.title}" (similarity: ${Math.round(note.similarity * 100)}%), content length: ${note.content ? note.content.length : 0} chars`); | ||||||
|  |                     } | ||||||
|  |                 }); | ||||||
|  | 
 | ||||||
|  |                 // Prioritize Qu notes first, then other notes by similarity
 | ||||||
|  |                 const nonQuNotes = combinedNotes.filter(note => !quNotes.includes(note)); | ||||||
|  |                 const finalNotes = [...quNotes, ...nonQuNotes].slice(0, 30); // Take top 30 prioritized notes
 | ||||||
|  | 
 | ||||||
|  |                 log.info(`Selected ${finalNotes.length} notes for context, with ${quNotes.length} Qu-related notes prioritized`); | ||||||
|  | 
 | ||||||
|  |                 // Add the selected notes to the context
 | ||||||
|  |                 if (finalNotes.length > 0) { | ||||||
|  |                     agentContext += `## Relevant Information\n`; | ||||||
|  | 
 | ||||||
|  |                     for (const note of finalNotes) { | ||||||
|  |                         agentContext += `### ${note.title}\n`; | ||||||
|  | 
 | ||||||
|  |                         if (note.content) { | ||||||
|  |                             // Extract relevant content instead of just taking first 2000 chars
 | ||||||
|  |                             const relevantContent = await this.extractRelevantContent(note.content, query, 2000); | ||||||
|  |                             agentContext += `${relevantContent}\n\n`; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 log.info(`No Qu-related notes found among the ${combinedNotes.length} combined notes`); | ||||||
|  | 
 | ||||||
|  |                 // Just take the top notes by similarity
 | ||||||
|  |                 const finalNotes = combinedNotes.slice(0, 30); // Take top 30 notes
 | ||||||
|  | 
 | ||||||
|  |                 if (finalNotes.length > 0) { | ||||||
|  |                     agentContext += `## Relevant Information\n`; | ||||||
|  | 
 | ||||||
|  |                     for (const note of finalNotes) { | ||||||
|  |                         agentContext += `### ${note.title}\n`; | ||||||
|  | 
 | ||||||
|  |                         if (note.content) { | ||||||
|  |                             // Extract relevant content instead of just taking first 2000 chars
 | ||||||
|  |                             const relevantContent = await this.extractRelevantContent(note.content, query, 2000); | ||||||
|  |                             agentContext += `${relevantContent}\n\n`; | ||||||
|  |                         } | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @ -415,6 +479,15 @@ export class ContextService { | |||||||
|             // Log stats about the context
 |             // Log stats about the context
 | ||||||
|             log.info(`Agent tools context built: ${agentContext.length} chars, ${agentContext.split('\n').length} lines`); |             log.info(`Agent tools context built: ${agentContext.length} chars, ${agentContext.split('\n').length} lines`); | ||||||
| 
 | 
 | ||||||
|  |             // DEBUG: Log more detailed information about the agent tools context content
 | ||||||
|  |             log.info(`Agent tools context content structure: ${this.summarizeContextStructure(agentContext)}`); | ||||||
|  |             if (agentContext.length < 1000) { | ||||||
|  |                 log.info(`Agent tools context full content (short): ${agentContext}`); | ||||||
|  |             } else { | ||||||
|  |                 log.info(`Agent tools context first 500 chars: ${agentContext.substring(0, 500)}...`); | ||||||
|  |                 log.info(`Agent tools context last 500 chars: ${agentContext.substring(agentContext.length - 500)}`); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             return agentContext; |             return agentContext; | ||||||
|         } catch (error) { |         } catch (error) { | ||||||
|             log.error(`Error getting agent tools context: ${error}`); |             log.error(`Error getting agent tools context: ${error}`); | ||||||
| @ -422,6 +495,31 @@ export class ContextService { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /** | ||||||
|  |      * Summarize the structure of a context string for debugging | ||||||
|  |      * @param context - The context string to summarize | ||||||
|  |      * @returns A summary of the context structure | ||||||
|  |      */ | ||||||
|  |     private summarizeContextStructure(context: string): string { | ||||||
|  |         if (!context) return "Empty context"; | ||||||
|  | 
 | ||||||
|  |         // Count sections and headers
 | ||||||
|  |         const sections = context.split('##').length - 1; | ||||||
|  |         const subSections = context.split('###').length - 1; | ||||||
|  | 
 | ||||||
|  |         // Count notes referenced
 | ||||||
|  |         const noteMatches = context.match(/### [^\n]+/g); | ||||||
|  |         const noteCount = noteMatches ? noteMatches.length : 0; | ||||||
|  | 
 | ||||||
|  |         // Extract note titles if present
 | ||||||
|  |         let noteTitles = ""; | ||||||
|  |         if (noteMatches && noteMatches.length > 0) { | ||||||
|  |             noteTitles = ` Note titles: ${noteMatches.slice(0, 3).map(m => m.substring(4)).join(', ')}${noteMatches.length > 3 ? '...' : ''}`; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return `${sections} main sections, ${subSections} subsections, ${noteCount} notes referenced.${noteTitles}`; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /** |     /** | ||||||
|      * Get semantic context for a note and query |      * Get semantic context for a note and query | ||||||
|      * |      * | ||||||
| @ -586,6 +684,104 @@ export class ContextService { | |||||||
|     clearCaches(): void { |     clearCaches(): void { | ||||||
|         cacheManager.clearAllCaches(); |         cacheManager.clearAllCaches(); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     /** | ||||||
|  |      * Extract the most relevant portions from a note's content | ||||||
|  |      * @param content - The full note content | ||||||
|  |      * @param query - The user's query | ||||||
|  |      * @param maxChars - Maximum characters to include | ||||||
|  |      * @returns The most relevant content sections | ||||||
|  |      */ | ||||||
|  |     private async extractRelevantContent(content: string, query: string, maxChars: number = 2000): Promise<string> { | ||||||
|  |         if (!content || content.length <= maxChars) { | ||||||
|  |             return content; // Return full content if it's already short enough
 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         try { | ||||||
|  |             // Get the vector search tool for relevance calculation
 | ||||||
|  |             const agentManager = aiServiceManager.getInstance(); | ||||||
|  |             const vectorSearchTool = agentManager.getVectorSearchTool(); | ||||||
|  | 
 | ||||||
|  |             // Split content into chunks of reasonable size (300-500 chars with overlap)
 | ||||||
|  |             const chunkSize = 400; | ||||||
|  |             const overlap = 100; | ||||||
|  |             const chunks: string[] = []; | ||||||
|  | 
 | ||||||
|  |             for (let i = 0; i < content.length; i += (chunkSize - overlap)) { | ||||||
|  |                 const end = Math.min(i + chunkSize, content.length); | ||||||
|  |                 chunks.push(content.substring(i, end)); | ||||||
|  |                 if (end === content.length) break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             log.info(`Split note content into ${chunks.length} chunks for relevance extraction`); | ||||||
|  | 
 | ||||||
|  |             // Get embedding provider from service
 | ||||||
|  |             const provider = await providerManager.getPreferredEmbeddingProvider(); | ||||||
|  |             if (!provider) { | ||||||
|  |                 throw new Error("No embedding provider available"); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Get embeddings for the query and all chunks
 | ||||||
|  |             const queryEmbedding = await provider.createEmbedding(query); | ||||||
|  | 
 | ||||||
|  |             // Process chunks in smaller batches to avoid overwhelming the provider
 | ||||||
|  |             const batchSize = 5; | ||||||
|  |             const chunkEmbeddings = []; | ||||||
|  | 
 | ||||||
|  |             for (let i = 0; i < chunks.length; i += batchSize) { | ||||||
|  |                 const batch = chunks.slice(i, i + batchSize); | ||||||
|  |                 const batchEmbeddings = await Promise.all( | ||||||
|  |                     batch.map(chunk => provider.createEmbedding(chunk)) | ||||||
|  |                 ); | ||||||
|  |                 chunkEmbeddings.push(...batchEmbeddings); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Calculate similarity between query and each chunk
 | ||||||
|  |             const similarities: Array<{index: number, similarity: number, content: string}> = | ||||||
|  |                 chunkEmbeddings.map((embedding, index) => { | ||||||
|  |                     const similarity = provider.calculateSimilarity(queryEmbedding, embedding); | ||||||
|  |                     return { index, similarity, content: chunks[index] }; | ||||||
|  |                 }); | ||||||
|  | 
 | ||||||
|  |             // Sort chunks by similarity (most relevant first)
 | ||||||
|  |             similarities.sort((a, b) => b.similarity - a.similarity); | ||||||
|  | 
 | ||||||
|  |             // DEBUG: Log some info about the top chunks
 | ||||||
|  |             log.info(`Top 3 most relevant chunks for query "${query.substring(0, 30)}..." (out of ${chunks.length} total):`); | ||||||
|  |             similarities.slice(0, 3).forEach((chunk, idx) => { | ||||||
|  |                 log.info(`  Chunk ${idx+1}: Similarity ${Math.round(chunk.similarity * 100)}%, Content: "${chunk.content.substring(0, 50)}..."`); | ||||||
|  |             }); | ||||||
|  | 
 | ||||||
|  |             // Take the most relevant chunks up to maxChars
 | ||||||
|  |             let result = ''; | ||||||
|  |             let totalChars = 0; | ||||||
|  |             let chunksIncluded = 0; | ||||||
|  | 
 | ||||||
|  |             for (const chunk of similarities) { | ||||||
|  |                 if (totalChars + chunk.content.length > maxChars) { | ||||||
|  |                     // If adding full chunk would exceed limit, add as much as possible
 | ||||||
|  |                     const remainingSpace = maxChars - totalChars; | ||||||
|  |                     if (remainingSpace > 100) { // Only add if we can include something meaningful
 | ||||||
|  |                         result += `\n...\n${chunk.content.substring(0, remainingSpace)}...`; | ||||||
|  |                         log.info(`  Added partial chunk with similarity ${Math.round(chunk.similarity * 100)}% (${remainingSpace} chars)`); | ||||||
|  |                     } | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 if (result.length > 0) result += '\n...\n'; | ||||||
|  |                 result += chunk.content; | ||||||
|  |                 totalChars += chunk.content.length; | ||||||
|  |                 chunksIncluded++; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             log.info(`Extracted ${totalChars} chars of relevant content from ${content.length} chars total (${chunksIncluded} chunks included)`); | ||||||
|  |             return result; | ||||||
|  |         } catch (error) { | ||||||
|  |             log.error(`Error extracting relevant content: ${error}`); | ||||||
|  |             // Fallback to simple truncation if extraction fails
 | ||||||
|  |             return content.substring(0, maxChars) + '...'; | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Export singleton instance
 | // Export singleton instance
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 perf3ct
						perf3ct