diff --git a/src/services/llm/pipeline/chat_pipeline.ts b/src/services/llm/pipeline/chat_pipeline.ts index 0128b380c..ea7e9145c 100644 --- a/src/services/llm/pipeline/chat_pipeline.ts +++ b/src/services/llm/pipeline/chat_pipeline.ts @@ -581,6 +581,16 @@ export class ChatPipeline { } else if (toolsEnabled) { log.info(`========== NO TOOL CALLS DETECTED ==========`); log.info(`LLM response did not contain any tool calls, skipping tool execution`); + + // Handle streaming for responses without tool calls + if (shouldEnableStream && streamCallback) { + log.info(`Sending final streaming response without tool calls: ${currentResponse.text.length} chars`); + + // Send the final response with done=true to complete the streaming + await streamCallback(currentResponse.text, true); + + log.info(`Sent final non-tool response with done=true signal`); + } } // Process the final response diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts index 01285612e..da3c42845 100644 --- a/src/services/llm/providers/ollama_service.ts +++ b/src/services/llm/providers/ollama_service.ts @@ -347,7 +347,7 @@ export class OllamaService extends BaseAIService { // Send the chunk to the caller await callback({ text: chunk.message?.content || '', - done: !!chunk.done, + done: false, // Never mark as done during chunk processing raw: chunk // Include the raw chunk for advanced processing }); @@ -359,7 +359,7 @@ export class OllamaService extends BaseAIService { log.info(`Completed streaming from Ollama: processed ${chunkCount} chunks, total content: ${completeText.length} chars`); - // Signal completion + // Signal completion with a separate final callback after all processing is done await callback({ text: '', done: true @@ -476,8 +476,10 @@ export class OllamaService extends BaseAIService { // Call the callback with the current chunk content if (opts.streamCallback) { try { - // Don't send done:true when tool calls are present to avoid premature completion - const shouldMarkAsDone = !!chunk.done && !responseToolCalls.length; + // Only mark as done on the final chunk if we have actual content + // This ensures consistent behavior with and without tool calls + // We'll send a separate final callback after the loop completes + const shouldMarkAsDone = false; // Never mark as done during chunk processing await opts.streamCallback( chunk.message?.content || '', @@ -499,6 +501,17 @@ export class OllamaService extends BaseAIService { } } + // Send one final callback with done=true after all chunks have been processed + // This ensures we get the complete response regardless of tool calls + if (opts.streamCallback) { + try { + log.info(`Sending final done=true callback after processing all chunks`); + await opts.streamCallback('', true, { done: true }); + } catch (finalCallbackError) { + log.error(`Error in final streamCallback: ${finalCallbackError}`); + } + } + log.info(`Completed direct streaming from Ollama: processed ${chunkCount} chunks, final content: ${completeText.length} chars`); } catch (iterationError) { log.error(`Error iterating through Ollama stream chunks: ${iterationError instanceof Error ? iterationError.message : String(iterationError)}`);