fix(llm): fix duplicated text when streaming responses

2025-12-06 15:34:26 +01:00 · 2025-06-07 00:27:56 +00:00 · 2025-06-07 00:27:56 +00:00 · cb3844e627
commit cb3844e627
parent 6bc9b3c184
2 changed files with 19 additions and 13 deletions
--- a/apps/server/src/services/llm/chat/rest_chat_service.ts
+++ b/apps/server/src/services/llm/chat/rest_chat_service.ts
@ -237,14 +237,6 @@ class RestChatService {
        // Send WebSocket message
        wsService.sendMessageToAllClients(message);
        // Send SSE response for compatibility
        const responseData: any = { content: data, done };
        if (rawChunk?.toolExecution) {
            responseData.toolExecution = rawChunk.toolExecution;
        }
        res.write(`data: ${JSON.stringify(responseData)}\n\n`);
        // When streaming is complete, save the accumulated content to the chat note
        if (done) {
@ -266,8 +258,8 @@ class RestChatService {
                log.error(`Error saving streaming response: ${error}`);
            }
-            // End the response
+            // Note: For WebSocket-only streaming, we don't end the HTTP response here
-            res.end();
+            // since it was already handled by the calling endpoint
        }
    }
--- a/apps/server/src/services/llm/pipeline/chat_pipeline.ts
+++ b/apps/server/src/services/llm/pipeline/chat_pipeline.ts
@ -298,6 +298,9 @@ export class ChatPipeline {
            this.updateStageMetrics('llmCompletion', llmStartTime);
            log.info(`Received LLM response from model: ${completion.response.model}, provider: ${completion.response.provider}`);
            // Track whether content has been streamed to prevent duplication
            let hasStreamedContent = false;
            // Handle streaming if enabled and available
            // Use shouldEnableStream variable which contains our streaming decision
            if (shouldEnableStream && completion.response.stream && streamCallback) {
@ -311,6 +314,9 @@ export class ChatPipeline {
                    // Forward to callback with original chunk data in case it contains additional information
                    streamCallback(processedChunk.text, processedChunk.done, chunk);
                    // Mark that we have streamed content to prevent duplication
                    hasStreamedContent = true;
                });
            }
@ -767,11 +773,15 @@ export class ChatPipeline {
                    const responseText = currentResponse.text || "";
                    log.info(`Resuming streaming with final response: ${responseText.length} chars`);
-                    if (responseText.length > 0) {
+                    if (responseText.length > 0 && !hasStreamedContent) {
-                        // Resume streaming with the final response text
+                        // Resume streaming with the final response text only if we haven't already streamed content
                        // This is where we send the definitive done:true signal with the complete content
                        streamCallback(responseText, true);
                        log.info(`Sent final response with done=true signal and text content`);
                    } else if (hasStreamedContent) {
                        log.info(`Content already streamed, sending done=true signal only after tool execution`);
                        // Just send the done signal without duplicating content
                        streamCallback('', true);
                    } else {
                        // For Anthropic, sometimes text is empty but response is in stream
                        if ((currentResponse.provider === 'Anthropic' || currentResponse.provider === 'OpenAI') && currentResponse.stream) {
@ -803,13 +813,17 @@ export class ChatPipeline {
                log.info(`LLM response did not contain any tool calls, skipping tool execution`);
                // Handle streaming for responses without tool calls
-                if (shouldEnableStream && streamCallback) {
+                if (shouldEnableStream && streamCallback && !hasStreamedContent) {
                    log.info(`Sending final streaming response without tool calls: ${currentResponse.text.length} chars`);
                    // Send the final response with done=true to complete the streaming
                    streamCallback(currentResponse.text, true);
                    log.info(`Sent final non-tool response with done=true signal`);
                } else if (shouldEnableStream && streamCallback && hasStreamedContent) {
                    log.info(`Content already streamed, sending done=true signal only`);
                    // Just send the done signal without duplicating content
                    streamCallback('', true);
                }
            }