mirror of
https://github.com/zadam/trilium.git
synced 2025-10-20 23:29:02 +02:00
fix(llm): fix duplicated text when streaming responses
This commit is contained in:
parent
6bc9b3c184
commit
cb3844e627
@ -237,14 +237,6 @@ class RestChatService {
|
|||||||
|
|
||||||
// Send WebSocket message
|
// Send WebSocket message
|
||||||
wsService.sendMessageToAllClients(message);
|
wsService.sendMessageToAllClients(message);
|
||||||
|
|
||||||
// Send SSE response for compatibility
|
|
||||||
const responseData: any = { content: data, done };
|
|
||||||
if (rawChunk?.toolExecution) {
|
|
||||||
responseData.toolExecution = rawChunk.toolExecution;
|
|
||||||
}
|
|
||||||
|
|
||||||
res.write(`data: ${JSON.stringify(responseData)}\n\n`);
|
|
||||||
|
|
||||||
// When streaming is complete, save the accumulated content to the chat note
|
// When streaming is complete, save the accumulated content to the chat note
|
||||||
if (done) {
|
if (done) {
|
||||||
@ -266,8 +258,8 @@ class RestChatService {
|
|||||||
log.error(`Error saving streaming response: ${error}`);
|
log.error(`Error saving streaming response: ${error}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// End the response
|
// Note: For WebSocket-only streaming, we don't end the HTTP response here
|
||||||
res.end();
|
// since it was already handled by the calling endpoint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,6 +298,9 @@ export class ChatPipeline {
|
|||||||
this.updateStageMetrics('llmCompletion', llmStartTime);
|
this.updateStageMetrics('llmCompletion', llmStartTime);
|
||||||
log.info(`Received LLM response from model: ${completion.response.model}, provider: ${completion.response.provider}`);
|
log.info(`Received LLM response from model: ${completion.response.model}, provider: ${completion.response.provider}`);
|
||||||
|
|
||||||
|
// Track whether content has been streamed to prevent duplication
|
||||||
|
let hasStreamedContent = false;
|
||||||
|
|
||||||
// Handle streaming if enabled and available
|
// Handle streaming if enabled and available
|
||||||
// Use shouldEnableStream variable which contains our streaming decision
|
// Use shouldEnableStream variable which contains our streaming decision
|
||||||
if (shouldEnableStream && completion.response.stream && streamCallback) {
|
if (shouldEnableStream && completion.response.stream && streamCallback) {
|
||||||
@ -311,6 +314,9 @@ export class ChatPipeline {
|
|||||||
|
|
||||||
// Forward to callback with original chunk data in case it contains additional information
|
// Forward to callback with original chunk data in case it contains additional information
|
||||||
streamCallback(processedChunk.text, processedChunk.done, chunk);
|
streamCallback(processedChunk.text, processedChunk.done, chunk);
|
||||||
|
|
||||||
|
// Mark that we have streamed content to prevent duplication
|
||||||
|
hasStreamedContent = true;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -767,11 +773,15 @@ export class ChatPipeline {
|
|||||||
const responseText = currentResponse.text || "";
|
const responseText = currentResponse.text || "";
|
||||||
log.info(`Resuming streaming with final response: ${responseText.length} chars`);
|
log.info(`Resuming streaming with final response: ${responseText.length} chars`);
|
||||||
|
|
||||||
if (responseText.length > 0) {
|
if (responseText.length > 0 && !hasStreamedContent) {
|
||||||
// Resume streaming with the final response text
|
// Resume streaming with the final response text only if we haven't already streamed content
|
||||||
// This is where we send the definitive done:true signal with the complete content
|
// This is where we send the definitive done:true signal with the complete content
|
||||||
streamCallback(responseText, true);
|
streamCallback(responseText, true);
|
||||||
log.info(`Sent final response with done=true signal and text content`);
|
log.info(`Sent final response with done=true signal and text content`);
|
||||||
|
} else if (hasStreamedContent) {
|
||||||
|
log.info(`Content already streamed, sending done=true signal only after tool execution`);
|
||||||
|
// Just send the done signal without duplicating content
|
||||||
|
streamCallback('', true);
|
||||||
} else {
|
} else {
|
||||||
// For Anthropic, sometimes text is empty but response is in stream
|
// For Anthropic, sometimes text is empty but response is in stream
|
||||||
if ((currentResponse.provider === 'Anthropic' || currentResponse.provider === 'OpenAI') && currentResponse.stream) {
|
if ((currentResponse.provider === 'Anthropic' || currentResponse.provider === 'OpenAI') && currentResponse.stream) {
|
||||||
@ -803,13 +813,17 @@ export class ChatPipeline {
|
|||||||
log.info(`LLM response did not contain any tool calls, skipping tool execution`);
|
log.info(`LLM response did not contain any tool calls, skipping tool execution`);
|
||||||
|
|
||||||
// Handle streaming for responses without tool calls
|
// Handle streaming for responses without tool calls
|
||||||
if (shouldEnableStream && streamCallback) {
|
if (shouldEnableStream && streamCallback && !hasStreamedContent) {
|
||||||
log.info(`Sending final streaming response without tool calls: ${currentResponse.text.length} chars`);
|
log.info(`Sending final streaming response without tool calls: ${currentResponse.text.length} chars`);
|
||||||
|
|
||||||
// Send the final response with done=true to complete the streaming
|
// Send the final response with done=true to complete the streaming
|
||||||
streamCallback(currentResponse.text, true);
|
streamCallback(currentResponse.text, true);
|
||||||
|
|
||||||
log.info(`Sent final non-tool response with done=true signal`);
|
log.info(`Sent final non-tool response with done=true signal`);
|
||||||
|
} else if (shouldEnableStream && streamCallback && hasStreamedContent) {
|
||||||
|
log.info(`Content already streamed, sending done=true signal only`);
|
||||||
|
// Just send the done signal without duplicating content
|
||||||
|
streamCallback('', true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user