From 614d5ccdd3158ea43dd298599f4a70483297b8bb Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sun, 30 Mar 2025 21:00:02 +0000 Subject: [PATCH] move from using axios to fetch in llm services --- .../llm/embeddings/providers/ollama.ts | 91 ++++++++------ .../llm/embeddings/providers/openai.ts | 111 ++++++++++-------- .../llm/embeddings/providers/voyage.ts | 94 +++++++++------ 3 files changed, 170 insertions(+), 126 deletions(-) diff --git a/src/services/llm/embeddings/providers/ollama.ts b/src/services/llm/embeddings/providers/ollama.ts index ee9254f70..56db285de 100644 --- a/src/services/llm/embeddings/providers/ollama.ts +++ b/src/services/llm/embeddings/providers/ollama.ts @@ -1,4 +1,3 @@ -import axios from "axios"; import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig } from "../embeddings_interface.js"; @@ -41,17 +40,23 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { private async fetchModelCapabilities(modelName: string): Promise { try { // First try the /api/show endpoint which has detailed model information - const showResponse = await axios.get( - `${this.baseUrl}/api/show`, - { - params: { name: modelName }, - headers: { "Content-Type": "application/json" }, - timeout: 10000 - } - ); + const url = new URL(`${this.baseUrl}/api/show`); + url.searchParams.append('name', modelName); + + const showResponse = await fetch(url, { + method: 'GET', + headers: { "Content-Type": "application/json" }, + signal: AbortSignal.timeout(10000) + }); - if (showResponse.data && showResponse.data.parameters) { - const params = showResponse.data.parameters; + if (!showResponse.ok) { + throw new Error(`HTTP error! status: ${showResponse.status}`); + } + + const data = await showResponse.json(); + + if (data && data.parameters) { + const params = data.parameters; // Extract context length from parameters (different models might use different parameter names) const contextWindow = params.context_length || params.num_ctx || @@ -157,20 +162,24 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { * Detect embedding dimension by making a test API call */ private async detectEmbeddingDimension(modelName: string): Promise { - const testResponse = await axios.post( - `${this.baseUrl}/api/embeddings`, - { + const testResponse = await fetch(`${this.baseUrl}/api/embeddings`, { + method: 'POST', + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: modelName, prompt: "Test" - }, - { - headers: { "Content-Type": "application/json" }, - timeout: 10000 - } - ); + }), + signal: AbortSignal.timeout(10000) + }); - if (testResponse.data && Array.isArray(testResponse.data.embedding)) { - return testResponse.data.embedding.length; + if (!testResponse.ok) { + throw new Error(`HTTP error! status: ${testResponse.status}`); + } + + const data = await testResponse.json(); + + if (data && Array.isArray(data.embedding)) { + return data.embedding.length; } else { throw new Error("Could not detect embedding dimensions"); } @@ -209,35 +218,39 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; - const response = await axios.post( - `${this.baseUrl}/api/embeddings`, - { + const response = await fetch(`${this.baseUrl}/api/embeddings`, { + method: 'POST', + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: modelName, prompt: trimmedText, format: "json" - }, - { - headers: { - "Content-Type": "application/json" - }, - timeout: 60000 // Increased timeout for larger texts (60 seconds) - } - ); + }), + signal: AbortSignal.timeout(60000) // Increased timeout for larger texts (60 seconds) + }); - if (response.data && Array.isArray(response.data.embedding)) { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + + if (data && Array.isArray(data.embedding)) { // Success! Return the embedding - return new Float32Array(response.data.embedding); + return new Float32Array(data.embedding); } else { throw new Error("Unexpected response structure from Ollama API"); } } catch (error: any) { lastError = error; // Only retry on timeout or connection errors - const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + const errorMessage = error.message || "Unknown error"; const isTimeoutError = errorMessage.includes('timeout') || errorMessage.includes('socket hang up') || errorMessage.includes('ECONNREFUSED') || - errorMessage.includes('ECONNRESET'); + errorMessage.includes('ECONNRESET') || + errorMessage.includes('AbortError') || + errorMessage.includes('NetworkError'); if (isTimeoutError && retryCount < maxRetries) { // Exponential backoff with jitter @@ -247,7 +260,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { retryCount++; } else { // Non-retryable error or max retries exceeded - const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + const errorMessage = error.message || "Unknown error"; log.error(`Ollama embedding error: ${errorMessage}`); throw new Error(`Ollama embedding error: ${errorMessage}`); } @@ -255,7 +268,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { } // If we get here, we've exceeded our retry limit - const errorMessage = lastError.response?.data?.error?.message || lastError.message || "Unknown error"; + const errorMessage = lastError.message || "Unknown error"; log.error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`); throw new Error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`); } diff --git a/src/services/llm/embeddings/providers/openai.ts b/src/services/llm/embeddings/providers/openai.ts index 5a76e2032..69ed111c1 100644 --- a/src/services/llm/embeddings/providers/openai.ts +++ b/src/services/llm/embeddings/providers/openai.ts @@ -1,4 +1,3 @@ -import axios from "axios"; import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; import type { EmbeddingConfig } from "../embeddings_interface.js"; @@ -44,36 +43,40 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { try { // First try to get model details from the models API - const response = await axios.get( - `${this.baseUrl}/models/${modelName}`, - { - headers: { - "Authorization": `Bearer ${this.apiKey}`, - "Content-Type": "application/json" - }, - timeout: 10000 - } - ); + const response = await fetch(`${this.baseUrl}/models/${modelName}`, { + method: 'GET', + headers: { + "Authorization": `Bearer ${this.apiKey}`, + "Content-Type": "application/json" + }, + signal: AbortSignal.timeout(10000) + }); - if (response.data) { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + + if (data) { // Different model families may have different ways of exposing context window let contextWindow = 0; let dimension = 0; // Extract context window if available - if (response.data.context_window) { - contextWindow = response.data.context_window; - } else if (response.data.limits && response.data.limits.context_window) { - contextWindow = response.data.limits.context_window; - } else if (response.data.limits && response.data.limits.context_length) { - contextWindow = response.data.limits.context_length; + if (data.context_window) { + contextWindow = data.context_window; + } else if (data.limits && data.limits.context_window) { + contextWindow = data.limits.context_window; + } else if (data.limits && data.limits.context_length) { + contextWindow = data.limits.context_length; } // Extract embedding dimensions if available - if (response.data.dimensions) { - dimension = response.data.dimensions; - } else if (response.data.embedding_dimension) { - dimension = response.data.embedding_dimension; + if (data.dimensions) { + dimension = data.dimensions; + } else if (data.embedding_dimension) { + dimension = data.embedding_dimension; } // If we didn't get all the info, use defaults for missing values @@ -185,28 +188,32 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { return new Float32Array(this.config.dimension); } - const response = await axios.post( - `${this.baseUrl}/embeddings`, - { + const response = await fetch(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + }, + body: JSON.stringify({ input: text, model: this.config.model || "text-embedding-3-small", encoding_format: "float" - }, - { - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${this.apiKey}` - } - } - ); + }) + }); - if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) { - return new Float32Array(response.data.data[0].embedding); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + + if (data && data.data && data.data[0] && data.data[0].embedding) { + return new Float32Array(data.data[0].embedding); } else { throw new Error("Unexpected response structure from OpenAI API"); } } catch (error: any) { - const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + const errorMessage = error.message || "Unknown error"; log.error(`OpenAI embedding error: ${errorMessage}`); throw new Error(`OpenAI embedding error: ${errorMessage}`); } @@ -216,7 +223,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { * More specific implementation of batch size error detection for OpenAI */ protected isBatchSizeError(error: any): boolean { - const errorMessage = error?.message || error?.response?.data?.error?.message || ''; + const errorMessage = error?.message || ''; const openAIBatchSizeErrorPatterns = [ 'batch size', 'too many inputs', 'context length exceeded', 'maximum context length', 'token limit', 'rate limit exceeded', @@ -236,24 +243,28 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { return []; } - const response = await axios.post( - `${this.baseUrl}/embeddings`, - { + const response = await fetch(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + }, + body: JSON.stringify({ input: texts, model: this.config.model || "text-embedding-3-small", encoding_format: "float" - }, - { - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${this.apiKey}` - } - } - ); + }) + }); - if (response.data && response.data.data) { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const data = await response.json(); + + if (data && data.data) { // Sort the embeddings by index to ensure they match the input order - const sortedEmbeddings = response.data.data + const sortedEmbeddings = data.data .sort((a: any, b: any) => a.index - b.index) .map((item: any) => new Float32Array(item.embedding)); diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts index 9d33c6673..129ca6760 100644 --- a/src/services/llm/embeddings/providers/voyage.ts +++ b/src/services/llm/embeddings/providers/voyage.ts @@ -1,9 +1,9 @@ -import axios from "axios"; import log from "../../../log.js"; import { BaseEmbeddingProvider } from "../base_embeddings.js"; -import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; import { NormalizationStatus } from "../embeddings_interface.js"; import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; +import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; // Voyage model context window sizes - as of current API version const VOYAGE_MODEL_CONTEXT_WINDOWS: Record = { @@ -46,7 +46,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { // Update the config dimension this.config.dimension = modelInfo.dimension; - log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWindow}`); + log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWidth}`); } catch (error: any) { log.error(`Error initializing Voyage AI provider: ${error.message}`); } @@ -70,7 +70,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { return { dimension, contextWindow, - guaranteesNormalization: true // Voyage embeddings are typically normalized + guaranteesNormalization: true, // Voyage embeddings are typically normalized + name: modelName, + type: 'float32' }; } catch (error) { log.info(`Could not determine capabilities for Voyage AI model ${modelName}: ${error}`); @@ -99,7 +101,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { const modelInfo: EmbeddingModelInfo = { dimension: knownDimension, contextWindow, - guaranteesNormalization: true // Voyage embeddings are typically normalized + guaranteesNormalization: true, // Voyage embeddings are typically normalized + name: modelName, + type: 'float32' }; this.modelInfoCache.set(modelName, modelInfo); @@ -117,20 +121,26 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { return { dimension: dimension || 1024, contextWindow: 4096, - guaranteesNormalization: true // Voyage-2 embeddings are normalized + guaranteesNormalization: true, // Voyage-2 embeddings are normalized + name: modelName, + type: 'float32' }; } else if (modelName.includes('voyage-lite-02')) { return { dimension: dimension || 768, contextWindow: 4096, - guaranteesNormalization: true // Voyage-lite embeddings are normalized + guaranteesNormalization: true, // Voyage-lite embeddings are normalized + name: modelName, + type: 'float32' }; } else { // Default for other Voyage models return { dimension: dimension || 1024, contextWindow: 4096, - guaranteesNormalization: true // Assuming all Voyage embeddings are normalized + guaranteesNormalization: true, // Assuming all Voyage embeddings are normalized + name: modelName, + type: 'float32' }; } } @@ -141,7 +151,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { const defaultModelInfo: EmbeddingModelInfo = { dimension: 1024, // Default for Voyage models contextWindow: 8192, - guaranteesNormalization: true // Voyage embeddings are typically normalized + guaranteesNormalization: true, // Voyage embeddings are typically normalized + name: modelName, + type: 'float32' }; this.modelInfoCache.set(modelName, defaultModelInfo); @@ -167,29 +179,33 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; - const response = await axios.post( - `${this.baseUrl}/embeddings`, - { + const response = await fetch(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + }, + body: JSON.stringify({ model: modelName, input: trimmedText, input_type: "text", truncation: true - }, - { - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${this.apiKey}` - } - } - ); + }) + }); - if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) { - return new Float32Array(response.data.data[0].embedding); + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error?.message || `HTTP error ${response.status}`); + } + + const data = await response.json(); + if (data && data.data && data.data[0] && data.data[0].embedding) { + return new Float32Array(data.data[0].embedding); } else { throw new Error("Unexpected response structure from Voyage AI API"); } } catch (error: any) { - const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + const errorMessage = error.message || "Unknown error"; log.error(`Voyage AI embedding error: ${errorMessage}`); throw new Error(`Voyage AI embedding error: ${errorMessage}`); } @@ -199,7 +215,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { * More specific implementation of batch size error detection for Voyage AI */ protected isBatchSizeError(error: any): boolean { - const errorMessage = error?.message || error?.response?.data?.error?.message || ''; + const errorMessage = error?.message || ''; const voyageBatchSizeErrorPatterns = [ 'batch size', 'too many inputs', 'context length exceeded', 'token limit', 'rate limit', 'limit exceeded', @@ -234,24 +250,28 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { // Filter out empty texts const validBatch = batch.map(text => text.trim() || " "); - const response = await axios.post( - `${this.baseUrl}/embeddings`, - { + const response = await fetch(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + }, + body: JSON.stringify({ model: modelName, input: validBatch, input_type: "text", truncation: true - }, - { - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${this.apiKey}` - } - } - ); + }) + }); - if (response.data && response.data.data && Array.isArray(response.data.data)) { - return response.data.data.map((item: any) => + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error?.message || `HTTP error ${response.status}`); + } + + const data = await response.json(); + if (data && data.data && Array.isArray(data.data)) { + return data.data.map((item: any) => new Float32Array(item.embedding || []) ); } else {