move from using axios to fetch in llm services

This commit is contained in:
perf3ct 2025-03-30 21:00:02 +00:00
parent dd9b37e9fb
commit 614d5ccdd3
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
3 changed files with 170 additions and 126 deletions

View File

@ -1,4 +1,3 @@
import axios from "axios";
import log from "../../../log.js";
import { BaseEmbeddingProvider } from "../base_embeddings.js";
import type { EmbeddingConfig } from "../embeddings_interface.js";
@ -41,17 +40,23 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
private async fetchModelCapabilities(modelName: string): Promise<EmbeddingModelInfo | null> {
try {
// First try the /api/show endpoint which has detailed model information
const showResponse = await axios.get(
`${this.baseUrl}/api/show`,
{
params: { name: modelName },
headers: { "Content-Type": "application/json" },
timeout: 10000
}
);
const url = new URL(`${this.baseUrl}/api/show`);
url.searchParams.append('name', modelName);
const showResponse = await fetch(url, {
method: 'GET',
headers: { "Content-Type": "application/json" },
signal: AbortSignal.timeout(10000)
});
if (showResponse.data && showResponse.data.parameters) {
const params = showResponse.data.parameters;
if (!showResponse.ok) {
throw new Error(`HTTP error! status: ${showResponse.status}`);
}
const data = await showResponse.json();
if (data && data.parameters) {
const params = data.parameters;
// Extract context length from parameters (different models might use different parameter names)
const contextWindow = params.context_length ||
params.num_ctx ||
@ -157,20 +162,24 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
* Detect embedding dimension by making a test API call
*/
private async detectEmbeddingDimension(modelName: string): Promise<number> {
const testResponse = await axios.post(
`${this.baseUrl}/api/embeddings`,
{
const testResponse = await fetch(`${this.baseUrl}/api/embeddings`, {
method: 'POST',
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: modelName,
prompt: "Test"
},
{
headers: { "Content-Type": "application/json" },
timeout: 10000
}
);
}),
signal: AbortSignal.timeout(10000)
});
if (testResponse.data && Array.isArray(testResponse.data.embedding)) {
return testResponse.data.embedding.length;
if (!testResponse.ok) {
throw new Error(`HTTP error! status: ${testResponse.status}`);
}
const data = await testResponse.json();
if (data && Array.isArray(data.embedding)) {
return data.embedding.length;
} else {
throw new Error("Could not detect embedding dimensions");
}
@ -209,35 +218,39 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
const response = await axios.post(
`${this.baseUrl}/api/embeddings`,
{
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
method: 'POST',
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: modelName,
prompt: trimmedText,
format: "json"
},
{
headers: {
"Content-Type": "application/json"
},
timeout: 60000 // Increased timeout for larger texts (60 seconds)
}
);
}),
signal: AbortSignal.timeout(60000) // Increased timeout for larger texts (60 seconds)
});
if (response.data && Array.isArray(response.data.embedding)) {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
if (data && Array.isArray(data.embedding)) {
// Success! Return the embedding
return new Float32Array(response.data.embedding);
return new Float32Array(data.embedding);
} else {
throw new Error("Unexpected response structure from Ollama API");
}
} catch (error: any) {
lastError = error;
// Only retry on timeout or connection errors
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
const errorMessage = error.message || "Unknown error";
const isTimeoutError = errorMessage.includes('timeout') ||
errorMessage.includes('socket hang up') ||
errorMessage.includes('ECONNREFUSED') ||
errorMessage.includes('ECONNRESET');
errorMessage.includes('ECONNRESET') ||
errorMessage.includes('AbortError') ||
errorMessage.includes('NetworkError');
if (isTimeoutError && retryCount < maxRetries) {
// Exponential backoff with jitter
@ -247,7 +260,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
retryCount++;
} else {
// Non-retryable error or max retries exceeded
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
const errorMessage = error.message || "Unknown error";
log.error(`Ollama embedding error: ${errorMessage}`);
throw new Error(`Ollama embedding error: ${errorMessage}`);
}
@ -255,7 +268,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
}
// If we get here, we've exceeded our retry limit
const errorMessage = lastError.response?.data?.error?.message || lastError.message || "Unknown error";
const errorMessage = lastError.message || "Unknown error";
log.error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
throw new Error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
}

View File

@ -1,4 +1,3 @@
import axios from "axios";
import log from "../../../log.js";
import { BaseEmbeddingProvider } from "../base_embeddings.js";
import type { EmbeddingConfig } from "../embeddings_interface.js";
@ -44,36 +43,40 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
try {
// First try to get model details from the models API
const response = await axios.get(
`${this.baseUrl}/models/${modelName}`,
{
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json"
},
timeout: 10000
}
);
const response = await fetch(`${this.baseUrl}/models/${modelName}`, {
method: 'GET',
headers: {
"Authorization": `Bearer ${this.apiKey}`,
"Content-Type": "application/json"
},
signal: AbortSignal.timeout(10000)
});
if (response.data) {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
if (data) {
// Different model families may have different ways of exposing context window
let contextWindow = 0;
let dimension = 0;
// Extract context window if available
if (response.data.context_window) {
contextWindow = response.data.context_window;
} else if (response.data.limits && response.data.limits.context_window) {
contextWindow = response.data.limits.context_window;
} else if (response.data.limits && response.data.limits.context_length) {
contextWindow = response.data.limits.context_length;
if (data.context_window) {
contextWindow = data.context_window;
} else if (data.limits && data.limits.context_window) {
contextWindow = data.limits.context_window;
} else if (data.limits && data.limits.context_length) {
contextWindow = data.limits.context_length;
}
// Extract embedding dimensions if available
if (response.data.dimensions) {
dimension = response.data.dimensions;
} else if (response.data.embedding_dimension) {
dimension = response.data.embedding_dimension;
if (data.dimensions) {
dimension = data.dimensions;
} else if (data.embedding_dimension) {
dimension = data.embedding_dimension;
}
// If we didn't get all the info, use defaults for missing values
@ -185,28 +188,32 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
return new Float32Array(this.config.dimension);
}
const response = await axios.post(
`${this.baseUrl}/embeddings`,
{
const response = await fetch(`${this.baseUrl}/embeddings`, {
method: 'POST',
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
},
body: JSON.stringify({
input: text,
model: this.config.model || "text-embedding-3-small",
encoding_format: "float"
},
{
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
}
}
);
})
});
if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) {
return new Float32Array(response.data.data[0].embedding);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
if (data && data.data && data.data[0] && data.data[0].embedding) {
return new Float32Array(data.data[0].embedding);
} else {
throw new Error("Unexpected response structure from OpenAI API");
}
} catch (error: any) {
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
const errorMessage = error.message || "Unknown error";
log.error(`OpenAI embedding error: ${errorMessage}`);
throw new Error(`OpenAI embedding error: ${errorMessage}`);
}
@ -216,7 +223,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
* More specific implementation of batch size error detection for OpenAI
*/
protected isBatchSizeError(error: any): boolean {
const errorMessage = error?.message || error?.response?.data?.error?.message || '';
const errorMessage = error?.message || '';
const openAIBatchSizeErrorPatterns = [
'batch size', 'too many inputs', 'context length exceeded',
'maximum context length', 'token limit', 'rate limit exceeded',
@ -236,24 +243,28 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
return [];
}
const response = await axios.post(
`${this.baseUrl}/embeddings`,
{
const response = await fetch(`${this.baseUrl}/embeddings`, {
method: 'POST',
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
},
body: JSON.stringify({
input: texts,
model: this.config.model || "text-embedding-3-small",
encoding_format: "float"
},
{
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
}
}
);
})
});
if (response.data && response.data.data) {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
if (data && data.data) {
// Sort the embeddings by index to ensure they match the input order
const sortedEmbeddings = response.data.data
const sortedEmbeddings = data.data
.sort((a: any, b: any) => a.index - b.index)
.map((item: any) => new Float32Array(item.embedding));

View File

@ -1,9 +1,9 @@
import axios from "axios";
import log from "../../../log.js";
import { BaseEmbeddingProvider } from "../base_embeddings.js";
import type { EmbeddingConfig, EmbeddingModelInfo } from "../embeddings_interface.js";
import type { EmbeddingConfig } from "../embeddings_interface.js";
import { NormalizationStatus } from "../embeddings_interface.js";
import { LLM_CONSTANTS } from "../../constants/provider_constants.js";
import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js";
// Voyage model context window sizes - as of current API version
const VOYAGE_MODEL_CONTEXT_WINDOWS: Record<string, number> = {
@ -46,7 +46,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
// Update the config dimension
this.config.dimension = modelInfo.dimension;
log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWindow}`);
log.info(`Voyage AI model ${modelName} initialized with dimension ${this.config.dimension} and context window ${modelInfo.contextWidth}`);
} catch (error: any) {
log.error(`Error initializing Voyage AI provider: ${error.message}`);
}
@ -70,7 +70,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
return {
dimension,
contextWindow,
guaranteesNormalization: true // Voyage embeddings are typically normalized
guaranteesNormalization: true, // Voyage embeddings are typically normalized
name: modelName,
type: 'float32'
};
} catch (error) {
log.info(`Could not determine capabilities for Voyage AI model ${modelName}: ${error}`);
@ -99,7 +101,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
const modelInfo: EmbeddingModelInfo = {
dimension: knownDimension,
contextWindow,
guaranteesNormalization: true // Voyage embeddings are typically normalized
guaranteesNormalization: true, // Voyage embeddings are typically normalized
name: modelName,
type: 'float32'
};
this.modelInfoCache.set(modelName, modelInfo);
@ -117,20 +121,26 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
return {
dimension: dimension || 1024,
contextWindow: 4096,
guaranteesNormalization: true // Voyage-2 embeddings are normalized
guaranteesNormalization: true, // Voyage-2 embeddings are normalized
name: modelName,
type: 'float32'
};
} else if (modelName.includes('voyage-lite-02')) {
return {
dimension: dimension || 768,
contextWindow: 4096,
guaranteesNormalization: true // Voyage-lite embeddings are normalized
guaranteesNormalization: true, // Voyage-lite embeddings are normalized
name: modelName,
type: 'float32'
};
} else {
// Default for other Voyage models
return {
dimension: dimension || 1024,
contextWindow: 4096,
guaranteesNormalization: true // Assuming all Voyage embeddings are normalized
guaranteesNormalization: true, // Assuming all Voyage embeddings are normalized
name: modelName,
type: 'float32'
};
}
}
@ -141,7 +151,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
const defaultModelInfo: EmbeddingModelInfo = {
dimension: 1024, // Default for Voyage models
contextWindow: 8192,
guaranteesNormalization: true // Voyage embeddings are typically normalized
guaranteesNormalization: true, // Voyage embeddings are typically normalized
name: modelName,
type: 'float32'
};
this.modelInfoCache.set(modelName, defaultModelInfo);
@ -167,29 +179,33 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
const response = await axios.post(
`${this.baseUrl}/embeddings`,
{
const response = await fetch(`${this.baseUrl}/embeddings`, {
method: 'POST',
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
},
body: JSON.stringify({
model: modelName,
input: trimmedText,
input_type: "text",
truncation: true
},
{
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
}
}
);
})
});
if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) {
return new Float32Array(response.data.data[0].embedding);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error?.message || `HTTP error ${response.status}`);
}
const data = await response.json();
if (data && data.data && data.data[0] && data.data[0].embedding) {
return new Float32Array(data.data[0].embedding);
} else {
throw new Error("Unexpected response structure from Voyage AI API");
}
} catch (error: any) {
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
const errorMessage = error.message || "Unknown error";
log.error(`Voyage AI embedding error: ${errorMessage}`);
throw new Error(`Voyage AI embedding error: ${errorMessage}`);
}
@ -199,7 +215,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
* More specific implementation of batch size error detection for Voyage AI
*/
protected isBatchSizeError(error: any): boolean {
const errorMessage = error?.message || error?.response?.data?.error?.message || '';
const errorMessage = error?.message || '';
const voyageBatchSizeErrorPatterns = [
'batch size', 'too many inputs', 'context length exceeded',
'token limit', 'rate limit', 'limit exceeded',
@ -234,24 +250,28 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
// Filter out empty texts
const validBatch = batch.map(text => text.trim() || " ");
const response = await axios.post(
`${this.baseUrl}/embeddings`,
{
const response = await fetch(`${this.baseUrl}/embeddings`, {
method: 'POST',
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
},
body: JSON.stringify({
model: modelName,
input: validBatch,
input_type: "text",
truncation: true
},
{
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.apiKey}`
}
}
);
})
});
if (response.data && response.data.data && Array.isArray(response.data.data)) {
return response.data.data.map((item: any) =>
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.error?.message || `HTTP error ${response.status}`);
}
const data = await response.json();
if (data && data.data && Array.isArray(data.data)) {
return data.data.map((item: any) =>
new Float32Array(item.embedding || [])
);
} else {