8000 Implement a retry mechanism for Google GenAI calls by xpomul · Pull Request #15783 · eclipse-theia/theia · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Implement a retry mechanism for Google GenAI calls #15783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import { FrontendApplicationContribution, PreferenceService } from '@theia/core/lib/browser';
import { inject, injectable } from '@theia/core/shared/inversify';
import { GoogleLanguageModelsManager, GoogleModelDescription } from '../common';
import { API_KEY_PREF, MODELS_PREF } from './google-preferences';
import { API_KEY_PREF, MODELS_PREF, MAX_RETRIES, RETRY_DELAY_OTHER_ERRORS, RETRY_DELAY_RATE_LIMIT } from './google-preferences';

const GOOGLE_PROVIDER_ID = 'google';

Expand All @@ -37,13 +37,23 @@ export class GoogleFrontendApplicationContribution implements FrontendApplicatio
const apiKey = this.preferenceService.get<string>(API_KEY_PREF, undefined);
this.manager.setApiKey(apiKey);

this.manager.setMaxRetriesOnErrors(this.preferenceService.get<number>(MAX_RETRIES, 3));
this.manager.setRetryDelayOnRateLimitError(this.preferenceService.get<number>(RETRY_DELAY_RATE_LIMIT, 60));
this.manager.setRetryDelayOnOtherErrors(this.preferenceService.get<number>(RETRY_DELAY_OTHER_ERRORS, -1));

const models = this.preferenceService.get<string[]>(MODELS_PREF, []);
this.manager.createOrUpdateLanguageModels(...models.map(modelId => this.createGeminiModelDescription(modelId)));
this.prevModels = [...models];

this.preferenceService.onPreferenceChanged(event => {
if (event.preferenceName === API_KEY_PREF) {
this.manager.setApiKey(event.newValue);
} else if (event.preferenceName === MAX_RETRIES) {
this.manager.setMaxRetriesOnErrors(event.newValue);
} else if (event.preferenceName === RETRY_DELAY_RATE_LIMIT) {
this.manager.setRetryDelayOnRateLimitError(event.newValue);
} else if (event.preferenceName === RETRY_DELAY_OTHER_ERRORS) {
this.manager.setRetryDelayOnOtherErrors(event.newValue);
} else if (event.preferenceName === MODELS_PREF) {
this.handleModelChanges(event.newValue as string[]);
}
Expand Down
29 changes: 29 additions & 0 deletions packages/ai-google/src/browser/google-preferences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import { nls } from '@theia/core';

export const API_KEY_PREF = 'ai-features.google.apiKey';
export const MODELS_PREF = 'ai-features.google.models';
export const MAX_RETRIES = 'ai-features.google.maxRetriesOnErrors';
export const RETRY_DELAY_RATE_LIMIT = 'ai-features.google.retryDelayOnRateLimitError';
export const RETRY_DELAY_OTHER_ERRORS = 'ai-features.google.retryDelayOnOtherErrors';

export const GooglePreferencesSchema: PreferenceSchema = {
type: 'object',
Expand All @@ -40,5 +43,31 @@ export const GooglePreferencesSchema: PreferenceSchema = {
type: 'string'
}
},
[MAX_RETRIES]: {
type: 'integer',
description: nls.localize('theia/ai/google/maxRetriesOnErrors/description',
'Maximum number of retries in case of errors. If smaller than 1, then the retry logic is disabled'),
title: AI_CORE_PREFERENCES_TITLE,
default: 3,
minimum: 0
},
[RETRY_DELAY_RATE_LIMIT]: {
type: 'number',
description: nls.localize('theia/ai/google/retryDelayOnRateLimitError/description',
'Delay in seconds between retries in case of rate limit errors. See https://ai.google.dev/gemini-api/docs/rate-limits'),
title: AI_CORE_PREFERENCES_TITLE,
default: 60,
minimum: 0
},
[RETRY_DELAY_OTHER_ERRORS]: {
type: 'number',
description: nls.localize('theia/ai/google/retryDelayOnOtherErrors/description',
'Delay in seconds between retries in case of other errors (sometimes the Google GenAI reports errors such as incomplete JSON syntax returned from the model \
or 500 Internal Server Error). Setting this to -1 prevents retries in these cases. Otherwise a retry happens either immediately (if set to 0) or after \
this delay in seconds (if set to a positive number).'),
title: AI_CORE_PREFERENCES_TITLE,
default: -1,
minimum: -1
}
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,13 @@ export interface GoogleModelDescription {
maxTokens?: number;

}

export interface GoogleLanguageModelsManager {
apiKey: string | undefined;
setApiKey(key: string | undefined): void;
setMaxRetriesOnErrors(maxRetries: number): void;
setRetryDelayOnRateLimitError(retryDelay: number): void;
setRetryDelayOnOtherErrors(retryDelay: number): void;
createOrUpdateLanguageModels(...models: GoogleModelDescription[]): Promise<void>;
removeLanguageModels(...modelIds: string[]): void
}
88 changes: 69 additions & 19 deletions packages/ai-google/src/node/google-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import {
} from '@theia/ai-core';
import { CancellationToken } from '@theia/core';
import { GoogleGenAI, FunctionCallingConfigMode, FunctionDeclaration, Content, Schema, Part, Modality, FunctionResponse } from '@google/genai';
import { wait } from '@theia/core/lib/common/promise-util';
import { GoogleLanguageModelRetrySettings } from './google-language-models-manager-impl';

interface ToolCallback {
readonly name: string;
Expand Down Expand Up @@ -124,6 +126,7 @@ export class GoogleModel implements LanguageModel {
public model: string,
public enableStreaming: boolean,
public apiKey: () => string | undefined,
public retrySettings: () => GoogleLanguageModelRetrySettings,
protected readonly tokenUsageService?: TokenUsageService
) { }

Expand Down Expand Up @@ -158,24 +161,26 @@ export class GoogleModel implements LanguageModel {
const { contents: parts, systemMessage } = transformToGeminiMessages(request.messages);
const functionDeclarations = this.createFunctionDeclarations(request);

const stream = await genAI.models.generateContentStream({
model: this.model,
config: {
systemInstruction: systemMessage,
toolConfig: {
functionCallingConfig: {
mode: FunctionCallingConfigMode.AUTO,
}
// Wrap the API call in the retry mechanism
const stream = await this.withRetry(async () =>
genAI.models.generateContentStream({
model: this.model,
config: {
systemInstruction: systemMessage,
toolConfig: {
functionCallingConfig: {
mode: FunctionCallingConfigMode.AUTO,
}
},
responseModalities: [Modality.TEXT],
tools: [{
functionDeclarations
}],
temperature: 1,
...settings
},
responseModalities: [Modality.TEXT],
tools: [{
functionDeclarations
}],
temperature: 1,
...settings
},
contents: [...parts, ...(toolMessages ?? [])]
});
contents: [...parts, ...(toolMessages ?? [])]
}));

const that = this;

Expand Down Expand Up @@ -349,7 +354,8 @@ export class GoogleModel implements LanguageModel {
const { contents: parts, systemMessage } = transformToGeminiMessages(request.messages);
const functionDeclarations = this.createFunctionDeclarations(request);

const model = await genAI.models.generateContent({
// Wrap the API call in the retry mechanism
const model = await this.withRetry(async () => genAI.models.generateContent({
model: this.model,
config: {
systemInstruction: systemMessage,
Expand All @@ -362,7 +368,7 @@ export class GoogleModel implements LanguageModel {
...settings
},
contents: parts
});
}));

try {
const responseText = model.text;
Expand Down Expand Up @@ -395,4 +401,48 @@ export class GoogleModel implements LanguageModel {
// TODO test vertexai
return new GoogleGenAI({ apiKey, vertexai: false });
}

/**
* Implements a retry mechanism for the handle(non)Streaming request functions.
* @param fn the wrapped function to which the retry logic should be applied.
* @param retrySettings the configuration settings for the retry mechanism.
* @returns the result of the wrapped function.
*/
private async withRetry<T>(fn: () => Promise<T>): Promise<T> {
const { maxRetriesOnErrors, retryDelayOnRateLimitError, retryDelayOnOtherErrors } = this.retrySettings();

for (let i = 0; i <= maxRetriesOnErrors; i++) {
try {
return await fn();
} catch (error) {
if (i === maxRetriesOnErrors) {
// no retries left - throw the original error
throw error;
}

const message = (error as Error).message;
// Check for rate limit exhaustion (usually, there is a rate limit per minute, so we can retry after a delay...)
if (message && message.includes('429 Too Many Requests')) {
if (retryDelayOnRateLimitError < 0) {
9E81 // rate limit error should not retried because of the setting
throw error;
}

const delayMs = retryDelayOnRateLimitError * 1000;
console.warn(`Received 429 (Too Many Requests). Retrying in ${retryDelayOnRateLimitError}s. Attempt ${i + 1} of ${maxRetriesOnErrors}.`);
await wait(delayMs);
} else if (retryDelayOnOtherErrors < 0) {
// Other errors should not retried because of the setting
throw error;
} else {
const delayMs = retryDelayOnOtherErrors * 1000;
console.warn(`Request failed: ${message}. Retrying in ${retryDelayOnOtherErrors}s. Attempt ${i + 1} of ${maxRetriesOnErrors}.`);
await wait(delayMs);
}
// -> reiterate the loop for the next attempt
}
}
// This should not be reached
throw new Error('Retry mechanism failed unexpectedly.');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,20 @@ import { inject, injectable } from '@theia/core/shared/inversify';
import { GoogleModel } from './google-language-model';
import { GoogleLanguageModelsManager, GoogleModelDescription } from '../common';

export interface GoogleLanguageModelRetrySettings {
maxRetriesOnErrors: number;
retryDelayOnRateLimitError: number;
retryDelayOnOtherErrors: number;
}

@injectable()
export class GoogleLanguageModelsManagerImpl implements GoogleLanguageModelsManager {

protected _apiKey: string | undefined;
protected retrySettings: GoogleLanguageModelRetrySettings = {
maxRetriesOnErrors: 3,
retryDelayOnRateLimitError: 60,
retryDelayOnOtherErrors: -1
};

@inject(LanguageModelRegistry)
protected readonly languageMo B80E delRegistry: LanguageModelRegistry;
Expand All @@ -46,6 +56,7 @@ export class GoogleLanguageModelsManagerImpl implements GoogleLanguageModelsMana
}
return undefined;
};
const retrySettingsProvider = () => this.retrySettings;

if (model) {
if (!(model instanceof GoogleModel)) {
Expand All @@ -55,13 +66,15 @@ export class GoogleLanguageModelsManagerImpl implements GoogleLanguageModelsMana
model.model = modelDescription.model;
model.enableStreaming = modelDescription.enableStreaming;
model.apiKey = apiKeyProvider;
model.retrySettings = retrySettingsProvider;
} else {
this.languageModelRegistry.addLanguageModels([
new GoogleModel(
modelDescription.id,
modelDescription.model,
modelDescription.enableStreaming,
apiKeyProvider,
retrySettingsProvider,
this.tokenUsageService
)
]);
Expand All @@ -80,4 +93,16 @@ export class GoogleLanguageModelsManagerImpl implements GoogleLanguageModelsMana
this._apiKey = undefined;
}
}

setMaxRetriesOnErrors(maxRetries: number): void {
this.retrySettings.maxRetriesOnErrors = maxRetries;
}

setRetryDelayOnRateLimitError(retryDelay: number): void {
this.retrySettings.retryDelayOnRateLimitError = retryDelay;
}

setRetryDelayOnOtherErrors(retryDelay: number): void {
this.retrySettings.retryDelayOnOtherErrors = retryDelay;
}
}
Loading
0