Implement rate limit handling and retry logic in GeminiProvider
- Added a retry policy with exponential backoff for handling transient errors and rate limits in the Gemini API provider. - Introduced a delay between iterations in LlmController to prevent rapid bursts and avoid hitting rate limits. - Enhanced logging for retries and error handling to improve visibility into API interactions and rate limiting behavior.
This commit is contained in:
@@ -96,6 +96,7 @@ public class LlmController : BaseController
|
||||
int maxIterations = DetermineMaxIterations(request);
|
||||
int iteration = 0;
|
||||
LlmChatResponse? finalResponse = null;
|
||||
const int DelayBetweenIterationsMs = 500; // 500ms delay between iterations to avoid rate limits
|
||||
|
||||
while (iteration < maxIterations)
|
||||
{
|
||||
@@ -103,6 +104,15 @@ public class LlmController : BaseController
|
||||
_logger.LogInformation("LLM chat iteration {Iteration}/{MaxIterations} for user {UserId}",
|
||||
iteration, maxIterations, user.Id);
|
||||
|
||||
// Add delay between iterations to avoid rapid bursts and rate limiting
|
||||
// Skip delay on first iteration
|
||||
if (iteration > 1)
|
||||
{
|
||||
_logger.LogDebug("Waiting {DelayMs}ms before iteration {Iteration} to avoid rate limits",
|
||||
DelayBetweenIterationsMs, iteration);
|
||||
await Task.Delay(DelayBetweenIterationsMs);
|
||||
}
|
||||
|
||||
// Trim context if conversation is getting too long
|
||||
TrimConversationContext(request);
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
using System.Net;
|
||||
using System.Net.Http.Json;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Managing.Application.Abstractions.Services;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace Managing.Application.LLM.Providers;
|
||||
|
||||
/// <summary>
|
||||
/// Google Gemini API provider
|
||||
/// Google Gemini API provider with retry logic and rate limit handling
|
||||
/// </summary>
|
||||
public class GeminiProvider : ILlmProvider
|
||||
{
|
||||
@@ -15,8 +18,11 @@ public class GeminiProvider : ILlmProvider
|
||||
private readonly string _defaultModel;
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger _logger;
|
||||
private readonly IAsyncPolicy<HttpResponseMessage> _retryPolicy;
|
||||
private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta";
|
||||
private const string FallbackModel = "gemini-2.0-flash-exp";
|
||||
private const int MaxRetryAttempts = 3;
|
||||
private const int BaseRetryDelayMs = 2000; // 2 seconds base delay
|
||||
|
||||
public string Name => "gemini";
|
||||
|
||||
@@ -26,6 +32,51 @@ public class GeminiProvider : ILlmProvider
|
||||
_defaultModel = defaultModel ?? FallbackModel;
|
||||
_httpClient = httpClientFactory?.CreateClient() ?? new HttpClient();
|
||||
_logger = logger;
|
||||
|
||||
// Configure retry policy with exponential backoff and Retry-After header support
|
||||
_retryPolicy = Policy
|
||||
.Handle<HttpRequestException>()
|
||||
.Or<TaskCanceledException>()
|
||||
.Or<TimeoutException>()
|
||||
.OrResult<HttpResponseMessage>(r => !r.IsSuccessStatusCode && IsRetryableStatusCode(r.StatusCode))
|
||||
.WaitAndRetryAsync(
|
||||
retryCount: MaxRetryAttempts,
|
||||
sleepDurationProvider: (retryAttempt, result, context) =>
|
||||
{
|
||||
// Check if response has Retry-After header (for 429 errors)
|
||||
if (result.Result?.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
var retryAfter = ParseRetryAfterHeader(result.Result);
|
||||
if (retryAfter.HasValue)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Rate limited (429). Respecting Retry-After header: {RetryAfterSeconds}s",
|
||||
retryAfter.Value.TotalSeconds);
|
||||
return retryAfter.Value;
|
||||
}
|
||||
}
|
||||
|
||||
// Exponential backoff with jitter: baseDelay * 2^(retryAttempt-1) + random jitter
|
||||
var exponentialDelay = BaseRetryDelayMs * Math.Pow(2, retryAttempt - 1);
|
||||
var jitter = new Random().Next(0, BaseRetryDelayMs / 4);
|
||||
var delay = TimeSpan.FromMilliseconds(exponentialDelay + jitter);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Retrying after exponential backoff: {DelayMs}ms (attempt {Attempt}/{MaxAttempts})",
|
||||
delay.TotalMilliseconds, retryAttempt, MaxRetryAttempts + 1);
|
||||
|
||||
return delay;
|
||||
},
|
||||
onRetry: (outcome, timespan, retryCount, context) =>
|
||||
{
|
||||
var exception = outcome.Exception;
|
||||
var response = outcome.Result;
|
||||
var errorMessage = exception?.Message ?? $"HTTP {response?.StatusCode}";
|
||||
|
||||
_logger.LogWarning(
|
||||
"Gemini API request failed (attempt {RetryCount}/{MaxRetries}): {Error}. Retrying in {Delay}ms",
|
||||
retryCount, MaxRetryAttempts + 1, errorMessage, timespan.TotalMilliseconds);
|
||||
});
|
||||
}
|
||||
|
||||
public async Task<LlmChatResponse> ChatAsync(LlmChatRequest request)
|
||||
@@ -40,12 +91,16 @@ public class GeminiProvider : ILlmProvider
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
|
||||
};
|
||||
|
||||
var response = await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
|
||||
// Execute request with retry policy
|
||||
var response = await _retryPolicy.ExecuteAsync(async () =>
|
||||
{
|
||||
return await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
|
||||
});
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("Gemini API error: {StatusCode} - {Error}", response.StatusCode, errorContent);
|
||||
_logger.LogError("Gemini API error after retries: {StatusCode} - {Error}", response.StatusCode, errorContent);
|
||||
throw new HttpRequestException($"Gemini API error: {response.StatusCode} - {errorContent}");
|
||||
}
|
||||
|
||||
@@ -53,6 +108,49 @@ public class GeminiProvider : ILlmProvider
|
||||
return ConvertFromGeminiResponse(geminiResponse!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if an HTTP status code is retryable
|
||||
/// </summary>
|
||||
private static bool IsRetryableStatusCode(HttpStatusCode statusCode)
|
||||
{
|
||||
return statusCode == HttpStatusCode.RequestTimeout ||
|
||||
statusCode == HttpStatusCode.TooManyRequests ||
|
||||
statusCode == HttpStatusCode.InternalServerError ||
|
||||
statusCode == HttpStatusCode.BadGateway ||
|
||||
statusCode == HttpStatusCode.ServiceUnavailable ||
|
||||
statusCode == HttpStatusCode.GatewayTimeout;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses the Retry-After header from HTTP response
|
||||
/// Supports both seconds (integer) and HTTP-date formats
|
||||
/// </summary>
|
||||
private static TimeSpan? ParseRetryAfterHeader(HttpResponseMessage response)
|
||||
{
|
||||
if (!response.Headers.Contains("Retry-After"))
|
||||
return null;
|
||||
|
||||
var retryAfterValues = response.Headers.GetValues("Retry-After");
|
||||
var retryAfterValue = retryAfterValues.FirstOrDefault();
|
||||
if (string.IsNullOrWhiteSpace(retryAfterValue))
|
||||
return null;
|
||||
|
||||
// Try parsing as seconds (integer)
|
||||
if (int.TryParse(retryAfterValue, out var seconds))
|
||||
{
|
||||
return TimeSpan.FromSeconds(seconds);
|
||||
}
|
||||
|
||||
// Try parsing as HTTP-date (RFC 7231)
|
||||
if (DateTime.TryParse(retryAfterValue, out var retryDate))
|
||||
{
|
||||
var delay = retryDate - DateTime.UtcNow;
|
||||
return delay.TotalSeconds > 0 ? delay : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private object ConvertToGeminiRequest(LlmChatRequest request)
|
||||
{
|
||||
var contents = request.Messages
|
||||
|
||||
Reference in New Issue
Block a user