Implement rate limit handling and retry logic in GeminiProvider

- Added a retry policy with exponential backoff for handling transient errors and rate limits in the Gemini API provider.
- Introduced a delay between iterations in LlmController to prevent rapid bursts and avoid hitting rate limits.
- Enhanced logging for retries and error handling to improve visibility into API interactions and rate limiting behavior.
This commit is contained in:
2026-01-06 17:55:29 +07:00
parent e0a064456a
commit 438a0b1a63
2 changed files with 111 additions and 3 deletions

View File

@@ -96,6 +96,7 @@ public class LlmController : BaseController
int maxIterations = DetermineMaxIterations(request);
int iteration = 0;
LlmChatResponse? finalResponse = null;
const int DelayBetweenIterationsMs = 500; // 500ms delay between iterations to avoid rate limits
while (iteration < maxIterations)
{
@@ -103,6 +104,15 @@ public class LlmController : BaseController
_logger.LogInformation("LLM chat iteration {Iteration}/{MaxIterations} for user {UserId}",
iteration, maxIterations, user.Id);
// Add delay between iterations to avoid rapid bursts and rate limiting
// Skip delay on first iteration
if (iteration > 1)
{
_logger.LogDebug("Waiting {DelayMs}ms before iteration {Iteration} to avoid rate limits",
DelayBetweenIterationsMs, iteration);
await Task.Delay(DelayBetweenIterationsMs);
}
// Trim context if conversation is getting too long
TrimConversationContext(request);

View File

@@ -1,13 +1,16 @@
using System.Net;
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;
using Managing.Application.Abstractions.Services;
using Microsoft.Extensions.Logging;
using Polly;
using Polly.Retry;
namespace Managing.Application.LLM.Providers;
/// <summary>
/// Google Gemini API provider
/// Google Gemini API provider with retry logic and rate limit handling
/// </summary>
public class GeminiProvider : ILlmProvider
{
@@ -15,8 +18,11 @@ public class GeminiProvider : ILlmProvider
private readonly string _defaultModel;
private readonly HttpClient _httpClient;
private readonly ILogger _logger;
private readonly IAsyncPolicy<HttpResponseMessage> _retryPolicy;
private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta";
private const string FallbackModel = "gemini-2.0-flash-exp";
private const int MaxRetryAttempts = 3;
private const int BaseRetryDelayMs = 2000; // 2 seconds base delay
public string Name => "gemini";
@@ -26,6 +32,51 @@ public class GeminiProvider : ILlmProvider
_defaultModel = defaultModel ?? FallbackModel;
_httpClient = httpClientFactory?.CreateClient() ?? new HttpClient();
_logger = logger;
// Configure retry policy with exponential backoff and Retry-After header support
_retryPolicy = Policy
.Handle<HttpRequestException>()
.Or<TaskCanceledException>()
.Or<TimeoutException>()
.OrResult<HttpResponseMessage>(r => !r.IsSuccessStatusCode && IsRetryableStatusCode(r.StatusCode))
.WaitAndRetryAsync(
retryCount: MaxRetryAttempts,
sleepDurationProvider: (retryAttempt, result, context) =>
{
// Check if response has Retry-After header (for 429 errors)
if (result.Result?.StatusCode == HttpStatusCode.TooManyRequests)
{
var retryAfter = ParseRetryAfterHeader(result.Result);
if (retryAfter.HasValue)
{
_logger.LogInformation(
"Rate limited (429). Respecting Retry-After header: {RetryAfterSeconds}s",
retryAfter.Value.TotalSeconds);
return retryAfter.Value;
}
}
// Exponential backoff with jitter: baseDelay * 2^(retryAttempt-1) + random jitter
var exponentialDelay = BaseRetryDelayMs * Math.Pow(2, retryAttempt - 1);
var jitter = new Random().Next(0, BaseRetryDelayMs / 4);
var delay = TimeSpan.FromMilliseconds(exponentialDelay + jitter);
_logger.LogInformation(
"Retrying after exponential backoff: {DelayMs}ms (attempt {Attempt}/{MaxAttempts})",
delay.TotalMilliseconds, retryAttempt, MaxRetryAttempts + 1);
return delay;
},
onRetry: (outcome, timespan, retryCount, context) =>
{
var exception = outcome.Exception;
var response = outcome.Result;
var errorMessage = exception?.Message ?? $"HTTP {response?.StatusCode}";
_logger.LogWarning(
"Gemini API request failed (attempt {RetryCount}/{MaxRetries}): {Error}. Retrying in {Delay}ms",
retryCount, MaxRetryAttempts + 1, errorMessage, timespan.TotalMilliseconds);
});
}
public async Task<LlmChatResponse> ChatAsync(LlmChatRequest request)
@@ -40,12 +91,16 @@ public class GeminiProvider : ILlmProvider
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
var response = await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
// Execute request with retry policy
var response = await _retryPolicy.ExecuteAsync(async () =>
{
return await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
});
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Gemini API error: {StatusCode} - {Error}", response.StatusCode, errorContent);
_logger.LogError("Gemini API error after retries: {StatusCode} - {Error}", response.StatusCode, errorContent);
throw new HttpRequestException($"Gemini API error: {response.StatusCode} - {errorContent}");
}
@@ -53,6 +108,49 @@ public class GeminiProvider : ILlmProvider
return ConvertFromGeminiResponse(geminiResponse!);
}
/// <summary>
/// Checks if an HTTP status code is retryable
/// </summary>
private static bool IsRetryableStatusCode(HttpStatusCode statusCode)
{
return statusCode == HttpStatusCode.RequestTimeout ||
statusCode == HttpStatusCode.TooManyRequests ||
statusCode == HttpStatusCode.InternalServerError ||
statusCode == HttpStatusCode.BadGateway ||
statusCode == HttpStatusCode.ServiceUnavailable ||
statusCode == HttpStatusCode.GatewayTimeout;
}
/// <summary>
/// Parses the Retry-After header from HTTP response
/// Supports both seconds (integer) and HTTP-date formats
/// </summary>
private static TimeSpan? ParseRetryAfterHeader(HttpResponseMessage response)
{
if (!response.Headers.Contains("Retry-After"))
return null;
var retryAfterValues = response.Headers.GetValues("Retry-After");
var retryAfterValue = retryAfterValues.FirstOrDefault();
if (string.IsNullOrWhiteSpace(retryAfterValue))
return null;
// Try parsing as seconds (integer)
if (int.TryParse(retryAfterValue, out var seconds))
{
return TimeSpan.FromSeconds(seconds);
}
// Try parsing as HTTP-date (RFC 7231)
if (DateTime.TryParse(retryAfterValue, out var retryDate))
{
var delay = retryDate - DateTime.UtcNow;
return delay.TotalSeconds > 0 ? delay : null;
}
return null;
}
private object ConvertToGeminiRequest(LlmChatRequest request)
{
var contents = request.Messages