From 438a0b1a6301f8a5a624a7a38fb2727e01b3841e Mon Sep 17 00:00:00 2001 From: cryptooda Date: Tue, 6 Jan 2026 17:55:29 +0700 Subject: [PATCH] Implement rate limit handling and retry logic in GeminiProvider - Added a retry policy with exponential backoff for handling transient errors and rate limits in the Gemini API provider. - Introduced a delay between iterations in LlmController to prevent rapid bursts and avoid hitting rate limits. - Enhanced logging for retries and error handling to improve visibility into API interactions and rate limiting behavior. --- src/Managing.Api/Controllers/LlmController.cs | 10 ++ .../LLM/Providers/GeminiProvider.cs | 104 +++++++++++++++++- 2 files changed, 111 insertions(+), 3 deletions(-) diff --git a/src/Managing.Api/Controllers/LlmController.cs b/src/Managing.Api/Controllers/LlmController.cs index 4a3b5e92..f6f483af 100644 --- a/src/Managing.Api/Controllers/LlmController.cs +++ b/src/Managing.Api/Controllers/LlmController.cs @@ -96,6 +96,7 @@ public class LlmController : BaseController int maxIterations = DetermineMaxIterations(request); int iteration = 0; LlmChatResponse? finalResponse = null; + const int DelayBetweenIterationsMs = 500; // 500ms delay between iterations to avoid rate limits while (iteration < maxIterations) { @@ -103,6 +104,15 @@ public class LlmController : BaseController _logger.LogInformation("LLM chat iteration {Iteration}/{MaxIterations} for user {UserId}", iteration, maxIterations, user.Id); + // Add delay between iterations to avoid rapid bursts and rate limiting + // Skip delay on first iteration + if (iteration > 1) + { + _logger.LogDebug("Waiting {DelayMs}ms before iteration {Iteration} to avoid rate limits", + DelayBetweenIterationsMs, iteration); + await Task.Delay(DelayBetweenIterationsMs); + } + // Trim context if conversation is getting too long TrimConversationContext(request); diff --git a/src/Managing.Application/LLM/Providers/GeminiProvider.cs b/src/Managing.Application/LLM/Providers/GeminiProvider.cs index b8080de6..00c39952 100644 --- a/src/Managing.Application/LLM/Providers/GeminiProvider.cs +++ b/src/Managing.Application/LLM/Providers/GeminiProvider.cs @@ -1,13 +1,16 @@ +using System.Net; using System.Net.Http.Json; using System.Text.Json; using System.Text.Json.Serialization; using Managing.Application.Abstractions.Services; using Microsoft.Extensions.Logging; +using Polly; +using Polly.Retry; namespace Managing.Application.LLM.Providers; /// -/// Google Gemini API provider +/// Google Gemini API provider with retry logic and rate limit handling /// public class GeminiProvider : ILlmProvider { @@ -15,8 +18,11 @@ public class GeminiProvider : ILlmProvider private readonly string _defaultModel; private readonly HttpClient _httpClient; private readonly ILogger _logger; + private readonly IAsyncPolicy _retryPolicy; private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta"; private const string FallbackModel = "gemini-2.0-flash-exp"; + private const int MaxRetryAttempts = 3; + private const int BaseRetryDelayMs = 2000; // 2 seconds base delay public string Name => "gemini"; @@ -26,6 +32,51 @@ public class GeminiProvider : ILlmProvider _defaultModel = defaultModel ?? FallbackModel; _httpClient = httpClientFactory?.CreateClient() ?? new HttpClient(); _logger = logger; + + // Configure retry policy with exponential backoff and Retry-After header support + _retryPolicy = Policy + .Handle() + .Or() + .Or() + .OrResult(r => !r.IsSuccessStatusCode && IsRetryableStatusCode(r.StatusCode)) + .WaitAndRetryAsync( + retryCount: MaxRetryAttempts, + sleepDurationProvider: (retryAttempt, result, context) => + { + // Check if response has Retry-After header (for 429 errors) + if (result.Result?.StatusCode == HttpStatusCode.TooManyRequests) + { + var retryAfter = ParseRetryAfterHeader(result.Result); + if (retryAfter.HasValue) + { + _logger.LogInformation( + "Rate limited (429). Respecting Retry-After header: {RetryAfterSeconds}s", + retryAfter.Value.TotalSeconds); + return retryAfter.Value; + } + } + + // Exponential backoff with jitter: baseDelay * 2^(retryAttempt-1) + random jitter + var exponentialDelay = BaseRetryDelayMs * Math.Pow(2, retryAttempt - 1); + var jitter = new Random().Next(0, BaseRetryDelayMs / 4); + var delay = TimeSpan.FromMilliseconds(exponentialDelay + jitter); + + _logger.LogInformation( + "Retrying after exponential backoff: {DelayMs}ms (attempt {Attempt}/{MaxAttempts})", + delay.TotalMilliseconds, retryAttempt, MaxRetryAttempts + 1); + + return delay; + }, + onRetry: (outcome, timespan, retryCount, context) => + { + var exception = outcome.Exception; + var response = outcome.Result; + var errorMessage = exception?.Message ?? $"HTTP {response?.StatusCode}"; + + _logger.LogWarning( + "Gemini API request failed (attempt {RetryCount}/{MaxRetries}): {Error}. Retrying in {Delay}ms", + retryCount, MaxRetryAttempts + 1, errorMessage, timespan.TotalMilliseconds); + }); } public async Task ChatAsync(LlmChatRequest request) @@ -40,12 +91,16 @@ public class GeminiProvider : ILlmProvider DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull }; - var response = await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions); + // Execute request with retry policy + var response = await _retryPolicy.ExecuteAsync(async () => + { + return await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions); + }); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); - _logger.LogError("Gemini API error: {StatusCode} - {Error}", response.StatusCode, errorContent); + _logger.LogError("Gemini API error after retries: {StatusCode} - {Error}", response.StatusCode, errorContent); throw new HttpRequestException($"Gemini API error: {response.StatusCode} - {errorContent}"); } @@ -53,6 +108,49 @@ public class GeminiProvider : ILlmProvider return ConvertFromGeminiResponse(geminiResponse!); } + /// + /// Checks if an HTTP status code is retryable + /// + private static bool IsRetryableStatusCode(HttpStatusCode statusCode) + { + return statusCode == HttpStatusCode.RequestTimeout || + statusCode == HttpStatusCode.TooManyRequests || + statusCode == HttpStatusCode.InternalServerError || + statusCode == HttpStatusCode.BadGateway || + statusCode == HttpStatusCode.ServiceUnavailable || + statusCode == HttpStatusCode.GatewayTimeout; + } + + /// + /// Parses the Retry-After header from HTTP response + /// Supports both seconds (integer) and HTTP-date formats + /// + private static TimeSpan? ParseRetryAfterHeader(HttpResponseMessage response) + { + if (!response.Headers.Contains("Retry-After")) + return null; + + var retryAfterValues = response.Headers.GetValues("Retry-After"); + var retryAfterValue = retryAfterValues.FirstOrDefault(); + if (string.IsNullOrWhiteSpace(retryAfterValue)) + return null; + + // Try parsing as seconds (integer) + if (int.TryParse(retryAfterValue, out var seconds)) + { + return TimeSpan.FromSeconds(seconds); + } + + // Try parsing as HTTP-date (RFC 7231) + if (DateTime.TryParse(retryAfterValue, out var retryDate)) + { + var delay = retryDate - DateTime.UtcNow; + return delay.TotalSeconds > 0 ? delay : null; + } + + return null; + } + private object ConvertToGeminiRequest(LlmChatRequest request) { var contents = request.Messages