diff --git a/src/Managing.Api/Controllers/LlmController.cs b/src/Managing.Api/Controllers/LlmController.cs
index 4a3b5e92..f6f483af 100644
--- a/src/Managing.Api/Controllers/LlmController.cs
+++ b/src/Managing.Api/Controllers/LlmController.cs
@@ -96,6 +96,7 @@ public class LlmController : BaseController
int maxIterations = DetermineMaxIterations(request);
int iteration = 0;
LlmChatResponse? finalResponse = null;
+ const int DelayBetweenIterationsMs = 500; // 500ms delay between iterations to avoid rate limits
while (iteration < maxIterations)
{
@@ -103,6 +104,15 @@ public class LlmController : BaseController
_logger.LogInformation("LLM chat iteration {Iteration}/{MaxIterations} for user {UserId}",
iteration, maxIterations, user.Id);
+ // Add delay between iterations to avoid rapid bursts and rate limiting
+ // Skip delay on first iteration
+ if (iteration > 1)
+ {
+ _logger.LogDebug("Waiting {DelayMs}ms before iteration {Iteration} to avoid rate limits",
+ DelayBetweenIterationsMs, iteration);
+ await Task.Delay(DelayBetweenIterationsMs);
+ }
+
// Trim context if conversation is getting too long
TrimConversationContext(request);
diff --git a/src/Managing.Application/LLM/Providers/GeminiProvider.cs b/src/Managing.Application/LLM/Providers/GeminiProvider.cs
index b8080de6..00c39952 100644
--- a/src/Managing.Application/LLM/Providers/GeminiProvider.cs
+++ b/src/Managing.Application/LLM/Providers/GeminiProvider.cs
@@ -1,13 +1,16 @@
+using System.Net;
using System.Net.Http.Json;
using System.Text.Json;
using System.Text.Json.Serialization;
using Managing.Application.Abstractions.Services;
using Microsoft.Extensions.Logging;
+using Polly;
+using Polly.Retry;
namespace Managing.Application.LLM.Providers;
///
-/// Google Gemini API provider
+/// Google Gemini API provider with retry logic and rate limit handling
///
public class GeminiProvider : ILlmProvider
{
@@ -15,8 +18,11 @@ public class GeminiProvider : ILlmProvider
private readonly string _defaultModel;
private readonly HttpClient _httpClient;
private readonly ILogger _logger;
+ private readonly IAsyncPolicy _retryPolicy;
private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta";
private const string FallbackModel = "gemini-2.0-flash-exp";
+ private const int MaxRetryAttempts = 3;
+ private const int BaseRetryDelayMs = 2000; // 2 seconds base delay
public string Name => "gemini";
@@ -26,6 +32,51 @@ public class GeminiProvider : ILlmProvider
_defaultModel = defaultModel ?? FallbackModel;
_httpClient = httpClientFactory?.CreateClient() ?? new HttpClient();
_logger = logger;
+
+ // Configure retry policy with exponential backoff and Retry-After header support
+ _retryPolicy = Policy
+ .Handle()
+ .Or()
+ .Or()
+ .OrResult(r => !r.IsSuccessStatusCode && IsRetryableStatusCode(r.StatusCode))
+ .WaitAndRetryAsync(
+ retryCount: MaxRetryAttempts,
+ sleepDurationProvider: (retryAttempt, result, context) =>
+ {
+ // Check if response has Retry-After header (for 429 errors)
+ if (result.Result?.StatusCode == HttpStatusCode.TooManyRequests)
+ {
+ var retryAfter = ParseRetryAfterHeader(result.Result);
+ if (retryAfter.HasValue)
+ {
+ _logger.LogInformation(
+ "Rate limited (429). Respecting Retry-After header: {RetryAfterSeconds}s",
+ retryAfter.Value.TotalSeconds);
+ return retryAfter.Value;
+ }
+ }
+
+ // Exponential backoff with jitter: baseDelay * 2^(retryAttempt-1) + random jitter
+ var exponentialDelay = BaseRetryDelayMs * Math.Pow(2, retryAttempt - 1);
+ var jitter = new Random().Next(0, BaseRetryDelayMs / 4);
+ var delay = TimeSpan.FromMilliseconds(exponentialDelay + jitter);
+
+ _logger.LogInformation(
+ "Retrying after exponential backoff: {DelayMs}ms (attempt {Attempt}/{MaxAttempts})",
+ delay.TotalMilliseconds, retryAttempt, MaxRetryAttempts + 1);
+
+ return delay;
+ },
+ onRetry: (outcome, timespan, retryCount, context) =>
+ {
+ var exception = outcome.Exception;
+ var response = outcome.Result;
+ var errorMessage = exception?.Message ?? $"HTTP {response?.StatusCode}";
+
+ _logger.LogWarning(
+ "Gemini API request failed (attempt {RetryCount}/{MaxRetries}): {Error}. Retrying in {Delay}ms",
+ retryCount, MaxRetryAttempts + 1, errorMessage, timespan.TotalMilliseconds);
+ });
}
public async Task ChatAsync(LlmChatRequest request)
@@ -40,12 +91,16 @@ public class GeminiProvider : ILlmProvider
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
- var response = await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
+ // Execute request with retry policy
+ var response = await _retryPolicy.ExecuteAsync(async () =>
+ {
+ return await _httpClient.PostAsJsonAsync(url, geminiRequest, jsonOptions);
+ });
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
- _logger.LogError("Gemini API error: {StatusCode} - {Error}", response.StatusCode, errorContent);
+ _logger.LogError("Gemini API error after retries: {StatusCode} - {Error}", response.StatusCode, errorContent);
throw new HttpRequestException($"Gemini API error: {response.StatusCode} - {errorContent}");
}
@@ -53,6 +108,49 @@ public class GeminiProvider : ILlmProvider
return ConvertFromGeminiResponse(geminiResponse!);
}
+ ///
+ /// Checks if an HTTP status code is retryable
+ ///
+ private static bool IsRetryableStatusCode(HttpStatusCode statusCode)
+ {
+ return statusCode == HttpStatusCode.RequestTimeout ||
+ statusCode == HttpStatusCode.TooManyRequests ||
+ statusCode == HttpStatusCode.InternalServerError ||
+ statusCode == HttpStatusCode.BadGateway ||
+ statusCode == HttpStatusCode.ServiceUnavailable ||
+ statusCode == HttpStatusCode.GatewayTimeout;
+ }
+
+ ///
+ /// Parses the Retry-After header from HTTP response
+ /// Supports both seconds (integer) and HTTP-date formats
+ ///
+ private static TimeSpan? ParseRetryAfterHeader(HttpResponseMessage response)
+ {
+ if (!response.Headers.Contains("Retry-After"))
+ return null;
+
+ var retryAfterValues = response.Headers.GetValues("Retry-After");
+ var retryAfterValue = retryAfterValues.FirstOrDefault();
+ if (string.IsNullOrWhiteSpace(retryAfterValue))
+ return null;
+
+ // Try parsing as seconds (integer)
+ if (int.TryParse(retryAfterValue, out var seconds))
+ {
+ return TimeSpan.FromSeconds(seconds);
+ }
+
+ // Try parsing as HTTP-date (RFC 7231)
+ if (DateTime.TryParse(retryAfterValue, out var retryDate))
+ {
+ var delay = retryDate - DateTime.UtcNow;
+ return delay.TotalSeconds > 0 ? delay : null;
+ }
+
+ return null;
+ }
+
private object ConvertToGeminiRequest(LlmChatRequest request)
{
var contents = request.Messages