Enhance error handling and logging in BotController, LiveTradingBotGrain, and BotService

- Added specific handling for ServiceUnavailableException in BotController to return user-friendly messages.
- Improved logging for Orleans exceptions in both BotController and BotService to provide clearer context during errors.
- Implemented verification of position closure status in LiveTradingBotGrain, including timeout handling for closing positions.
- Enhanced logging for critical and non-critical operations during bot stop processes to ensure better traceability.
This commit is contained in:
2025-11-23 21:48:21 +07:00
parent 411fc41bef
commit d10ce5e3ba
3 changed files with 182 additions and 5 deletions

View File

@@ -7,6 +7,7 @@ using Managing.Application.ManageBot.Commands;
using Managing.Application.Shared; using Managing.Application.Shared;
using Managing.Common; using Managing.Common;
using Managing.Core; using Managing.Core;
using Managing.Core.Exceptions;
using Managing.Domain.Accounts; using Managing.Domain.Accounts;
using Managing.Domain.Backtests; using Managing.Domain.Backtests;
using Managing.Domain.Bots; using Managing.Domain.Bots;
@@ -245,9 +246,26 @@ public class BotController : BaseController
return Ok(result); return Ok(result);
} }
catch (ServiceUnavailableException ex)
{
// ServiceUnavailableException is already user-friendly (e.g., from Orleans exception conversion)
_logger.LogWarning(ex, "Service unavailable error stopping bot {Identifier}", identifier);
return StatusCode(503, ex.Message);
}
catch (Exception ex) catch (Exception ex)
{ {
_logger.LogError(ex, "Error stopping bot"); _logger.LogError(ex, "Error stopping bot {Identifier}", identifier);
// Check if this is an Orleans exception that wasn't caught earlier
if (OrleansExceptionHelper.IsOrleansException(ex))
{
var userMessage = OrleansExceptionHelper.GetUserFriendlyMessage(ex, "bot stop");
_logger.LogWarning(
"Orleans exception detected in controller for bot {Identifier}: {ExceptionType}",
identifier, ex.GetType().Name);
return StatusCode(503, userMessage);
}
return StatusCode(500, $"Error stopping bot: {ex.Message}"); return StatusCode(500, $"Error stopping bot: {ex.Message}");
} }
} }

View File

@@ -320,6 +320,36 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
if (!isRestarting) if (!isRestarting)
{ {
await CloseAllOpenPositionsAsync(); await CloseAllOpenPositionsAsync();
// Verify positions are actually closed (even if CloseAllOpenPositionsAsync had timeouts/exceptions)
// This ensures we don't report failure if positions were successfully closed despite timeouts
try
{
var botId = this.GetPrimaryKey();
var positions = await ServiceScopeHelpers.WithScopedService<ITradingService, IEnumerable<Position>>(
_scopeFactory,
async tradingService => await tradingService.GetPositionsByInitiatorIdentifierAsync(botId));
var stillOpenPositions = positions?.Where(p => p.IsOpen() || p.Status.Equals(PositionStatus.New)).ToList() ??
new List<Position>();
if (stillOpenPositions.Any())
{
_logger.LogWarning(
"Bot {GrainId} still has {Count} open positions after closure attempt: {Positions}",
botId, stillOpenPositions.Count, string.Join(", ", stillOpenPositions.Select(p => p.Identifier)));
}
else
{
_logger.LogInformation("Bot {GrainId} - all positions verified as closed", botId);
}
}
catch (Exception ex)
{
// Don't fail the stop operation if we can't verify positions
_logger.LogWarning(ex, "Could not verify position closure status for bot {GrainId}, continuing with stop",
this.GetPrimaryKey());
}
} }
// The check is now against the registry status // The check is now against the registry status
@@ -332,6 +362,9 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
return; return;
} }
// Track if critical operations succeeded
var criticalOperationsSucceeded = false;
try try
{ {
StopAndDisposeTimer(); StopAndDisposeTimer();
@@ -355,17 +388,69 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
// Sync state from the volatile TradingBotBase before destroying it // Sync state from the volatile TradingBotBase before destroying it
SyncStateFromBase(); SyncStateFromBase();
await _state.WriteStateAsync();
await SaveBotAsync(BotStatus.Stopped); // Critical: Stop the bot and clear the trading bot instance
await _tradingBot?.StopBot(reason)!; await _tradingBot?.StopBot(reason)!;
_tradingBot = null; _tradingBot = null;
// Mark critical operations as succeeded
criticalOperationsSucceeded = true;
// Non-critical cleanup operations - continue even if these fail
try
{
await _state.WriteStateAsync();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to write state for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
}
try
{
await SaveBotAsync(BotStatus.Stopped);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to save bot status for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
}
try
{
await UpdateBotRegistryStatus(BotStatus.Stopped);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to update bot registry for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
}
await UpdateBotRegistryStatus(BotStatus.Stopped);
_logger.LogInformation("LiveTradingBotGrain {GrainId} stopped successfully", this.GetPrimaryKey()); _logger.LogInformation("LiveTradingBotGrain {GrainId} stopped successfully", this.GetPrimaryKey());
} }
catch (Exception ex) catch (Exception ex)
{ {
// If critical operations succeeded (bot stopped, positions closed), don't throw
// Only throw if critical operations failed
if (criticalOperationsSucceeded)
{
_logger.LogWarning(ex,
"Cleanup operations failed for bot {GrainId}, but bot was successfully stopped. " +
"This is non-critical and the stop operation is considered successful.",
this.GetPrimaryKey());
// Don't throw - the bot is stopped, which is what matters
return;
}
_logger.LogError(ex, "Failed to stop LiveTradingBotGrain {GrainId}", this.GetPrimaryKey()); _logger.LogError(ex, "Failed to stop LiveTradingBotGrain {GrainId}", this.GetPrimaryKey());
// Log Orleans-specific exceptions with additional context for debugging
if (Managing.Core.Exceptions.OrleansExceptionHelper.IsOrleansException(ex))
{
_logger.LogWarning(
"Orleans exception detected during bot stop: {ExceptionType} - {Message}. " +
"This may indicate a timeout or deadlock when closing positions.",
ex.GetType().Name, ex.Message);
}
throw; throw;
} }
} }
@@ -1151,13 +1236,75 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
"Bot {GrainId} has {Count} open positions that will be closed before stopping: {Positions}", "Bot {GrainId} has {Count} open positions that will be closed before stopping: {Positions}",
botId, openPositions.Count, string.Join(", ", openPositions.Select(p => p.Identifier))); botId, openPositions.Count, string.Join(", ", openPositions.Select(p => p.Identifier)));
// Add timeout for closing positions (30 seconds per position, max 2 minutes total)
var maxTotalTime = TimeSpan.FromMinutes(2);
var perPositionTimeout = TimeSpan.FromSeconds(30);
var startTime = DateTime.UtcNow;
foreach (var position in openPositions) foreach (var position in openPositions)
{ {
// Check if we've exceeded total timeout
if (DateTime.UtcNow - startTime > maxTotalTime)
{
_logger.LogWarning(
"Closing positions for bot {GrainId} exceeded total timeout of {Timeout} seconds. Stopping position closure.",
botId, maxTotalTime.TotalSeconds);
break;
}
try try
{ {
_logger.LogInformation("Closing position {PositionId} for bot {GrainId}", position.Identifier, _logger.LogInformation("Closing position {PositionId} for bot {GrainId}", position.Identifier,
botId); botId);
await ClosePositionAsync(position.Identifier);
// Use timeout for individual position closure
using var cts = new CancellationTokenSource(perPositionTimeout);
var closeTask = ClosePositionAsync(position.Identifier);
var timeoutTask = Task.Delay(perPositionTimeout, cts.Token);
var completedTask = await Task.WhenAny(closeTask, timeoutTask);
if (completedTask == timeoutTask)
{
_logger.LogWarning(
"Closing position {PositionId} for bot {GrainId} timed out after {Timeout} seconds. " +
"Verifying if position was actually closed...",
position.Identifier, botId, perPositionTimeout.TotalSeconds);
// Verify if position was actually closed despite timeout
try
{
var updatedPositions = await ServiceScopeHelpers.WithScopedService<ITradingService, IEnumerable<Position>>(
_scopeFactory,
async tradingService => await tradingService.GetPositionsByInitiatorIdentifierAsync(botId));
var updatedPosition = updatedPositions?.FirstOrDefault(p => p.Identifier == position.Identifier);
if (updatedPosition != null && !updatedPosition.IsOpen() && updatedPosition.Status != PositionStatus.New)
{
_logger.LogInformation(
"Position {PositionId} was actually closed despite timeout. Status: {Status}",
position.Identifier, updatedPosition.Status);
// Position is closed, continue
}
else
{
_logger.LogWarning(
"Position {PositionId} may not be closed. Status: {Status}",
position.Identifier, updatedPosition != null ? updatedPosition.Status.ToString() : "null");
// Continue with other positions - we'll verify all positions at the end
}
}
catch (Exception verifyEx)
{
_logger.LogWarning(verifyEx,
"Could not verify position {PositionId} closure status after timeout",
position.Identifier);
}
// Continue with other positions
continue;
}
await closeTask;
_logger.LogInformation("Successfully closed position {PositionId} for bot {GrainId}", _logger.LogInformation("Successfully closed position {PositionId} for bot {GrainId}",
position.Identifier, botId); position.Identifier, botId);
} }

View File

@@ -6,6 +6,7 @@ using Managing.Application.Bots;
using Managing.Application.Bots.Models; using Managing.Application.Bots.Models;
using Managing.Common; using Managing.Common;
using Managing.Core; using Managing.Core;
using Managing.Core.Exceptions;
using Managing.Domain.Accounts; using Managing.Domain.Accounts;
using Managing.Domain.Bots; using Managing.Domain.Bots;
using Managing.Domain.Indicators; using Managing.Domain.Indicators;
@@ -60,6 +61,17 @@ namespace Managing.Application.ManageBot
catch (Exception e) catch (Exception e)
{ {
_tradingBotLogger.LogError(e, "Error stopping bot {Identifier}", identifier); _tradingBotLogger.LogError(e, "Error stopping bot {Identifier}", identifier);
// Check if this is an Orleans exception and convert to user-friendly message
if (OrleansExceptionHelper.IsOrleansException(e))
{
var userMessage = OrleansExceptionHelper.GetUserFriendlyMessage(e, "bot stop");
_tradingBotLogger.LogWarning(
"Orleans exception during bot stop {Identifier}: {ExceptionType}. Converting to user-friendly message.",
identifier, e.GetType().Name);
throw new ServiceUnavailableException(userMessage);
}
throw; throw;
} }
} }