Enhance error handling and logging in BotController, LiveTradingBotGrain, and BotService
- Added specific handling for ServiceUnavailableException in BotController to return user-friendly messages. - Improved logging for Orleans exceptions in both BotController and BotService to provide clearer context during errors. - Implemented verification of position closure status in LiveTradingBotGrain, including timeout handling for closing positions. - Enhanced logging for critical and non-critical operations during bot stop processes to ensure better traceability.
This commit is contained in:
@@ -7,6 +7,7 @@ using Managing.Application.ManageBot.Commands;
|
|||||||
using Managing.Application.Shared;
|
using Managing.Application.Shared;
|
||||||
using Managing.Common;
|
using Managing.Common;
|
||||||
using Managing.Core;
|
using Managing.Core;
|
||||||
|
using Managing.Core.Exceptions;
|
||||||
using Managing.Domain.Accounts;
|
using Managing.Domain.Accounts;
|
||||||
using Managing.Domain.Backtests;
|
using Managing.Domain.Backtests;
|
||||||
using Managing.Domain.Bots;
|
using Managing.Domain.Bots;
|
||||||
@@ -245,9 +246,26 @@ public class BotController : BaseController
|
|||||||
|
|
||||||
return Ok(result);
|
return Ok(result);
|
||||||
}
|
}
|
||||||
|
catch (ServiceUnavailableException ex)
|
||||||
|
{
|
||||||
|
// ServiceUnavailableException is already user-friendly (e.g., from Orleans exception conversion)
|
||||||
|
_logger.LogWarning(ex, "Service unavailable error stopping bot {Identifier}", identifier);
|
||||||
|
return StatusCode(503, ex.Message);
|
||||||
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
_logger.LogError(ex, "Error stopping bot");
|
_logger.LogError(ex, "Error stopping bot {Identifier}", identifier);
|
||||||
|
|
||||||
|
// Check if this is an Orleans exception that wasn't caught earlier
|
||||||
|
if (OrleansExceptionHelper.IsOrleansException(ex))
|
||||||
|
{
|
||||||
|
var userMessage = OrleansExceptionHelper.GetUserFriendlyMessage(ex, "bot stop");
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Orleans exception detected in controller for bot {Identifier}: {ExceptionType}",
|
||||||
|
identifier, ex.GetType().Name);
|
||||||
|
return StatusCode(503, userMessage);
|
||||||
|
}
|
||||||
|
|
||||||
return StatusCode(500, $"Error stopping bot: {ex.Message}");
|
return StatusCode(500, $"Error stopping bot: {ex.Message}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -320,6 +320,36 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
|
|||||||
if (!isRestarting)
|
if (!isRestarting)
|
||||||
{
|
{
|
||||||
await CloseAllOpenPositionsAsync();
|
await CloseAllOpenPositionsAsync();
|
||||||
|
|
||||||
|
// Verify positions are actually closed (even if CloseAllOpenPositionsAsync had timeouts/exceptions)
|
||||||
|
// This ensures we don't report failure if positions were successfully closed despite timeouts
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var botId = this.GetPrimaryKey();
|
||||||
|
var positions = await ServiceScopeHelpers.WithScopedService<ITradingService, IEnumerable<Position>>(
|
||||||
|
_scopeFactory,
|
||||||
|
async tradingService => await tradingService.GetPositionsByInitiatorIdentifierAsync(botId));
|
||||||
|
|
||||||
|
var stillOpenPositions = positions?.Where(p => p.IsOpen() || p.Status.Equals(PositionStatus.New)).ToList() ??
|
||||||
|
new List<Position>();
|
||||||
|
|
||||||
|
if (stillOpenPositions.Any())
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Bot {GrainId} still has {Count} open positions after closure attempt: {Positions}",
|
||||||
|
botId, stillOpenPositions.Count, string.Join(", ", stillOpenPositions.Select(p => p.Identifier)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_logger.LogInformation("Bot {GrainId} - all positions verified as closed", botId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
// Don't fail the stop operation if we can't verify positions
|
||||||
|
_logger.LogWarning(ex, "Could not verify position closure status for bot {GrainId}, continuing with stop",
|
||||||
|
this.GetPrimaryKey());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The check is now against the registry status
|
// The check is now against the registry status
|
||||||
@@ -332,6 +362,9 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track if critical operations succeeded
|
||||||
|
var criticalOperationsSucceeded = false;
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
StopAndDisposeTimer();
|
StopAndDisposeTimer();
|
||||||
@@ -355,17 +388,69 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
|
|||||||
|
|
||||||
// Sync state from the volatile TradingBotBase before destroying it
|
// Sync state from the volatile TradingBotBase before destroying it
|
||||||
SyncStateFromBase();
|
SyncStateFromBase();
|
||||||
await _state.WriteStateAsync();
|
|
||||||
await SaveBotAsync(BotStatus.Stopped);
|
// Critical: Stop the bot and clear the trading bot instance
|
||||||
await _tradingBot?.StopBot(reason)!;
|
await _tradingBot?.StopBot(reason)!;
|
||||||
_tradingBot = null;
|
_tradingBot = null;
|
||||||
|
|
||||||
|
// Mark critical operations as succeeded
|
||||||
|
criticalOperationsSucceeded = true;
|
||||||
|
|
||||||
|
// Non-critical cleanup operations - continue even if these fail
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await _state.WriteStateAsync();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Failed to write state for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await SaveBotAsync(BotStatus.Stopped);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Failed to save bot status for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
await UpdateBotRegistryStatus(BotStatus.Stopped);
|
await UpdateBotRegistryStatus(BotStatus.Stopped);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex, "Failed to update bot registry for bot {GrainId}, but bot is stopped", this.GetPrimaryKey());
|
||||||
|
}
|
||||||
|
|
||||||
_logger.LogInformation("LiveTradingBotGrain {GrainId} stopped successfully", this.GetPrimaryKey());
|
_logger.LogInformation("LiveTradingBotGrain {GrainId} stopped successfully", this.GetPrimaryKey());
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
// If critical operations succeeded (bot stopped, positions closed), don't throw
|
||||||
|
// Only throw if critical operations failed
|
||||||
|
if (criticalOperationsSucceeded)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(ex,
|
||||||
|
"Cleanup operations failed for bot {GrainId}, but bot was successfully stopped. " +
|
||||||
|
"This is non-critical and the stop operation is considered successful.",
|
||||||
|
this.GetPrimaryKey());
|
||||||
|
// Don't throw - the bot is stopped, which is what matters
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
_logger.LogError(ex, "Failed to stop LiveTradingBotGrain {GrainId}", this.GetPrimaryKey());
|
_logger.LogError(ex, "Failed to stop LiveTradingBotGrain {GrainId}", this.GetPrimaryKey());
|
||||||
|
|
||||||
|
// Log Orleans-specific exceptions with additional context for debugging
|
||||||
|
if (Managing.Core.Exceptions.OrleansExceptionHelper.IsOrleansException(ex))
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Orleans exception detected during bot stop: {ExceptionType} - {Message}. " +
|
||||||
|
"This may indicate a timeout or deadlock when closing positions.",
|
||||||
|
ex.GetType().Name, ex.Message);
|
||||||
|
}
|
||||||
|
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1151,13 +1236,75 @@ public class LiveTradingBotGrain : Grain, ILiveTradingBotGrain, IRemindable
|
|||||||
"Bot {GrainId} has {Count} open positions that will be closed before stopping: {Positions}",
|
"Bot {GrainId} has {Count} open positions that will be closed before stopping: {Positions}",
|
||||||
botId, openPositions.Count, string.Join(", ", openPositions.Select(p => p.Identifier)));
|
botId, openPositions.Count, string.Join(", ", openPositions.Select(p => p.Identifier)));
|
||||||
|
|
||||||
|
// Add timeout for closing positions (30 seconds per position, max 2 minutes total)
|
||||||
|
var maxTotalTime = TimeSpan.FromMinutes(2);
|
||||||
|
var perPositionTimeout = TimeSpan.FromSeconds(30);
|
||||||
|
var startTime = DateTime.UtcNow;
|
||||||
|
|
||||||
foreach (var position in openPositions)
|
foreach (var position in openPositions)
|
||||||
{
|
{
|
||||||
|
// Check if we've exceeded total timeout
|
||||||
|
if (DateTime.UtcNow - startTime > maxTotalTime)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Closing positions for bot {GrainId} exceeded total timeout of {Timeout} seconds. Stopping position closure.",
|
||||||
|
botId, maxTotalTime.TotalSeconds);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_logger.LogInformation("Closing position {PositionId} for bot {GrainId}", position.Identifier,
|
_logger.LogInformation("Closing position {PositionId} for bot {GrainId}", position.Identifier,
|
||||||
botId);
|
botId);
|
||||||
await ClosePositionAsync(position.Identifier);
|
|
||||||
|
// Use timeout for individual position closure
|
||||||
|
using var cts = new CancellationTokenSource(perPositionTimeout);
|
||||||
|
var closeTask = ClosePositionAsync(position.Identifier);
|
||||||
|
var timeoutTask = Task.Delay(perPositionTimeout, cts.Token);
|
||||||
|
|
||||||
|
var completedTask = await Task.WhenAny(closeTask, timeoutTask);
|
||||||
|
if (completedTask == timeoutTask)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Closing position {PositionId} for bot {GrainId} timed out after {Timeout} seconds. " +
|
||||||
|
"Verifying if position was actually closed...",
|
||||||
|
position.Identifier, botId, perPositionTimeout.TotalSeconds);
|
||||||
|
|
||||||
|
// Verify if position was actually closed despite timeout
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var updatedPositions = await ServiceScopeHelpers.WithScopedService<ITradingService, IEnumerable<Position>>(
|
||||||
|
_scopeFactory,
|
||||||
|
async tradingService => await tradingService.GetPositionsByInitiatorIdentifierAsync(botId));
|
||||||
|
|
||||||
|
var updatedPosition = updatedPositions?.FirstOrDefault(p => p.Identifier == position.Identifier);
|
||||||
|
if (updatedPosition != null && !updatedPosition.IsOpen() && updatedPosition.Status != PositionStatus.New)
|
||||||
|
{
|
||||||
|
_logger.LogInformation(
|
||||||
|
"Position {PositionId} was actually closed despite timeout. Status: {Status}",
|
||||||
|
position.Identifier, updatedPosition.Status);
|
||||||
|
// Position is closed, continue
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"Position {PositionId} may not be closed. Status: {Status}",
|
||||||
|
position.Identifier, updatedPosition != null ? updatedPosition.Status.ToString() : "null");
|
||||||
|
// Continue with other positions - we'll verify all positions at the end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception verifyEx)
|
||||||
|
{
|
||||||
|
_logger.LogWarning(verifyEx,
|
||||||
|
"Could not verify position {PositionId} closure status after timeout",
|
||||||
|
position.Identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue with other positions
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await closeTask;
|
||||||
_logger.LogInformation("Successfully closed position {PositionId} for bot {GrainId}",
|
_logger.LogInformation("Successfully closed position {PositionId} for bot {GrainId}",
|
||||||
position.Identifier, botId);
|
position.Identifier, botId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ using Managing.Application.Bots;
|
|||||||
using Managing.Application.Bots.Models;
|
using Managing.Application.Bots.Models;
|
||||||
using Managing.Common;
|
using Managing.Common;
|
||||||
using Managing.Core;
|
using Managing.Core;
|
||||||
|
using Managing.Core.Exceptions;
|
||||||
using Managing.Domain.Accounts;
|
using Managing.Domain.Accounts;
|
||||||
using Managing.Domain.Bots;
|
using Managing.Domain.Bots;
|
||||||
using Managing.Domain.Indicators;
|
using Managing.Domain.Indicators;
|
||||||
@@ -60,6 +61,17 @@ namespace Managing.Application.ManageBot
|
|||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
{
|
{
|
||||||
_tradingBotLogger.LogError(e, "Error stopping bot {Identifier}", identifier);
|
_tradingBotLogger.LogError(e, "Error stopping bot {Identifier}", identifier);
|
||||||
|
|
||||||
|
// Check if this is an Orleans exception and convert to user-friendly message
|
||||||
|
if (OrleansExceptionHelper.IsOrleansException(e))
|
||||||
|
{
|
||||||
|
var userMessage = OrleansExceptionHelper.GetUserFriendlyMessage(e, "bot stop");
|
||||||
|
_tradingBotLogger.LogWarning(
|
||||||
|
"Orleans exception during bot stop {Identifier}: {ExceptionType}. Converting to user-friendly message.",
|
||||||
|
identifier, e.GetType().Name);
|
||||||
|
throw new ServiceUnavailableException(userMessage);
|
||||||
|
}
|
||||||
|
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user