|
|
|
|
@@ -22,7 +22,10 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
private readonly ILogger<BundleBacktestHealthCheckWorker> _logger;
|
|
|
|
|
private readonly TimeSpan _checkInterval = TimeSpan.FromMinutes(30);
|
|
|
|
|
private readonly TimeSpan _inactiveThreshold = TimeSpan.FromMinutes(2); // Check bundles inactive for 2+ minutes
|
|
|
|
|
private readonly TimeSpan _stuckThreshold = TimeSpan.FromHours(2); // Consider bundle stuck if no progress for 2 hours
|
|
|
|
|
|
|
|
|
|
private readonly TimeSpan
|
|
|
|
|
_stuckThreshold = TimeSpan.FromHours(2); // Consider bundle stuck if no progress for 2 hours
|
|
|
|
|
|
|
|
|
|
private readonly IMessengerService _messengerService;
|
|
|
|
|
|
|
|
|
|
public BundleBacktestHealthCheckWorker(
|
|
|
|
|
@@ -80,15 +83,17 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
_logger.LogInformation("Starting bundle health check...");
|
|
|
|
|
|
|
|
|
|
// Check bundles in Pending and Running status
|
|
|
|
|
var pendingBundles = await backtestRepository.GetBundleBacktestRequestsByStatusAsync(BundleBacktestRequestStatus.Pending);
|
|
|
|
|
var runningBundles = await backtestRepository.GetBundleBacktestRequestsByStatusAsync(BundleBacktestRequestStatus.Running);
|
|
|
|
|
var pendingBundles =
|
|
|
|
|
await backtestRepository.GetBundleBacktestRequestsByStatusAsync(BundleBacktestRequestStatus.Pending);
|
|
|
|
|
var runningBundles =
|
|
|
|
|
await backtestRepository.GetBundleBacktestRequestsByStatusAsync(BundleBacktestRequestStatus.Running);
|
|
|
|
|
|
|
|
|
|
// Only check bundles that haven't been updated in more than the inactive threshold
|
|
|
|
|
var inactiveThresholdTime = DateTime.UtcNow.Add(-_inactiveThreshold);
|
|
|
|
|
var allBundlesToCheck = pendingBundles.Concat(runningBundles)
|
|
|
|
|
.Where(b => b.UpdatedAt < inactiveThresholdTime)
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_logger.LogInformation(
|
|
|
|
|
"Found {TotalCount} bundles (from {PendingTotal} pending and {RunningTotal} running) that haven't been updated in >{InactiveMinutes} minutes",
|
|
|
|
|
allBundlesToCheck.Count, pendingBundles.Count(), runningBundles.Count(), _inactiveThreshold.TotalMinutes);
|
|
|
|
|
@@ -106,15 +111,15 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
{
|
|
|
|
|
var (stuckCount, missingJobs, healthyCount) = await CheckSingleBundleHealthAsync(
|
|
|
|
|
bundle, backtestRepository, jobRepository);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stuckBundlesCount += stuckCount;
|
|
|
|
|
missingJobsCount += missingJobs;
|
|
|
|
|
healthyBundlesCount += healthyCount;
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogError(ex,
|
|
|
|
|
"Error checking health for bundle {BundleRequestId}",
|
|
|
|
|
_logger.LogError(ex,
|
|
|
|
|
"Error checking health for bundle {BundleRequestId}",
|
|
|
|
|
bundle.RequestId);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
@@ -138,7 +143,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
|
|
|
|
|
_logger.LogDebug(
|
|
|
|
|
"Bundle {BundleRequestId} ({Status}): Expected {Expected} jobs, Found {Actual} jobs, Completed {Completed}/{Total}",
|
|
|
|
|
bundle.RequestId, bundle.Status, expectedJobCount, actualJobCount, bundle.CompletedBacktests, bundle.TotalBacktests);
|
|
|
|
|
bundle.RequestId, bundle.Status, expectedJobCount, actualJobCount, bundle.CompletedBacktests,
|
|
|
|
|
bundle.TotalBacktests);
|
|
|
|
|
|
|
|
|
|
// Check 1: Missing jobs - bundle has no jobs or fewer jobs than expected
|
|
|
|
|
if (actualJobCount == 0 || actualJobCount < expectedJobCount)
|
|
|
|
|
@@ -151,7 +157,7 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
if (bundle.Status == BundleBacktestRequestStatus.Running)
|
|
|
|
|
{
|
|
|
|
|
var timeSinceUpdate = DateTime.UtcNow - bundle.UpdatedAt;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (timeSinceUpdate > _stuckThreshold)
|
|
|
|
|
{
|
|
|
|
|
await HandleStuckBundleAsync(bundle, timeSinceUpdate, jobs, backtestRepository, jobRepository);
|
|
|
|
|
@@ -172,7 +178,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
|
|
|
|
|
if (allJobsPending || hasFailedJobs)
|
|
|
|
|
{
|
|
|
|
|
await HandleStalePendingBundleAsync(bundle, timeSinceCreation, jobs, backtestRepository, jobRepository);
|
|
|
|
|
await HandleStalePendingBundleAsync(bundle, timeSinceCreation, jobs, backtestRepository,
|
|
|
|
|
jobRepository);
|
|
|
|
|
return (StuckCount: 1, MissingJobsCount: 0, HealthyCount: 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
@@ -184,7 +191,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
var totalProcessedJobs = completedJobs + failedJobs;
|
|
|
|
|
|
|
|
|
|
if (totalProcessedJobs == bundle.TotalBacktests &&
|
|
|
|
|
(bundle.Status == BundleBacktestRequestStatus.Running || bundle.Status == BundleBacktestRequestStatus.Pending))
|
|
|
|
|
(bundle.Status == BundleBacktestRequestStatus.Running ||
|
|
|
|
|
bundle.Status == BundleBacktestRequestStatus.Pending))
|
|
|
|
|
{
|
|
|
|
|
await HandleCompletedBundleAsync(bundle, completedJobs, failedJobs, backtestRepository);
|
|
|
|
|
return (StuckCount: 0, MissingJobsCount: 0, HealthyCount: 1);
|
|
|
|
|
@@ -212,13 +220,13 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
|
|
|
|
|
// Generate all backtest requests from bundle variants
|
|
|
|
|
var allBacktestRequests = await GenerateBacktestRequestsFromVariants(bundle);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (allBacktestRequests == null || !allBacktestRequests.Any())
|
|
|
|
|
{
|
|
|
|
|
_logger.LogError(
|
|
|
|
|
"Failed to generate backtest requests from variants for bundle {BundleRequestId}",
|
|
|
|
|
bundle.RequestId);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bundle.ErrorMessage = $"Failed to regenerate jobs: Could not generate backtest requests from variants.";
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
await backtestRepository.UpdateBundleBacktestRequestAsync(bundle);
|
|
|
|
|
@@ -227,13 +235,13 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
|
|
|
|
|
// Get existing jobs for this bundle
|
|
|
|
|
var existingJobs = (await jobRepository.GetByBundleRequestIdAsync(bundle.RequestId)).ToList();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Create a set of existing job config signatures for quick lookup
|
|
|
|
|
var existingJobSignatures = existingJobs
|
|
|
|
|
.Select(j =>
|
|
|
|
|
{
|
|
|
|
|
var config = JsonSerializer.Deserialize<TradingBotConfig>(j.ConfigJson);
|
|
|
|
|
return config != null
|
|
|
|
|
return config != null
|
|
|
|
|
? $"{config.Ticker}_{config.Timeframe}_{config.Name}_{j.StartDate:yyyyMMdd}_{j.EndDate:yyyyMMdd}"
|
|
|
|
|
: null;
|
|
|
|
|
})
|
|
|
|
|
@@ -244,7 +252,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
var missingRequests = allBacktestRequests
|
|
|
|
|
.Where(req =>
|
|
|
|
|
{
|
|
|
|
|
var signature = $"{req.Config.Ticker}_{req.Config.Timeframe}_{req.Config.Name}_{req.StartDate:yyyyMMdd}_{req.EndDate:yyyyMMdd}";
|
|
|
|
|
var signature =
|
|
|
|
|
$"{req.Config.Ticker}_{req.Config.Timeframe}_{req.Config.Name}_{req.StartDate:yyyyMMdd}_{req.EndDate:yyyyMMdd}";
|
|
|
|
|
return !existingJobSignatures.Contains(signature);
|
|
|
|
|
})
|
|
|
|
|
.ToList();
|
|
|
|
|
@@ -269,7 +278,7 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
bundle.ErrorMessage = null;
|
|
|
|
|
bundle.CompletedAt = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
await backtestRepository.UpdateBundleBacktestRequestAsync(bundle);
|
|
|
|
|
|
|
|
|
|
@@ -282,7 +291,7 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
_logger.LogWarning(
|
|
|
|
|
"No missing jobs found to recreate for bundle {BundleRequestId}. All {ExpectedCount} jobs already exist.",
|
|
|
|
|
bundle.RequestId, expectedJobCount);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
await backtestRepository.UpdateBundleBacktestRequestAsync(bundle);
|
|
|
|
|
}
|
|
|
|
|
@@ -292,7 +301,7 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
_logger.LogError(ex,
|
|
|
|
|
"Error recreating missing jobs for bundle {BundleRequestId}",
|
|
|
|
|
bundle.RequestId);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bundle.ErrorMessage = $"Error recreating jobs: {ex.Message}";
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
await backtestRepository.UpdateBundleBacktestRequestAsync(bundle);
|
|
|
|
|
@@ -324,7 +333,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
// Get the first account for the user
|
|
|
|
|
using var scope = _scopeFactory.CreateScope();
|
|
|
|
|
var accountService = scope.ServiceProvider.GetRequiredService<IAccountService>();
|
|
|
|
|
var accounts = await accountService.GetAccountsByUserAsync(bundleRequest.User, hideSecrets: true, getBalance: false);
|
|
|
|
|
var accounts =
|
|
|
|
|
await accountService.GetAccountsByUserAsync(bundleRequest.User, hideSecrets: true, getBalance: false);
|
|
|
|
|
var firstAccount = accounts.FirstOrDefault();
|
|
|
|
|
|
|
|
|
|
if (firstAccount == null)
|
|
|
|
|
@@ -362,7 +372,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
UseSynthApi = universalConfig.UseSynthApi,
|
|
|
|
|
UseForPositionSizing = universalConfig.UseForPositionSizing,
|
|
|
|
|
UseForSignalFiltering = universalConfig.UseForSignalFiltering,
|
|
|
|
|
UseForDynamicStopLoss = universalConfig.UseForDynamicStopLoss
|
|
|
|
|
UseForDynamicStopLoss = universalConfig.UseForDynamicStopLoss,
|
|
|
|
|
TradingType = universalConfig.TradingType
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
var backtestRequest = new RunBacktestRequest
|
|
|
|
|
@@ -435,12 +446,14 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
"Bundle {BundleRequestId} has all jobs completed ({Completed} completed, {Failed} failed). Updating bundle status.",
|
|
|
|
|
bundle.RequestId, completedJobs, failedJobs);
|
|
|
|
|
|
|
|
|
|
bundle.Status = failedJobs == 0 ? BundleBacktestRequestStatus.Completed : BundleBacktestRequestStatus.Completed;
|
|
|
|
|
bundle.Status = failedJobs == 0
|
|
|
|
|
? BundleBacktestRequestStatus.Completed
|
|
|
|
|
: BundleBacktestRequestStatus.Completed;
|
|
|
|
|
bundle.CompletedBacktests = completedJobs;
|
|
|
|
|
bundle.FailedBacktests = failedJobs;
|
|
|
|
|
bundle.CompletedAt = DateTime.UtcNow;
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (failedJobs > 0)
|
|
|
|
|
{
|
|
|
|
|
bundle.ErrorMessage = $"{failedJobs} backtests failed";
|
|
|
|
|
@@ -451,11 +464,11 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
// Some jobs are still pending or running - bundle is genuinely stuck
|
|
|
|
|
// Reset any stale running jobs back to pending
|
|
|
|
|
var runningJobs = jobs.Where(j => j.Status == JobStatus.Running).ToList();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var job in runningJobs)
|
|
|
|
|
{
|
|
|
|
|
var timeSinceJobHeartbeat = job.LastHeartbeat.HasValue
|
|
|
|
|
? DateTime.UtcNow - job.LastHeartbeat.Value
|
|
|
|
|
var timeSinceJobHeartbeat = job.LastHeartbeat.HasValue
|
|
|
|
|
? DateTime.UtcNow - job.LastHeartbeat.Value
|
|
|
|
|
: DateTime.UtcNow - job.CreatedAt;
|
|
|
|
|
|
|
|
|
|
if (timeSinceJobHeartbeat > TimeSpan.FromMinutes(30))
|
|
|
|
|
@@ -473,7 +486,8 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
|
|
|
|
|
// Update bundle timestamp to give it another chance
|
|
|
|
|
bundle.UpdatedAt = DateTime.UtcNow;
|
|
|
|
|
bundle.ErrorMessage = $"Bundle was stuck. Reset {runningJobs.Count(j => j.Status == JobStatus.Pending)} stale jobs to pending.";
|
|
|
|
|
bundle.ErrorMessage =
|
|
|
|
|
$"Bundle was stuck. Reset {runningJobs.Count(j => j.Status == JobStatus.Pending)} stale jobs to pending.";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await backtestRepository.UpdateBundleBacktestRequestAsync(bundle);
|
|
|
|
|
@@ -571,7 +585,7 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
{
|
|
|
|
|
var message = $"❌ Bundle backtest '{bundle.Name}' (ID: {bundle.RequestId}) failed: {reason}";
|
|
|
|
|
await _messengerService.SendMessage(message, bundle.User.TelegramChannel);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_logger.LogInformation(
|
|
|
|
|
"Sent failure notification to user {UserId} for bundle {BundleRequestId}",
|
|
|
|
|
bundle.User.Id, bundle.RequestId);
|
|
|
|
|
@@ -579,10 +593,9 @@ public class BundleBacktestHealthCheckWorker : BackgroundService
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
_logger.LogError(ex,
|
|
|
|
|
"Failed to send notification to user for bundle {BundleRequestId}",
|
|
|
|
|
_logger.LogError(ex,
|
|
|
|
|
"Failed to send notification to user for bundle {BundleRequestId}",
|
|
|
|
|
bundle.RequestId);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|