Update and fix worker

This commit is contained in:
2025-11-11 03:02:24 +07:00
parent e8e2ec5a43
commit 4a8c22e52a
4 changed files with 105 additions and 6 deletions

View File

@@ -42,9 +42,37 @@ public class BacktestComputeWorker : BackgroundService
{
_logger.LogInformation(
"BacktestComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrentPerUser: {MaxConcurrentPerUser}, MaxConcurrentPerInstance: {MaxConcurrentPerInstance}, PollInterval: {PollInterval}s, JobTimeout: {JobTimeoutMinutes}min",
_options.WorkerId, _options.MaxConcurrentPerUser, _options.MaxConcurrentPerInstance,
_options.WorkerId, _options.MaxConcurrentPerUser, _options.MaxConcurrentPerInstance,
_options.JobPollIntervalSeconds, _options.JobTimeoutMinutes);
// Reset any jobs assigned to this WorkerId from previous worker instances at startup
// This is critical when restarting with the same WorkerId (e.g., Environment.MachineName)
try
{
using var scope = _scopeFactory.CreateScope();
var jobRepository = scope.ServiceProvider.GetRequiredService<IJobRepository>();
// First, reset all jobs assigned to this WorkerId (from previous instance)
var workerResetCount = await jobRepository.ResetJobsByWorkerIdAsync(_options.WorkerId);
if (workerResetCount > 0)
{
_logger.LogInformation("Reset {Count} jobs assigned to worker {WorkerId} from previous instance",
workerResetCount, _options.WorkerId);
}
// Then, reset any other stale jobs (from other workers or orphaned jobs)
var staleResetCount = await jobRepository.ResetStaleJobsAsync(_options.StaleJobTimeoutMinutes);
if (staleResetCount > 0)
{
_logger.LogInformation("Reset {Count} stale jobs to Pending status at startup", staleResetCount);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error resetting jobs at startup");
// Don't fail startup if this fails, but log it
}
// Link cancellation tokens
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken, _shutdownCts.Token);
var cancellationToken = linkedCts.Token;

View File

@@ -39,6 +39,34 @@ public class GeneticComputeWorker : BackgroundService
"GeneticComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrent: {MaxConcurrent}, PollInterval: {PollInterval}s",
_options.WorkerId, _options.MaxConcurrentGenetics, _options.JobPollIntervalSeconds);
// Reset any jobs assigned to this WorkerId from previous worker instances at startup
// This is critical when restarting with the same WorkerId (e.g., Environment.MachineName)
try
{
using var scope = _scopeFactory.CreateScope();
var jobRepository = scope.ServiceProvider.GetRequiredService<IJobRepository>();
// First, reset all jobs assigned to this WorkerId (from previous instance)
var workerResetCount = await jobRepository.ResetJobsByWorkerIdAsync(_options.WorkerId);
if (workerResetCount > 0)
{
_logger.LogInformation("Reset {Count} jobs assigned to worker {WorkerId} from previous instance",
workerResetCount, _options.WorkerId);
}
// Then, reset any other stale jobs (from other workers or orphaned jobs)
var staleResetCount = await jobRepository.ResetStaleJobsAsync(_options.StaleJobTimeoutMinutes);
if (staleResetCount > 0)
{
_logger.LogInformation("Reset {Count} stale jobs to Pending status at startup", staleResetCount);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error resetting jobs at startup");
// Don't fail startup if this fails, but log it
}
// Background task for stale job recovery
var staleJobRecoveryTask = Task.Run(() => StaleJobRecoveryLoop(stoppingToken), stoppingToken);