Improve workers for backtests

This commit is contained in:
2025-11-10 01:44:33 +07:00
parent 97f2b8229b
commit 7e52b7a734
18 changed files with 740 additions and 144 deletions

View File

@@ -21,7 +21,7 @@ public class BacktestComputeWorker : BackgroundService
private readonly IServiceScopeFactory _scopeFactory;
private readonly ILogger<BacktestComputeWorker> _logger;
private readonly BacktestComputeWorkerOptions _options;
private readonly SemaphoreSlim _semaphore;
private readonly SemaphoreSlim _instanceSemaphore;
public BacktestComputeWorker(
IServiceScopeFactory scopeFactory,
@@ -31,14 +31,14 @@ public class BacktestComputeWorker : BackgroundService
_scopeFactory = scopeFactory;
_logger = logger;
_options = options.Value;
_semaphore = new SemaphoreSlim(_options.MaxConcurrentBacktests, _options.MaxConcurrentBacktests);
_instanceSemaphore = new SemaphoreSlim(_options.MaxConcurrentPerInstance, _options.MaxConcurrentPerInstance);
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"BacktestComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrent: {MaxConcurrent}, PollInterval: {PollInterval}s",
_options.WorkerId, _options.MaxConcurrentBacktests, _options.JobPollIntervalSeconds);
"BacktestComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrentPerUser: {MaxConcurrentPerUser}, MaxConcurrentPerInstance: {MaxConcurrentPerInstance}, PollInterval: {PollInterval}s",
_options.WorkerId, _options.MaxConcurrentPerUser, _options.MaxConcurrentPerInstance, _options.JobPollIntervalSeconds);
// Background task for stale job recovery
var staleJobRecoveryTask = Task.Run(() => StaleJobRecoveryLoop(stoppingToken), stoppingToken);
@@ -67,10 +67,10 @@ public class BacktestComputeWorker : BackgroundService
private async Task ProcessJobsAsync(CancellationToken cancellationToken)
{
// Check if we have capacity
if (!await _semaphore.WaitAsync(0, cancellationToken))
// Check if this instance has capacity
if (!await _instanceSemaphore.WaitAsync(0, cancellationToken))
{
// At capacity, skip this iteration
// Instance at capacity, skip this iteration
return;
}
@@ -79,17 +79,23 @@ public class BacktestComputeWorker : BackgroundService
using var scope = _scopeFactory.CreateScope();
var jobRepository = scope.ServiceProvider.GetRequiredService<IJobRepository>();
// Try to claim a backtest job (exclude genetic jobs)
var job = await jobRepository.ClaimNextJobAsync(_options.WorkerId, JobType.Backtest);
// Claim a random backtest job atomically, excluding users at capacity
// The SQL query checks running job counts within the transaction, ensuring thread-safety
var job = await jobRepository.ClaimRandomJobAsync(
_options.WorkerId,
JobType.Backtest,
_options.MaxConcurrentPerUser);
if (job == null)
{
// No jobs available, release semaphore
_semaphore.Release();
// No jobs available for users not at capacity, release semaphore
_instanceSemaphore.Release();
return;
}
_logger.LogInformation("Claimed backtest job {JobId} for worker {WorkerId}", job.Id, _options.WorkerId);
_logger.LogInformation(
"Claimed random backtest job {JobId} (UserId: {UserId}) for worker {WorkerId}",
job.Id, job.UserId, _options.WorkerId);
// Process the job asynchronously (don't await, let it run in background)
// Create a new scope for the job processing to ensure proper lifetime management
@@ -99,16 +105,21 @@ public class BacktestComputeWorker : BackgroundService
{
await ProcessJobAsync(job, cancellationToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error processing job {JobId}", job.Id);
throw;
}
finally
{
_semaphore.Release();
_instanceSemaphore.Release();
}
}, cancellationToken);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error claiming or processing job");
_semaphore.Release();
_instanceSemaphore.Release();
throw;
}
}
@@ -560,7 +571,7 @@ public class BacktestComputeWorker : BackgroundService
public override void Dispose()
{
_semaphore?.Dispose();
_instanceSemaphore?.Dispose();
base.Dispose();
}
}
@@ -578,9 +589,14 @@ public class BacktestComputeWorkerOptions
public string WorkerId { get; set; } = Environment.MachineName;
/// <summary>
/// Maximum number of concurrent backtests to process
/// Maximum number of concurrent backtests per user (global limit across all workers)
/// </summary>
public int MaxConcurrentBacktests { get; set; } = 6;
public int MaxConcurrentPerUser { get; set; } = 6;
/// <summary>
/// Maximum number of concurrent backtests per worker instance (local limit for this worker)
/// </summary>
public int MaxConcurrentPerInstance { get; set; } = 6;
/// <summary>
/// Interval in seconds between job polling attempts