Improve workers for backtests
This commit is contained in:
@@ -21,7 +21,7 @@ public class BacktestComputeWorker : BackgroundService
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly ILogger<BacktestComputeWorker> _logger;
|
||||
private readonly BacktestComputeWorkerOptions _options;
|
||||
private readonly SemaphoreSlim _semaphore;
|
||||
private readonly SemaphoreSlim _instanceSemaphore;
|
||||
|
||||
public BacktestComputeWorker(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
@@ -31,14 +31,14 @@ public class BacktestComputeWorker : BackgroundService
|
||||
_scopeFactory = scopeFactory;
|
||||
_logger = logger;
|
||||
_options = options.Value;
|
||||
_semaphore = new SemaphoreSlim(_options.MaxConcurrentBacktests, _options.MaxConcurrentBacktests);
|
||||
_instanceSemaphore = new SemaphoreSlim(_options.MaxConcurrentPerInstance, _options.MaxConcurrentPerInstance);
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"BacktestComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrent: {MaxConcurrent}, PollInterval: {PollInterval}s",
|
||||
_options.WorkerId, _options.MaxConcurrentBacktests, _options.JobPollIntervalSeconds);
|
||||
"BacktestComputeWorker starting. WorkerId: {WorkerId}, MaxConcurrentPerUser: {MaxConcurrentPerUser}, MaxConcurrentPerInstance: {MaxConcurrentPerInstance}, PollInterval: {PollInterval}s",
|
||||
_options.WorkerId, _options.MaxConcurrentPerUser, _options.MaxConcurrentPerInstance, _options.JobPollIntervalSeconds);
|
||||
|
||||
// Background task for stale job recovery
|
||||
var staleJobRecoveryTask = Task.Run(() => StaleJobRecoveryLoop(stoppingToken), stoppingToken);
|
||||
@@ -67,10 +67,10 @@ public class BacktestComputeWorker : BackgroundService
|
||||
|
||||
private async Task ProcessJobsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Check if we have capacity
|
||||
if (!await _semaphore.WaitAsync(0, cancellationToken))
|
||||
// Check if this instance has capacity
|
||||
if (!await _instanceSemaphore.WaitAsync(0, cancellationToken))
|
||||
{
|
||||
// At capacity, skip this iteration
|
||||
// Instance at capacity, skip this iteration
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -79,17 +79,23 @@ public class BacktestComputeWorker : BackgroundService
|
||||
using var scope = _scopeFactory.CreateScope();
|
||||
var jobRepository = scope.ServiceProvider.GetRequiredService<IJobRepository>();
|
||||
|
||||
// Try to claim a backtest job (exclude genetic jobs)
|
||||
var job = await jobRepository.ClaimNextJobAsync(_options.WorkerId, JobType.Backtest);
|
||||
// Claim a random backtest job atomically, excluding users at capacity
|
||||
// The SQL query checks running job counts within the transaction, ensuring thread-safety
|
||||
var job = await jobRepository.ClaimRandomJobAsync(
|
||||
_options.WorkerId,
|
||||
JobType.Backtest,
|
||||
_options.MaxConcurrentPerUser);
|
||||
|
||||
if (job == null)
|
||||
{
|
||||
// No jobs available, release semaphore
|
||||
_semaphore.Release();
|
||||
// No jobs available for users not at capacity, release semaphore
|
||||
_instanceSemaphore.Release();
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Claimed backtest job {JobId} for worker {WorkerId}", job.Id, _options.WorkerId);
|
||||
_logger.LogInformation(
|
||||
"Claimed random backtest job {JobId} (UserId: {UserId}) for worker {WorkerId}",
|
||||
job.Id, job.UserId, _options.WorkerId);
|
||||
|
||||
// Process the job asynchronously (don't await, let it run in background)
|
||||
// Create a new scope for the job processing to ensure proper lifetime management
|
||||
@@ -99,16 +105,21 @@ public class BacktestComputeWorker : BackgroundService
|
||||
{
|
||||
await ProcessJobAsync(job, cancellationToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error processing job {JobId}", job.Id);
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_semaphore.Release();
|
||||
_instanceSemaphore.Release();
|
||||
}
|
||||
}, cancellationToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error claiming or processing job");
|
||||
_semaphore.Release();
|
||||
_instanceSemaphore.Release();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@@ -560,7 +571,7 @@ public class BacktestComputeWorker : BackgroundService
|
||||
|
||||
public override void Dispose()
|
||||
{
|
||||
_semaphore?.Dispose();
|
||||
_instanceSemaphore?.Dispose();
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -578,9 +589,14 @@ public class BacktestComputeWorkerOptions
|
||||
public string WorkerId { get; set; } = Environment.MachineName;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of concurrent backtests to process
|
||||
/// Maximum number of concurrent backtests per user (global limit across all workers)
|
||||
/// </summary>
|
||||
public int MaxConcurrentBacktests { get; set; } = 6;
|
||||
public int MaxConcurrentPerUser { get; set; } = 6;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of concurrent backtests per worker instance (local limit for this worker)
|
||||
/// </summary>
|
||||
public int MaxConcurrentPerInstance { get; set; } = 6;
|
||||
|
||||
/// <summary>
|
||||
/// Interval in seconds between job polling attempts
|
||||
|
||||
Reference in New Issue
Block a user