Improve workers for backtests
This commit is contained in:
@@ -75,9 +75,10 @@ public class PostgreSqlGeneticRepository : IGeneticRepository
|
||||
|
||||
public async Task UpdateGeneticRequestAsync(GeneticRequest geneticRequest)
|
||||
{
|
||||
var existingEntity = _context.GeneticRequests
|
||||
var existingEntity = await _context.GeneticRequests
|
||||
.AsTracking() // Explicitly enable tracking to ensure entity is tracked
|
||||
.Include(gr => gr.User)
|
||||
.FirstOrDefault(gr => gr.RequestId == geneticRequest.RequestId);
|
||||
.FirstOrDefaultAsync(gr => gr.RequestId == geneticRequest.RequestId);
|
||||
|
||||
if (existingEntity != null)
|
||||
{
|
||||
@@ -110,9 +111,13 @@ public class PostgreSqlGeneticRepository : IGeneticRepository
|
||||
existingEntity.EligibleIndicatorsJson = "[]";
|
||||
}
|
||||
|
||||
// Only update the tracked entity, do not attach a new one
|
||||
// Save changes - entity is tracked so changes will be persisted
|
||||
await _context.SaveChangesAsync();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException($"Genetic request with RequestId '{geneticRequest.RequestId}' not found in database");
|
||||
}
|
||||
}
|
||||
|
||||
public void DeleteGeneticRequestByIdForUser(User user, string id)
|
||||
|
||||
@@ -219,6 +219,136 @@ public class PostgreSqlJobRepository : IJobRepository
|
||||
return entities.Select(MapToDomain);
|
||||
}
|
||||
|
||||
public async Task<Dictionary<int, int>> GetRunningJobCountsByUserIdAsync(string workerId, JobType jobType)
|
||||
{
|
||||
// Get running job counts per user across all workers (global limit per user)
|
||||
var counts = await _context.Jobs
|
||||
.Where(j => j.Status == (int)JobStatus.Running &&
|
||||
j.JobType == (int)jobType)
|
||||
.GroupBy(j => j.UserId)
|
||||
.Select(g => new { UserId = g.Key, Count = g.Count() })
|
||||
.ToListAsync();
|
||||
|
||||
return counts.ToDictionary(x => x.UserId, x => x.Count);
|
||||
}
|
||||
|
||||
public async Task<Job?> ClaimRandomJobAsync(string workerId, JobType jobType, int maxConcurrentPerUser)
|
||||
{
|
||||
// Use execution strategy to support retry with transactions
|
||||
var strategy = _context.Database.CreateExecutionStrategy();
|
||||
|
||||
return await strategy.ExecuteAsync(async () =>
|
||||
{
|
||||
await using var transaction = await _context.Database.BeginTransactionAsync();
|
||||
|
||||
try
|
||||
{
|
||||
// Build SQL query that atomically excludes users at capacity using a subquery
|
||||
// This ensures thread-safety across multiple workers - the check and claim happen atomically
|
||||
var sql = @"
|
||||
SELECT j.""Id"", j.""BundleRequestId"", j.""UserId"", j.""Status"", j.""JobType"", j.""Priority"",
|
||||
j.""ConfigJson"", j.""StartDate"", j.""EndDate"", j.""ProgressPercentage"",
|
||||
j.""AssignedWorkerId"", j.""LastHeartbeat"", j.""CreatedAt"", j.""StartedAt"",
|
||||
j.""CompletedAt"", j.""ResultJson"", j.""ErrorMessage"", j.""RequestId"",
|
||||
j.""GeneticRequestId"", j.""RetryCount"", j.""MaxRetries"", j.""RetryAfter"",
|
||||
j.""IsRetryable"", j.""FailureCategory""
|
||||
FROM ""Jobs"" j
|
||||
WHERE j.""Status"" = @status
|
||||
AND j.""JobType"" = @jobType
|
||||
AND (
|
||||
SELECT COUNT(*)
|
||||
FROM ""Jobs"" running
|
||||
WHERE running.""UserId"" = j.""UserId""
|
||||
AND running.""Status"" = @runningStatus
|
||||
AND running.""JobType"" = @jobType
|
||||
) < @maxConcurrentPerUser
|
||||
ORDER BY RANDOM()
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED";
|
||||
|
||||
var parameters = new List<NpgsqlParameter>
|
||||
{
|
||||
new NpgsqlParameter("status", NpgsqlDbType.Integer) { Value = (int)JobStatus.Pending },
|
||||
new NpgsqlParameter("jobType", NpgsqlDbType.Integer) { Value = (int)jobType },
|
||||
new NpgsqlParameter("runningStatus", NpgsqlDbType.Integer) { Value = (int)JobStatus.Running },
|
||||
new NpgsqlParameter("maxConcurrentPerUser", NpgsqlDbType.Integer) { Value = maxConcurrentPerUser }
|
||||
};
|
||||
|
||||
_logger.LogDebug("Claiming random job atomically (maxConcurrentPerUser: {MaxConcurrent})", maxConcurrentPerUser);
|
||||
|
||||
// Execute raw SQL using ADO.NET to get the job with row-level locking
|
||||
var connection = _context.Database.GetDbConnection();
|
||||
await using var command = connection.CreateCommand();
|
||||
command.Transaction = transaction.GetDbTransaction();
|
||||
command.CommandText = sql;
|
||||
command.Parameters.AddRange(parameters.ToArray());
|
||||
|
||||
JobEntity? job = null;
|
||||
await using var reader = await command.ExecuteReaderAsync();
|
||||
|
||||
if (await reader.ReadAsync())
|
||||
{
|
||||
job = new JobEntity
|
||||
{
|
||||
Id = reader.GetGuid(reader.GetOrdinal("Id")),
|
||||
BundleRequestId = reader.IsDBNull(reader.GetOrdinal("BundleRequestId")) ? null : reader.GetGuid(reader.GetOrdinal("BundleRequestId")),
|
||||
UserId = reader.GetInt32(reader.GetOrdinal("UserId")),
|
||||
Status = reader.GetInt32(reader.GetOrdinal("Status")),
|
||||
JobType = reader.GetInt32(reader.GetOrdinal("JobType")),
|
||||
Priority = reader.GetInt32(reader.GetOrdinal("Priority")),
|
||||
ConfigJson = reader.GetString(reader.GetOrdinal("ConfigJson")),
|
||||
StartDate = reader.GetDateTime(reader.GetOrdinal("StartDate")),
|
||||
EndDate = reader.GetDateTime(reader.GetOrdinal("EndDate")),
|
||||
ProgressPercentage = reader.GetInt32(reader.GetOrdinal("ProgressPercentage")),
|
||||
AssignedWorkerId = reader.IsDBNull(reader.GetOrdinal("AssignedWorkerId")) ? null : reader.GetString(reader.GetOrdinal("AssignedWorkerId")),
|
||||
LastHeartbeat = reader.IsDBNull(reader.GetOrdinal("LastHeartbeat")) ? null : reader.GetDateTime(reader.GetOrdinal("LastHeartbeat")),
|
||||
CreatedAt = reader.GetDateTime(reader.GetOrdinal("CreatedAt")),
|
||||
StartedAt = reader.IsDBNull(reader.GetOrdinal("StartedAt")) ? null : reader.GetDateTime(reader.GetOrdinal("StartedAt")),
|
||||
CompletedAt = reader.IsDBNull(reader.GetOrdinal("CompletedAt")) ? null : reader.GetDateTime(reader.GetOrdinal("CompletedAt")),
|
||||
ResultJson = reader.IsDBNull(reader.GetOrdinal("ResultJson")) ? null : reader.GetString(reader.GetOrdinal("ResultJson")),
|
||||
ErrorMessage = reader.IsDBNull(reader.GetOrdinal("ErrorMessage")) ? null : reader.GetString(reader.GetOrdinal("ErrorMessage")),
|
||||
RequestId = reader.IsDBNull(reader.GetOrdinal("RequestId")) ? null : reader.GetString(reader.GetOrdinal("RequestId")),
|
||||
GeneticRequestId = reader.IsDBNull(reader.GetOrdinal("GeneticRequestId")) ? null : reader.GetString(reader.GetOrdinal("GeneticRequestId")),
|
||||
RetryCount = reader.GetInt32(reader.GetOrdinal("RetryCount")),
|
||||
MaxRetries = reader.GetInt32(reader.GetOrdinal("MaxRetries")),
|
||||
RetryAfter = reader.IsDBNull(reader.GetOrdinal("RetryAfter")) ? null : reader.GetDateTime(reader.GetOrdinal("RetryAfter")),
|
||||
IsRetryable = reader.GetBoolean(reader.GetOrdinal("IsRetryable")),
|
||||
FailureCategory = reader.IsDBNull(reader.GetOrdinal("FailureCategory")) ? null : reader.GetInt32(reader.GetOrdinal("FailureCategory"))
|
||||
};
|
||||
}
|
||||
|
||||
await reader.CloseAsync();
|
||||
|
||||
if (job == null)
|
||||
{
|
||||
_logger.LogDebug("No random job found to claim for worker {WorkerId}", workerId);
|
||||
await transaction.CommitAsync();
|
||||
return null;
|
||||
}
|
||||
|
||||
// Attach and update the job entity
|
||||
_context.Jobs.Attach(job);
|
||||
job.Status = (int)JobStatus.Running;
|
||||
job.AssignedWorkerId = workerId;
|
||||
job.StartedAt = DateTime.UtcNow;
|
||||
job.LastHeartbeat = DateTime.UtcNow;
|
||||
|
||||
await _context.SaveChangesAsync();
|
||||
await transaction.CommitAsync();
|
||||
|
||||
_logger.LogInformation("Claimed random job {JobId} (UserId: {UserId}) for worker {WorkerId}",
|
||||
job.Id, job.UserId, workerId);
|
||||
return MapToDomain(job);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
await transaction.RollbackAsync();
|
||||
_logger.LogError(ex, "Error claiming random job for worker {WorkerId}", workerId);
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public async Task<(IEnumerable<Job> Jobs, int TotalCount)> GetPaginatedAsync(
|
||||
int page,
|
||||
int pageSize,
|
||||
@@ -466,6 +596,25 @@ public class PostgreSqlJobRepository : IJobRepository
|
||||
}
|
||||
}
|
||||
|
||||
public async Task DeleteAsync(Guid jobId)
|
||||
{
|
||||
// Use AsTracking() to enable change tracking since DbContext uses NoTracking by default
|
||||
var entity = await _context.Jobs
|
||||
.AsTracking()
|
||||
.FirstOrDefaultAsync(e => e.Id == jobId);
|
||||
|
||||
if (entity == null)
|
||||
{
|
||||
_logger.LogWarning("Job {JobId} not found for deletion", jobId);
|
||||
throw new InvalidOperationException($"Job with ID {jobId} not found.");
|
||||
}
|
||||
|
||||
_context.Jobs.Remove(entity);
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
_logger.LogInformation("Deleted job {JobId}", jobId);
|
||||
}
|
||||
|
||||
// Helper classes for raw SQL query results
|
||||
private class StatusCountResult
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user