Improve workers for backtests

This commit is contained in:
2025-11-10 01:44:33 +07:00
parent 97f2b8229b
commit 7e52b7a734
18 changed files with 740 additions and 144 deletions

View File

@@ -75,9 +75,10 @@ public class PostgreSqlGeneticRepository : IGeneticRepository
public async Task UpdateGeneticRequestAsync(GeneticRequest geneticRequest)
{
var existingEntity = _context.GeneticRequests
var existingEntity = await _context.GeneticRequests
.AsTracking() // Explicitly enable tracking to ensure entity is tracked
.Include(gr => gr.User)
.FirstOrDefault(gr => gr.RequestId == geneticRequest.RequestId);
.FirstOrDefaultAsync(gr => gr.RequestId == geneticRequest.RequestId);
if (existingEntity != null)
{
@@ -110,9 +111,13 @@ public class PostgreSqlGeneticRepository : IGeneticRepository
existingEntity.EligibleIndicatorsJson = "[]";
}
// Only update the tracked entity, do not attach a new one
// Save changes - entity is tracked so changes will be persisted
await _context.SaveChangesAsync();
}
else
{
throw new InvalidOperationException($"Genetic request with RequestId '{geneticRequest.RequestId}' not found in database");
}
}
public void DeleteGeneticRequestByIdForUser(User user, string id)

View File

@@ -219,6 +219,136 @@ public class PostgreSqlJobRepository : IJobRepository
return entities.Select(MapToDomain);
}
public async Task<Dictionary<int, int>> GetRunningJobCountsByUserIdAsync(string workerId, JobType jobType)
{
// Get running job counts per user across all workers (global limit per user)
var counts = await _context.Jobs
.Where(j => j.Status == (int)JobStatus.Running &&
j.JobType == (int)jobType)
.GroupBy(j => j.UserId)
.Select(g => new { UserId = g.Key, Count = g.Count() })
.ToListAsync();
return counts.ToDictionary(x => x.UserId, x => x.Count);
}
public async Task<Job?> ClaimRandomJobAsync(string workerId, JobType jobType, int maxConcurrentPerUser)
{
// Use execution strategy to support retry with transactions
var strategy = _context.Database.CreateExecutionStrategy();
return await strategy.ExecuteAsync(async () =>
{
await using var transaction = await _context.Database.BeginTransactionAsync();
try
{
// Build SQL query that atomically excludes users at capacity using a subquery
// This ensures thread-safety across multiple workers - the check and claim happen atomically
var sql = @"
SELECT j.""Id"", j.""BundleRequestId"", j.""UserId"", j.""Status"", j.""JobType"", j.""Priority"",
j.""ConfigJson"", j.""StartDate"", j.""EndDate"", j.""ProgressPercentage"",
j.""AssignedWorkerId"", j.""LastHeartbeat"", j.""CreatedAt"", j.""StartedAt"",
j.""CompletedAt"", j.""ResultJson"", j.""ErrorMessage"", j.""RequestId"",
j.""GeneticRequestId"", j.""RetryCount"", j.""MaxRetries"", j.""RetryAfter"",
j.""IsRetryable"", j.""FailureCategory""
FROM ""Jobs"" j
WHERE j.""Status"" = @status
AND j.""JobType"" = @jobType
AND (
SELECT COUNT(*)
FROM ""Jobs"" running
WHERE running.""UserId"" = j.""UserId""
AND running.""Status"" = @runningStatus
AND running.""JobType"" = @jobType
) < @maxConcurrentPerUser
ORDER BY RANDOM()
LIMIT 1
FOR UPDATE SKIP LOCKED";
var parameters = new List<NpgsqlParameter>
{
new NpgsqlParameter("status", NpgsqlDbType.Integer) { Value = (int)JobStatus.Pending },
new NpgsqlParameter("jobType", NpgsqlDbType.Integer) { Value = (int)jobType },
new NpgsqlParameter("runningStatus", NpgsqlDbType.Integer) { Value = (int)JobStatus.Running },
new NpgsqlParameter("maxConcurrentPerUser", NpgsqlDbType.Integer) { Value = maxConcurrentPerUser }
};
_logger.LogDebug("Claiming random job atomically (maxConcurrentPerUser: {MaxConcurrent})", maxConcurrentPerUser);
// Execute raw SQL using ADO.NET to get the job with row-level locking
var connection = _context.Database.GetDbConnection();
await using var command = connection.CreateCommand();
command.Transaction = transaction.GetDbTransaction();
command.CommandText = sql;
command.Parameters.AddRange(parameters.ToArray());
JobEntity? job = null;
await using var reader = await command.ExecuteReaderAsync();
if (await reader.ReadAsync())
{
job = new JobEntity
{
Id = reader.GetGuid(reader.GetOrdinal("Id")),
BundleRequestId = reader.IsDBNull(reader.GetOrdinal("BundleRequestId")) ? null : reader.GetGuid(reader.GetOrdinal("BundleRequestId")),
UserId = reader.GetInt32(reader.GetOrdinal("UserId")),
Status = reader.GetInt32(reader.GetOrdinal("Status")),
JobType = reader.GetInt32(reader.GetOrdinal("JobType")),
Priority = reader.GetInt32(reader.GetOrdinal("Priority")),
ConfigJson = reader.GetString(reader.GetOrdinal("ConfigJson")),
StartDate = reader.GetDateTime(reader.GetOrdinal("StartDate")),
EndDate = reader.GetDateTime(reader.GetOrdinal("EndDate")),
ProgressPercentage = reader.GetInt32(reader.GetOrdinal("ProgressPercentage")),
AssignedWorkerId = reader.IsDBNull(reader.GetOrdinal("AssignedWorkerId")) ? null : reader.GetString(reader.GetOrdinal("AssignedWorkerId")),
LastHeartbeat = reader.IsDBNull(reader.GetOrdinal("LastHeartbeat")) ? null : reader.GetDateTime(reader.GetOrdinal("LastHeartbeat")),
CreatedAt = reader.GetDateTime(reader.GetOrdinal("CreatedAt")),
StartedAt = reader.IsDBNull(reader.GetOrdinal("StartedAt")) ? null : reader.GetDateTime(reader.GetOrdinal("StartedAt")),
CompletedAt = reader.IsDBNull(reader.GetOrdinal("CompletedAt")) ? null : reader.GetDateTime(reader.GetOrdinal("CompletedAt")),
ResultJson = reader.IsDBNull(reader.GetOrdinal("ResultJson")) ? null : reader.GetString(reader.GetOrdinal("ResultJson")),
ErrorMessage = reader.IsDBNull(reader.GetOrdinal("ErrorMessage")) ? null : reader.GetString(reader.GetOrdinal("ErrorMessage")),
RequestId = reader.IsDBNull(reader.GetOrdinal("RequestId")) ? null : reader.GetString(reader.GetOrdinal("RequestId")),
GeneticRequestId = reader.IsDBNull(reader.GetOrdinal("GeneticRequestId")) ? null : reader.GetString(reader.GetOrdinal("GeneticRequestId")),
RetryCount = reader.GetInt32(reader.GetOrdinal("RetryCount")),
MaxRetries = reader.GetInt32(reader.GetOrdinal("MaxRetries")),
RetryAfter = reader.IsDBNull(reader.GetOrdinal("RetryAfter")) ? null : reader.GetDateTime(reader.GetOrdinal("RetryAfter")),
IsRetryable = reader.GetBoolean(reader.GetOrdinal("IsRetryable")),
FailureCategory = reader.IsDBNull(reader.GetOrdinal("FailureCategory")) ? null : reader.GetInt32(reader.GetOrdinal("FailureCategory"))
};
}
await reader.CloseAsync();
if (job == null)
{
_logger.LogDebug("No random job found to claim for worker {WorkerId}", workerId);
await transaction.CommitAsync();
return null;
}
// Attach and update the job entity
_context.Jobs.Attach(job);
job.Status = (int)JobStatus.Running;
job.AssignedWorkerId = workerId;
job.StartedAt = DateTime.UtcNow;
job.LastHeartbeat = DateTime.UtcNow;
await _context.SaveChangesAsync();
await transaction.CommitAsync();
_logger.LogInformation("Claimed random job {JobId} (UserId: {UserId}) for worker {WorkerId}",
job.Id, job.UserId, workerId);
return MapToDomain(job);
}
catch (Exception ex)
{
await transaction.RollbackAsync();
_logger.LogError(ex, "Error claiming random job for worker {WorkerId}", workerId);
throw;
}
});
}
public async Task<(IEnumerable<Job> Jobs, int TotalCount)> GetPaginatedAsync(
int page,
int pageSize,
@@ -466,6 +596,25 @@ public class PostgreSqlJobRepository : IJobRepository
}
}
public async Task DeleteAsync(Guid jobId)
{
// Use AsTracking() to enable change tracking since DbContext uses NoTracking by default
var entity = await _context.Jobs
.AsTracking()
.FirstOrDefaultAsync(e => e.Id == jobId);
if (entity == null)
{
_logger.LogWarning("Job {JobId} not found for deletion", jobId);
throw new InvalidOperationException($"Job with ID {jobId} not found.");
}
_context.Jobs.Remove(entity);
await _context.SaveChangesAsync();
_logger.LogInformation("Deleted job {JobId}", jobId);
}
// Helper classes for raw SQL query results
private class StatusCountResult
{