update healtcheck for security
This commit is contained in:
@@ -62,19 +62,41 @@ namespace Managing.Api.HealthChecks
|
||||
issues.AddRange(grainHealth.Issues);
|
||||
}
|
||||
|
||||
// Determine overall health status
|
||||
// Determine overall health status based on number of active silos
|
||||
try
|
||||
{
|
||||
if (healthData.TryGetValue("OrleansMetrics", out var orleansMetricsObj) &&
|
||||
orleansMetricsObj is Dictionary<string, object> orleansMetrics &&
|
||||
orleansMetrics.TryGetValue("ActiveSilos", out var activeSilosObj) &&
|
||||
int.TryParse(activeSilosObj?.ToString(), out var activeSilos))
|
||||
{
|
||||
if (activeSilos <= 0)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("No active silos found", data: healthData);
|
||||
}
|
||||
if (activeSilos == 1)
|
||||
{
|
||||
return HealthCheckResult.Degraded("Only one active silo running", data: healthData);
|
||||
}
|
||||
// 2 or more active silos
|
||||
return HealthCheckResult.Healthy("Two or more active silos running", data: healthData);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Fallback to issue-based logic if parsing fails
|
||||
}
|
||||
|
||||
// Fallback to issue-based logic
|
||||
if (isHealthy)
|
||||
{
|
||||
return HealthCheckResult.Healthy("Orleans cluster is healthy", data: healthData);
|
||||
}
|
||||
else if (issues.Count <= 2)
|
||||
if (issues.Count <= 2)
|
||||
{
|
||||
return HealthCheckResult.Degraded($"Orleans cluster has minor issues: {string.Join(", ", issues)}", data: healthData);
|
||||
}
|
||||
else
|
||||
{
|
||||
return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData);
|
||||
}
|
||||
return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -106,16 +128,22 @@ namespace Managing.Api.HealthChecks
|
||||
clusterInfo["ActiveSilos"] = membershipTable.Count();
|
||||
|
||||
var membershipList = new List<object>();
|
||||
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var silo in membershipTable)
|
||||
{
|
||||
var siloData = new Dictionary<string, object>
|
||||
{
|
||||
["SiloName"] = silo.SiloName,
|
||||
["Status"] = silo.Status.ToString(),
|
||||
["SiloAddress"] = silo.SiloAddress.ToString(),
|
||||
["HostName"] = silo.HostName,
|
||||
["ProxyPort"] = silo.ProxyPort
|
||||
};
|
||||
|
||||
if (!isProduction)
|
||||
{
|
||||
siloData["SiloName"] = silo.SiloName;
|
||||
siloData["SiloAddress"] = silo.SiloAddress.ToString();
|
||||
siloData["HostName"] = silo.HostName;
|
||||
siloData["ProxyPort"] = silo.ProxyPort;
|
||||
}
|
||||
|
||||
if (silo.StartTime != default)
|
||||
{
|
||||
@@ -292,16 +320,22 @@ namespace Managing.Api.HealthChecks
|
||||
var deadSilos = 0;
|
||||
var localhostEnvironment = false;
|
||||
|
||||
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var silo in siloStatistics)
|
||||
{
|
||||
var siloData = new Dictionary<string, object>
|
||||
{
|
||||
["SiloName"] = silo.SiloName,
|
||||
["Status"] = silo.Status.ToString(),
|
||||
["SiloAddress"] = silo.SiloAddress.ToString(),
|
||||
["HostName"] = silo.HostName,
|
||||
["ProxyPort"] = silo.ProxyPort
|
||||
};
|
||||
|
||||
if (!isProduction)
|
||||
{
|
||||
siloData["SiloName"] = silo.SiloName;
|
||||
siloData["SiloAddress"] = silo.SiloAddress.ToString();
|
||||
siloData["HostName"] = silo.HostName;
|
||||
siloData["ProxyPort"] = silo.ProxyPort;
|
||||
}
|
||||
|
||||
if (silo.StartTime != default)
|
||||
{
|
||||
@@ -365,8 +399,8 @@ namespace Managing.Api.HealthChecks
|
||||
{
|
||||
issues.Add($"Production environment should have at least 2 active silos for redundancy, found {activeSilos}");
|
||||
}
|
||||
// Dead silos in production are concerning
|
||||
if (deadSilos > 0)
|
||||
// Allow up to 2 dead silos right after deploy (ghost silos)
|
||||
if (deadSilos > 2)
|
||||
{
|
||||
issues.Add($"Found {deadSilos} dead silos in production environment");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user