update healtcheck for security

This commit is contained in:
2025-10-31 01:27:52 +07:00
parent 758e376381
commit a29e2b5a99
2 changed files with 53 additions and 18 deletions

View File

@@ -62,20 +62,42 @@ namespace Managing.Api.HealthChecks
issues.AddRange(grainHealth.Issues);
}
// Determine overall health status
// Determine overall health status based on number of active silos
try
{
if (healthData.TryGetValue("OrleansMetrics", out var orleansMetricsObj) &&
orleansMetricsObj is Dictionary<string, object> orleansMetrics &&
orleansMetrics.TryGetValue("ActiveSilos", out var activeSilosObj) &&
int.TryParse(activeSilosObj?.ToString(), out var activeSilos))
{
if (activeSilos <= 0)
{
return HealthCheckResult.Unhealthy("No active silos found", data: healthData);
}
if (activeSilos == 1)
{
return HealthCheckResult.Degraded("Only one active silo running", data: healthData);
}
// 2 or more active silos
return HealthCheckResult.Healthy("Two or more active silos running", data: healthData);
}
}
catch
{
// Fallback to issue-based logic if parsing fails
}
// Fallback to issue-based logic
if (isHealthy)
{
return HealthCheckResult.Healthy("Orleans cluster is healthy", data: healthData);
}
else if (issues.Count <= 2)
if (issues.Count <= 2)
{
return HealthCheckResult.Degraded($"Orleans cluster has minor issues: {string.Join(", ", issues)}", data: healthData);
}
else
{
return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error checking Orleans health");
@@ -106,17 +128,23 @@ namespace Managing.Api.HealthChecks
clusterInfo["ActiveSilos"] = membershipTable.Count();
var membershipList = new List<object>();
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
foreach (var silo in membershipTable)
{
var siloData = new Dictionary<string, object>
{
["SiloName"] = silo.SiloName,
["Status"] = silo.Status.ToString(),
["SiloAddress"] = silo.SiloAddress.ToString(),
["HostName"] = silo.HostName,
["ProxyPort"] = silo.ProxyPort
};
if (!isProduction)
{
siloData["SiloName"] = silo.SiloName;
siloData["SiloAddress"] = silo.SiloAddress.ToString();
siloData["HostName"] = silo.HostName;
siloData["ProxyPort"] = silo.ProxyPort;
}
if (silo.StartTime != default)
{
siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC");
@@ -292,17 +320,23 @@ namespace Managing.Api.HealthChecks
var deadSilos = 0;
var localhostEnvironment = false;
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
foreach (var silo in siloStatistics)
{
var siloData = new Dictionary<string, object>
{
["SiloName"] = silo.SiloName,
["Status"] = silo.Status.ToString(),
["SiloAddress"] = silo.SiloAddress.ToString(),
["HostName"] = silo.HostName,
["ProxyPort"] = silo.ProxyPort
};
if (!isProduction)
{
siloData["SiloName"] = silo.SiloName;
siloData["SiloAddress"] = silo.SiloAddress.ToString();
siloData["HostName"] = silo.HostName;
siloData["ProxyPort"] = silo.ProxyPort;
}
if (silo.StartTime != default)
{
siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC");
@@ -365,8 +399,8 @@ namespace Managing.Api.HealthChecks
{
issues.Add($"Production environment should have at least 2 active silos for redundancy, found {activeSilos}");
}
// Dead silos in production are concerning
if (deadSilos > 0)
// Allow up to 2 dead silos right after deploy (ghost silos)
if (deadSilos > 2)
{
issues.Add($"Found {deadSilos} dead silos in production environment");
}

View File

@@ -60,8 +60,9 @@ namespace Managing.Api.HealthChecks
data["version"] = versionElement.GetString();
}
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
// Parse checks if available
if (root.TryGetProperty("checks", out var checksElement))
if (!isProduction && root.TryGetProperty("checks", out var checksElement))
{
// Extract Privy check
if (checksElement.TryGetProperty("privy", out var privyElement))
@@ -217,7 +218,7 @@ namespace Managing.Api.HealthChecks
}
// Determine overall health result based on status
if (status.ToLower() == "healthy")
if (!isProduction && status.ToLower() == "healthy")
{
return HealthCheckResult.Healthy(
"Web3Proxy is healthy",