update healtcheck for security

This commit is contained in:
2025-10-31 01:27:52 +07:00
parent 758e376381
commit a29e2b5a99
2 changed files with 53 additions and 18 deletions

View File

@@ -62,19 +62,41 @@ namespace Managing.Api.HealthChecks
issues.AddRange(grainHealth.Issues); issues.AddRange(grainHealth.Issues);
} }
// Determine overall health status // Determine overall health status based on number of active silos
try
{
if (healthData.TryGetValue("OrleansMetrics", out var orleansMetricsObj) &&
orleansMetricsObj is Dictionary<string, object> orleansMetrics &&
orleansMetrics.TryGetValue("ActiveSilos", out var activeSilosObj) &&
int.TryParse(activeSilosObj?.ToString(), out var activeSilos))
{
if (activeSilos <= 0)
{
return HealthCheckResult.Unhealthy("No active silos found", data: healthData);
}
if (activeSilos == 1)
{
return HealthCheckResult.Degraded("Only one active silo running", data: healthData);
}
// 2 or more active silos
return HealthCheckResult.Healthy("Two or more active silos running", data: healthData);
}
}
catch
{
// Fallback to issue-based logic if parsing fails
}
// Fallback to issue-based logic
if (isHealthy) if (isHealthy)
{ {
return HealthCheckResult.Healthy("Orleans cluster is healthy", data: healthData); return HealthCheckResult.Healthy("Orleans cluster is healthy", data: healthData);
} }
else if (issues.Count <= 2) if (issues.Count <= 2)
{ {
return HealthCheckResult.Degraded($"Orleans cluster has minor issues: {string.Join(", ", issues)}", data: healthData); return HealthCheckResult.Degraded($"Orleans cluster has minor issues: {string.Join(", ", issues)}", data: healthData);
} }
else return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData);
{
return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData);
}
} }
catch (Exception ex) catch (Exception ex)
{ {
@@ -106,17 +128,23 @@ namespace Managing.Api.HealthChecks
clusterInfo["ActiveSilos"] = membershipTable.Count(); clusterInfo["ActiveSilos"] = membershipTable.Count();
var membershipList = new List<object>(); var membershipList = new List<object>();
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
foreach (var silo in membershipTable) foreach (var silo in membershipTable)
{ {
var siloData = new Dictionary<string, object> var siloData = new Dictionary<string, object>
{ {
["SiloName"] = silo.SiloName,
["Status"] = silo.Status.ToString(), ["Status"] = silo.Status.ToString(),
["SiloAddress"] = silo.SiloAddress.ToString(),
["HostName"] = silo.HostName,
["ProxyPort"] = silo.ProxyPort
}; };
if (!isProduction)
{
siloData["SiloName"] = silo.SiloName;
siloData["SiloAddress"] = silo.SiloAddress.ToString();
siloData["HostName"] = silo.HostName;
siloData["ProxyPort"] = silo.ProxyPort;
}
if (silo.StartTime != default) if (silo.StartTime != default)
{ {
siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC"); siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC");
@@ -292,17 +320,23 @@ namespace Managing.Api.HealthChecks
var deadSilos = 0; var deadSilos = 0;
var localhostEnvironment = false; var localhostEnvironment = false;
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
foreach (var silo in siloStatistics) foreach (var silo in siloStatistics)
{ {
var siloData = new Dictionary<string, object> var siloData = new Dictionary<string, object>
{ {
["SiloName"] = silo.SiloName,
["Status"] = silo.Status.ToString(), ["Status"] = silo.Status.ToString(),
["SiloAddress"] = silo.SiloAddress.ToString(),
["HostName"] = silo.HostName,
["ProxyPort"] = silo.ProxyPort
}; };
if (!isProduction)
{
siloData["SiloName"] = silo.SiloName;
siloData["SiloAddress"] = silo.SiloAddress.ToString();
siloData["HostName"] = silo.HostName;
siloData["ProxyPort"] = silo.ProxyPort;
}
if (silo.StartTime != default) if (silo.StartTime != default)
{ {
siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC"); siloData["StartTime"] = silo.StartTime.ToString("yyyy-MM-dd HH:mm:ss UTC");
@@ -365,8 +399,8 @@ namespace Managing.Api.HealthChecks
{ {
issues.Add($"Production environment should have at least 2 active silos for redundancy, found {activeSilos}"); issues.Add($"Production environment should have at least 2 active silos for redundancy, found {activeSilos}");
} }
// Dead silos in production are concerning // Allow up to 2 dead silos right after deploy (ghost silos)
if (deadSilos > 0) if (deadSilos > 2)
{ {
issues.Add($"Found {deadSilos} dead silos in production environment"); issues.Add($"Found {deadSilos} dead silos in production environment");
} }

View File

@@ -60,8 +60,9 @@ namespace Managing.Api.HealthChecks
data["version"] = versionElement.GetString(); data["version"] = versionElement.GetString();
} }
var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase);
// Parse checks if available // Parse checks if available
if (root.TryGetProperty("checks", out var checksElement)) if (!isProduction && root.TryGetProperty("checks", out var checksElement))
{ {
// Extract Privy check // Extract Privy check
if (checksElement.TryGetProperty("privy", out var privyElement)) if (checksElement.TryGetProperty("privy", out var privyElement))
@@ -217,7 +218,7 @@ namespace Managing.Api.HealthChecks
} }
// Determine overall health result based on status // Determine overall health result based on status
if (status.ToLower() == "healthy") if (!isProduction && status.ToLower() == "healthy")
{ {
return HealthCheckResult.Healthy( return HealthCheckResult.Healthy(
"Web3Proxy is healthy", "Web3Proxy is healthy",