From a29e2b5a9900646edbd7d39fcb3b7aa1cb7226a5 Mon Sep 17 00:00:00 2001 From: cryptooda Date: Fri, 31 Oct 2025 01:27:52 +0700 Subject: [PATCH] update healtcheck for security --- .../HealthChecks/OrleansHealthCheck.cs | 66 ++++++++++++++----- .../HealthChecks/Web3ProxyHealthCheck.cs | 5 +- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/src/Managing.Api/HealthChecks/OrleansHealthCheck.cs b/src/Managing.Api/HealthChecks/OrleansHealthCheck.cs index c2ec9da0..7b04a266 100644 --- a/src/Managing.Api/HealthChecks/OrleansHealthCheck.cs +++ b/src/Managing.Api/HealthChecks/OrleansHealthCheck.cs @@ -62,19 +62,41 @@ namespace Managing.Api.HealthChecks issues.AddRange(grainHealth.Issues); } - // Determine overall health status + // Determine overall health status based on number of active silos + try + { + if (healthData.TryGetValue("OrleansMetrics", out var orleansMetricsObj) && + orleansMetricsObj is Dictionary orleansMetrics && + orleansMetrics.TryGetValue("ActiveSilos", out var activeSilosObj) && + int.TryParse(activeSilosObj?.ToString(), out var activeSilos)) + { + if (activeSilos <= 0) + { + return HealthCheckResult.Unhealthy("No active silos found", data: healthData); + } + if (activeSilos == 1) + { + return HealthCheckResult.Degraded("Only one active silo running", data: healthData); + } + // 2 or more active silos + return HealthCheckResult.Healthy("Two or more active silos running", data: healthData); + } + } + catch + { + // Fallback to issue-based logic if parsing fails + } + + // Fallback to issue-based logic if (isHealthy) { return HealthCheckResult.Healthy("Orleans cluster is healthy", data: healthData); } - else if (issues.Count <= 2) + if (issues.Count <= 2) { return HealthCheckResult.Degraded($"Orleans cluster has minor issues: {string.Join(", ", issues)}", data: healthData); } - else - { - return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData); - } + return HealthCheckResult.Unhealthy($"Orleans cluster has critical issues: {string.Join(", ", issues)}", data: healthData); } catch (Exception ex) { @@ -106,16 +128,22 @@ namespace Managing.Api.HealthChecks clusterInfo["ActiveSilos"] = membershipTable.Count(); var membershipList = new List(); + var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase); + foreach (var silo in membershipTable) { var siloData = new Dictionary { - ["SiloName"] = silo.SiloName, ["Status"] = silo.Status.ToString(), - ["SiloAddress"] = silo.SiloAddress.ToString(), - ["HostName"] = silo.HostName, - ["ProxyPort"] = silo.ProxyPort }; + + if (!isProduction) + { + siloData["SiloName"] = silo.SiloName; + siloData["SiloAddress"] = silo.SiloAddress.ToString(); + siloData["HostName"] = silo.HostName; + siloData["ProxyPort"] = silo.ProxyPort; + } if (silo.StartTime != default) { @@ -292,16 +320,22 @@ namespace Managing.Api.HealthChecks var deadSilos = 0; var localhostEnvironment = false; + var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase); + foreach (var silo in siloStatistics) { var siloData = new Dictionary { - ["SiloName"] = silo.SiloName, ["Status"] = silo.Status.ToString(), - ["SiloAddress"] = silo.SiloAddress.ToString(), - ["HostName"] = silo.HostName, - ["ProxyPort"] = silo.ProxyPort }; + + if (!isProduction) + { + siloData["SiloName"] = silo.SiloName; + siloData["SiloAddress"] = silo.SiloAddress.ToString(); + siloData["HostName"] = silo.HostName; + siloData["ProxyPort"] = silo.ProxyPort; + } if (silo.StartTime != default) { @@ -365,8 +399,8 @@ namespace Managing.Api.HealthChecks { issues.Add($"Production environment should have at least 2 active silos for redundancy, found {activeSilos}"); } - // Dead silos in production are concerning - if (deadSilos > 0) + // Allow up to 2 dead silos right after deploy (ghost silos) + if (deadSilos > 2) { issues.Add($"Found {deadSilos} dead silos in production environment"); } diff --git a/src/Managing.Api/HealthChecks/Web3ProxyHealthCheck.cs b/src/Managing.Api/HealthChecks/Web3ProxyHealthCheck.cs index c7bc082c..665adb36 100644 --- a/src/Managing.Api/HealthChecks/Web3ProxyHealthCheck.cs +++ b/src/Managing.Api/HealthChecks/Web3ProxyHealthCheck.cs @@ -60,8 +60,9 @@ namespace Managing.Api.HealthChecks data["version"] = versionElement.GetString(); } + var isProduction = string.Equals(Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), "Production", StringComparison.OrdinalIgnoreCase); // Parse checks if available - if (root.TryGetProperty("checks", out var checksElement)) + if (!isProduction && root.TryGetProperty("checks", out var checksElement)) { // Extract Privy check if (checksElement.TryGetProperty("privy", out var privyElement)) @@ -217,7 +218,7 @@ namespace Managing.Api.HealthChecks } // Determine overall health result based on status - if (status.ToLower() == "healthy") + if (!isProduction && status.ToLower() == "healthy") { return HealthCheckResult.Healthy( "Web3Proxy is healthy",