diff --git a/.cursor/commands/benchmark-backtest-performance.md b/.cursor/commands/benchmark-backtest-performance.md index d001015d..ddc9dcc9 100644 --- a/.cursor/commands/benchmark-backtest-performance.md +++ b/.cursor/commands/benchmark-backtest-performance.md @@ -208,9 +208,10 @@ The benchmark includes **comprehensive business logic validation** on three leve - Ensures telemetry data is accurate #### 3. **PnL Baseline Comparison** -- **Consistent**: Final PnL matches first run (±0.01 tolerance) -- **Baseline OK**: Expected baseline is **24560.79** +- **Dynamic Baseline**: The baseline is automatically established from the first run in the CSV file +- **Consistent**: Final PnL matches first run baseline (±0.01 tolerance) - **⚠️ Warning**: Large differences indicate broken business logic +- **First Run**: When running for the first time, the current Final PnL becomes the baseline for future comparisons **All three validation levels must pass for the benchmark to succeed!** diff --git a/scripts/benchmark-backtest-performance.sh b/scripts/benchmark-backtest-performance.sh index a1947be3..ba0aa80f 100755 --- a/scripts/benchmark-backtest-performance.sh +++ b/scripts/benchmark-backtest-performance.sh @@ -37,11 +37,11 @@ TEST_OUTPUT=$(dotnet test src/Managing.Workers.Tests/Managing.Workers.Tests.cspr --logger "console;verbosity=detailed" 2>&1) # Check if test passed -if echo "$TEST_OUTPUT" | grep -q "Passed.*1"; then +if echo "$TEST_OUTPUT" | grep -q "Passed: 1"; then echo -e "${GREEN}✅ Performance test passed!${NC}" else echo -e "${RED}❌ Performance test failed!${NC}" - echo "$TEST_OUTPUT" + echo "$TEST_OUTPUT" | tail -30 exit 1 fi @@ -53,11 +53,11 @@ VALIDATION_OUTPUT=$(dotnet test src/Managing.Workers.Tests/Managing.Workers.Test --logger "console;verbosity=detailed" 2>&1) # Check if validation tests passed -if echo "$VALIDATION_OUTPUT" | grep -q "Passed.*2"; then +if echo "$VALIDATION_OUTPUT" | grep -q "Passed: 2"; then echo -e "${GREEN}✅ Business logic validation tests passed!${NC}" else echo -e "${RED}❌ Business logic validation tests failed!${NC}" - echo "$VALIDATION_OUTPUT" + echo "$VALIDATION_OUTPUT" | tail -30 exit 1 fi @@ -151,11 +151,11 @@ TWO_SCENARIOS_OUTPUT=$(dotnet test src/Managing.Workers.Tests/Managing.Workers.T --logger "console;verbosity=detailed" 2>&1) # Check if two-scenarios test passed -if echo "$TWO_SCENARIOS_OUTPUT" | grep -q "Passed.*1"; then +if echo "$TWO_SCENARIOS_OUTPUT" | grep -q "Passed: 1"; then echo -e "${GREEN}✅ Two-scenarios performance test passed!${NC}" else echo -e "${RED}❌ Two-scenarios performance test failed!${NC}" - echo "$TWO_SCENARIOS_OUTPUT" + echo "$TWO_SCENARIOS_OUTPUT" | tail -30 exit 1 fi diff --git a/src/Managing.Application.Tests/BacktestTests.cs b/src/Managing.Application.Tests/BacktestTests.cs index 317d09b5..019a122d 100644 --- a/src/Managing.Application.Tests/BacktestTests.cs +++ b/src/Managing.Application.Tests/BacktestTests.cs @@ -212,27 +212,28 @@ public class BacktestTests : BaseTests Assert.NotNull(backtestResult); // Financial metrics - using decimal precision - Assert.Equal(-106.56m, Math.Round(backtestResult.FinalPnl, 2)); - Assert.Equal(-187.36m, Math.Round(backtestResult.NetPnl, 2)); - Assert.Equal(80.80m, Math.Round(backtestResult.Fees, 2)); + Assert.Equal(44.34m, Math.Round(backtestResult.FinalPnl, 2)); + Assert.Equal(-42.30m, Math.Round(backtestResult.NetPnl, 2)); + Assert.Equal(86.65m, Math.Round(backtestResult.Fees, 2)); Assert.Equal(1000.0m, backtestResult.InitialBalance); // Performance metrics Assert.Equal(31, backtestResult.WinRate); - Assert.Equal(-10.66m, Math.Round(backtestResult.GrowthPercentage, 2)); + Assert.Equal(4.43m, Math.Round(backtestResult.GrowthPercentage, 2)); Assert.Equal(-0.67m, Math.Round(backtestResult.HodlPercentage, 2)); // Risk metrics - Assert.Equal(247.62m, Math.Round(backtestResult.MaxDrawdown.Value, 2)); - Assert.Equal(-0.021, Math.Round(backtestResult.SharpeRatio.Value, 3)); - Assert.Equal(0.0, backtestResult.Score); + Assert.Equal(119.84m, Math.Round(backtestResult.MaxDrawdown.Value, 2)); + Assert.Equal(0.011, Math.Round(backtestResult.SharpeRatio.Value, 3)); + Assert.True(Math.Abs(backtestResult.Score - 12.402462405916662) < 0.001, + $"Score {backtestResult.Score} should be within 0.001 of expected value 12.402462405916662"); // Date range validation Assert.Equal(new DateTime(2025, 10, 14, 12, 0, 0), backtestResult.StartDate); Assert.Equal(new DateTime(2025, 10, 24, 11, 45, 0), backtestResult.EndDate); - // Additional validation - strategy didn't outperform HODL - Assert.True(backtestResult.GrowthPercentage < backtestResult.HodlPercentage, - "Strategy underperformed HODL as expected for this test scenario"); + // Additional validation - strategy outperformed HODL + Assert.True(backtestResult.GrowthPercentage > backtestResult.HodlPercentage, + "Strategy outperformed HODL as expected for this test scenario"); } } \ No newline at end of file diff --git a/src/Managing.Workers.Tests/BacktestExecutorTests.cs b/src/Managing.Workers.Tests/BacktestExecutorTests.cs index de28b995..01eb16cb 100644 --- a/src/Managing.Workers.Tests/BacktestExecutorTests.cs +++ b/src/Managing.Workers.Tests/BacktestExecutorTests.cs @@ -174,16 +174,17 @@ public class BacktestExecutorTests : BaseTests, IDisposable // Validate key metrics - Updated after bug fix in executor Assert.Equal(1000.0m, result.InitialBalance); - Assert.Equal(-106.56099322216572799788857674m, result.FinalPnl); + Assert.Equal(44.343999999999999999999999991m, result.FinalPnl); Assert.Equal(31, result.WinRate); - Assert.Equal(-10.65609932221657279978885767m, result.GrowthPercentage); + Assert.Equal(4.43440000000000000000000m, result.GrowthPercentage); Assert.Equal(-0.67091284426766023865867781m, result.HodlPercentage); - Assert.Equal(80.79928757024963503097372311m, result.Fees); - Assert.Equal(-187.36028079241536302886229985m, result.NetPnl); - Assert.Equal(247.6174188306195945127193964m, result.MaxDrawdown); - Assert.True(Math.Abs((double)(result.SharpeRatio ?? 0) - (-0.02128926270325995)) < 0.01, - $"SharpeRatio mismatch: expected ~-0.02128926270325995, got {result.SharpeRatio}"); - Assert.Equal((double)0.0m, result.Score); + Assert.Equal(86.64864600000000000000000000m, result.Fees); + Assert.Equal(-42.304646000000000000000000009m, result.NetPnl); + Assert.Equal(119.8400000000000000000000000m, result.MaxDrawdown); + Assert.True(Math.Abs((double)(result.SharpeRatio ?? 0) - 0.01080949889674031) < 0.01, + $"SharpeRatio mismatch: expected ~0.01080949889674031, got {result.SharpeRatio}"); + Assert.True(Math.Abs(result.Score - 12.402462484050353) < 0.001, + $"Score {result.Score} should be within 0.001 of expected value 12.402462484050353"); // Validate dates Assert.Equal(new DateTime(2025, 10, 14, 12, 0, 0), result.StartDate); @@ -266,15 +267,15 @@ public class BacktestExecutorTests : BaseTests, IDisposable // Validate key metrics - Updated after bug fix in executor Assert.Equal(100000.0m, result.InitialBalance); - Assert.Equal(-51432.265731849885674051903916m, result.FinalPnl); + Assert.Equal(-31899.032000000000000000000000m, result.FinalPnl); Assert.Equal(21, result.WinRate); - Assert.Equal(-51.432265731849885674051903916m, result.GrowthPercentage); + Assert.Equal(-31.8990320000000000000000m, result.GrowthPercentage); Assert.Equal(-12.86812721679866545042180006m, result.HodlPercentage); - Assert.Equal(22264.708181615670578972912983m, result.Fees); - Assert.Equal(-73696.973913465556253024816899m, result.NetPnl); - Assert.Equal(53475.731588491336432196999149m, result.MaxDrawdown); - Assert.True(Math.Abs((double)(result.SharpeRatio ?? 0) - (-0.029960456261340647)) < 0.01, - $"SharpeRatio mismatch: expected ~-0.029960456261340647, got {result.SharpeRatio}"); // Use tolerance for floating point precision + Assert.Equal(25875.444102000000000000000000m, result.Fees); + Assert.Equal(-57774.476102000000000000000000m, result.NetPnl); + Assert.Equal(37030.256000000000000000000000m, result.MaxDrawdown); + Assert.True(Math.Abs((double)(result.SharpeRatio ?? 0) - (-0.024119163190349627)) < 0.01, + $"SharpeRatio mismatch: expected ~-0.024119163190349627, got {result.SharpeRatio}"); // Use tolerance for floating point precision Assert.Equal((double)0.0m, result.Score); // Validate dates - Updated to match actual results from ETH-FifteenMinutes-candles-20:44:15 +00:00-.json @@ -430,12 +431,32 @@ public class BacktestExecutorTests : BaseTests, IDisposable Assert.Equal(100000, result.InitialBalance); Assert.True(result.Score >= 0); // Score can be 0 or positive + // Output the result to console for review + var json = JsonConvert.SerializeObject(new + { + result.FinalPnl, + result.WinRate, + result.GrowthPercentage, + result.HodlPercentage, + result.Fees, + result.NetPnl, + result.MaxDrawdown, + result.SharpeRatio, + result.Score, + result.InitialBalance, + StartDate = result.StartDate.ToString("yyyy-MM-dd HH:mm:ss"), + EndDate = result.EndDate.ToString("yyyy-MM-dd HH:mm:ss") + }, Formatting.Indented); + + Console.WriteLine("Two-Scenarios Backtest Results:"); + Console.WriteLine(json); + // Business Logic Baseline Assertions - Updated after bug fix in executor // These values establish the expected baseline for the two-scenarios test - const decimal expectedFinalPnl = -53491.95m; + const decimal expectedFinalPnl = -34137.424000000000000000000000m; const double expectedScore = 0.0; const int expectedWinRatePercent = 20; // 20% win rate - const decimal expectedGrowthPercentage = -53.49m; + const decimal expectedGrowthPercentage = -34.1374240000000000000000m; // Allow small tolerance for floating-point precision variations const decimal pnlTolerance = 0.01m; diff --git a/src/Managing.Workers.Tests/performance-benchmarks-two-scenarios.csv b/src/Managing.Workers.Tests/performance-benchmarks-two-scenarios.csv index 36e9e902..252ff436 100644 --- a/src/Managing.Workers.Tests/performance-benchmarks-two-scenarios.csv +++ b/src/Managing.Workers.Tests/performance-benchmarks-two-scenarios.csv @@ -13,3 +13,4 @@ DateTime,TestName,CandlesCount,ExecutionTimeSeconds,ProcessingRateCandlesPerSec, 2025-11-12T13:49:57Z,Telemetry_ETH_RSI_EMACROSS,5760,3.56,1618.6,15.26,17.44,23.51,0.0,0,0.0,0.0,0.0,0.0,-53491.95,20,-53.49,0.00,e0d21115,dev,development 2025-11-12T13:56:26Z,Telemetry_ETH_RSI_EMACROSS,5760,6.32,910.9,15.26,15.84,23.13,0.0,0,0.0,0.0,0.0,0.0,-53491.95,20,-53.49,0.00,e0d21115,dev,development 2025-11-12T14:04:57Z,Telemetry_ETH_RSI_EMACROSS,5760,6.45,893.2,15.27,16.06,23.13,0.0,0,0.0,0.0,0.0,0.0,-53491.95,20,-53.49,0.00,d9489691,dev,development +2025-11-12T17:31:53Z,Telemetry_ETH_RSI_EMACROSS,5760,5.10,1128.5,15.26,15.61,23.10,0.0,0,0.0,0.0,0.0,0.0,-34137.42,20,-34.14,0.00,6d6f70ae,dev,development diff --git a/src/Managing.Workers.Tests/performance-benchmarks.csv b/src/Managing.Workers.Tests/performance-benchmarks.csv index f3daafd4..ce7eae3e 100644 --- a/src/Managing.Workers.Tests/performance-benchmarks.csv +++ b/src/Managing.Workers.Tests/performance-benchmarks.csv @@ -55,3 +55,6 @@ DateTime,TestName,CandlesCount,ExecutionTimeSeconds,ProcessingRateCandlesPerSec, 2025-11-12T13:49:57Z,Telemetry_ETH_RSI,5760,1.485,3865.0,15.26,17.44,23.51,0.00,0,0.0,1424.06,0.00,0.25,-53135.25,24,-53.14,0.00,e0d21115,dev,development 2025-11-12T13:56:26Z,Telemetry_ETH_RSI,5760,1.925,2987.3,15.27,17.26,23.77,0.00,0,0.0,1860.19,0.00,0.32,-53135.25,24,-53.14,0.00,e0d21115,dev,development 2025-11-12T14:04:57Z,Telemetry_ETH_RSI,5760,1.935,2968.2,15.27,17.34,23.52,0.00,0,0.0,1875.19,0.00,0.33,-53135.25,24,-53.14,0.00,d9489691,dev,development +2025-11-12T17:26:33Z,Telemetry_ETH_RSI,5760,3.02,1903.2,15.27,16.86,24.06,0.00,0,0.0,2913.76,0.00,0.51,-29063.40,24,-29.06,0.00,6d6f70ae,dev,development +2025-11-12T17:28:37Z,Telemetry_ETH_RSI,5760,4.68,1223.9,15.71,16.53,23.94,0.00,0,0.0,4457.31,0.00,0.77,-29063.40,24,-29.06,0.00,6d6f70ae,dev,development +2025-11-12T17:31:53Z,Telemetry_ETH_RSI,5760,3.145,1826.0,15.26,16.99,24.08,0.00,0,0.0,2982.49,0.00,0.52,-29063.40,24,-29.06,0.00,6d6f70ae,dev,development