""" Test Incremental Indicators vs Original Implementations This script validates that incremental indicators (Bollinger Bands, RSI) produce identical results to the original batch implementations using real market data. """ import pandas as pd import numpy as np import logging from datetime import datetime import matplotlib.pyplot as plt # Import original implementations from cycles.Analysis.boillinger_band import BollingerBands from cycles.Analysis.rsi import RSI # Import incremental implementations from cycles.IncStrategies.indicators.bollinger_bands import BollingerBandsState from cycles.IncStrategies.indicators.rsi import RSIState from cycles.IncStrategies.indicators.base import SimpleIndicatorState # Import storage utility from cycles.utils.storage import Storage # Setup logging logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("test_incremental.log"), logging.StreamHandler() ] ) class WildersRSIState(SimpleIndicatorState): """ RSI implementation using Wilder's smoothing to match the original implementation. Wilder's smoothing uses alpha = 1/period instead of 2/(period+1). """ def __init__(self, period: int = 14): super().__init__(period) self.alpha = 1.0 / period # Wilder's smoothing factor self.avg_gain = None self.avg_loss = None self.previous_close = None self.is_initialized = True def update(self, new_close: float) -> float: """Update RSI with Wilder's smoothing.""" if not isinstance(new_close, (int, float)): raise TypeError(f"new_close must be numeric, got {type(new_close)}") self.validate_input(new_close) new_close = float(new_close) if self.previous_close is None: # First value - no gain/loss to calculate self.previous_close = new_close self.values_received += 1 self._current_value = 50.0 return self._current_value # Calculate price change price_change = new_close - self.previous_close gain = max(price_change, 0.0) loss = max(-price_change, 0.0) if self.avg_gain is None: # Initialize with first gain/loss self.avg_gain = gain self.avg_loss = loss else: # Wilder's smoothing: avg = alpha * new_value + (1 - alpha) * previous_avg self.avg_gain = self.alpha * gain + (1 - self.alpha) * self.avg_gain self.avg_loss = self.alpha * loss + (1 - self.alpha) * self.avg_loss # Calculate RSI if self.avg_loss == 0.0: rsi_value = 100.0 if self.avg_gain > 0 else 50.0 else: rs = self.avg_gain / self.avg_loss rsi_value = 100.0 - (100.0 / (1.0 + rs)) # Store state self.previous_close = new_close self.values_received += 1 self._current_value = rsi_value return rsi_value def is_warmed_up(self) -> bool: """Check if RSI is warmed up.""" return self.values_received >= self.period def reset(self) -> None: """Reset RSI state.""" self.avg_gain = None self.avg_loss = None self.previous_close = None self.values_received = 0 self._current_value = None def load_test_data(): """Load 2023-2024 BTC data for testing.""" storage = Storage(logging=logging) # Load data for 2023-2024 period start_date = "2023-01-01" end_date = "2024-12-31" data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date) if data.empty: logging.error("No data loaded for testing period") return None logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}") return data def test_bollinger_bands(data, period=20, std_multiplier=2.0): """Test Bollinger Bands: incremental vs batch implementation.""" logging.info(f"Testing Bollinger Bands (period={period}, std_multiplier={std_multiplier})") # Original batch implementation - fix config structure config = { "bb_period": period, "bb_width": 0.05, # Required for market regime detection "trending": { "bb_std_dev_multiplier": std_multiplier }, "sideways": { "bb_std_dev_multiplier": std_multiplier } } bb_calculator = BollingerBands(config=config) original_result = bb_calculator.calculate(data.copy()) # Incremental implementation bb_state = BollingerBandsState(period=period, std_dev_multiplier=std_multiplier) incremental_upper = [] incremental_middle = [] incremental_lower = [] incremental_bandwidth = [] for close_price in data['close']: result = bb_state.update(close_price) incremental_upper.append(result['upper_band']) incremental_middle.append(result['middle_band']) incremental_lower.append(result['lower_band']) incremental_bandwidth.append(result['bandwidth']) # Create incremental DataFrame incremental_result = pd.DataFrame({ 'UpperBand': incremental_upper, 'SMA': incremental_middle, 'LowerBand': incremental_lower, 'BBWidth': incremental_bandwidth }, index=data.index) # Compare results comparison_results = {} for col_orig, col_inc in [('UpperBand', 'UpperBand'), ('SMA', 'SMA'), ('LowerBand', 'LowerBand'), ('BBWidth', 'BBWidth')]: if col_orig in original_result.columns: # Skip NaN values for comparison (warm-up period) valid_mask = ~(original_result[col_orig].isna() | incremental_result[col_inc].isna()) if valid_mask.sum() > 0: orig_values = original_result[col_orig][valid_mask] inc_values = incremental_result[col_inc][valid_mask] max_diff = np.abs(orig_values - inc_values).max() mean_diff = np.abs(orig_values - inc_values).mean() comparison_results[col_orig] = { 'max_diff': max_diff, 'mean_diff': mean_diff, 'identical': max_diff < 1e-10 } logging.info(f"BB {col_orig}: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}") return comparison_results, original_result, incremental_result def test_rsi(data, period=14): """Test RSI: incremental vs batch implementation.""" logging.info(f"Testing RSI (period={period})") # Original batch implementation config = {"rsi_period": period} rsi_calculator = RSI(config=config) original_result = rsi_calculator.calculate(data.copy(), price_column='close') # Test both standard EMA and Wilder's smoothing rsi_state_standard = RSIState(period=period) rsi_state_wilders = WildersRSIState(period=period) incremental_rsi_standard = [] incremental_rsi_wilders = [] for close_price in data['close']: rsi_value_standard = rsi_state_standard.update(close_price) rsi_value_wilders = rsi_state_wilders.update(close_price) incremental_rsi_standard.append(rsi_value_standard) incremental_rsi_wilders.append(rsi_value_wilders) # Create incremental DataFrames incremental_result_standard = pd.DataFrame({ 'RSI': incremental_rsi_standard }, index=data.index) incremental_result_wilders = pd.DataFrame({ 'RSI': incremental_rsi_wilders }, index=data.index) # Compare results comparison_results = {} if 'RSI' in original_result.columns: # Test standard EMA valid_mask = ~(original_result['RSI'].isna() | incremental_result_standard['RSI'].isna()) if valid_mask.sum() > 0: orig_values = original_result['RSI'][valid_mask] inc_values = incremental_result_standard['RSI'][valid_mask] max_diff = np.abs(orig_values - inc_values).max() mean_diff = np.abs(orig_values - inc_values).mean() comparison_results['RSI_Standard'] = { 'max_diff': max_diff, 'mean_diff': mean_diff, 'identical': max_diff < 1e-10 } logging.info(f"RSI Standard EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}") # Test Wilder's smoothing valid_mask = ~(original_result['RSI'].isna() | incremental_result_wilders['RSI'].isna()) if valid_mask.sum() > 0: orig_values = original_result['RSI'][valid_mask] inc_values = incremental_result_wilders['RSI'][valid_mask] max_diff = np.abs(orig_values - inc_values).max() mean_diff = np.abs(orig_values - inc_values).mean() comparison_results['RSI_Wilders'] = { 'max_diff': max_diff, 'mean_diff': mean_diff, 'identical': max_diff < 1e-10 } logging.info(f"RSI Wilder's EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}") return comparison_results, original_result, incremental_result_wilders def plot_comparison(original, incremental, indicator_name, save_path=None): """Plot comparison between original and incremental implementations.""" fig, axes = plt.subplots(2, 1, figsize=(15, 10)) # Plot first 1000 points for visibility plot_data = min(1000, len(original)) x_range = range(plot_data) if indicator_name == "Bollinger Bands": # Plot Bollinger Bands axes[0].plot(x_range, original['UpperBand'].iloc[:plot_data], 'b-', label='Original Upper', alpha=0.7) axes[0].plot(x_range, original['SMA'].iloc[:plot_data], 'g-', label='Original SMA', alpha=0.7) axes[0].plot(x_range, original['LowerBand'].iloc[:plot_data], 'r-', label='Original Lower', alpha=0.7) axes[0].plot(x_range, incremental['UpperBand'].iloc[:plot_data], 'b--', label='Incremental Upper', alpha=0.7) axes[0].plot(x_range, incremental['SMA'].iloc[:plot_data], 'g--', label='Incremental SMA', alpha=0.7) axes[0].plot(x_range, incremental['LowerBand'].iloc[:plot_data], 'r--', label='Incremental Lower', alpha=0.7) # Plot differences axes[1].plot(x_range, (original['UpperBand'] - incremental['UpperBand']).iloc[:plot_data], 'b-', label='Upper Diff') axes[1].plot(x_range, (original['SMA'] - incremental['SMA']).iloc[:plot_data], 'g-', label='SMA Diff') axes[1].plot(x_range, (original['LowerBand'] - incremental['LowerBand']).iloc[:plot_data], 'r-', label='Lower Diff') elif indicator_name == "RSI": # Plot RSI axes[0].plot(x_range, original['RSI'].iloc[:plot_data], 'b-', label='Original RSI', alpha=0.7) axes[0].plot(x_range, incremental['RSI'].iloc[:plot_data], 'r--', label='Incremental RSI', alpha=0.7) # Plot differences axes[1].plot(x_range, (original['RSI'] - incremental['RSI']).iloc[:plot_data], 'g-', label='RSI Diff') axes[0].set_title(f'{indicator_name} Comparison: Original vs Incremental') axes[0].legend() axes[0].grid(True) axes[1].set_title(f'{indicator_name} Differences') axes[1].legend() axes[1].grid(True) axes[1].set_xlabel('Time Index') plt.tight_layout() if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') logging.info(f"Plot saved to {save_path}") plt.show() def main(): """Main test function.""" logging.info("Starting incremental indicators validation test") # Load test data data = load_test_data() if data is None: return # Test with subset for faster execution during development test_data = data.iloc[:10000] # First 10k rows for testing logging.info(f"Using {len(test_data)} rows for testing") # Test Bollinger Bands logging.info("=" * 50) bb_comparison, bb_original, bb_incremental = test_bollinger_bands(test_data) # Test RSI logging.info("=" * 50) rsi_comparison, rsi_original, rsi_incremental = test_rsi(test_data) # Summary logging.info("=" * 50) logging.info("VALIDATION SUMMARY:") all_identical = True for indicator, results in bb_comparison.items(): status = "PASS" if results['identical'] else "FAIL" logging.info(f"Bollinger Bands {indicator}: {status}") if not results['identical']: all_identical = False for indicator, results in rsi_comparison.items(): status = "PASS" if results['identical'] else "FAIL" logging.info(f"RSI {indicator}: {status}") if not results['identical']: all_identical = False if all_identical: logging.info("ALL TESTS PASSED - Incremental indicators are identical to original implementations!") else: logging.warning("Some tests failed - Check differences above") # Generate comparison plots plot_comparison(bb_original, bb_incremental, "Bollinger Bands", "bb_comparison.png") plot_comparison(rsi_original, rsi_incremental, "RSI", "rsi_comparison.png") if __name__ == "__main__": main()