Cycles/test/test_incremental_indicators.py

"""
Test Incremental Indicators vs Original Implementations

This script validates that incremental indicators (Bollinger Bands, RSI) produce
identical results to the original batch implementations using real market data.
"""

import pandas as pd
import numpy as np
import logging
from datetime import datetime
import matplotlib.pyplot as plt

# Import original implementations
from cycles.Analysis.boillinger_band import BollingerBands
from cycles.Analysis.rsi import RSI

# Import incremental implementations
from cycles.IncStrategies.indicators.bollinger_bands import BollingerBandsState
from cycles.IncStrategies.indicators.rsi import RSIState
from cycles.IncStrategies.indicators.base import SimpleIndicatorState

# Import storage utility
from cycles.utils.storage import Storage

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("test_incremental.log"),
        logging.StreamHandler()
    ]
)

class WildersRSIState(SimpleIndicatorState):
    """
    RSI implementation using Wilder's smoothing to match the original implementation.

    Wilder's smoothing uses alpha = 1/period instead of 2/(period+1).
    """

    def __init__(self, period: int = 14):
        super().__init__(period)
        self.alpha = 1.0 / period  # Wilder's smoothing factor
        self.avg_gain = None
        self.avg_loss = None
        self.previous_close = None
        self.is_initialized = True

    def update(self, new_close: float) -> float:
        """Update RSI with Wilder's smoothing."""
        if not isinstance(new_close, (int, float)):
            raise TypeError(f"new_close must be numeric, got {type(new_close)}")

        self.validate_input(new_close)
        new_close = float(new_close)

        if self.previous_close is None:
            # First value - no gain/loss to calculate
            self.previous_close = new_close
            self.values_received += 1
            self._current_value = 50.0
            return self._current_value

        # Calculate price change
        price_change = new_close - self.previous_close
        gain = max(price_change, 0.0)
        loss = max(-price_change, 0.0)

        if self.avg_gain is None:
            # Initialize with first gain/loss
            self.avg_gain = gain
            self.avg_loss = loss
        else:
            # Wilder's smoothing: avg = alpha * new_value + (1 - alpha) * previous_avg
            self.avg_gain = self.alpha * gain + (1 - self.alpha) * self.avg_gain
            self.avg_loss = self.alpha * loss + (1 - self.alpha) * self.avg_loss

        # Calculate RSI
        if self.avg_loss == 0.0:
            rsi_value = 100.0 if self.avg_gain > 0 else 50.0
        else:
            rs = self.avg_gain / self.avg_loss
            rsi_value = 100.0 - (100.0 / (1.0 + rs))

        # Store state
        self.previous_close = new_close
        self.values_received += 1
        self._current_value = rsi_value

        return rsi_value

    def is_warmed_up(self) -> bool:
        """Check if RSI is warmed up."""
        return self.values_received >= self.period

    def reset(self) -> None:
        """Reset RSI state."""
        self.avg_gain = None
        self.avg_loss = None
        self.previous_close = None
        self.values_received = 0
        self._current_value = None

def load_test_data():
    """Load 2023-2024 BTC data for testing."""
    storage = Storage(logging=logging)

    # Load data for 2023-2024 period
    start_date = "2023-01-01"
    end_date = "2024-12-31"

    data = storage.load_data("btcusd_1-min_data.csv", start_date, end_date)

    if data.empty:
        logging.error("No data loaded for testing period")
        return None

    logging.info(f"Loaded {len(data)} rows of data from {data.index[0]} to {data.index[-1]}")
    return data

def test_bollinger_bands(data, period=20, std_multiplier=2.0):
    """Test Bollinger Bands: incremental vs batch implementation."""
    logging.info(f"Testing Bollinger Bands (period={period}, std_multiplier={std_multiplier})")

    # Original batch implementation - fix config structure
    config = {
        "bb_period": period,
        "bb_width": 0.05,  # Required for market regime detection
        "trending": {
            "bb_std_dev_multiplier": std_multiplier
        },
        "sideways": {
            "bb_std_dev_multiplier": std_multiplier
        }
    }
    bb_calculator = BollingerBands(config=config)
    original_result = bb_calculator.calculate(data.copy())

    # Incremental implementation
    bb_state = BollingerBandsState(period=period, std_dev_multiplier=std_multiplier)

    incremental_upper = []
    incremental_middle = []
    incremental_lower = []
    incremental_bandwidth = []

    for close_price in data['close']:
        result = bb_state.update(close_price)
        incremental_upper.append(result['upper_band'])
        incremental_middle.append(result['middle_band'])
        incremental_lower.append(result['lower_band'])
        incremental_bandwidth.append(result['bandwidth'])

    # Create incremental DataFrame
    incremental_result = pd.DataFrame({
        'UpperBand': incremental_upper,
        'SMA': incremental_middle,
        'LowerBand': incremental_lower,
        'BBWidth': incremental_bandwidth
    }, index=data.index)

    # Compare results
    comparison_results = {}

    for col_orig, col_inc in [('UpperBand', 'UpperBand'), ('SMA', 'SMA'),
                              ('LowerBand', 'LowerBand'), ('BBWidth', 'BBWidth')]:
        if col_orig in original_result.columns:
            # Skip NaN values for comparison (warm-up period)
            valid_mask = ~(original_result[col_orig].isna() | incremental_result[col_inc].isna())

            if valid_mask.sum() > 0:
                orig_values = original_result[col_orig][valid_mask]
                inc_values = incremental_result[col_inc][valid_mask]

                max_diff = np.abs(orig_values - inc_values).max()
                mean_diff = np.abs(orig_values - inc_values).mean()

                comparison_results[col_orig] = {
                    'max_diff': max_diff,
                    'mean_diff': mean_diff,
                    'identical': max_diff < 1e-10
                }

                logging.info(f"BB {col_orig}: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")

    return comparison_results, original_result, incremental_result

def test_rsi(data, period=14):
    """Test RSI: incremental vs batch implementation."""
    logging.info(f"Testing RSI (period={period})")

    # Original batch implementation
    config = {"rsi_period": period}
    rsi_calculator = RSI(config=config)
    original_result = rsi_calculator.calculate(data.copy(), price_column='close')

    # Test both standard EMA and Wilder's smoothing
    rsi_state_standard = RSIState(period=period)
    rsi_state_wilders = WildersRSIState(period=period)

    incremental_rsi_standard = []
    incremental_rsi_wilders = []

    for close_price in data['close']:
        rsi_value_standard = rsi_state_standard.update(close_price)
        rsi_value_wilders = rsi_state_wilders.update(close_price)
        incremental_rsi_standard.append(rsi_value_standard)
        incremental_rsi_wilders.append(rsi_value_wilders)

    # Create incremental DataFrames
    incremental_result_standard = pd.DataFrame({
        'RSI': incremental_rsi_standard
    }, index=data.index)

    incremental_result_wilders = pd.DataFrame({
        'RSI': incremental_rsi_wilders
    }, index=data.index)

    # Compare results
    comparison_results = {}

    if 'RSI' in original_result.columns:
        # Test standard EMA
        valid_mask = ~(original_result['RSI'].isna() | incremental_result_standard['RSI'].isna())
        if valid_mask.sum() > 0:
            orig_values = original_result['RSI'][valid_mask]
            inc_values = incremental_result_standard['RSI'][valid_mask]

            max_diff = np.abs(orig_values - inc_values).max()
            mean_diff = np.abs(orig_values - inc_values).mean()

            comparison_results['RSI_Standard'] = {
                'max_diff': max_diff,
                'mean_diff': mean_diff,
                'identical': max_diff < 1e-10
            }

            logging.info(f"RSI Standard EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")

        # Test Wilder's smoothing
        valid_mask = ~(original_result['RSI'].isna() | incremental_result_wilders['RSI'].isna())
        if valid_mask.sum() > 0:
            orig_values = original_result['RSI'][valid_mask]
            inc_values = incremental_result_wilders['RSI'][valid_mask]

            max_diff = np.abs(orig_values - inc_values).max()
            mean_diff = np.abs(orig_values - inc_values).mean()

            comparison_results['RSI_Wilders'] = {
                'max_diff': max_diff,
                'mean_diff': mean_diff,
                'identical': max_diff < 1e-10
            }

            logging.info(f"RSI Wilder's EMA: max_diff={max_diff:.2e}, mean_diff={mean_diff:.2e}, identical={max_diff < 1e-10}")

    return comparison_results, original_result, incremental_result_wilders

def plot_comparison(original, incremental, indicator_name, save_path=None):
    """Plot comparison between original and incremental implementations."""
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))

    # Plot first 1000 points for visibility
    plot_data = min(1000, len(original))
    x_range = range(plot_data)

    if indicator_name == "Bollinger Bands":
        # Plot Bollinger Bands
        axes[0].plot(x_range, original['UpperBand'].iloc[:plot_data], 'b-', label='Original Upper', alpha=0.7)
        axes[0].plot(x_range, original['SMA'].iloc[:plot_data], 'g-', label='Original SMA', alpha=0.7)
        axes[0].plot(x_range, original['LowerBand'].iloc[:plot_data], 'r-', label='Original Lower', alpha=0.7)

        axes[0].plot(x_range, incremental['UpperBand'].iloc[:plot_data], 'b--', label='Incremental Upper', alpha=0.7)
        axes[0].plot(x_range, incremental['SMA'].iloc[:plot_data], 'g--', label='Incremental SMA', alpha=0.7)
        axes[0].plot(x_range, incremental['LowerBand'].iloc[:plot_data], 'r--', label='Incremental Lower', alpha=0.7)

        # Plot differences
        axes[1].plot(x_range, (original['UpperBand'] - incremental['UpperBand']).iloc[:plot_data], 'b-', label='Upper Diff')
        axes[1].plot(x_range, (original['SMA'] - incremental['SMA']).iloc[:plot_data], 'g-', label='SMA Diff')
        axes[1].plot(x_range, (original['LowerBand'] - incremental['LowerBand']).iloc[:plot_data], 'r-', label='Lower Diff')

    elif indicator_name == "RSI":
        # Plot RSI
        axes[0].plot(x_range, original['RSI'].iloc[:plot_data], 'b-', label='Original RSI', alpha=0.7)
        axes[0].plot(x_range, incremental['RSI'].iloc[:plot_data], 'r--', label='Incremental RSI', alpha=0.7)

        # Plot differences
        axes[1].plot(x_range, (original['RSI'] - incremental['RSI']).iloc[:plot_data], 'g-', label='RSI Diff')

    axes[0].set_title(f'{indicator_name} Comparison: Original vs Incremental')
    axes[0].legend()
    axes[0].grid(True)

    axes[1].set_title(f'{indicator_name} Differences')
    axes[1].legend()
    axes[1].grid(True)
    axes[1].set_xlabel('Time Index')

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        logging.info(f"Plot saved to {save_path}")

    plt.show()

def main():
    """Main test function."""
    logging.info("Starting incremental indicators validation test")

    # Load test data
    data = load_test_data()
    if data is None:
        return

    # Test with subset for faster execution during development
    test_data = data.iloc[:10000]  # First 10k rows for testing
    logging.info(f"Using {len(test_data)} rows for testing")

    # Test Bollinger Bands
    logging.info("=" * 50)
    bb_comparison, bb_original, bb_incremental = test_bollinger_bands(test_data)

    # Test RSI
    logging.info("=" * 50)
    rsi_comparison, rsi_original, rsi_incremental = test_rsi(test_data)

    # Summary
    logging.info("=" * 50)
    logging.info("VALIDATION SUMMARY:")

    all_identical = True

    for indicator, results in bb_comparison.items():
        status = "PASS" if results['identical'] else "FAIL"
        logging.info(f"Bollinger Bands {indicator}: {status}")
        if not results['identical']:
            all_identical = False

    for indicator, results in rsi_comparison.items():
        status = "PASS" if results['identical'] else "FAIL"
        logging.info(f"RSI {indicator}: {status}")
        if not results['identical']:
            all_identical = False

    if all_identical:
        logging.info("ALL TESTS PASSED - Incremental indicators are identical to original implementations!")
    else:
        logging.warning("Some tests failed - Check differences above")

    # Generate comparison plots
    plot_comparison(bb_original, bb_incremental, "Bollinger Bands", "bb_comparison.png")
    plot_comparison(rsi_original, rsi_incremental, "RSI", "rsi_comparison.png")

if __name__ == "__main__":
    main()