Cycles/test/indicators/test_bollinger_bands.py

"""
Bollinger Bands Indicators Comparison Test

Focused testing for Bollinger Bands implementations.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

# Import original indicators
from cycles.IncStrategies.indicators import (
    BollingerBandsState as OriginalBB,
    BollingerBandsOHLCState as OriginalBBOHLC
)

# Import new indicators
from IncrementalTrader.strategies.indicators import (
    BollingerBandsState as NewBB,
    BollingerBandsOHLCState as NewBBOHLC
)


class BollingerBandsComparisonTest:
    """Test framework for comparing Bollinger Bands implementations."""

    def __init__(self, data_file: str = "data/btcusd_1-min_data.csv", sample_size: int = 5000):
        self.data_file = data_file
        self.sample_size = sample_size
        self.data = None
        self.results = {}

        # Create results directory
        self.results_dir = Path("test/results/bollinger_bands")
        self.results_dir.mkdir(parents=True, exist_ok=True)

    def load_data(self):
        """Load and prepare the data for testing."""
        print(f"Loading data from {self.data_file}...")

        df = pd.read_csv(self.data_file)
        df['datetime'] = pd.to_datetime(df['Timestamp'], unit='s')

        if self.sample_size and len(df) > self.sample_size:
            df = df.tail(self.sample_size).reset_index(drop=True)

        self.data = df
        print(f"Loaded {len(df)} data points from {df['datetime'].iloc[0]} to {df['datetime'].iloc[-1]}")

    def test_bollinger_bands(self, periods=[10, 20, 30], std_devs=[1.5, 2.0, 2.5]):
        """Test Bollinger Bands implementations (Close price based)."""
        print("\n=== Testing Bollinger Bands (Close Price) ===")

        for period in periods:
            for std_dev in std_devs:
                print(f"Testing BollingerBands({period}, {std_dev})...")

                # Initialize indicators
                original_bb = OriginalBB(period, std_dev)
                new_bb = NewBB(period, std_dev)

                original_upper = []
                original_middle = []
                original_lower = []
                new_upper = []
                new_middle = []
                new_lower = []
                prices = []

                # Process data
                for _, row in self.data.iterrows():
                    price = row['Close']
                    prices.append(price)

                    original_bb.update(price)
                    new_bb.update(price)

                    if original_bb.is_warmed_up():
                        original_upper.append(original_bb.get_current_value()['upper_band'])
                        original_middle.append(original_bb.get_current_value()['middle_band'])
                        original_lower.append(original_bb.get_current_value()['lower_band'])
                    else:
                        original_upper.append(np.nan)
                        original_middle.append(np.nan)
                        original_lower.append(np.nan)

                    if new_bb.is_warmed_up():
                        new_upper.append(new_bb.get_current_value()['upper_band'])
                        new_middle.append(new_bb.get_current_value()['middle_band'])
                        new_lower.append(new_bb.get_current_value()['lower_band'])
                    else:
                        new_upper.append(np.nan)
                        new_middle.append(np.nan)
                        new_lower.append(np.nan)

                # Store results
                key = f'BB_{period}_{std_dev}'
                self.results[key] = {
                    'original_upper': original_upper,
                    'original_middle': original_middle,
                    'original_lower': original_lower,
                    'new_upper': new_upper,
                    'new_middle': new_middle,
                    'new_lower': new_lower,
                    'prices': prices,
                    'dates': self.data['datetime'].tolist(),
                    'period': period,
                    'std_dev': std_dev,
                    'type': 'Close'
                }

                # Calculate differences for each band
                for band in ['upper', 'middle', 'lower']:
                    orig = np.array(locals()[f'original_{band}'])
                    new = np.array(locals()[f'new_{band}'])
                    diff = new - orig
                    valid_diff = diff[~np.isnan(diff)]

                    if len(valid_diff) > 0:
                        max_diff = np.max(np.abs(valid_diff))
                        mean_diff = np.mean(np.abs(valid_diff))

                        print(f"  {band.capitalize()} band - Max diff: {max_diff:.12f}, Mean diff: {mean_diff:.12f}")

                        # Status check for this band
                        if max_diff < 1e-10:
                            status = "✅ PASSED"
                        elif max_diff < 1e-6:
                            status = "⚠️ WARNING"
                        else:
                            status = "❌ FAILED"
                        print(f"    Status: {status}")
                    else:
                        print(f"  {band.capitalize()} band - ❌ ERROR: No valid data points")

    def test_bollinger_bands_ohlc(self, periods=[10, 20, 30], std_devs=[1.5, 2.0, 2.5]):
        """Test Bollinger Bands OHLC implementations (Typical price based)."""
        print("\n=== Testing Bollinger Bands OHLC (Typical Price) ===")

        for period in periods:
            for std_dev in std_devs:
                print(f"Testing BollingerBandsOHLC({period}, {std_dev})...")

                # Initialize indicators
                original_bb = OriginalBBOHLC(period, std_dev)
                new_bb = NewBBOHLC(period, std_dev)

                original_upper = []
                original_middle = []
                original_lower = []
                new_upper = []
                new_middle = []
                new_lower = []
                typical_prices = []

                # Process data
                for _, row in self.data.iterrows():
                    high, low, close = row['High'], row['Low'], row['Close']
                    typical_price = (high + low + close) / 3
                    typical_prices.append(typical_price)

                    # Create OHLC dictionary for both indicators
                    ohlc_data = {
                        'open': row['Open'],
                        'high': high,
                        'low': low,
                        'close': close
                    }

                    original_bb.update(ohlc_data)
                    new_bb.update(ohlc_data)

                    if original_bb.is_warmed_up():
                        original_upper.append(original_bb.get_current_value()['upper_band'])
                        original_middle.append(original_bb.get_current_value()['middle_band'])
                        original_lower.append(original_bb.get_current_value()['lower_band'])
                    else:
                        original_upper.append(np.nan)
                        original_middle.append(np.nan)
                        original_lower.append(np.nan)

                    if new_bb.is_warmed_up():
                        new_upper.append(new_bb.get_current_value()['upper_band'])
                        new_middle.append(new_bb.get_current_value()['middle_band'])
                        new_lower.append(new_bb.get_current_value()['lower_band'])
                    else:
                        new_upper.append(np.nan)
                        new_middle.append(np.nan)
                        new_lower.append(np.nan)

                # Store results
                key = f'BBOHLC_{period}_{std_dev}'
                self.results[key] = {
                    'original_upper': original_upper,
                    'original_middle': original_middle,
                    'original_lower': original_lower,
                    'new_upper': new_upper,
                    'new_middle': new_middle,
                    'new_lower': new_lower,
                    'prices': self.data['Close'].tolist(),
                    'typical_prices': typical_prices,
                    'highs': self.data['High'].tolist(),
                    'lows': self.data['Low'].tolist(),
                    'dates': self.data['datetime'].tolist(),
                    'period': period,
                    'std_dev': std_dev,
                    'type': 'OHLC'
                }

                # Calculate differences for each band
                for band in ['upper', 'middle', 'lower']:
                    orig = np.array(locals()[f'original_{band}'])
                    new = np.array(locals()[f'new_{band}'])
                    diff = new - orig
                    valid_diff = diff[~np.isnan(diff)]

                    if len(valid_diff) > 0:
                        max_diff = np.max(np.abs(valid_diff))
                        mean_diff = np.mean(np.abs(valid_diff))

                        print(f"  {band.capitalize()} band - Max diff: {max_diff:.12f}, Mean diff: {mean_diff:.12f}")

                        # Status check for this band
                        if max_diff < 1e-10:
                            status = "✅ PASSED"
                        elif max_diff < 1e-6:
                            status = "⚠️ WARNING"
                        else:
                            status = "❌ FAILED"
                        print(f"    Status: {status}")
                    else:
                        print(f"  {band.capitalize()} band - ❌ ERROR: No valid data points")

    def plot_comparison(self, indicator_name: str):
        """Plot detailed comparison for a specific indicator."""
        if indicator_name not in self.results:
            print(f"No results found for {indicator_name}")
            return

        result = self.results[indicator_name]
        dates = pd.to_datetime(result['dates'])

        # Create figure with subplots
        fig, axes = plt.subplots(4, 1, figsize=(15, 16))
        fig.suptitle(f'{indicator_name} - Detailed Comparison Analysis', fontsize=16)

        # Plot 1: Price and Bollinger Bands
        ax1 = axes[0]
        if result['type'] == 'OHLC':
            ax1.plot(dates, result['typical_prices'], label='Typical Price', alpha=0.7, color='black', linewidth=1)
        else:
            ax1.plot(dates, result['prices'], label='Close Price', alpha=0.7, color='black', linewidth=1)

        ax1.plot(dates, result['original_upper'], label='Original Upper', alpha=0.8, color='red')
        ax1.plot(dates, result['original_middle'], label='Original Middle', alpha=0.8, color='blue')
        ax1.plot(dates, result['original_lower'], label='Original Lower', alpha=0.8, color='green')
        ax1.fill_between(dates, result['original_upper'], result['original_lower'], alpha=0.1, color='gray')
        ax1.set_title(f'{indicator_name} - Original Implementation')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        # Plot 2: New implementation
        ax2 = axes[1]
        if result['type'] == 'OHLC':
            ax2.plot(dates, result['typical_prices'], label='Typical Price', alpha=0.7, color='black', linewidth=1)
        else:
            ax2.plot(dates, result['prices'], label='Close Price', alpha=0.7, color='black', linewidth=1)

        ax2.plot(dates, result['new_upper'], label='New Upper', alpha=0.8, color='red', linestyle='--')
        ax2.plot(dates, result['new_middle'], label='New Middle', alpha=0.8, color='blue', linestyle='--')
        ax2.plot(dates, result['new_lower'], label='New Lower', alpha=0.8, color='green', linestyle='--')
        ax2.fill_between(dates, result['new_upper'], result['new_lower'], alpha=0.1, color='gray')
        ax2.set_title(f'{indicator_name} - New Implementation')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        # Plot 3: Overlay comparison
        ax3 = axes[2]
        ax3.plot(dates, result['original_upper'], label='Original Upper', alpha=0.8, color='red')
        ax3.plot(dates, result['original_middle'], label='Original Middle', alpha=0.8, color='blue')
        ax3.plot(dates, result['original_lower'], label='Original Lower', alpha=0.8, color='green')
        ax3.plot(dates, result['new_upper'], label='New Upper', alpha=0.8, color='red', linestyle='--')
        ax3.plot(dates, result['new_middle'], label='New Middle', alpha=0.8, color='blue', linestyle='--')
        ax3.plot(dates, result['new_lower'], label='New Lower', alpha=0.8, color='green', linestyle='--')
        ax3.set_title(f'{indicator_name} - Overlay Comparison')
        ax3.legend()
        ax3.grid(True, alpha=0.3)

        # Plot 4: Differences for all bands
        ax4 = axes[3]
        for band, color in [('upper', 'red'), ('middle', 'blue'), ('lower', 'green')]:
            orig = np.array(result[f'original_{band}'])
            new = np.array(result[f'new_{band}'])
            diff = new - orig
            ax4.plot(dates, diff, label=f'{band.capitalize()} diff', alpha=0.7, color=color)

        ax4.set_title(f'{indicator_name} Differences (New - Original)')
        ax4.axhline(y=0, color='black', linestyle='-', alpha=0.5)
        ax4.legend()
        ax4.grid(True, alpha=0.3)

        # Add statistics text
        stats_lines = []
        for band in ['upper', 'middle', 'lower']:
            orig = np.array(result[f'original_{band}'])
            new = np.array(result[f'new_{band}'])
            diff = new - orig
            valid_diff = diff[~np.isnan(diff)]
            if len(valid_diff) > 0:
                stats_lines.append(f'{band.capitalize()}: Max={np.max(np.abs(valid_diff)):.2e}')

        stats_text = '\n'.join(stats_lines)
        ax4.text(0.02, 0.98, stats_text, transform=ax4.transAxes,
                verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

        # Format x-axis
        for ax in axes:
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
            ax.xaxis.set_major_locator(mdates.DayLocator(interval=max(1, len(dates)//10)))
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

        plt.tight_layout()

        # Save plot
        plot_path = self.results_dir / f"{indicator_name}_detailed_comparison.png"
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        print(f"Plot saved to {plot_path}")

        plt.show()

    def plot_all_comparisons(self):
        """Plot comparisons for all tested indicators."""
        print("\n=== Generating Detailed Comparison Plots ===")

        for indicator_name in self.results.keys():
            print(f"Plotting {indicator_name}...")
            self.plot_comparison(indicator_name)
            plt.close('all')

    def generate_report(self):
        """Generate detailed report for Bollinger Bands indicators."""
        print("\n=== Generating Bollinger Bands Report ===")

        report_lines = []
        report_lines.append("# Bollinger Bands Indicators Comparison Report")
        report_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        report_lines.append(f"Data file: {self.data_file}")
        report_lines.append(f"Sample size: {len(self.data)} data points")
        report_lines.append("")

        # Summary table
        report_lines.append("## Summary Table")
        report_lines.append("| Indicator | Period | Std Dev | Upper Max Diff | Middle Max Diff | Lower Max Diff | Status |")
        report_lines.append("|-----------|--------|---------|----------------|-----------------|----------------|--------|")

        for indicator_name, result in self.results.items():
            max_diffs = []
            for band in ['upper', 'middle', 'lower']:
                orig = np.array(result[f'original_{band}'])
                new = np.array(result[f'new_{band}'])
                diff = new - orig
                valid_diff = diff[~np.isnan(diff)]

                if len(valid_diff) > 0:
                    max_diff = np.max(np.abs(valid_diff))
                    max_diffs.append(max_diff)
                else:
                    max_diffs.append(float('inf'))

            overall_max = max(max_diffs) if max_diffs else float('inf')

            if overall_max < 1e-10:
                status = "✅ PASSED"
            elif overall_max < 1e-6:
                status = "⚠️ WARNING"
            else:
                status = "❌ FAILED"

            max_diff_strs = [f"{d:.2e}" if d != float('inf') else "N/A" for d in max_diffs]
            report_lines.append(f"| {indicator_name} | {result['period']} | {result['std_dev']} | "
                              f"{max_diff_strs[0]} | {max_diff_strs[1]} | {max_diff_strs[2]} | {status} |")

        report_lines.append("")

        # Methodology explanation
        report_lines.append("## Methodology")
        report_lines.append("### Bollinger Bands (Close Price)")
        report_lines.append("- **Middle Band**: Simple Moving Average of Close prices")
        report_lines.append("- **Upper Band**: Middle Band + (Standard Deviation × Multiplier)")
        report_lines.append("- **Lower Band**: Middle Band - (Standard Deviation × Multiplier)")
        report_lines.append("- Uses Close price for all calculations")
        report_lines.append("")
        report_lines.append("### Bollinger Bands OHLC (Typical Price)")
        report_lines.append("- **Typical Price**: (High + Low + Close) / 3")
        report_lines.append("- **Middle Band**: Simple Moving Average of Typical prices")
        report_lines.append("- **Upper Band**: Middle Band + (Standard Deviation × Multiplier)")
        report_lines.append("- **Lower Band**: Middle Band - (Standard Deviation × Multiplier)")
        report_lines.append("- Uses Typical price for all calculations")
        report_lines.append("")

        # Detailed analysis
        report_lines.append("## Detailed Analysis")

        for indicator_name, result in self.results.items():
            report_lines.append(f"### {indicator_name}")

            report_lines.append(f"- **Type**: {result['type']}")
            report_lines.append(f"- **Period**: {result['period']}")
            report_lines.append(f"- **Standard Deviation Multiplier**: {result['std_dev']}")

            for band in ['upper', 'middle', 'lower']:
                orig = np.array(result[f'original_{band}'])
                new = np.array(result[f'new_{band}'])
                diff = new - orig
                valid_diff = diff[~np.isnan(diff)]

                if len(valid_diff) > 0:
                    report_lines.append(f"- **{band.capitalize()} Band Analysis**:")
                    report_lines.append(f"  - Valid data points: {len(valid_diff)}")
                    report_lines.append(f"  - Max absolute difference: {np.max(np.abs(valid_diff)):.12f}")
                    report_lines.append(f"  - Mean absolute difference: {np.mean(np.abs(valid_diff)):.12f}")
                    report_lines.append(f"  - Standard deviation: {np.std(valid_diff):.12f}")

                    # Band-specific metrics
                    valid_original = orig[~np.isnan(orig)]
                    if len(valid_original) > 0:
                        mean_value = np.mean(valid_original)
                        relative_error = np.mean(np.abs(valid_diff)) / mean_value * 100
                        report_lines.append(f"  - Mean {band} value: {mean_value:.6f}")
                        report_lines.append(f"  - Relative error: {relative_error:.2e}%")

            # Band width analysis
            orig_width = np.array(result['original_upper']) - np.array(result['original_lower'])
            new_width = np.array(result['new_upper']) - np.array(result['new_lower'])
            width_diff = new_width - orig_width
            valid_width_diff = width_diff[~np.isnan(width_diff)]

            if len(valid_width_diff) > 0:
                report_lines.append(f"- **Band Width Analysis**:")
                report_lines.append(f"  - Max width difference: {np.max(np.abs(valid_width_diff)):.12f}")
                report_lines.append(f"  - Mean width difference: {np.mean(np.abs(valid_width_diff)):.12f}")

            # Squeeze detection (when bands are narrow)
            valid_orig_width = orig_width[~np.isnan(orig_width)]
            if len(valid_orig_width) > 0:
                width_percentile_20 = np.percentile(valid_orig_width, 20)
                squeeze_periods = np.sum(valid_orig_width < width_percentile_20)
                report_lines.append(f"  - Squeeze periods (width < 20th percentile): {squeeze_periods}")

            report_lines.append("")

        # Save report
        report_path = self.results_dir / "bollinger_bands_report.md"
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(report_lines))

        print(f"Report saved to {report_path}")

    def run_tests(self):
        """Run all Bollinger Bands tests."""
        print("Starting Bollinger Bands Comparison Tests...")

        # Load data
        self.load_data()

        # Run tests
        self.test_bollinger_bands()
        self.test_bollinger_bands_ohlc()

        # Generate outputs
        self.plot_all_comparisons()
        self.generate_report()

        print("\n✅ Bollinger Bands tests completed!")


if __name__ == "__main__":
    tester = BollingerBandsComparisonTest(sample_size=3000)
    tester.run_tests()