Cycles/test/strategies/test_strategies_comparison_2025.py

"""
Enhanced Strategy Comparison Test Framework for 2025 Data

Comprehensive testing for comparing original incremental strategies from cycles/IncStrategies
with new implementations in IncrementalTrader/strategies using real 2025 data.

Features:
- Interactive plots using Plotly
- CSV export of all signals
- Detailed signal analysis
- Performance comparison
- Real 2025 data (Jan-Apr)
"""

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.subplots as sp
from plotly.offline import plot
from datetime import datetime
import sys
from pathlib import Path
from typing import Dict, List, Tuple, Any
import warnings
warnings.filterwarnings('ignore')

# Add project paths
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / "cycles"))
sys.path.insert(0, str(project_root / "IncrementalTrader"))

# Import original strategies
from cycles.IncStrategies.metatrend_strategy import IncMetaTrendStrategy
from cycles.IncStrategies.random_strategy import IncRandomStrategy
from cycles.IncStrategies.bbrs_incremental import BBRSIncrementalState

# Import new strategies
from IncrementalTrader.strategies.metatrend import MetaTrendStrategy
from IncrementalTrader.strategies.random import RandomStrategy
from IncrementalTrader.strategies.bbrs import BBRSStrategy

class Enhanced2025StrategyComparison:
    """Enhanced strategy comparison framework with interactive plots and CSV export."""

    def __init__(self, data_file: str = "data/temp_2025_data.csv"):
        """Initialize the comparison framework."""
        self.data_file = data_file
        self.data = None
        self.results = {}

        # Create results directory
        self.results_dir = Path("test/results/strategies_2025")
        self.results_dir.mkdir(parents=True, exist_ok=True)

        print("Enhanced 2025 Strategy Comparison Framework")
        print("=" * 60)

    def load_data(self) -> None:
        """Load and prepare 2025 data."""
        print(f"Loading data from {self.data_file}...")

        self.data = pd.read_csv(self.data_file)

        # Convert timestamp to datetime
        self.data['DateTime'] = pd.to_datetime(self.data['Timestamp'], unit='s')

        print(f"Data loaded: {len(self.data):,} rows")
        print(f"Date range: {self.data['DateTime'].iloc[0]} to {self.data['DateTime'].iloc[-1]}")
        print(f"Columns: {list(self.data.columns)}")

    def compare_metatrend_strategies(self) -> Dict[str, Any]:
        """Compare IncMetaTrendStrategy vs MetaTrendStrategy with detailed analysis."""
        print("\n" + "="*80)
        print("COMPARING METATREND STRATEGIES - 2025 DATA")
        print("="*80)

        try:
            # Initialize strategies
            original_strategy = IncMetaTrendStrategy(weight=1.0, params={})
            new_strategy = MetaTrendStrategy(name="metatrend", weight=1.0, params={})

            # Track all signals and data
            signals_data = []
            price_data = []

            print("Processing data points...")

            # Process data
            for i, row in self.data.iterrows():
                if i % 10000 == 0:
                    print(f"Processed {i:,} / {len(self.data):,} data points...")

                timestamp = row['DateTime']
                ohlcv_data = {
                    'open': row['Open'],
                    'high': row['High'],
                    'low': row['Low'],
                    'close': row['Close'],
                    'volume': row['Volume']
                }

                # Update strategies
                original_strategy.update_minute_data(timestamp, ohlcv_data)
                new_strategy.process_data_point(timestamp, ohlcv_data)

                # Get signals
                orig_entry = original_strategy.get_entry_signal()
                new_entry = new_strategy.get_entry_signal()
                orig_exit = original_strategy.get_exit_signal()
                new_exit = new_strategy.get_exit_signal()

                # Determine combined signals
                orig_signal = "BUY" if orig_entry and orig_entry.signal_type == "ENTRY" else (
                    "SELL" if orig_exit and orig_exit.signal_type == "EXIT" else "HOLD")
                new_signal = "BUY" if new_entry and new_entry.signal_type == "ENTRY" else (
                    "SELL" if new_exit and new_exit.signal_type == "EXIT" else "HOLD")

                # Store data
                signals_data.append({
                    'timestamp': timestamp,
                    'price': row['Close'],
                    'original_entry': orig_entry.signal_type if orig_entry else "HOLD",
                    'new_entry': new_entry.signal_type if new_entry else "HOLD",
                    'original_exit': orig_exit.signal_type if orig_exit else "HOLD",
                    'new_exit': new_exit.signal_type if new_exit else "HOLD",
                    'original_combined': orig_signal,
                    'new_combined': new_signal,
                    'signals_match': orig_signal == new_signal
                })

                price_data.append({
                    'timestamp': timestamp,
                    'open': row['Open'],
                    'high': row['High'],
                    'low': row['Low'],
                    'close': row['Close'],
                    'volume': row['Volume']
                })

            # Convert to DataFrame
            signals_df = pd.DataFrame(signals_data)
            price_df = pd.DataFrame(price_data)

            # Calculate statistics
            total_signals = len(signals_df)
            matching_signals = signals_df['signals_match'].sum()
            consistency = (matching_signals / total_signals) * 100

            # Signal distribution
            orig_signal_counts = signals_df['original_combined'].value_counts()
            new_signal_counts = signals_df['new_combined'].value_counts()

            # Save signals to CSV
            csv_file = self.results_dir / "metatrend_signals_2025.csv"
            signals_df.to_csv(csv_file, index=False, encoding='utf-8')

            # Create interactive plot
            self.create_interactive_plot(signals_df, price_df, "MetaTrend", "metatrend_2025")

            results = {
                'strategy': 'MetaTrend',
                'total_signals': total_signals,
                'matching_signals': matching_signals,
                'consistency_percentage': consistency,
                'original_signal_distribution': orig_signal_counts.to_dict(),
                'new_signal_distribution': new_signal_counts.to_dict(),
                'signals_dataframe': signals_df,
                'csv_file': str(csv_file)
            }

            print(f"✅ MetaTrend Strategy Comparison Complete")
            print(f"   Signal Consistency: {consistency:.2f}%")
            print(f"   Total Signals: {total_signals:,}")
            print(f"   Matching Signals: {matching_signals:,}")
            print(f"   CSV Saved: {csv_file}")

            return results

        except Exception as e:
            print(f"❌ Error in MetaTrend comparison: {str(e)}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}

    def compare_random_strategies(self) -> Dict[str, Any]:
        """Compare IncRandomStrategy vs RandomStrategy with detailed analysis."""
        print("\n" + "="*80)
        print("COMPARING RANDOM STRATEGIES - 2025 DATA")
        print("="*80)

        try:
            # Initialize strategies with same seed for reproducibility
            original_strategy = IncRandomStrategy(weight=1.0, params={"random_seed": 42})
            new_strategy = RandomStrategy(name="random", weight=1.0, params={"random_seed": 42})

            # Track all signals and data
            signals_data = []

            print("Processing data points...")

            # Process data (use subset for Random strategy to speed up)
            subset_data = self.data.iloc[::10]  # Every 10th point for Random strategy

            for i, row in subset_data.iterrows():
                if i % 1000 == 0:
                    print(f"Processed {i:,} data points...")

                timestamp = row['DateTime']
                ohlcv_data = {
                    'open': row['Open'],
                    'high': row['High'],
                    'low': row['Low'],
                    'close': row['Close'],
                    'volume': row['Volume']
                }

                # Update strategies
                original_strategy.update_minute_data(timestamp, ohlcv_data)
                new_strategy.process_data_point(timestamp, ohlcv_data)

                # Get signals
                orig_entry = original_strategy.get_entry_signal()
                new_entry = new_strategy.get_entry_signal()
                orig_exit = original_strategy.get_exit_signal()
                new_exit = new_strategy.get_exit_signal()

                # Determine combined signals
                orig_signal = "BUY" if orig_entry and orig_entry.signal_type == "ENTRY" else (
                    "SELL" if orig_exit and orig_exit.signal_type == "EXIT" else "HOLD")
                new_signal = "BUY" if new_entry and new_entry.signal_type == "ENTRY" else (
                    "SELL" if new_exit and new_exit.signal_type == "EXIT" else "HOLD")

                # Store data
                signals_data.append({
                    'timestamp': timestamp,
                    'price': row['Close'],
                    'original_entry': orig_entry.signal_type if orig_entry else "HOLD",
                    'new_entry': new_entry.signal_type if new_entry else "HOLD",
                    'original_exit': orig_exit.signal_type if orig_exit else "HOLD",
                    'new_exit': new_exit.signal_type if new_exit else "HOLD",
                    'original_combined': orig_signal,
                    'new_combined': new_signal,
                    'signals_match': orig_signal == new_signal
                })

            # Convert to DataFrame
            signals_df = pd.DataFrame(signals_data)

            # Calculate statistics
            total_signals = len(signals_df)
            matching_signals = signals_df['signals_match'].sum()
            consistency = (matching_signals / total_signals) * 100

            # Save signals to CSV
            csv_file = self.results_dir / "random_signals_2025.csv"
            signals_df.to_csv(csv_file, index=False, encoding='utf-8')

            results = {
                'strategy': 'Random',
                'total_signals': total_signals,
                'matching_signals': matching_signals,
                'consistency_percentage': consistency,
                'signals_dataframe': signals_df,
                'csv_file': str(csv_file)
            }

            print(f"✅ Random Strategy Comparison Complete")
            print(f"   Signal Consistency: {consistency:.2f}%")
            print(f"   Total Signals: {total_signals:,}")
            print(f"   CSV Saved: {csv_file}")

            return results

        except Exception as e:
            print(f"❌ Error in Random comparison: {str(e)}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}

    def compare_bbrs_strategies(self) -> Dict[str, Any]:
        """Compare BBRSIncrementalState vs BBRSStrategy with detailed analysis."""
        print("\n" + "="*80)
        print("COMPARING BBRS STRATEGIES - 2025 DATA")
        print("="*80)

        try:
            # Initialize strategies
            bbrs_config = {
                "bb_period": 20,
                "bb_std": 2.0,
                "rsi_period": 14,
                "volume_ma_period": 20
            }

            original_strategy = BBRSIncrementalState(config=bbrs_config)
            new_strategy = BBRSStrategy(name="bbrs", weight=1.0, params=bbrs_config)

            # Track all signals and data
            signals_data = []

            print("Processing data points...")

            # Process data
            for i, row in self.data.iterrows():
                if i % 10000 == 0:
                    print(f"Processed {i:,} / {len(self.data):,} data points...")

                timestamp = row['DateTime']
                ohlcv_data = {
                    'open': row['Open'],
                    'high': row['High'],
                    'low': row['Low'],
                    'close': row['Close'],
                    'volume': row['Volume']
                }

                # Update strategies
                orig_result = original_strategy.update_minute_data(timestamp, ohlcv_data)
                new_strategy.process_data_point(timestamp, ohlcv_data)

                # Get signals - original returns signals in result, new uses methods
                if orig_result is not None:
                    orig_buy = orig_result.get('buy_signal', False)
                    orig_sell = orig_result.get('sell_signal', False)
                else:
                    orig_buy = False
                    orig_sell = False

                new_entry = new_strategy.get_entry_signal()
                new_exit = new_strategy.get_exit_signal()
                new_buy = new_entry and new_entry.signal_type == "ENTRY"
                new_sell = new_exit and new_exit.signal_type == "EXIT"

                # Determine combined signals
                orig_signal = "BUY" if orig_buy else ("SELL" if orig_sell else "HOLD")
                new_signal = "BUY" if new_buy else ("SELL" if new_sell else "HOLD")

                # Store data
                signals_data.append({
                    'timestamp': timestamp,
                    'price': row['Close'],
                    'original_entry': "ENTRY" if orig_buy else "HOLD",
                    'new_entry': new_entry.signal_type if new_entry else "HOLD",
                    'original_exit': "EXIT" if orig_sell else "HOLD",
                    'new_exit': new_exit.signal_type if new_exit else "HOLD",
                    'original_combined': orig_signal,
                    'new_combined': new_signal,
                    'signals_match': orig_signal == new_signal
                })

            # Convert to DataFrame
            signals_df = pd.DataFrame(signals_data)

            # Calculate statistics
            total_signals = len(signals_df)
            matching_signals = signals_df['signals_match'].sum()
            consistency = (matching_signals / total_signals) * 100

            # Save signals to CSV
            csv_file = self.results_dir / "bbrs_signals_2025.csv"
            signals_df.to_csv(csv_file, index=False, encoding='utf-8')

            # Create interactive plot
            self.create_interactive_plot(signals_df, self.data, "BBRS", "bbrs_2025")

            results = {
                'strategy': 'BBRS',
                'total_signals': total_signals,
                'matching_signals': matching_signals,
                'consistency_percentage': consistency,
                'signals_dataframe': signals_df,
                'csv_file': str(csv_file)
            }

            print(f"✅ BBRS Strategy Comparison Complete")
            print(f"   Signal Consistency: {consistency:.2f}%")
            print(f"   Total Signals: {total_signals:,}")
            print(f"   CSV Saved: {csv_file}")

            return results

        except Exception as e:
            print(f"❌ Error in BBRS comparison: {str(e)}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}

    def create_interactive_plot(self, signals_df: pd.DataFrame, price_df: pd.DataFrame,
                              strategy_name: str, filename: str) -> None:
        """Create interactive Plotly chart with signals and price data."""
        print(f"Creating interactive plot for {strategy_name}...")

        # Create subplots
        fig = sp.make_subplots(
            rows=3, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.05,
            subplot_titles=(
                f'{strategy_name} Strategy - Price & Signals',
                'Signal Comparison',
                'Signal Consistency'
            ),
            row_heights=[0.6, 0.2, 0.2]
        )

        # Price chart with signals
        fig.add_trace(
            go.Scatter(
                x=price_df['timestamp'],
                y=price_df['close'],
                mode='lines',
                name='Price',
                line=dict(color='blue', width=1)
            ),
            row=1, col=1
        )

        # Add buy signals
        buy_signals_orig = signals_df[signals_df['original_combined'] == 'BUY']
        buy_signals_new = signals_df[signals_df['new_combined'] == 'BUY']

        if len(buy_signals_orig) > 0:
            fig.add_trace(
                go.Scatter(
                    x=buy_signals_orig['timestamp'],
                    y=buy_signals_orig['price'],
                    mode='markers',
                    name='Original BUY',
                    marker=dict(color='green', size=8, symbol='triangle-up')
                ),
                row=1, col=1
            )

        if len(buy_signals_new) > 0:
            fig.add_trace(
                go.Scatter(
                    x=buy_signals_new['timestamp'],
                    y=buy_signals_new['price'],
                    mode='markers',
                    name='New BUY',
                    marker=dict(color='lightgreen', size=6, symbol='triangle-up')
                ),
                row=1, col=1
            )

        # Add sell signals
        sell_signals_orig = signals_df[signals_df['original_combined'] == 'SELL']
        sell_signals_new = signals_df[signals_df['new_combined'] == 'SELL']

        if len(sell_signals_orig) > 0:
            fig.add_trace(
                go.Scatter(
                    x=sell_signals_orig['timestamp'],
                    y=sell_signals_orig['price'],
                    mode='markers',
                    name='Original SELL',
                    marker=dict(color='red', size=8, symbol='triangle-down')
                ),
                row=1, col=1
            )

        if len(sell_signals_new) > 0:
            fig.add_trace(
                go.Scatter(
                    x=sell_signals_new['timestamp'],
                    y=sell_signals_new['price'],
                    mode='markers',
                    name='New SELL',
                    marker=dict(color='pink', size=6, symbol='triangle-down')
                ),
                row=1, col=1
            )

        # Signal comparison chart
        signal_mapping = {'HOLD': 0, 'BUY': 1, 'SELL': -1}
        signals_df['original_numeric'] = signals_df['original_combined'].map(signal_mapping)
        signals_df['new_numeric'] = signals_df['new_combined'].map(signal_mapping)

        fig.add_trace(
            go.Scatter(
                x=signals_df['timestamp'],
                y=signals_df['original_numeric'],
                mode='lines',
                name='Original Signals',
                line=dict(color='blue', width=2)
            ),
            row=2, col=1
        )

        fig.add_trace(
            go.Scatter(
                x=signals_df['timestamp'],
                y=signals_df['new_numeric'],
                mode='lines',
                name='New Signals',
                line=dict(color='red', width=1, dash='dash')
            ),
            row=2, col=1
        )

        # Signal consistency chart
        signals_df['consistency_numeric'] = signals_df['signals_match'].astype(int)

        fig.add_trace(
            go.Scatter(
                x=signals_df['timestamp'],
                y=signals_df['consistency_numeric'],
                mode='lines',
                name='Signal Match',
                line=dict(color='green', width=1),
                fill='tonexty'
            ),
            row=3, col=1
        )

        # Update layout
        fig.update_layout(
            title=f'{strategy_name} Strategy Comparison - 2025 Data',
            height=800,
            showlegend=True,
            hovermode='x unified'
        )

        # Update y-axes
        fig.update_yaxes(title_text="Price ($)", row=1, col=1)
        fig.update_yaxes(title_text="Signal", row=2, col=1, tickvals=[-1, 0, 1], ticktext=['SELL', 'HOLD', 'BUY'])
        fig.update_yaxes(title_text="Match", row=3, col=1, tickvals=[0, 1], ticktext=['No', 'Yes'])

        # Save interactive plot
        html_file = self.results_dir / f"{filename}_interactive.html"
        plot(fig, filename=str(html_file), auto_open=False)

        print(f"   Interactive plot saved: {html_file}")

    def generate_comprehensive_report(self) -> None:
        """Generate comprehensive comparison report."""
        print("\n" + "="*80)
        print("GENERATING COMPREHENSIVE REPORT")
        print("="*80)

        report_file = self.results_dir / "comprehensive_strategy_comparison_2025.md"

        with open(report_file, 'w', encoding='utf-8') as f:
            f.write("# Comprehensive Strategy Comparison Report - 2025 Data\n\n")
            f.write(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"**Data Period**: January 1, 2025 - April 30, 2025\n")
            f.write(f"**Total Data Points**: {len(self.data):,} minute-level OHLCV records\n\n")

            f.write("## Executive Summary\n\n")
            f.write("This report compares the signal generation consistency between original incremental strategies ")
            f.write("from `cycles/IncStrategies` and new implementations in `IncrementalTrader/strategies` ")
            f.write("using real market data from 2025.\n\n")

            f.write("## Strategy Comparison Results\n\n")

            for strategy_name, results in self.results.items():
                if 'error' not in results:
                    f.write(f"### {results['strategy']} Strategy\n\n")
                    f.write(f"- **Signal Consistency**: {results['consistency_percentage']:.2f}%\n")
                    f.write(f"- **Total Signals Compared**: {results['total_signals']:,}\n")
                    f.write(f"- **Matching Signals**: {results['matching_signals']:,}\n")
                    f.write(f"- **CSV Export**: `{results['csv_file']}`\n\n")

                    if 'original_signal_distribution' in results:
                        f.write("**Original Strategy Signal Distribution:**\n")
                        for signal, count in results['original_signal_distribution'].items():
                            f.write(f"- {signal}: {count:,}\n")
                        f.write("\n")

                        f.write("**New Strategy Signal Distribution:**\n")
                        for signal, count in results['new_signal_distribution'].items():
                            f.write(f"- {signal}: {count:,}\n")
                        f.write("\n")

            f.write("## Files Generated\n\n")
            f.write("### CSV Signal Exports\n")
            for csv_file in self.results_dir.glob("*_signals_2025.csv"):
                f.write(f"- `{csv_file.name}`: Complete signal history with timestamps\n")

            f.write("\n### Interactive Plots\n")
            for html_file in self.results_dir.glob("*_interactive.html"):
                f.write(f"- `{html_file.name}`: Interactive Plotly visualization\n")

            f.write("\n## Conclusion\n\n")
            f.write("The strategy comparison validates the migration accuracy by comparing signal generation ")
            f.write("between original and refactored implementations. High consistency percentages indicate ")
            f.write("successful preservation of strategy behavior during the refactoring process.\n")

        print(f"✅ Comprehensive report saved: {report_file}")

    def run_all_comparisons(self) -> None:
        """Run all strategy comparisons."""
        print("Starting comprehensive strategy comparison with 2025 data...")

        # Load data
        self.load_data()

        # Run comparisons
        self.results['metatrend'] = self.compare_metatrend_strategies()
        self.results['random'] = self.compare_random_strategies()
        self.results['bbrs'] = self.compare_bbrs_strategies()

        # Generate report
        self.generate_comprehensive_report()

        print("\n" + "="*80)
        print("ALL STRATEGY COMPARISONS COMPLETED")
        print("="*80)
        print(f"Results directory: {self.results_dir}")
        print("Files generated:")
        for file in sorted(self.results_dir.glob("*")):
            print(f"  - {file.name}")

if __name__ == "__main__":
    # Run the enhanced comparison
    comparison = Enhanced2025StrategyComparison()
    comparison.run_all_comparisons()