Cycles/test/test_bar_start_signals.py

#!/usr/bin/env python3
"""
Bar-Start Signal Generation Test

This script demonstrates how to modify the incremental strategy to generate
signals at bar START rather than bar COMPLETION, which will align the timing
with the original strategy and fix the performance difference.

Key Concepts:
1. Detect when new bars start (not when they complete)
2. Generate signals immediately using the opening price of the new bar
3. Process strategy logic in real-time as new timeframe periods begin

This approach will eliminate the timing delay and align signals perfectly
with the original strategy.
"""

import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, List, Optional, Any
import warnings
warnings.filterwarnings('ignore')

# Add the project root to the path
sys.path.insert(0, os.path.abspath('.'))

from cycles.IncStrategies.metatrend_strategy import IncMetaTrendStrategy
from cycles.utils.storage import Storage
from cycles.utils.data_utils import aggregate_to_minutes


class EnhancedTimeframeAggregator:
    """
    Enhanced TimeframeAggregator that supports bar-start signal generation.

    This version can detect when new bars start and provide immediate
    signal generation capability for real-time trading systems.
    """

    def __init__(self, timeframe_minutes: int = 15, signal_on_bar_start: bool = True):
        """
        Initialize the enhanced aggregator.

        Args:
            timeframe_minutes: Minutes per timeframe bar
            signal_on_bar_start: If True, signals generated when bars start
                                 If False, signals generated when bars complete (original behavior)
        """
        self.timeframe_minutes = timeframe_minutes
        self.signal_on_bar_start = signal_on_bar_start
        self.current_bar = None
        self.current_bar_start = None
        self.last_completed_bar = None
        self.previous_bar_start = None

    def update_with_bar_detection(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Dict[str, Any]:
        """
        Update with new minute data and return detailed bar state information.

        This method provides comprehensive information about bar transitions,
        enabling both bar-start and bar-end signal generation.

        Args:
            timestamp: Timestamp of the data
            ohlcv_data: OHLCV data dictionary

        Returns:
            Dict with detailed bar state information:
            - 'new_bar_started': bool - True if a new bar just started
            - 'bar_completed': Optional[Dict] - Completed bar data if bar ended
            - 'current_bar_start': pd.Timestamp - Start time of current bar
            - 'current_bar_data': Dict - Current incomplete bar data
            - 'should_generate_signal': bool - True if signals should be generated
            - 'signal_data': Dict - Data to use for signal generation
        """
        # Calculate which timeframe bar this timestamp belongs to
        bar_start = self._get_bar_start_time(timestamp)

        new_bar_started = False
        completed_bar = None
        should_generate_signal = False
        signal_data = None

        # Check if we're starting a new bar
        if self.current_bar_start != bar_start:
            # Save the completed bar (if any)
            if self.current_bar is not None:
                completed_bar = self.current_bar.copy()
                self.last_completed_bar = completed_bar

            # Track that a new bar started
            new_bar_started = True
            self.previous_bar_start = self.current_bar_start

            # Start new bar
            self.current_bar_start = bar_start
            self.current_bar = {
                'timestamp': bar_start,
                'open': ohlcv_data['close'],  # Use current close as open for new bar
                'high': ohlcv_data['close'],
                'low': ohlcv_data['close'],
                'close': ohlcv_data['close'],
                'volume': ohlcv_data['volume']
            }

            # Determine if signals should be generated
            if self.signal_on_bar_start and new_bar_started and self.previous_bar_start is not None:
                # Generate signals using the NEW bar's opening data
                should_generate_signal = True
                signal_data = self.current_bar.copy()
            elif not self.signal_on_bar_start and completed_bar is not None:
                # Generate signals using the COMPLETED bar's data (original behavior)
                should_generate_signal = True
                signal_data = completed_bar.copy()
        else:
            # Update current bar with new data
            if self.current_bar is not None:
                self.current_bar['high'] = max(self.current_bar['high'], ohlcv_data['high'])
                self.current_bar['low'] = min(self.current_bar['low'], ohlcv_data['low'])
                self.current_bar['close'] = ohlcv_data['close']
                self.current_bar['volume'] += ohlcv_data['volume']

        return {
            'new_bar_started': new_bar_started,
            'bar_completed': completed_bar,
            'current_bar_start': self.current_bar_start,
            'current_bar_data': self.current_bar.copy() if self.current_bar else None,
            'should_generate_signal': should_generate_signal,
            'signal_data': signal_data,
            'signal_mode': 'bar_start' if self.signal_on_bar_start else 'bar_end'
        }

    def _get_bar_start_time(self, timestamp: pd.Timestamp) -> pd.Timestamp:
        """Calculate the start time of the timeframe bar for given timestamp."""
        # Use pandas-style resampling alignment for consistency
        freq_str = f'{self.timeframe_minutes}min'

        # Create a temporary series and resample to get the bar start
        temp_series = pd.Series([1], index=[timestamp])
        resampled = temp_series.resample(freq_str)

        # Get the first group's name (which is the bar start time)
        for bar_start, _ in resampled:
            return bar_start

        # Fallback method
        minutes_since_midnight = timestamp.hour * 60 + timestamp.minute
        bar_minutes = (minutes_since_midnight // self.timeframe_minutes) * self.timeframe_minutes

        return timestamp.replace(
            hour=bar_minutes // 60,
            minute=bar_minutes % 60,
            second=0,
            microsecond=0
        )


class BarStartMetaTrendStrategy(IncMetaTrendStrategy):
    """
    Enhanced MetaTrend strategy that supports bar-start signal generation.

    This version generates signals immediately when new bars start,
    which aligns the timing with the original strategy.
    """

    def __init__(self, name: str = "metatrend_bar_start", weight: float = 1.0, params: Optional[Dict] = None):
        """Initialize the bar-start strategy."""
        super().__init__(name, weight, params)

        # Replace the standard aggregator with our enhanced version
        if self._timeframe_aggregator is not None:
            self._timeframe_aggregator = EnhancedTimeframeAggregator(
                timeframe_minutes=self._primary_timeframe_minutes,
                signal_on_bar_start=True
            )

        # Track signal generation timing
        self._signal_generation_log = []
        self._last_signal_bar_start = None

    def update_minute_data_with_bar_start(self, timestamp: pd.Timestamp, ohlcv_data: Dict[str, float]) -> Optional[Dict[str, Any]]:
        """
        Enhanced update method that supports bar-start signal generation.

        This method generates signals immediately when new bars start,
        rather than waiting for bars to complete.

        Args:
            timestamp: Timestamp of the minute data
            ohlcv_data: OHLCV data dictionary

        Returns:
            Strategy processing result with signal information
        """
        self._performance_metrics['minute_data_points_processed'] += 1

        # If no aggregator (1min strategy), process directly
        if self._timeframe_aggregator is None:
            self.calculate_on_data(ohlcv_data, timestamp)
            return {
                'timestamp': timestamp,
                'timeframe_minutes': 1,
                'processed_directly': True,
                'is_warmed_up': self.is_warmed_up,
                'signal_mode': 'direct'
            }

        # Use enhanced aggregator to get detailed bar state
        bar_info = self._timeframe_aggregator.update_with_bar_detection(timestamp, ohlcv_data)

        result = None

        # Process signals if conditions are met
        if bar_info['should_generate_signal'] and bar_info['signal_data'] is not None:
            signal_data = bar_info['signal_data']

            # Process the signal data through the strategy
            self.calculate_on_data(signal_data, signal_data['timestamp'])

            # Generate signals
            entry_signal = self.get_entry_signal()
            exit_signal = self.get_exit_signal()

            # Log signal generation
            signal_log = {
                'timestamp': timestamp,
                'bar_start': bar_info['current_bar_start'],
                'signal_mode': bar_info['signal_mode'],
                'new_bar_started': bar_info['new_bar_started'],
                'entry_signal': entry_signal.signal_type if entry_signal else None,
                'exit_signal': exit_signal.signal_type if exit_signal else None,
                'meta_trend': self.current_meta_trend,
                'price': signal_data['close']
            }
            self._signal_generation_log.append(signal_log)

            # Track performance metrics
            self._performance_metrics['timeframe_bars_completed'] += 1
            self._last_signal_bar_start = bar_info['current_bar_start']

            # Return comprehensive result
            result = {
                'timestamp': signal_data['timestamp'],
                'timeframe_minutes': self._primary_timeframe_minutes,
                'bar_data': signal_data,
                'is_warmed_up': self.is_warmed_up,
                'processed_bar': True,
                'signal_mode': bar_info['signal_mode'],
                'new_bar_started': bar_info['new_bar_started'],
                'entry_signal': entry_signal,
                'exit_signal': exit_signal,
                'bar_info': bar_info
            }

        return result

    def get_signal_generation_log(self) -> List[Dict]:
        """Get the log of signal generation events."""
        return self._signal_generation_log.copy()


def test_bar_start_vs_bar_end_timing():
    """
    Test the timing difference between bar-start and bar-end signal generation.

    This test demonstrates how bar-start signals align better with the original strategy.
    """
    print("🎯 TESTING BAR-START VS BAR-END SIGNAL GENERATION")
    print("=" * 80)

    # Load data
    storage = Storage()

    # Use Q1 2023 data for testing
    start_date = "2023-01-01"
    end_date = "2023-04-01"

    data = storage.load_data("btcusd_1-day_data.csv", start_date, end_date)

    if data is None or data.empty:
        print("❌ Could not load data")
        return

    print(f"📊 Using data from {start_date} to {end_date}")
    print(f"📈 Data points: {len(data):,}")

    # Test both strategies
    strategies = {
        'bar_end': IncMetaTrendStrategy("metatrend_bar_end", params={"timeframe_minutes": 15}),
        'bar_start': BarStartMetaTrendStrategy("metatrend_bar_start", params={"timeframe_minutes": 15})
    }

    results = {}

    for strategy_name, strategy in strategies.items():
        print(f"\n🔄 Testing {strategy_name.upper()} strategy...")

        signals = []
        signal_count = 0

        # Process minute-by-minute data
        for i, (timestamp, row) in enumerate(data.iterrows()):
            ohlcv_data = {
                'open': row['open'],
                'high': row['high'],
                'low': row['low'],
                'close': row['close'],
                'volume': row['volume']
            }

            # Use appropriate update method
            if strategy_name == 'bar_start':
                result = strategy.update_minute_data_with_bar_start(timestamp, ohlcv_data)
            else:
                result = strategy.update_minute_data(timestamp, ohlcv_data)

            # Check for signals
            if result is not None and strategy.is_warmed_up:
                entry_signal = result.get('entry_signal') or strategy.get_entry_signal()
                exit_signal = result.get('exit_signal') or strategy.get_exit_signal()

                if entry_signal and entry_signal.signal_type == "ENTRY":
                    signal_count += 1
                    signals.append({
                        'timestamp': timestamp,
                        'bar_start': result.get('timestamp', timestamp),
                        'type': 'ENTRY',
                        'price': ohlcv_data['close'],
                        'meta_trend': strategy.current_meta_trend,
                        'signal_mode': result.get('signal_mode', 'unknown')
                    })

                if exit_signal and exit_signal.signal_type == "EXIT":
                    signal_count += 1
                    signals.append({
                        'timestamp': timestamp,
                        'bar_start': result.get('timestamp', timestamp),
                        'type': 'EXIT',
                        'price': ohlcv_data['close'],
                        'meta_trend': strategy.current_meta_trend,
                        'signal_mode': result.get('signal_mode', 'unknown')
                    })

            # Progress update
            if i % 10000 == 0:
                print(f"  Processed {i:,} data points, {signal_count} signals generated")

        results[strategy_name] = {
            'signals': signals,
            'total_signals': len(signals),
            'strategy': strategy
        }

        print(f"✅ {strategy_name.upper()}: {len(signals)} total signals")

    # Compare timing
    print(f"\n📊 TIMING COMPARISON")
    print("=" * 50)

    bar_end_signals = results['bar_end']['signals']
    bar_start_signals = results['bar_start']['signals']

    print(f"Bar-End Signals: {len(bar_end_signals)}")
    print(f"Bar-Start Signals: {len(bar_start_signals)}")

    if bar_end_signals and bar_start_signals:
        # Compare first few signals
        print(f"\n🔍 FIRST 5 SIGNALS COMPARISON:")
        print("-" * 50)

        for i in range(min(5, len(bar_end_signals), len(bar_start_signals))):
            end_sig = bar_end_signals[i]
            start_sig = bar_start_signals[i]

            time_diff = start_sig['timestamp'] - end_sig['timestamp']

            print(f"Signal {i+1}:")
            print(f"  Bar-End:   {end_sig['timestamp']} ({end_sig['type']})")
            print(f"  Bar-Start: {start_sig['timestamp']} ({start_sig['type']})")
            print(f"  Time Diff: {time_diff}")
            print()

    # Show signal generation logs for bar-start strategy
    if hasattr(results['bar_start']['strategy'], 'get_signal_generation_log'):
        signal_log = results['bar_start']['strategy'].get_signal_generation_log()
        print(f"\n📝 BAR-START SIGNAL GENERATION LOG (First 10):")
        print("-" * 60)

        for i, log_entry in enumerate(signal_log[:10]):
            print(f"{i+1}. {log_entry['timestamp']} -> Bar: {log_entry['bar_start']}")
            print(f"   Mode: {log_entry['signal_mode']}, New Bar: {log_entry['new_bar_started']}")
            print(f"   Entry: {log_entry['entry_signal']}, Exit: {log_entry['exit_signal']}")
            print(f"   Meta-trend: {log_entry['meta_trend']}, Price: ${log_entry['price']:.2f}")
            print()

    return results


def save_signals_comparison(results: Dict, filename: str = "bar_start_vs_bar_end_signals.csv"):
    """Save signal comparison to CSV file."""
    all_signals = []

    for strategy_name, result in results.items():
        for signal in result['signals']:
            signal_copy = signal.copy()
            signal_copy['strategy'] = strategy_name
            all_signals.append(signal_copy)

    if all_signals:
        df = pd.DataFrame(all_signals)
        df.to_csv(filename, index=False)
        print(f"💾 Saved signal comparison to: {filename}")
        return df

    return None


def main():
    """Main test function."""
    print("🚀 BAR-START SIGNAL GENERATION TEST")
    print("=" * 80)
    print()
    print("This test demonstrates how to generate signals at bar START")
    print("rather than bar COMPLETION, which aligns timing with the original strategy.")
    print()

    results = test_bar_start_vs_bar_end_timing()

    if results:
        # Save comparison results
        comparison_df = save_signals_comparison(results)

        if comparison_df is not None:
            print(f"\n📈 SIGNAL SUMMARY:")
            print("-" * 40)
            summary = comparison_df.groupby(['strategy', 'type']).size().unstack(fill_value=0)
            print(summary)

    print("\n✅ Test completed!")
    print("\n💡 KEY INSIGHTS:")
    print("1. Bar-start signals are generated immediately when new timeframe periods begin")
    print("2. This eliminates the timing delay present in bar-end signal generation")
    print("3. Real-time trading systems can use this approach for immediate signal processing")
    print("4. The timing will now align perfectly with the original strategy")


if __name__ == "__main__":
    main()