Cycles/test/test_realtime_simulation.py

#!/usr/bin/env python3
"""
Real-Time Simulation Tests

This module simulates real-time trading conditions to verify that the new
timeframe aggregation works correctly in live trading scenarios.
"""

import pandas as pd
import numpy as np
import sys
import os
import time
import logging
import threading
import queue
from typing import List, Dict, Any, Optional, Generator
import unittest
from datetime import datetime, timedelta

# Add the project root to Python path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from IncrementalTrader.strategies.metatrend import MetaTrendStrategy
from IncrementalTrader.strategies.bbrs import BBRSStrategy
from IncrementalTrader.strategies.random import RandomStrategy
from IncrementalTrader.utils.timeframe_utils import MinuteDataBuffer, aggregate_minute_data_to_timeframe

# Configure logging
logging.basicConfig(level=logging.WARNING)


class RealTimeDataSimulator:
    """Simulates real-time market data feed."""

    def __init__(self, data: List[Dict[str, Any]], speed_multiplier: float = 1.0):
        self.data = data
        self.speed_multiplier = speed_multiplier
        self.current_index = 0
        self.is_running = False
        self.subscribers = []

    def subscribe(self, callback):
        """Subscribe to data updates."""
        self.subscribers.append(callback)

    def start(self):
        """Start the real-time data feed."""
        self.is_running = True

        def data_feed():
            while self.is_running and self.current_index < len(self.data):
                data_point = self.data[self.current_index]

                # Notify all subscribers
                for callback in self.subscribers:
                    try:
                        callback(data_point)
                    except Exception as e:
                        print(f"Error in subscriber callback: {e}")

                self.current_index += 1

                # Simulate real-time delay (1 minute = 60 seconds / speed_multiplier)
                time.sleep(60.0 / self.speed_multiplier / 1000)  # Convert to milliseconds for testing

        self.thread = threading.Thread(target=data_feed, daemon=True)
        self.thread.start()

    def stop(self):
        """Stop the real-time data feed."""
        self.is_running = False
        if hasattr(self, 'thread'):
            self.thread.join(timeout=1.0)


class RealTimeStrategyRunner:
    """Runs strategies in real-time simulation."""

    def __init__(self, strategy, name: str):
        self.strategy = strategy
        self.name = name
        self.signals = []
        self.processing_times = []
        self.data_points_received = 0
        self.last_bar_timestamps = {}

    def on_data(self, data_point: Dict[str, Any]):
        """Handle incoming data point."""
        start_time = time.perf_counter()

        timestamp = data_point['timestamp']
        ohlcv = {
            'open': data_point['open'],
            'high': data_point['high'],
            'low': data_point['low'],
            'close': data_point['close'],
            'volume': data_point['volume']
        }

        # Process data point
        signal = self.strategy.process_data_point(timestamp, ohlcv)

        processing_time = time.perf_counter() - start_time
        self.processing_times.append(processing_time)
        self.data_points_received += 1

        if signal and signal.signal_type != "HOLD":
            self.signals.append({
                'timestamp': timestamp,
                'signal_type': signal.signal_type,
                'confidence': signal.confidence,
                'processing_time': processing_time
            })


class TestRealTimeSimulation(unittest.TestCase):
    """Test real-time simulation scenarios."""

    def setUp(self):
        """Set up test data and strategies."""
        # Create realistic minute data for simulation
        self.test_data = self._create_streaming_data(240)  # 4 hours

        # Strategy configurations for real-time testing
        self.strategy_configs = [
            {
                'class': MetaTrendStrategy,
                'name': 'metatrend_rt',
                'params': {"timeframe": "15min", "lookback_period": 10}
            },
            {
                'class': BBRSStrategy,
                'name': 'bbrs_rt',
                'params': {"timeframe": "30min", "bb_period": 20, "rsi_period": 14}
            },
            {
                'class': RandomStrategy,
                'name': 'random_rt',
                'params': {
                    "timeframe": "5min",
                    "entry_probability": 0.1,
                    "exit_probability": 0.1,
                    "random_seed": 42
                }
            }
        ]

    def _create_streaming_data(self, num_minutes: int) -> List[Dict[str, Any]]:
        """Create realistic streaming market data."""
        start_time = pd.Timestamp.now().floor('min')  # Start at current minute
        data = []

        base_price = 50000.0

        for i in range(num_minutes):
            timestamp = start_time + pd.Timedelta(minutes=i)

            # Simulate realistic price movement
            volatility = 0.003  # 0.3% volatility
            price_change = np.random.normal(0, volatility * base_price)
            base_price += price_change
            base_price = max(base_price, 1000.0)

            # Create OHLC with realistic intrabar movement
            spread = base_price * 0.0005  # 0.05% spread
            open_price = base_price
            high_price = base_price + np.random.uniform(0, spread * 3)
            low_price = base_price - np.random.uniform(0, spread * 3)
            close_price = base_price + np.random.uniform(-spread, spread)

            # Ensure OHLC consistency
            high_price = max(high_price, open_price, close_price)
            low_price = min(low_price, open_price, close_price)

            volume = np.random.uniform(500, 1500)

            data.append({
                'timestamp': timestamp,
                'open': round(open_price, 2),
                'high': round(high_price, 2),
                'low': round(low_price, 2),
                'close': round(close_price, 2),
                'volume': round(volume, 0)
            })

        return data

    def test_minute_by_minute_processing(self):
        """Test minute-by-minute data processing in real-time."""
        print("\n⏱️  Testing Minute-by-Minute Processing")

        # Use a subset of data for faster testing
        test_data = self.test_data[:60]  # 1 hour

        strategy_runners = []

        # Create strategy runners
        for config in self.strategy_configs:
            strategy = config['class'](config['name'], params=config['params'])
            runner = RealTimeStrategyRunner(strategy, config['name'])
            strategy_runners.append(runner)

        # Process data minute by minute
        for i, data_point in enumerate(test_data):
            for runner in strategy_runners:
                runner.on_data(data_point)

            # Verify processing is fast enough for real-time
            for runner in strategy_runners:
                if runner.processing_times:
                    latest_time = runner.processing_times[-1]
                    self.assertLess(
                        latest_time, 0.1,  # Less than 100ms per minute
                        f"{runner.name}: Processing too slow {latest_time:.3f}s"
                    )

        # Verify all strategies processed all data
        for runner in strategy_runners:
            self.assertEqual(
                runner.data_points_received, len(test_data),
                f"{runner.name}: Missed data points"
            )

            avg_processing_time = np.mean(runner.processing_times)
            print(f"✅ {runner.name}: {runner.data_points_received} points, "
                  f"avg: {avg_processing_time*1000:.2f}ms, "
                  f"signals: {len(runner.signals)}")

    def test_bar_completion_timing(self):
        """Test that bars are completed at correct timeframe boundaries."""
        print("\n📊 Testing Bar Completion Timing")

        # Test with 15-minute timeframe
        strategy = MetaTrendStrategy("test_timing", params={"timeframe": "15min"})
        buffer = MinuteDataBuffer(max_size=100)

        # Track when complete bars are available
        complete_bars_timestamps = []

        for data_point in self.test_data[:90]:  # 1.5 hours
            timestamp = data_point['timestamp']
            ohlcv = {
                'open': data_point['open'],
                'high': data_point['high'],
                'low': data_point['low'],
                'close': data_point['close'],
                'volume': data_point['volume']
            }

            # Add to buffer
            buffer.add(timestamp, ohlcv)

            # Check for complete bars
            bars = buffer.aggregate_to_timeframe("15min", lookback_bars=1)
            if bars:
                latest_bar = bars[0]
                bar_timestamp = latest_bar['timestamp']

                # Only record new complete bars
                if not complete_bars_timestamps or bar_timestamp != complete_bars_timestamps[-1]:
                    complete_bars_timestamps.append(bar_timestamp)

        # Verify bar completion timing
        for i, bar_timestamp in enumerate(complete_bars_timestamps):
            # Bar should complete at 15-minute boundaries
            minute = bar_timestamp.minute
            self.assertIn(
                minute, [0, 15, 30, 45],
                f"Bar {i} completed at invalid time: {bar_timestamp}"
            )

        print(f"✅ {len(complete_bars_timestamps)} bars completed at correct 15min boundaries")

    def test_no_future_data_usage(self):
        """Test that strategies never use future data in real-time."""
        print("\n🔮 Testing No Future Data Usage")

        strategy = MetaTrendStrategy("test_future", params={"timeframe": "15min"})

        signals_with_context = []

        # Process data chronologically (simulating real-time)
        for i, data_point in enumerate(self.test_data):
            timestamp = data_point['timestamp']
            ohlcv = {
                'open': data_point['open'],
                'high': data_point['high'],
                'low': data_point['low'],
                'close': data_point['close'],
                'volume': data_point['volume']
            }

            signal = strategy.process_data_point(timestamp, ohlcv)

            if signal and signal.signal_type != "HOLD":
                signals_with_context.append({
                    'signal_timestamp': timestamp,
                    'data_index': i,
                    'signal': signal
                })

        # Verify no future data usage
        for sig_data in signals_with_context:
            signal_time = sig_data['signal_timestamp']
            data_index = sig_data['data_index']

            # Signal should only use data up to current index
            available_data = self.test_data[:data_index + 1]
            latest_available_time = available_data[-1]['timestamp']

            self.assertLessEqual(
                signal_time, latest_available_time,
                f"Signal at {signal_time} uses future data beyond {latest_available_time}"
            )

        print(f"✅ {len(signals_with_context)} signals verified - no future data usage")

    def test_memory_usage_monitoring(self):
        """Test memory usage during extended real-time simulation."""
        print("\n💾 Testing Memory Usage Monitoring")

        import psutil
        import gc

        process = psutil.Process()
        initial_memory = process.memory_info().rss / 1024 / 1024  # MB

        # Create extended dataset
        extended_data = self._create_streaming_data(1440)  # 24 hours

        strategy = MetaTrendStrategy("test_memory", params={"timeframe": "15min"})
        memory_samples = []

        # Process data and monitor memory every 100 data points
        for i, data_point in enumerate(extended_data):
            timestamp = data_point['timestamp']
            ohlcv = {
                'open': data_point['open'],
                'high': data_point['high'],
                'low': data_point['low'],
                'close': data_point['close'],
                'volume': data_point['volume']
            }

            strategy.process_data_point(timestamp, ohlcv)

            # Sample memory every 100 points
            if i % 100 == 0:
                gc.collect()
                current_memory = process.memory_info().rss / 1024 / 1024  # MB
                memory_increase = current_memory - initial_memory
                memory_samples.append(memory_increase)

        # Analyze memory usage
        max_memory_increase = max(memory_samples)
        final_memory_increase = memory_samples[-1]
        memory_growth_rate = (final_memory_increase - memory_samples[0]) / len(memory_samples)

        # Memory should not grow unbounded
        self.assertLess(
            max_memory_increase, 50,  # Less than 50MB increase
            f"Memory usage too high: {max_memory_increase:.2f}MB"
        )

        # Memory growth rate should be minimal
        self.assertLess(
            abs(memory_growth_rate), 0.1,  # Less than 0.1MB per 100 data points
            f"Memory growing too fast: {memory_growth_rate:.3f}MB per 100 points"
        )

        print(f"✅ Memory bounded: max {max_memory_increase:.2f}MB, "
              f"final {final_memory_increase:.2f}MB, "
              f"growth rate {memory_growth_rate:.3f}MB/100pts")

    def test_concurrent_strategy_processing(self):
        """Test multiple strategies processing data concurrently."""
        print("\n🔄 Testing Concurrent Strategy Processing")

        # Create multiple strategy instances
        strategies = []
        for config in self.strategy_configs:
            strategy = config['class'](config['name'], params=config['params'])
            strategies.append((strategy, config['name']))

        # Process data through all strategies simultaneously
        all_processing_times = {name: [] for _, name in strategies}
        all_signals = {name: [] for _, name in strategies}

        test_data = self.test_data[:120]  # 2 hours

        for data_point in test_data:
            timestamp = data_point['timestamp']
            ohlcv = {
                'open': data_point['open'],
                'high': data_point['high'],
                'low': data_point['low'],
                'close': data_point['close'],
                'volume': data_point['volume']
            }

            # Process through all strategies
            for strategy, name in strategies:
                start_time = time.perf_counter()
                signal = strategy.process_data_point(timestamp, ohlcv)
                processing_time = time.perf_counter() - start_time

                all_processing_times[name].append(processing_time)

                if signal and signal.signal_type != "HOLD":
                    all_signals[name].append({
                        'timestamp': timestamp,
                        'signal': signal
                    })

        # Verify all strategies processed successfully
        for strategy, name in strategies:
            processing_times = all_processing_times[name]
            signals = all_signals[name]

            # Check processing performance
            avg_time = np.mean(processing_times)
            max_time = max(processing_times)

            self.assertLess(
                avg_time, 0.01,  # Less than 10ms average
                f"{name}: Average processing too slow {avg_time:.3f}s"
            )

            self.assertLess(
                max_time, 0.1,  # Less than 100ms maximum
                f"{name}: Maximum processing too slow {max_time:.3f}s"
            )

            print(f"✅ {name}: avg {avg_time*1000:.2f}ms, "
                  f"max {max_time*1000:.2f}ms, "
                  f"{len(signals)} signals")

    def test_real_time_data_feed_simulation(self):
        """Test with simulated real-time data feed."""
        print("\n📡 Testing Real-Time Data Feed Simulation")

        # Use smaller dataset for faster testing
        test_data = self.test_data[:30]  # 30 minutes

        # Create data simulator
        simulator = RealTimeDataSimulator(test_data, speed_multiplier=1000)  # 1000x speed

        # Create strategy runner
        strategy = MetaTrendStrategy("rt_feed_test", params={"timeframe": "5min"})
        runner = RealTimeStrategyRunner(strategy, "rt_feed_test")

        # Subscribe to data feed
        simulator.subscribe(runner.on_data)

        # Start simulation
        simulator.start()

        # Wait for simulation to complete
        start_time = time.time()
        while simulator.current_index < len(test_data) and time.time() - start_time < 10:
            time.sleep(0.01)  # Small delay

        # Stop simulation
        simulator.stop()

        # Verify results
        self.assertGreater(
            runner.data_points_received, 0,
            "No data points received from simulator"
        )

        # Should have processed most or all data points
        self.assertGreaterEqual(
            runner.data_points_received, len(test_data) * 0.8,  # At least 80%
            f"Only processed {runner.data_points_received}/{len(test_data)} data points"
        )

        print(f"✅ Real-time feed: {runner.data_points_received}/{len(test_data)} points, "
              f"{len(runner.signals)} signals")

    def test_latency_requirements(self):
        """Test that processing meets real-time latency requirements."""
        print("\n⚡ Testing Latency Requirements")

        strategy = MetaTrendStrategy("latency_test", params={"timeframe": "15min"})

        latencies = []

        # Test processing latency for each data point
        for data_point in self.test_data[:100]:  # Test 100 points
            timestamp = data_point['timestamp']
            ohlcv = {
                'open': data_point['open'],
                'high': data_point['high'],
                'low': data_point['low'],
                'close': data_point['close'],
                'volume': data_point['volume']
            }

            # Measure processing latency
            start_time = time.perf_counter()
            signal = strategy.process_data_point(timestamp, ohlcv)
            latency = time.perf_counter() - start_time

            latencies.append(latency)

        # Analyze latency statistics
        avg_latency = np.mean(latencies)
        max_latency = max(latencies)
        p95_latency = np.percentile(latencies, 95)
        p99_latency = np.percentile(latencies, 99)

        # Real-time requirements (adjusted for realistic performance)
        self.assertLess(
            avg_latency, 0.005,  # Less than 5ms average (more realistic)
            f"Average latency too high: {avg_latency*1000:.2f}ms"
        )

        self.assertLess(
            p95_latency, 0.010,  # Less than 10ms for 95th percentile
            f"95th percentile latency too high: {p95_latency*1000:.2f}ms"
        )

        self.assertLess(
            max_latency, 0.020,  # Less than 20ms maximum
            f"Maximum latency too high: {max_latency*1000:.2f}ms"
        )

        print(f"✅ Latency requirements met:")
        print(f"  Average: {avg_latency*1000:.2f}ms")
        print(f"  95th percentile: {p95_latency*1000:.2f}ms")
        print(f"  99th percentile: {p99_latency*1000:.2f}ms")
        print(f"  Maximum: {max_latency*1000:.2f}ms")


def run_realtime_simulation():
    """Run all real-time simulation tests."""
    print("🚀 Phase 3 Task 3.3: Real-Time Simulation Tests")
    print("=" * 70)

    # Create test suite
    suite = unittest.TestLoader().loadTestsFromTestCase(TestRealTimeSimulation)

    # Run tests with detailed output
    runner = unittest.TextTestRunner(verbosity=2, stream=sys.stdout)
    result = runner.run(suite)

    # Summary
    print(f"\n🎯 Real-Time Simulation Results:")
    print(f"  Tests run: {result.testsRun}")
    print(f"  Failures: {len(result.failures)}")
    print(f"  Errors: {len(result.errors)}")

    if result.failures:
        print(f"\n❌ Failures:")
        for test, traceback in result.failures:
            print(f"  - {test}: {traceback}")

    if result.errors:
        print(f"\n❌ Errors:")
        for test, traceback in result.errors:
            print(f"  - {test}: {traceback}")

    success = len(result.failures) == 0 and len(result.errors) == 0

    if success:
        print(f"\n✅ All real-time simulation tests PASSED!")
        print(f"🔧 Verified:")
        print(f"  - Minute-by-minute processing")
        print(f"  - Bar completion timing")
        print(f"  - No future data usage")
        print(f"  - Memory usage monitoring")
        print(f"  - Concurrent strategy processing")
        print(f"  - Real-time data feed simulation")
        print(f"  - Latency requirements")
    else:
        print(f"\n❌ Some real-time simulation tests FAILED")

    return success


if __name__ == "__main__":
    success = run_realtime_simulation()
    sys.exit(0 if success else 1)