From 8c23489ff01949affabe8282b21c0ea3a05f2c33 Mon Sep 17 00:00:00 2001 From: "Vasily.onl" Date: Thu, 12 Jun 2025 18:29:39 +0800 Subject: [PATCH] 4.0 - 4.0 Implement real-time strategy execution and data integration features - Added `realtime_execution.py` for real-time strategy execution, enabling live signal generation and integration with the dashboard's chart refresh cycle. - Introduced `data_integration.py` to manage market data orchestration, caching, and technical indicator calculations for strategy signal generation. - Implemented `validation.py` for comprehensive validation and quality assessment of strategy-generated signals, ensuring reliability and consistency. - Developed `batch_processing.py` to facilitate efficient backtesting of multiple strategies across large datasets with memory management and performance optimization. - Updated `__init__.py` files to include new modules and ensure proper exports, enhancing modularity and maintainability. - Enhanced unit tests for the new features, ensuring robust functionality and adherence to project standards. These changes establish a solid foundation for real-time strategy execution and data integration, aligning with project goals for modularity, performance, and maintainability. --- dashboard/callbacks/__init__.py | 4 +- dashboard/callbacks/realtime_strategies.py | 291 +++++ database/repositories/strategy_repository.py | 37 + strategies/__init__.py | 16 +- strategies/batch_processing.py | 1059 +++++++++++++++++ strategies/data_integration.py | 1060 +++++++++++++++++ strategies/realtime_execution.py | 649 +++++++++++ strategies/validation.py | 375 ++++++ tasks/4.0-strategy-engine-foundation.md | 47 +- tests/strategies/test_batch_processing.py | 798 +++++++++++++ tests/strategies/test_data_integration.py | 1068 ++++++++++++++++++ tests/strategies/test_realtime_execution.py | 558 +++++++++ tests/strategies/test_validation.py | 478 ++++++++ 13 files changed, 6429 insertions(+), 11 deletions(-) create mode 100644 dashboard/callbacks/realtime_strategies.py create mode 100644 strategies/batch_processing.py create mode 100644 strategies/data_integration.py create mode 100644 strategies/realtime_execution.py create mode 100644 strategies/validation.py create mode 100644 tests/strategies/test_batch_processing.py create mode 100644 tests/strategies/test_data_integration.py create mode 100644 tests/strategies/test_realtime_execution.py create mode 100644 tests/strategies/test_validation.py diff --git a/dashboard/callbacks/__init__.py b/dashboard/callbacks/__init__.py index 9fb1ebb..bda5982 100644 --- a/dashboard/callbacks/__init__.py +++ b/dashboard/callbacks/__init__.py @@ -6,10 +6,12 @@ from .navigation import register_navigation_callbacks from .charts import register_chart_callbacks from .indicators import register_indicator_callbacks from .system_health import register_system_health_callbacks +from .realtime_strategies import register_realtime_strategy_callbacks __all__ = [ 'register_navigation_callbacks', 'register_chart_callbacks', 'register_indicator_callbacks', - 'register_system_health_callbacks' + 'register_system_health_callbacks', + 'register_realtime_strategy_callbacks' ] \ No newline at end of file diff --git a/dashboard/callbacks/realtime_strategies.py b/dashboard/callbacks/realtime_strategies.py new file mode 100644 index 0000000..51e0df4 --- /dev/null +++ b/dashboard/callbacks/realtime_strategies.py @@ -0,0 +1,291 @@ +""" +Real-time Strategy Callbacks + +This module provides callbacks for integrating real-time strategy execution +with the dashboard chart refresh cycle and user interactions. +""" + +import json +from dash import Output, Input, State, Patch, ctx, html, no_update, dcc, callback +import dash_bootstrap_components as dbc +from datetime import datetime, timedelta +from typing import Dict, Any, List, Optional + +from utils.logger import get_logger +from strategies.realtime_execution import ( + get_realtime_strategy_processor, + initialize_realtime_strategy_system, + RealTimeConfig, + RealTimeSignal +) +from strategies.manager import StrategyManager +from config.strategies.config_utils import StrategyConfigurationManager + +logger = get_logger() + +# Global processor instance +_processor = None + + +def get_processor(): + """Get or initialize the real-time strategy processor.""" + global _processor + if _processor is None: + config = RealTimeConfig( + refresh_interval_seconds=30, + max_strategies_concurrent=3, + incremental_calculation=True, + signal_batch_size=50, + enable_signal_broadcasting=True + ) + _processor = initialize_realtime_strategy_system(config) + return _processor + + +def register_realtime_strategy_callbacks(app): + """Register real-time strategy callbacks.""" + + @app.callback( + Output('realtime-strategies-store', 'data'), + [Input('realtime-strategy-toggle', 'value'), + Input('symbol-dropdown', 'value'), + Input('timeframe-dropdown', 'value'), + Input('strategy-dropdown', 'value')], + [State('realtime-strategies-store', 'data')], + prevent_initial_call=True + ) + def manage_realtime_strategies(enable_realtime, symbol, timeframe, strategy_name, current_data): + """ + Manage real-time strategy registration based on user selections. + + This callback handles enabling/disabling real-time strategy execution + and registers strategies based on current chart selections. + """ + try: + current_data = current_data or {'active_strategies': [], 'enabled': False} + processor = get_processor() + + if not enable_realtime: + # Disable all strategies + for context_id in current_data.get('active_strategies', []): + processor.unregister_strategy(context_id) + logger.info(f"Unregistered real-time strategy: {context_id}") + + return {'active_strategies': [], 'enabled': False} + + # Enable real-time strategies + if symbol and timeframe and strategy_name and strategy_name != 'basic': + # Load strategy configuration + try: + config_manager = StrategyConfigurationManager() + strategy_config = config_manager.load_user_strategy_config(strategy_name) + + if not strategy_config: + # Load from templates if user config doesn't exist + strategy_config = config_manager.load_strategy_template(strategy_name) + + if strategy_config: + # Register strategy for real-time execution + context_id = processor.register_strategy( + strategy_name=strategy_name, + strategy_config=strategy_config, + symbol=symbol, + timeframe=timeframe + ) + + active_strategies = [context_id] + logger.info(f"Registered real-time strategy: {context_id}") + + return { + 'active_strategies': active_strategies, + 'enabled': True, + 'current_symbol': symbol, + 'current_timeframe': timeframe, + 'current_strategy': strategy_name + } + + except Exception as e: + logger.error(f"Error loading strategy configuration for {strategy_name}: {e}") + return current_data + + return current_data + + except Exception as e: + logger.error(f"Error managing real-time strategies: {e}") + return current_data or {'active_strategies': [], 'enabled': False} + + @app.callback( + Output('realtime-strategy-status', 'children'), + [Input('realtime-strategies-store', 'data'), + Input('interval-component', 'n_intervals')], + prevent_initial_call=True + ) + def update_realtime_status(strategy_data, n_intervals): + """ + Update real-time strategy status display. + + Shows current status of real-time strategy execution including + active strategies and performance metrics. + """ + try: + if not strategy_data or not strategy_data.get('enabled'): + return dbc.Alert("Real-time strategy execution is disabled", color="secondary", className="mb-2") + + processor = get_processor() + active_strategies = processor.get_active_strategies() + perf_stats = processor.get_performance_stats() + + if not active_strategies: + return dbc.Alert("No active real-time strategies", color="warning", className="mb-2") + + # Build status display + status_items = [] + + # Active strategies + for context_id, context in active_strategies.items(): + status_items.append( + html.Li([ + html.Strong(f"{context.strategy_name}: "), + f"{context.symbol} {context.timeframe}", + html.Span( + " ✓" if context.is_active else " ⚠️", + style={'color': 'green' if context.is_active else 'orange'} + ) + ]) + ) + + # Performance metrics + success_rate = 0 + if perf_stats['total_calculations'] > 0: + success_rate = (perf_stats['successful_calculations'] / perf_stats['total_calculations']) * 100 + + metrics_text = f"Calculations: {perf_stats['total_calculations']} | " \ + f"Success Rate: {success_rate:.1f}% | " \ + f"Signals Generated: {perf_stats['signals_generated']}" + + return dbc.Card([ + dbc.CardHeader("Real-time Strategy Status"), + dbc.CardBody([ + html.H6("Active Strategies:", className="mb-2"), + html.Ul(status_items, className="mb-3"), + html.P(metrics_text, className="small mb-0") + ]) + ], className="mb-2") + + except Exception as e: + logger.error(f"Error updating real-time status: {e}") + return dbc.Alert(f"Error updating status: {str(e)}", color="danger", className="mb-2") + + # Integration with chart refresh cycle + @app.callback( + Output('realtime-execution-trigger', 'data'), + [Input('interval-component', 'n_intervals')], + [State('symbol-dropdown', 'value'), + State('timeframe-dropdown', 'value'), + State('realtime-strategies-store', 'data'), + State('analysis-mode-toggle', 'value')], + prevent_initial_call=True + ) + def trigger_realtime_execution(n_intervals, symbol, timeframe, strategy_data, analysis_mode): + """ + Trigger real-time strategy execution when new data is available. + + This callback integrates with the existing chart refresh cycle to + execute real-time strategies when new candle data arrives. + """ + try: + # Only execute in live mode + if analysis_mode == 'locked': + return no_update + + # Only execute if real-time strategies are enabled + if not strategy_data or not strategy_data.get('enabled'): + return no_update + + # Only execute if we have symbol and timeframe + if not symbol or not timeframe: + return no_update + + processor = get_processor() + + # Execute real-time strategy update + signals = processor.execute_realtime_update( + symbol=symbol, + timeframe=timeframe, + exchange="okx" + ) + + if signals: + logger.info(f"Real-time execution generated {len(signals)} signals for {symbol} {timeframe}") + return { + 'timestamp': datetime.now().isoformat(), + 'signals_generated': len(signals), + 'symbol': symbol, + 'timeframe': timeframe + } + + return no_update + + except Exception as e: + logger.error(f"Error in real-time strategy execution: {e}") + return no_update + + +def add_realtime_strategy_components(): + """ + Add real-time strategy components to the dashboard layout. + + Returns: + List of Dash components for real-time strategy controls + """ + return [ + # Real-time strategy toggle + dbc.Row([ + dbc.Col([ + dbc.Label("Real-time Strategy Execution", className="fw-bold"), + dbc.Switch( + id="realtime-strategy-toggle", + label="Enable Real-time Execution", + value=False, + className="mb-2" + ), + ], width=12) + ], className="mb-3"), + + # Status display + html.Div(id="realtime-strategy-status"), + + # Hidden stores for state management + dcc.Store(id="realtime-strategies-store", data={'active_strategies': [], 'enabled': False}), + dcc.Store(id="realtime-execution-trigger", data={}), + ] + + +def setup_chart_update_callback(): + """ + Setup chart update callback for real-time signals. + + This function configures the real-time processor to trigger + chart updates when new signals are generated. + """ + def chart_update_callback(signal: RealTimeSignal): + """Handle chart updates for real-time signals.""" + try: + # This would trigger chart refresh for the specific symbol/timeframe + # For now, we'll log the signal and let the regular refresh cycle handle it + logger.debug( + f"Chart update requested for signal: {signal.context.strategy_name} " + f"on {signal.context.symbol} {signal.context.timeframe}" + ) + + # Future enhancement: Could trigger specific chart layer updates here + + except Exception as e: + logger.error(f"Error in chart update callback: {e}") + + processor = get_processor() + processor.set_chart_update_callback(chart_update_callback) + + +# Initialize the chart update callback when module is imported +setup_chart_update_callback() \ No newline at end of file diff --git a/database/repositories/strategy_repository.py b/database/repositories/strategy_repository.py index 8ba9483..a91b716 100644 --- a/database/repositories/strategy_repository.py +++ b/database/repositories/strategy_repository.py @@ -181,6 +181,43 @@ class StrategyRepository(BaseRepository): self.log_error(f"Error retrieving strategy signals: {e}") raise DatabaseOperationError(f"Failed to retrieve strategy signals: {e}") + def store_signals_batch(self, signal_data_list: List[Dict[str, Any]]) -> int: + """ + Store a batch of real-time strategy signals. + + Args: + signal_data_list: List of signal data dictionaries + + Returns: + Number of signals stored + """ + try: + signals_stored = 0 + with self.get_session() as session: + for signal_data in signal_data_list: + strategy_signal = StrategySignal( + run_id=None, # Real-time signals don't have a run_id + strategy_name=signal_data.get('strategy_name'), + strategy_config=signal_data.get('strategy_config'), + symbol=signal_data.get('symbol'), + timeframe=signal_data.get('timeframe'), + timestamp=signal_data.get('timestamp'), + signal_type=signal_data.get('signal_type', 'HOLD'), + price=Decimal(str(signal_data.get('price'))) if signal_data.get('price') else None, + confidence=Decimal(str(signal_data.get('confidence', 0.0))), + signal_metadata=signal_data.get('signal_metadata', {}) + ) + session.add(strategy_signal) + signals_stored += 1 + + session.commit() + self.log_info(f"Stored batch of {signals_stored} real-time strategy signals") + return signals_stored + + except Exception as e: + self.log_error(f"Error storing signals batch: {e}") + raise DatabaseOperationError(f"Failed to store signals batch: {e}") + def get_strategy_signal_stats(self, run_id: Optional[int] = None) -> Dict[str, Any]: """Get statistics about strategy signals.""" try: diff --git a/strategies/__init__.py b/strategies/__init__.py index 9febcd5..90f083f 100644 --- a/strategies/__init__.py +++ b/strategies/__init__.py @@ -16,6 +16,10 @@ from .base import BaseStrategy from .factory import StrategyFactory from .data_types import StrategySignal, SignalType, StrategyResult from .manager import StrategyManager, StrategyConfig, StrategyType, StrategyCategory, get_strategy_manager +from .data_integration import StrategyDataIntegrator, StrategyDataIntegrationConfig, get_strategy_data_integrator +from .validation import StrategySignalValidator, ValidationConfig +from .batch_processing import BacktestingBatchProcessor, BatchProcessingConfig +from .realtime_execution import RealTimeStrategyProcessor, RealTimeConfig, get_realtime_strategy_processor __all__ = [ 'BaseStrategy', @@ -27,5 +31,15 @@ __all__ = [ 'StrategyConfig', 'StrategyType', 'StrategyCategory', - 'get_strategy_manager' + 'get_strategy_manager', + 'StrategyDataIntegrator', + 'StrategyDataIntegrationConfig', + 'get_strategy_data_integrator', + 'StrategySignalValidator', + 'ValidationConfig', + 'BacktestingBatchProcessor', + 'BatchProcessingConfig', + 'RealTimeStrategyProcessor', + 'RealTimeConfig', + 'get_realtime_strategy_processor' ] \ No newline at end of file diff --git a/strategies/batch_processing.py b/strategies/batch_processing.py new file mode 100644 index 0000000..9a697d9 --- /dev/null +++ b/strategies/batch_processing.py @@ -0,0 +1,1059 @@ +""" +Batch Processing for Strategy Backtesting + +This module provides efficient batch processing capabilities for running +multiple strategies across large datasets with memory management and +performance optimization. +""" + +from typing import List, Dict, Any, Optional, Tuple, Iterator +from dataclasses import dataclass +from datetime import datetime, timezone +import pandas as pd +from concurrent.futures import ThreadPoolExecutor, as_completed +import gc +import os +import psutil + +from .data_integration import StrategyDataIntegrator, StrategyDataIntegrationConfig +from .data_types import StrategyResult, StrategySignal +from .validation import StrategySignalValidator, ValidationConfig +from utils.logger import get_logger + + +@dataclass +class BatchProcessingConfig: + """Configuration for batch processing operations.""" + max_concurrent_strategies: int = 4 # Number of strategies to process concurrently + max_memory_usage_percent: float = 80.0 # Maximum memory usage threshold + chunk_size_days: int = 30 # Days to process per chunk for large datasets + enable_memory_monitoring: bool = True + enable_result_validation: bool = True + result_cache_size: int = 1000 # Maximum cached results + progress_reporting_interval: int = 10 # Report progress every N strategies + + +class BacktestingBatchProcessor: + """ + Efficient batch processing for strategy backtesting. + + Provides memory-efficient processing of multiple strategies across + large datasets with parallel execution and performance monitoring. + """ + + def __init__(self, config: BatchProcessingConfig = None): + """ + Initialize batch processor. + + Args: + config: Batch processing configuration + """ + self.config = config or BatchProcessingConfig() + self.logger = get_logger() + + # Initialize components + self.data_integrator = StrategyDataIntegrator() + self.signal_validator = StrategySignalValidator() if self.config.enable_result_validation else None + + # Processing statistics + self._processing_stats = { + 'strategies_processed': 0, + 'total_signals_generated': 0, + 'processing_time_seconds': 0.0, + 'memory_peak_mb': 0.0, + 'errors_count': 0, + 'validation_failures': 0 + } + + # Result cache for performance + self._result_cache = {} + + def process_strategies_batch( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + days_back: int, + exchange: str = "okx" + ) -> Dict[str, List[StrategyResult]]: + """ + Process multiple strategies across multiple symbols efficiently. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + days_back: Number of days to look back + exchange: Exchange name + + Returns: + Dictionary mapping strategy names to their results + """ + try: + start_time = datetime.now() + self.logger.info(f"BacktestingBatchProcessor: Starting batch processing of {len(strategy_configs)} strategies across {len(symbols)} symbols") + + # Initialize results container + batch_results = {} + + # Process strategies with memory monitoring + for i, strategy_config in enumerate(strategy_configs): + strategy_name = strategy_config.get('name', f'strategy_{i}') + + # Memory check + if self.config.enable_memory_monitoring: + self._check_memory_usage() + + # Process strategy across all symbols + strategy_results = self._process_single_strategy_batch( + strategy_config, symbols, timeframe, days_back, exchange + ) + + batch_results[strategy_name] = strategy_results + self._processing_stats['strategies_processed'] += 1 + + # Progress reporting + if (i + 1) % self.config.progress_reporting_interval == 0: + progress = ((i + 1) / len(strategy_configs)) * 100 + self.logger.info(f"Batch processing progress: {progress:.1f}% ({i + 1}/{len(strategy_configs)} strategies)") + + # Update final statistics + processing_time = (datetime.now() - start_time).total_seconds() + self._processing_stats['processing_time_seconds'] = processing_time + + self.logger.info(f"BacktestingBatchProcessor: Completed batch processing in {processing_time:.2f} seconds") + return batch_results + + except Exception as e: + self.logger.error(f"Error in batch processing: {e}") + self._processing_stats['errors_count'] += 1 + return {} + + def _process_single_strategy_batch( + self, + strategy_config: Dict[str, Any], + symbols: List[str], + timeframe: str, + days_back: int, + exchange: str + ) -> List[StrategyResult]: + """Process a single strategy across multiple symbols.""" + all_results = [] + strategy_name = strategy_config.get('name', 'unknown') + + for symbol in symbols: + try: + # Calculate strategy signals for this symbol + results = self.data_integrator.calculate_strategy_signals_orchestrated( + strategy_name=strategy_name, + strategy_config=strategy_config, + symbol=symbol, + timeframe=timeframe, + days_back=days_back, + exchange=exchange + ) + + # Validate results if enabled + if self.signal_validator and results: + validated_results = self._validate_strategy_results(results) + all_results.extend(validated_results) + else: + all_results.extend(results) + + # Update signal count + signal_count = sum(len(result.signals) for result in results) + self._processing_stats['total_signals_generated'] += signal_count + + except Exception as e: + self.logger.error(f"Error processing {strategy_name} for {symbol}: {e}") + self._processing_stats['errors_count'] += 1 + + return all_results + + def _validate_strategy_results(self, results: List[StrategyResult]) -> List[StrategyResult]: + """Validate strategy results and filter invalid signals.""" + validated_results = [] + + for result in results: + if result.signals: + valid_signals, invalid_signals = self.signal_validator.validate_signals_batch(result.signals) + + if invalid_signals: + self._processing_stats['validation_failures'] += len(invalid_signals) + self.logger.debug(f"Filtered {len(invalid_signals)} invalid signals from {result.strategy_name}") + + # Create new result with only valid signals + if valid_signals: + validated_result = StrategyResult( + timestamp=result.timestamp, + symbol=result.symbol, + timeframe=result.timeframe, + strategy_name=result.strategy_name, + signals=valid_signals, + indicators_used=result.indicators_used, + metadata=result.metadata + ) + validated_results.append(validated_result) + else: + validated_results.append(result) + + return validated_results + + def _check_memory_usage(self) -> None: + """Monitor memory usage and trigger cleanup if needed.""" + process = psutil.Process(os.getpid()) + memory_percent = process.memory_percent() + memory_mb = process.memory_info().rss / 1024 / 1024 + + # Update peak memory tracking + self._processing_stats['memory_peak_mb'] = max( + self._processing_stats['memory_peak_mb'], + memory_mb + ) + + if memory_percent > self.config.max_memory_usage_percent: + self.logger.warning(f"High memory usage detected: {memory_percent:.1f}% ({memory_mb:.1f} MB)") + self._cleanup_memory() + + def _cleanup_memory(self) -> None: + """Perform memory cleanup operations.""" + # Clear old cached results + if len(self._result_cache) > self.config.result_cache_size: + cache_items = list(self._result_cache.items()) + # Keep only the most recent half + keep_size = self.config.result_cache_size // 2 + self._result_cache = dict(cache_items[-keep_size:]) + + # Clear data integrator caches + self.data_integrator.clear_cache() + + # Force garbage collection + gc.collect() + + self.logger.debug("BacktestingBatchProcessor: Performed memory cleanup") + + def get_processing_statistics(self) -> Dict[str, Any]: + """Get comprehensive processing statistics.""" + stats = self._processing_stats.copy() + + # Calculate derived metrics + if stats['strategies_processed'] > 0: + stats['average_signals_per_strategy'] = stats['total_signals_generated'] / stats['strategies_processed'] + stats['average_processing_time_per_strategy'] = stats['processing_time_seconds'] / stats['strategies_processed'] + else: + stats['average_signals_per_strategy'] = 0 + stats['average_processing_time_per_strategy'] = 0 + + stats['error_rate'] = (stats['errors_count'] / max(stats['strategies_processed'], 1)) * 100 + stats['validation_failure_rate'] = (stats['validation_failures'] / max(stats['total_signals_generated'], 1)) * 100 + + return stats + + def process_strategies_parallel( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + days_back: int, + exchange: str = "okx" + ) -> Dict[str, List[StrategyResult]]: + """ + Process multiple strategies in parallel for improved performance. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + days_back: Number of days to look back + exchange: Exchange name + + Returns: + Dictionary mapping strategy names to their results + """ + try: + start_time = datetime.now() + self.logger.info(f"BacktestingBatchProcessor: Starting parallel processing of {len(strategy_configs)} strategies") + + batch_results = {} + + # Use ThreadPoolExecutor for parallel strategy processing + with ThreadPoolExecutor(max_workers=self.config.max_concurrent_strategies) as executor: + # Submit all strategy processing tasks + future_to_strategy = {} + + for strategy_config in strategy_configs: + strategy_name = strategy_config.get('name', f'strategy_{len(future_to_strategy)}') + + future = executor.submit( + self._process_single_strategy_batch, + strategy_config, symbols, timeframe, days_back, exchange + ) + future_to_strategy[future] = strategy_name + + # Collect results as they complete + completed_count = 0 + for future in as_completed(future_to_strategy): + strategy_name = future_to_strategy[future] + + try: + strategy_results = future.result() + batch_results[strategy_name] = strategy_results + self._processing_stats['strategies_processed'] += 1 + + completed_count += 1 + + # Progress reporting + if completed_count % self.config.progress_reporting_interval == 0: + progress = (completed_count / len(strategy_configs)) * 100 + self.logger.info(f"Parallel processing progress: {progress:.1f}% ({completed_count}/{len(strategy_configs)} strategies)") + + except Exception as e: + self.logger.error(f"Error processing strategy {strategy_name}: {e}") + self._processing_stats['errors_count'] += 1 + batch_results[strategy_name] = [] + + # Memory check after each completed strategy + if self.config.enable_memory_monitoring: + self._check_memory_usage() + + # Update final statistics + processing_time = (datetime.now() - start_time).total_seconds() + self._processing_stats['processing_time_seconds'] = processing_time + + self.logger.info(f"BacktestingBatchProcessor: Completed parallel processing in {processing_time:.2f} seconds") + return batch_results + + except Exception as e: + self.logger.error(f"Error in parallel batch processing: {e}") + self._processing_stats['errors_count'] += 1 + return {} + + def process_symbols_parallel( + self, + strategy_config: Dict[str, Any], + symbols: List[str], + timeframe: str, + days_back: int, + exchange: str = "okx" + ) -> List[StrategyResult]: + """ + Process a single strategy across multiple symbols in parallel. + + Args: + strategy_config: Strategy configuration + symbols: List of trading symbols to process + timeframe: Timeframe for processing + days_back: Number of days to look back + exchange: Exchange name + + Returns: + List of strategy results across all symbols + """ + try: + strategy_name = strategy_config.get('name', 'unknown') + self.logger.info(f"BacktestingBatchProcessor: Processing {strategy_name} across {len(symbols)} symbols in parallel") + + all_results = [] + + # Use ThreadPoolExecutor for parallel symbol processing + with ThreadPoolExecutor(max_workers=min(len(symbols), self.config.max_concurrent_strategies)) as executor: + # Submit symbol processing tasks + future_to_symbol = {} + + for symbol in symbols: + future = executor.submit( + self._process_strategy_for_symbol, + strategy_config, symbol, timeframe, days_back, exchange + ) + future_to_symbol[future] = symbol + + # Collect results as they complete + for future in as_completed(future_to_symbol): + symbol = future_to_symbol[future] + + try: + symbol_results = future.result() + all_results.extend(symbol_results) + + # Update signal count + signal_count = sum(len(result.signals) for result in symbol_results) + self._processing_stats['total_signals_generated'] += signal_count + + except Exception as e: + self.logger.error(f"Error processing {strategy_name} for {symbol}: {e}") + self._processing_stats['errors_count'] += 1 + + return all_results + + except Exception as e: + self.logger.error(f"Error in parallel symbol processing: {e}") + self._processing_stats['errors_count'] += 1 + return [] + + def _process_strategy_for_symbol( + self, + strategy_config: Dict[str, Any], + symbol: str, + timeframe: str, + days_back: int, + exchange: str + ) -> List[StrategyResult]: + """Process a single strategy for a single symbol.""" + try: + strategy_name = strategy_config.get('name', 'unknown') + + # Calculate strategy signals for this symbol + results = self.data_integrator.calculate_strategy_signals_orchestrated( + strategy_name=strategy_name, + strategy_config=strategy_config, + symbol=symbol, + timeframe=timeframe, + days_back=days_back, + exchange=exchange + ) + + # Validate results if enabled + if self.signal_validator and results: + validated_results = self._validate_strategy_results(results) + return validated_results + else: + return results + + except Exception as e: + self.logger.error(f"Error processing {strategy_config.get('name', 'unknown')} for {symbol}: {e}") + return [] + + def process_large_dataset_streaming( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + total_days_back: int, + exchange: str = "okx" + ) -> Iterator[Dict[str, List[StrategyResult]]]: + """ + Process large datasets using streaming approach with memory-efficient chunking. + + This method processes data in chunks to avoid memory overflow when dealing + with very large historical datasets. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + total_days_back: Total number of days to process + exchange: Exchange name + + Yields: + Dictionary chunks mapping strategy names to their results + """ + try: + chunk_size = self.config.chunk_size_days + total_chunks = (total_days_back + chunk_size - 1) // chunk_size # Ceiling division + + self.logger.info(f"BacktestingBatchProcessor: Starting streaming processing of {total_days_back} days in {total_chunks} chunks") + + for chunk_index in range(total_chunks): + # Calculate date range for this chunk + chunk_start_days = chunk_index * chunk_size + chunk_end_days = min((chunk_index + 1) * chunk_size, total_days_back) + chunk_days = chunk_end_days - chunk_start_days + + self.logger.info(f"Processing chunk {chunk_index + 1}/{total_chunks}: {chunk_days} days") + + # Memory check before processing chunk + if self.config.enable_memory_monitoring: + self._check_memory_usage() + + # Process chunk using parallel processing + chunk_results = self.process_strategies_parallel( + strategy_configs=strategy_configs, + symbols=symbols, + timeframe=timeframe, + days_back=chunk_days, + exchange=exchange + ) + + # Yield chunk results + yield chunk_results + + # Force cleanup after each chunk to manage memory + self._cleanup_memory() + + # Report progress + progress = ((chunk_index + 1) / total_chunks) * 100 + self.logger.info(f"Streaming progress: {progress:.1f}% ({chunk_index + 1}/{total_chunks} chunks)") + + self.logger.info("BacktestingBatchProcessor: Completed streaming processing") + + except Exception as e: + self.logger.error(f"Error in streaming processing: {e}") + self._processing_stats['errors_count'] += 1 + + def aggregate_streaming_results( + self, + result_stream: Iterator[Dict[str, List[StrategyResult]]] + ) -> Dict[str, List[StrategyResult]]: + """ + Aggregate results from streaming processing. + + Args: + result_stream: Iterator of result chunks + + Returns: + Aggregated results across all chunks + """ + try: + aggregated_results = {} + chunk_count = 0 + + for chunk_results in result_stream: + chunk_count += 1 + + for strategy_name, strategy_results in chunk_results.items(): + if strategy_name not in aggregated_results: + aggregated_results[strategy_name] = [] + + aggregated_results[strategy_name].extend(strategy_results) + + # Periodic memory cleanup during aggregation + if chunk_count % 5 == 0: # Every 5 chunks + self._cleanup_memory() + + # Final statistics + total_strategies = len(aggregated_results) + total_results = sum(len(results) for results in aggregated_results.values()) + + self.logger.info(f"Aggregated {total_results} results across {total_strategies} strategies from {chunk_count} chunks") + + return aggregated_results + + except Exception as e: + self.logger.error(f"Error aggregating streaming results: {e}") + return {} + + def process_with_memory_constraints( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + days_back: int, + max_memory_mb: float, + exchange: str = "okx" + ) -> Dict[str, List[StrategyResult]]: + """ + Process strategies with strict memory constraints. + + Automatically adjusts processing approach based on available memory. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + days_back: Number of days to look back + max_memory_mb: Maximum memory usage in MB + exchange: Exchange name + + Returns: + Dictionary mapping strategy names to their results + """ + try: + # Check current memory usage + current_memory = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 + available_memory = max_memory_mb - current_memory + + self.logger.info(f"Memory-constrained processing: {available_memory:.1f} MB available of {max_memory_mb:.1f} MB limit") + + # Estimate memory requirements + estimated_memory_per_strategy = 50 # MB - rough estimate + estimated_total_memory = len(strategy_configs) * len(symbols) * estimated_memory_per_strategy + + if estimated_total_memory <= available_memory: + # Sufficient memory for parallel processing + self.logger.info("Sufficient memory available - using parallel processing") + return self.process_strategies_parallel( + strategy_configs, symbols, timeframe, days_back, exchange + ) + + elif estimated_total_memory <= available_memory * 2: + # Moderate memory constraint - use sequential processing + self.logger.info("Moderate memory constraint - using sequential processing") + return self.process_strategies_batch( + strategy_configs, symbols, timeframe, days_back, exchange + ) + + else: + # Severe memory constraint - use streaming with warm-up + self.logger.info("Severe memory constraint - using streaming processing with warm-up") + stream = self.process_large_dataset_streaming_with_warmup( + strategy_configs, symbols, timeframe, days_back, exchange + ) + return self.aggregate_streaming_results(stream) + + except Exception as e: + self.logger.error(f"Error in memory-constrained processing: {e}") + self._processing_stats['errors_count'] += 1 + return {} + + def get_performance_metrics(self) -> Dict[str, Any]: + """ + Get comprehensive performance metrics for batch processing. + + Returns: + Dictionary containing detailed performance metrics + """ + stats = self.get_processing_statistics() + + # Enhanced metrics + enhanced_metrics = { + **stats, + 'cache_hit_rate': self._calculate_cache_hit_rate(), + 'memory_efficiency': self._calculate_memory_efficiency(), + 'throughput_signals_per_second': self._calculate_throughput(), + 'parallel_efficiency': self._calculate_parallel_efficiency(), + 'optimization_recommendations': self._generate_optimization_recommendations() + } + + return enhanced_metrics + + def _calculate_cache_hit_rate(self) -> float: + """Calculate cache hit rate from data integrator.""" + try: + cache_stats = self.data_integrator.get_cache_stats() + total_requests = cache_stats.get('cache_hits', 0) + cache_stats.get('cache_misses', 0) + if total_requests > 0: + return (cache_stats.get('cache_hits', 0) / total_requests) * 100 + return 0.0 + except Exception: + return 0.0 + + def _calculate_memory_efficiency(self) -> float: + """Calculate memory efficiency score.""" + peak_memory = self._processing_stats.get('memory_peak_mb', 0) + strategies_processed = self._processing_stats.get('strategies_processed', 1) + + if peak_memory > 0 and strategies_processed > 0: + # Memory per strategy in MB (lower is better) + memory_per_strategy = peak_memory / strategies_processed + + # Efficiency score (0-100, higher is better) + # Assuming 100MB per strategy is baseline (50% efficiency) + baseline_memory = 100 # MB + efficiency = max(0, min(100, (baseline_memory / memory_per_strategy) * 50)) + return efficiency + + return 100.0 # Perfect efficiency if no memory tracked + + def _calculate_throughput(self) -> float: + """Calculate signals processed per second.""" + total_signals = self._processing_stats.get('total_signals_generated', 0) + processing_time = self._processing_stats.get('processing_time_seconds', 1) + + if processing_time > 0: + return total_signals / processing_time + return 0.0 + + def _calculate_parallel_efficiency(self) -> float: + """Calculate parallel processing efficiency.""" + strategies_processed = self._processing_stats.get('strategies_processed', 0) + processing_time = self._processing_stats.get('processing_time_seconds', 1) + max_workers = self.config.max_concurrent_strategies + + if strategies_processed > 0 and processing_time > 0: + # Theoretical minimum time if perfectly parallel + avg_time_per_strategy = processing_time / strategies_processed + theoretical_min_time = avg_time_per_strategy * (strategies_processed / max_workers) + + # Efficiency as percentage of theoretical optimum + efficiency = min(100, (theoretical_min_time / processing_time) * 100) + return efficiency + + return 100.0 + + def _generate_optimization_recommendations(self) -> List[str]: + """Generate optimization recommendations based on performance metrics.""" + recommendations = [] + + # Memory recommendations + memory_efficiency = self._calculate_memory_efficiency() + if memory_efficiency < 50: + recommendations.append("Consider reducing chunk_size_days or max_concurrent_strategies to improve memory efficiency") + + # Cache recommendations + cache_hit_rate = self._calculate_cache_hit_rate() + if cache_hit_rate < 30: + recommendations.append("Enable indicator caching or increase cache timeout to improve performance") + + # Parallel efficiency recommendations + parallel_efficiency = self._calculate_parallel_efficiency() + if parallel_efficiency < 70: + recommendations.append("Consider adjusting max_concurrent_strategies based on system capabilities") + + # Error rate recommendations + error_rate = self._processing_stats.get('error_rate', 0) + if error_rate > 10: + recommendations.append("High error rate detected - check data availability and strategy configurations") + + # Throughput recommendations + throughput = self._calculate_throughput() + if throughput < 1.0: # Less than 1 signal per second + recommendations.append("Low throughput detected - consider optimizing strategy calculations or using simpler indicators") + + if not recommendations: + recommendations.append("Performance metrics are within acceptable ranges") + + return recommendations + + def optimize_configuration(self) -> BatchProcessingConfig: + """ + Automatically optimize configuration based on current performance metrics. + + Returns: + Optimized configuration + """ + try: + current_config = self.config + optimized_config = BatchProcessingConfig() + + # Copy current values as baseline + optimized_config.max_concurrent_strategies = current_config.max_concurrent_strategies + optimized_config.chunk_size_days = current_config.chunk_size_days + optimized_config.max_memory_usage_percent = current_config.max_memory_usage_percent + optimized_config.result_cache_size = current_config.result_cache_size + + # Get current metrics + memory_efficiency = self._calculate_memory_efficiency() + parallel_efficiency = self._calculate_parallel_efficiency() + error_rate = self._processing_stats.get('error_rate', 0) + + # Optimize based on metrics + if memory_efficiency < 50: + # Reduce memory pressure + optimized_config.max_concurrent_strategies = max(1, current_config.max_concurrent_strategies - 1) + optimized_config.chunk_size_days = max(7, current_config.chunk_size_days - 7) + + elif memory_efficiency > 80 and parallel_efficiency < 70: + # Increase parallelism if memory allows + optimized_config.max_concurrent_strategies = min(8, current_config.max_concurrent_strategies + 1) + + if error_rate > 10: + # Reduce load to minimize errors + optimized_config.max_concurrent_strategies = max(1, current_config.max_concurrent_strategies - 1) + + # Cache optimization + cache_hit_rate = self._calculate_cache_hit_rate() + if cache_hit_rate < 30: + optimized_config.result_cache_size = min(2000, current_config.result_cache_size * 2) + + self.logger.info(f"Configuration optimized: workers {current_config.max_concurrent_strategies} -> {optimized_config.max_concurrent_strategies}, " + f"chunk_size {current_config.chunk_size_days} -> {optimized_config.chunk_size_days}") + + return optimized_config + + except Exception as e: + self.logger.error(f"Error optimizing configuration: {e}") + return self.config + + def benchmark_processing_methods( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + days_back: int, + exchange: str = "okx" + ) -> Dict[str, Dict[str, Any]]: + """ + Benchmark different processing methods to determine optimal approach. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + days_back: Number of days to look back + exchange: Exchange name + + Returns: + Dictionary containing benchmark results for each method + """ + try: + self.logger.info("Starting processing method benchmark") + benchmark_results = {} + + # Test sequential processing + start_time = datetime.now() + self._reset_stats() + + sequential_results = self.process_strategies_batch( + strategy_configs, symbols, timeframe, min(days_back, 7), exchange # Limit to 7 days for benchmark + ) + + sequential_time = (datetime.now() - start_time).total_seconds() + benchmark_results['sequential'] = { + 'processing_time': sequential_time, + 'results_count': sum(len(results) for results in sequential_results.values()), + 'memory_peak_mb': self._processing_stats.get('memory_peak_mb', 0), + 'throughput': self._calculate_throughput() + } + + # Test parallel processing + start_time = datetime.now() + self._reset_stats() + + parallel_results = self.process_strategies_parallel( + strategy_configs, symbols, timeframe, min(days_back, 7), exchange + ) + + parallel_time = (datetime.now() - start_time).total_seconds() + benchmark_results['parallel'] = { + 'processing_time': parallel_time, + 'results_count': sum(len(results) for results in parallel_results.values()), + 'memory_peak_mb': self._processing_stats.get('memory_peak_mb', 0), + 'throughput': self._calculate_throughput() + } + + # Calculate speedup + if sequential_time > 0: + speedup = sequential_time / parallel_time + benchmark_results['parallel']['speedup'] = speedup + + # Recommend best method + if parallel_time < sequential_time * 0.8: # 20% improvement threshold + benchmark_results['recommendation'] = 'parallel' + else: + benchmark_results['recommendation'] = 'sequential' + + self.logger.info(f"Benchmark completed. Recommended method: {benchmark_results['recommendation']}") + return benchmark_results + + except Exception as e: + self.logger.error(f"Error in benchmark: {e}") + return {} + + def _reset_stats(self) -> None: + """Reset processing statistics for benchmarking.""" + self._processing_stats = { + 'strategies_processed': 0, + 'total_signals_generated': 0, + 'processing_time_seconds': 0.0, + 'memory_peak_mb': 0.0, + 'errors_count': 0, + 'validation_failures': 0 + } + + # Clear result cache + self._result_cache.clear() + + self.logger.debug("BacktestingBatchProcessor: Reset processing statistics and cleared result cache") + + def _calculate_warmup_period(self, strategy_configs: List[Dict[str, Any]]) -> int: + """ + Calculate the maximum warm-up period needed across all strategies. + + Args: + strategy_configs: List of strategy configurations + + Returns: + Maximum warm-up period in number of periods + """ + max_warmup = 0 + + for strategy_config in strategy_configs: + strategy_name = strategy_config.get('name', 'unknown') + + # Common indicator warm-up requirements + indicator_warmups = { + 'ema': strategy_config.get('slow_period', strategy_config.get('period', 26)), + 'sma': strategy_config.get('period', 20), + 'rsi': strategy_config.get('period', 14), + 'macd': max( + strategy_config.get('slow_period', 26), + strategy_config.get('signal_period', 9) + ), + 'bollinger': strategy_config.get('period', 20), + 'stochastic': strategy_config.get('k_period', 14) + } + + # Determine strategy type and required warm-up + if 'ema' in strategy_name.lower(): + warmup = max( + strategy_config.get('fast_period', 12), + strategy_config.get('slow_period', 26) + ) + elif 'macd' in strategy_name.lower(): + warmup = max( + strategy_config.get('slow_period', 26), + strategy_config.get('signal_period', 9) + ) + 10 # Additional buffer for MACD convergence + elif 'rsi' in strategy_name.lower(): + warmup = strategy_config.get('period', 14) + 5 # Additional buffer for RSI stabilization + else: + # Generic estimation based on common indicators + warmup = 30 # Conservative default + + max_warmup = max(max_warmup, warmup) + + # Add safety buffer + max_warmup += 10 + + self.logger.debug(f"Calculated warm-up period: {max_warmup} periods") + return max_warmup + + def process_large_dataset_streaming_with_warmup( + self, + strategy_configs: List[Dict[str, Any]], + symbols: List[str], + timeframe: str, + total_days_back: int, + exchange: str = "okx" + ) -> Iterator[Dict[str, List[StrategyResult]]]: + """ + Process large datasets using streaming approach with proper warm-up period handling. + + This method ensures indicator continuity across chunks by including warm-up data + from previous chunks and trimming overlapping results. + + Args: + strategy_configs: List of strategy configurations + symbols: List of trading symbols to process + timeframe: Timeframe for processing + total_days_back: Total number of days to process + exchange: Exchange name + + Yields: + Dictionary chunks mapping strategy names to their results (with overlaps removed) + """ + try: + chunk_size = self.config.chunk_size_days + warmup_days = self._calculate_warmup_period(strategy_configs) + + # Adjust chunk size if it's too small relative to warm-up + if chunk_size <= warmup_days: + adjusted_chunk_size = warmup_days * 2 + self.logger.warning(f"Chunk size ({chunk_size}) too small for warm-up ({warmup_days}). " + f"Adjusting to {adjusted_chunk_size} days") + chunk_size = adjusted_chunk_size + + total_chunks = (total_days_back + chunk_size - 1) // chunk_size + + self.logger.info(f"BacktestingBatchProcessor: Starting streaming with warm-up processing of " + f"{total_days_back} days in {total_chunks} chunks (warm-up: {warmup_days} days)") + + for chunk_index in range(total_chunks): + # Calculate date range for this chunk + chunk_start_days = chunk_index * chunk_size + chunk_end_days = min((chunk_index + 1) * chunk_size, total_days_back) + + # Include warm-up data for chunks after the first + if chunk_index == 0: + # First chunk: no warm-up available, process as-is + processing_start_days = chunk_start_days + processing_days = chunk_end_days - chunk_start_days + trim_warmup = False + else: + # Subsequent chunks: include warm-up from previous data + processing_start_days = max(0, chunk_start_days - warmup_days) + processing_days = chunk_end_days - processing_start_days + trim_warmup = True + + self.logger.info(f"Processing chunk {chunk_index + 1}/{total_chunks}: " + f"target days {chunk_start_days}-{chunk_end_days}, " + f"processing days {processing_start_days}-{chunk_end_days} " + f"(warm-up: {warmup_days if trim_warmup else 0})") + + # Memory check before processing chunk + if self.config.enable_memory_monitoring: + self._check_memory_usage() + + # Process chunk with warm-up data + chunk_results = self.process_strategies_parallel( + strategy_configs=strategy_configs, + symbols=symbols, + timeframe=timeframe, + days_back=processing_days, + exchange=exchange + ) + + # Trim warm-up period from results to avoid overlaps + if trim_warmup: + chunk_results = self._trim_warmup_from_results( + chunk_results, warmup_days, chunk_start_days, chunk_end_days + ) + + # Yield processed chunk results + yield chunk_results + + # Force cleanup after each chunk to manage memory + self._cleanup_memory() + + # Report progress + progress = ((chunk_index + 1) / total_chunks) * 100 + self.logger.info(f"Streaming progress: {progress:.1f}% ({chunk_index + 1}/{total_chunks} chunks)") + + self.logger.info("BacktestingBatchProcessor: Completed streaming processing with warm-up") + + except Exception as e: + self.logger.error(f"Error in streaming processing with warm-up: {e}") + self._processing_stats['errors_count'] += 1 + + def _trim_warmup_from_results( + self, + chunk_results: Dict[str, List[StrategyResult]], + warmup_days: int, + target_start_days: int, + target_end_days: int + ) -> Dict[str, List[StrategyResult]]: + """ + Trim warm-up period from chunk results to avoid overlapping data. + + Args: + chunk_results: Results from chunk processing (including warm-up) + warmup_days: Number of warm-up days to trim + target_start_days: Target start day for this chunk + target_end_days: Target end day for this chunk + + Returns: + Trimmed results containing only the target date range + """ + try: + from datetime import datetime, timedelta + + trimmed_results = {} + + # Calculate cutoff timestamp (approximate, since we don't have exact start date) + # This is a simplified approach - in production, you'd use actual timestamps + + for strategy_name, strategy_results in chunk_results.items(): + if not strategy_results: + trimmed_results[strategy_name] = [] + continue + + # Sort results by timestamp to identify warm-up period + sorted_results = sorted(strategy_results, key=lambda r: r.timestamp) + + # Simple approach: remove first portion equivalent to warm-up days + # This assumes roughly uniform distribution of signals + total_results = len(sorted_results) + if total_results > 0: + # Estimate warm-up portion based on proportion + processing_days = target_end_days - max(0, target_start_days - warmup_days) + target_days = target_end_days - target_start_days + + if processing_days > target_days: + warmup_proportion = warmup_days / processing_days + warmup_count = int(total_results * warmup_proportion) + + # Keep results after warm-up period + trimmed_results[strategy_name] = sorted_results[warmup_count:] + + self.logger.debug(f"Strategy {strategy_name}: trimmed {warmup_count}/{total_results} warm-up results") + else: + # No trimming needed + trimmed_results[strategy_name] = sorted_results + else: + trimmed_results[strategy_name] = [] + + return trimmed_results + + except Exception as e: + self.logger.error(f"Error trimming warm-up from results: {e}") + return chunk_results # Return original results if trimming fails \ No newline at end of file diff --git a/strategies/data_integration.py b/strategies/data_integration.py new file mode 100644 index 0000000..a44fa28 --- /dev/null +++ b/strategies/data_integration.py @@ -0,0 +1,1060 @@ +""" +Strategy Data Integration Module + +This module provides seamless integration between market data, technical indicators, +and strategy calculations, handling data orchestration, caching, and optimization +for strategy signal generation and backtesting. +""" + +import pandas as pd +from datetime import datetime, timezone, timedelta +from typing import List, Dict, Any, Optional, Tuple +from dataclasses import dataclass +import json +import pickle +import os +from pathlib import Path + +from database.operations import get_database_operations, DatabaseOperationError +from data.common.data_types import OHLCVCandle +from data.common.indicators import TechnicalIndicators +from components.charts.config.indicator_defs import convert_database_candles_to_ohlcv +from .factory import StrategyFactory +from .data_types import StrategyResult +from utils.logger import get_logger + +# Initialize logger +logger = get_logger() + + +@dataclass +class StrategyDataIntegrationConfig: + """Configuration for strategy data integration""" + default_days_back: int = 30 # Strategies often need more historical data + min_candles_required: int = 100 # Strategies need sufficient data for reliable signals + max_candles_limit: int = 5000 # Allow larger datasets for backtesting + cache_timeout_minutes: int = 15 # Longer cache for strategy analysis + enable_data_validation: bool = True + enable_sparse_data_handling: bool = True + enable_indicator_caching: bool = True + max_cached_indicators: int = 50 # Limit memory usage + + +class StrategyDataIntegrator: + """ + Integrates market data with strategy calculations and signal generation. + + This class handles: + - Fetching and preparing market data for strategies + - Pre-calculating required technical indicators + - Orchestrating strategy signal generation + - Caching computed indicators for performance + - Multi-timeframe data handling + - Strategy signal validation and storage + """ + + def __init__(self, config: StrategyDataIntegrationConfig = None): + """ + Initialize strategy data integrator. + + Args: + config: Integration configuration + """ + self.config = config or StrategyDataIntegrationConfig() + self.logger = logger + self.db_ops = get_database_operations(self.logger) + self.technical_indicators = TechnicalIndicators(self.logger) + self.strategy_factory = StrategyFactory(self.logger) + + # Caching for computed indicators and market data + self._indicator_cache: Dict[str, Dict[str, Any]] = {} + self._data_cache: Dict[str, Dict[str, Any]] = {} + + # Cache persistence setup + self._cache_dir = Path("temp/strategy_cache") + self._cache_dir.mkdir(parents=True, exist_ok=True) + self._persistent_cache_file = self._cache_dir / "indicator_cache.pkl" + + # Load persistent cache if available + self._load_persistent_cache() + + if self.logger: + self.logger.info("StrategyDataIntegrator: Initialized with strategy-optimized configuration") + + def _load_persistent_cache(self) -> None: + """Load indicator cache from persistent storage.""" + try: + if self._persistent_cache_file.exists(): + with open(self._persistent_cache_file, 'rb') as f: + cached_data = pickle.load(f) + + # Validate and filter expired entries + current_time = datetime.now(timezone.utc) + valid_entries = 0 + + for key, data in cached_data.items(): + cache_time = data.get('timestamp') + if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes: + self._indicator_cache[key] = data + valid_entries += 1 + + self.logger.debug(f"Loaded {valid_entries} valid cache entries from persistent storage") + except Exception as e: + self.logger.warning(f"Failed to load persistent cache: {e}") + + def _save_persistent_cache(self) -> None: + """Save indicator cache to persistent storage.""" + try: + # Only save recent, valid entries to avoid bloat + current_time = datetime.now(timezone.utc) + entries_to_save = {} + + for key, data in self._indicator_cache.items(): + cache_time = data.get('timestamp') + if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes: + entries_to_save[key] = data + + with open(self._persistent_cache_file, 'wb') as f: + pickle.dump(entries_to_save, f) + + self.logger.debug(f"Saved {len(entries_to_save)} cache entries to persistent storage") + except Exception as e: + self.logger.warning(f"Failed to save persistent cache: {e}") + + def calculate_strategy_signals( + self, + strategy_name: str, + strategy_config: Dict[str, Any], + symbol: str, + timeframe: str, + days_back: Optional[int] = None, + exchange: str = "okx", + enable_caching: bool = True + ) -> List[StrategyResult]: + """ + Main orchestration method for calculating strategy signals. + + Args: + strategy_name: Name of the strategy to execute + strategy_config: Strategy-specific configuration parameters + symbol: Trading pair symbol + timeframe: Timeframe for strategy calculation + days_back: Number of days to look back for data + exchange: Exchange name + enable_caching: Whether to use cached indicator results + + Returns: + List of strategy results with signals + """ + try: + self.logger.info(f"StrategyDataIntegrator: Calculating signals for {strategy_name} on {symbol} {timeframe}") + + # Get market data for strategy + market_df = self.get_strategy_data( + symbol=symbol, + timeframe=timeframe, + days_back=days_back, + exchange=exchange + ) + + if market_df.empty: + self.logger.warning(f"No market data available for {symbol} {timeframe}") + return [] + + # Validate data sufficiency + if not self.validate_strategy_requirements(market_df, strategy_name): + self.logger.warning(f"Insufficient data for strategy {strategy_name}") + return [] + + # Calculate strategy signals using factory + results = self.strategy_factory.calculate_strategy_signals( + strategy_name=strategy_name, + df=market_df, + strategy_config=strategy_config + ) + + # Add metadata to results + for result in results: + if not hasattr(result, 'metadata') or result.metadata is None: + result.metadata = {} + result.metadata.update({ + 'symbol': symbol, + 'timeframe': timeframe, + 'exchange': exchange, + 'data_points_used': len(market_df), + 'calculation_timestamp': datetime.now(timezone.utc).isoformat() + }) + + self.logger.info(f"Generated {len(results)} strategy results for {strategy_name}") + return results + + except Exception as e: + self.logger.error(f"Error calculating strategy signals for {strategy_name}: {e}") + return [] + + def get_strategy_data( + self, + symbol: str, + timeframe: str, + days_back: Optional[int] = None, + exchange: str = "okx" + ) -> pd.DataFrame: + """ + Fetch and prepare market data for strategy calculation. + + Args: + symbol: Trading pair symbol + timeframe: Timeframe + days_back: Number of days to look back + exchange: Exchange name + + Returns: + DataFrame with OHLCV data ready for strategy calculation + """ + try: + # Use configured default if not specified + if days_back is None: + days_back = self.config.default_days_back + + # Check cache first + cache_key = f"market_data_{symbol}_{timeframe}_{days_back}_{exchange}" + cached_data = self._get_cached_data(cache_key) + if cached_data: + self.logger.debug(f"Using cached market data for {symbol} {timeframe}") + return cached_data['dataframe'] + + # Calculate time range + end_time = datetime.now(timezone.utc) + start_time = end_time - timedelta(days=days_back) + + # Fetch raw market data + raw_candles = self.db_ops.market_data.get_candles( + symbol=symbol, + timeframe=timeframe, + start_time=start_time, + end_time=end_time, + exchange=exchange + ) + + if not raw_candles: + self.logger.warning(f"No raw candles found for {symbol} {timeframe}") + return pd.DataFrame() + + # Convert to OHLCV format + ohlcv_candles = convert_database_candles_to_ohlcv(raw_candles) + + if not ohlcv_candles: + self.logger.warning(f"No OHLCV candles after conversion for {symbol} {timeframe}") + return pd.DataFrame() + + # Convert to DataFrame for strategy processing + market_df = self._prepare_dataframe_from_candles(ohlcv_candles) + + # Cache the results + self._cache_data(cache_key, { + 'dataframe': market_df, + 'candle_count': len(raw_candles), + 'timestamp': datetime.now(timezone.utc) + }) + + self.logger.debug(f"Fetched {len(raw_candles)} candles for strategy data: {symbol} {timeframe}") + return market_df + + except DatabaseOperationError as e: + self.logger.error(f"Database error fetching strategy data: {e}") + return pd.DataFrame() + except Exception as e: + self.logger.error(f"Error fetching strategy data for {symbol} {timeframe}: {e}") + return pd.DataFrame() + + def _prepare_dataframe_from_candles(self, candles: List[OHLCVCandle]) -> pd.DataFrame: + """ + Convert OHLCV candles to DataFrame optimized for strategy calculations. + Uses vectorized approach for improved performance. + + Args: + candles: List of OHLCV candles + + Returns: + DataFrame with OHLCV data + """ + if not candles: + return pd.DataFrame() + + # Vectorized DataFrame construction - extract all values at once + df = pd.DataFrame({ + 'timestamp': [candle.end_time for candle in candles], + 'open': [float(candle.open) for candle in candles], + 'high': [float(candle.high) for candle in candles], + 'low': [float(candle.low) for candle in candles], + 'close': [float(candle.close) for candle in candles], + 'volume': [float(candle.volume) for candle in candles] + }) + + # Set timestamp as index and sort + df['timestamp'] = pd.to_datetime(df['timestamp']) + df.set_index('timestamp', inplace=True) + df.sort_index(inplace=True) + + # Remove index name for cleaner appearance + df.index.name = None + + # Ensure proper data types using vectorized operations + numeric_columns = ['open', 'high', 'low', 'close', 'volume'] + df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce') + + # Remove any rows with NaN values + df.dropna(inplace=True) + + return df + + def validate_strategy_requirements( + self, + market_df: pd.DataFrame, + strategy_name: str + ) -> bool: + """ + Validate that market data meets strategy requirements. + + Args: + market_df: Market data DataFrame + strategy_name: Name of the strategy + + Returns: + True if data meets requirements, False otherwise + """ + try: + # Check minimum data points + if len(market_df) < self.config.min_candles_required: + self.logger.warning( + f"Insufficient data points: {len(market_df)} < {self.config.min_candles_required}" + ) + return False + + # Check for required columns + required_columns = ['open', 'high', 'low', 'close', 'volume'] + missing_columns = [col for col in required_columns if col not in market_df.columns] + if missing_columns: + self.logger.error(f"Missing required columns: {missing_columns}") + return False + + # Check for data quality (no all-zero or invalid values) + for col in ['open', 'high', 'low', 'close']: + if (market_df[col] <= 0).any(): + self.logger.warning(f"Invalid price data found in column {col}") + return False + + # Strategy-specific validations could be added here + # For example, some strategies might need specific minimum periods + + return True + + except Exception as e: + self.logger.error(f"Error validating strategy requirements: {e}") + return False + + def _get_cached_data(self, cache_key: str) -> Optional[Dict[str, Any]]: + """Get cached data if available and not expired.""" + if cache_key not in self._data_cache: + return None + + cached_data = self._data_cache[cache_key] + cache_time = cached_data.get('timestamp') + + if cache_time: + age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60 + if age_minutes < self.config.cache_timeout_minutes: + return cached_data + else: + # Remove expired cache + del self._data_cache[cache_key] + + return None + + def _cache_data(self, cache_key: str, data: Dict[str, Any]) -> None: + """Cache data with timestamp.""" + self._data_cache[cache_key] = data + + # Simple cache size management + if len(self._data_cache) > 100: # Limit cache size + # Remove oldest entries + oldest_keys = sorted( + self._data_cache.keys(), + key=lambda k: self._data_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc)) + )[:20] + for key in oldest_keys: + del self._data_cache[key] + + def clear_cache(self) -> None: + """Clear all cached data including persistent storage.""" + self._data_cache.clear() + self._indicator_cache.clear() + + # Clear persistent cache file + try: + if self._persistent_cache_file.exists(): + self._persistent_cache_file.unlink() + self.logger.debug("Cleared persistent cache file") + except Exception as e: + self.logger.warning(f"Failed to clear persistent cache file: {e}") + + self.logger.info("StrategyDataIntegrator: Cleared all caches") + + def get_cache_stats(self) -> Dict[str, Any]: + """Get cache statistics for monitoring.""" + return { + 'data_cache_size': len(self._data_cache), + 'indicator_cache_size': len(self._indicator_cache), + 'config': { + 'cache_timeout_minutes': self.config.cache_timeout_minutes, + 'enable_indicator_caching': self.config.enable_indicator_caching, + 'max_cached_indicators': self.config.max_cached_indicators + } + } + + def calculate_indicators_batch( + self, + market_df: pd.DataFrame, + indicator_configs: List[Dict[str, Any]], + enable_caching: bool = True + ) -> Dict[str, pd.DataFrame]: + """ + Calculate multiple indicators efficiently using vectorized operations. + + Args: + market_df: DataFrame with OHLCV data + indicator_configs: List of indicator configurations + enable_caching: Whether to use cached results + + Returns: + Dictionary mapping indicator keys to their DataFrames + """ + try: + if market_df.empty: + self.logger.warning("StrategyDataIntegrator: Empty market data for indicator calculation") + return {} + + indicators_data = {} + + # Group indicators by type for potential optimization + indicators_by_type = {} + for config in indicator_configs: + indicator_type = config.get('type') + if indicator_type not in indicators_by_type: + indicators_by_type[indicator_type] = [] + indicators_by_type[indicator_type].append(config) + + # Calculate indicators, leveraging caching and batching where possible + for indicator_type, configs in indicators_by_type.items(): + for config in configs: + indicator_key = self._create_indicator_key(config) + + # Check cache first if enabled + if enable_caching and self.config.enable_indicator_caching: + cached_result = self._get_cached_indicator(indicator_key, market_df) + if cached_result is not None: + indicators_data[indicator_key] = cached_result + continue + + try: + # Calculate indicator using TechnicalIndicators class + indicator_result = self.technical_indicators.calculate( + indicator_type, + market_df, + **{k: v for k, v in config.items() if k != 'type'} + ) + + if indicator_result is not None and not indicator_result.empty: + indicators_data[indicator_key] = indicator_result + + # Cache the result if enabled + if enable_caching and self.config.enable_indicator_caching: + self._cache_indicator_result(indicator_key, indicator_result, market_df) + else: + self.logger.warning(f"Empty result for indicator: {indicator_key}") + indicators_data[indicator_key] = pd.DataFrame() + + except Exception as e: + self.logger.error(f"Error calculating indicator {indicator_key}: {e}") + indicators_data[indicator_key] = pd.DataFrame() + + self.logger.debug(f"Calculated {len(indicators_data)} indicators in batch") + return indicators_data + + except Exception as e: + self.logger.error(f"Error in batch indicator calculation: {e}") + return {} + + def _create_indicator_key(self, indicator_config: Dict[str, Any]) -> str: + """ + Create a unique key for indicator configuration. + + Args: + indicator_config: Indicator configuration dictionary + + Returns: + Unique string key for the indicator + """ + indicator_type = indicator_config.get('type', 'unknown') + + # Create key from type and parameters + params = {k: v for k, v in indicator_config.items() if k != 'type'} + + if params: + # Sort parameters for consistent key generation + param_str = "_".join(f"{k}_{v}" for k, v in sorted(params.items())) + return f"{indicator_type}_{param_str}" + else: + return indicator_type + + def _get_cached_indicator( + self, + indicator_key: str, + market_df: pd.DataFrame + ) -> Optional[pd.DataFrame]: + """ + Get cached indicator result if available and valid. + + Args: + indicator_key: Unique indicator key + market_df: Current market data DataFrame + + Returns: + Cached DataFrame if valid, None otherwise + """ + if indicator_key not in self._indicator_cache: + return None + + cached_data = self._indicator_cache[indicator_key] + + # Check if cache is still valid + cache_time = cached_data.get('timestamp') + if cache_time: + age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60 + if age_minutes >= self.config.cache_timeout_minutes: + # Remove expired cache + del self._indicator_cache[indicator_key] + return None + + # Check if market data size matches (simple validation) + cached_result = cached_data.get('result') + if cached_result is not None and len(cached_result) == len(market_df): + self.logger.debug(f"Using cached indicator result for {indicator_key}") + return cached_result + + return None + + def _cache_indicator_result( + self, + indicator_key: str, + result: pd.DataFrame, + market_df: pd.DataFrame + ) -> None: + """ + Cache indicator calculation result. + + Args: + indicator_key: Unique indicator key + result: Calculated indicator DataFrame + market_df: Market data used for calculation + """ + self._indicator_cache[indicator_key] = { + 'result': result.copy(), # Store a copy to avoid modification + 'market_data_length': len(market_df), + 'timestamp': datetime.now(timezone.utc) + } + + # Manage cache size + if len(self._indicator_cache) > self.config.max_cached_indicators: + # Remove oldest entries + oldest_keys = sorted( + self._indicator_cache.keys(), + key=lambda k: self._indicator_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc)) + )[:10] # Remove 10 oldest entries + + for key in oldest_keys: + del self._indicator_cache[key] + + self.logger.debug(f"Cleaned up indicator cache, removed {len(oldest_keys)} entries") + + # Save to persistent storage periodically + if len(self._indicator_cache) % 10 == 0: # Every 10 new cache entries + self._save_persistent_cache() + + def calculate_strategy_signals_enhanced( + self, + strategy_name: str, + strategy_config: Dict[str, Any], + symbol: str, + timeframe: str, + days_back: Optional[int] = None, + exchange: str = "okx", + enable_caching: bool = True + ) -> List[StrategyResult]: + """ + Enhanced strategy signal calculation with vectorized indicator batching. + + This method uses the new indicator batching interface for improved performance + while maintaining compatibility with the existing single-strategy interface. + + Args: + strategy_name: Name of the strategy to execute + strategy_config: Strategy-specific configuration parameters + symbol: Trading pair symbol + timeframe: Timeframe for strategy calculation + days_back: Number of days to look back for data + exchange: Exchange name + enable_caching: Whether to use cached results + + Returns: + List of strategy results with signals + """ + try: + self.logger.info(f"StrategyDataIntegrator: Enhanced calculation for {strategy_name} on {symbol} {timeframe}") + + # Get market data for strategy + market_df = self.get_strategy_data( + symbol=symbol, + timeframe=timeframe, + days_back=days_back, + exchange=exchange + ) + + if market_df.empty: + self.logger.warning(f"No market data available for {symbol} {timeframe}") + return [] + + # Validate data sufficiency + if not self.validate_strategy_requirements(market_df, strategy_name): + self.logger.warning(f"Insufficient data for strategy {strategy_name}") + return [] + + # Get required indicators from strategy + strategy = self.strategy_factory.create_strategy(strategy_name) + if not strategy: + self.logger.error(f"Could not create strategy: {strategy_name}") + return [] + + required_indicators = strategy.get_required_indicators() + + # Use vectorized indicator calculation + indicators_data = self.calculate_indicators_batch( + market_df=market_df, + indicator_configs=required_indicators, + enable_caching=enable_caching + ) + + # Calculate strategy signals using the strategy directly + results = strategy.calculate(market_df, indicators_data, **strategy_config) + + # Add metadata to results + for result in results: + if not hasattr(result, 'metadata') or result.metadata is None: + result.metadata = {} + result.metadata.update({ + 'symbol': symbol, + 'timeframe': timeframe, + 'exchange': exchange, + 'data_points_used': len(market_df), + 'indicators_calculated': len(indicators_data), + 'calculation_timestamp': datetime.now(timezone.utc).isoformat(), + 'enhanced_calculation': True + }) + + self.logger.info(f"Enhanced calculation generated {len(results)} strategy results for {strategy_name}") + return results + + except Exception as e: + self.logger.error(f"Error in enhanced strategy signal calculation for {strategy_name}: {e}") + return [] + + def analyze_indicator_dependencies( + self, + indicator_configs: List[Dict[str, Any]] + ) -> Dict[str, List[str]]: + """ + Analyze indicator dependencies to optimize calculation order. + + Args: + indicator_configs: List of indicator configurations + + Returns: + Dictionary mapping indicator keys to their dependencies + """ + dependencies = {} + + for config in indicator_configs: + indicator_key = self._create_indicator_key(config) + indicator_type = config.get('type', '').lower() + + # Define known indicator dependencies + # Most indicators depend only on price data, but some depend on other indicators + deps = [] + + if indicator_type == 'macd': + # MACD signal line depends on MACD line (handled internally by TechnicalIndicators) + deps = [] # No external dependencies + elif indicator_type == 'bollinger_bands': + # Bollinger Bands depend on SMA (handled internally) + deps = [] # No external dependencies + elif indicator_type in ['stochastic', 'rsi_stochastic']: + # These might depend on RSI (if implemented) + deps = [] # For now, no external dependencies + else: + # Most indicators (SMA, EMA, RSI, etc.) depend only on price data + deps = [] + + dependencies[indicator_key] = deps + + return dependencies + + def resolve_calculation_order( + self, + indicator_configs: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """ + Resolve optimal calculation order based on indicator dependencies. + + Args: + indicator_configs: List of indicator configurations + + Returns: + List of indicator configurations in optimal calculation order + """ + try: + # Analyze dependencies + dependencies = self.analyze_indicator_dependencies(indicator_configs) + + # For current implementation, most indicators don't have external dependencies + # So we can optimize by grouping similar indicators together + + # Group by indicator type for potential batching optimizations + type_groups = {} + for config in indicator_configs: + indicator_type = config.get('type', 'unknown') + if indicator_type not in type_groups: + type_groups[indicator_type] = [] + type_groups[indicator_type].append(config) + + # Order types by computational complexity (lighter first) + type_priority = { + 'sma': 1, # Simple moving average - fastest + 'ema': 2, # Exponential moving average + 'rsi': 3, # RSI calculation + 'macd': 4, # MACD - more complex + 'bollinger_bands': 5, # Bollinger Bands + 'stochastic': 6 # Most complex + } + + # Sort groups by priority, unknown types go last + sorted_types = sorted( + type_groups.keys(), + key=lambda t: type_priority.get(t, 999) + ) + + # Build final ordered list + ordered_configs = [] + for indicator_type in sorted_types: + # Within each type, sort by period (smaller periods first for caching benefits) + configs_for_type = type_groups[indicator_type] + configs_for_type.sort(key=lambda c: c.get('period', 0)) + ordered_configs.extend(configs_for_type) + + self.logger.debug(f"Resolved calculation order for {len(ordered_configs)} indicators") + return ordered_configs + + except Exception as e: + self.logger.error(f"Error resolving calculation order: {e}") + # Fallback to original order + return indicator_configs + + def calculate_indicators_orchestrated( + self, + market_df: pd.DataFrame, + indicator_configs: List[Dict[str, Any]], + enable_caching: bool = True + ) -> Dict[str, pd.DataFrame]: + """ + Orchestrated indicator calculation with dependency resolution and optimization. + + Args: + market_df: DataFrame with OHLCV data + indicator_configs: List of indicator configurations + enable_caching: Whether to use cached results + + Returns: + Dictionary mapping indicator keys to their DataFrames + """ + try: + if market_df.empty: + self.logger.warning("StrategyDataIntegrator: Empty market data for orchestrated calculation") + return {} + + # Resolve optimal calculation order + ordered_configs = self.resolve_calculation_order(indicator_configs) + + indicators_data = {} + calculation_stats = { + 'cache_hits': 0, + 'calculations_performed': 0, + 'errors': 0 + } + + # Calculate indicators in optimized order + for config in ordered_configs: + indicator_key = self._create_indicator_key(config) + indicator_type = config.get('type') + + # Check cache first if enabled + if enable_caching and self.config.enable_indicator_caching: + cached_result = self._get_cached_indicator(indicator_key, market_df) + if cached_result is not None: + indicators_data[indicator_key] = cached_result + calculation_stats['cache_hits'] += 1 + continue + + try: + # Calculate indicator using TechnicalIndicators class + indicator_result = self.technical_indicators.calculate( + indicator_type, + market_df, + **{k: v for k, v in config.items() if k != 'type'} + ) + + if indicator_result is not None and not indicator_result.empty: + indicators_data[indicator_key] = indicator_result + calculation_stats['calculations_performed'] += 1 + + # Cache the result if enabled + if enable_caching and self.config.enable_indicator_caching: + self._cache_indicator_result(indicator_key, indicator_result, market_df) + else: + self.logger.warning(f"Empty result for indicator: {indicator_key}") + indicators_data[indicator_key] = pd.DataFrame() + calculation_stats['errors'] += 1 + + except Exception as e: + self.logger.error(f"Error calculating indicator {indicator_key}: {e}") + indicators_data[indicator_key] = pd.DataFrame() + calculation_stats['errors'] += 1 + + self.logger.debug( + f"Orchestrated calculation complete: {calculation_stats['calculations_performed']} calculated, " + f"{calculation_stats['cache_hits']} cached, {calculation_stats['errors']} errors" + ) + + return indicators_data + + except Exception as e: + self.logger.error(f"Error in orchestrated indicator calculation: {e}") + return {} + + def calculate_strategy_signals_orchestrated( + self, + strategy_name: str, + strategy_config: Dict[str, Any], + symbol: str, + timeframe: str, + days_back: Optional[int] = None, + exchange: str = "okx", + enable_caching: bool = True + ) -> List[StrategyResult]: + """ + Fully orchestrated strategy signal calculation with optimized workflow. + + This method provides the most optimized calculation flow: + Data → Dependency Analysis → Orchestrated Indicators → Strategy → Results + + Args: + strategy_name: Name of the strategy to execute + strategy_config: Strategy-specific configuration parameters + symbol: Trading pair symbol + timeframe: Timeframe for strategy calculation + days_back: Number of days to look back for data + exchange: Exchange name + enable_caching: Whether to use cached results + + Returns: + List of strategy results with signals + """ + try: + self.logger.info(f"StrategyDataIntegrator: Orchestrated calculation for {strategy_name} on {symbol} {timeframe}") + + # Step 1: Get market data + market_df = self.get_strategy_data( + symbol=symbol, + timeframe=timeframe, + days_back=days_back, + exchange=exchange + ) + + if market_df.empty: + self.logger.warning(f"No market data available for {symbol} {timeframe}") + return [] + + # Step 2: Validate data sufficiency + if not self.validate_strategy_requirements(market_df, strategy_name): + self.logger.warning(f"Insufficient data for strategy {strategy_name}") + return [] + + # Step 3: Get strategy and analyze its requirements + strategy = self.strategy_factory.create_strategy(strategy_name) + if not strategy: + self.logger.error(f"Could not create strategy: {strategy_name}") + return [] + + required_indicators = strategy.get_required_indicators() + + # Step 4: Orchestrated indicator calculation with dependency resolution + indicators_data = self.calculate_indicators_orchestrated( + market_df=market_df, + indicator_configs=required_indicators, + enable_caching=enable_caching + ) + + # Step 5: Calculate strategy signals + results = strategy.calculate(market_df, indicators_data, **strategy_config) + + # Step 6: Add comprehensive metadata + for result in results: + if not hasattr(result, 'metadata') or result.metadata is None: + result.metadata = {} + result.metadata.update({ + 'symbol': symbol, + 'timeframe': timeframe, + 'exchange': exchange, + 'data_points_used': len(market_df), + 'indicators_calculated': len(indicators_data), + 'required_indicators': len(required_indicators), + 'calculation_timestamp': datetime.now(timezone.utc).isoformat(), + 'orchestrated_calculation': True, + 'calculation_method': 'orchestrated' + }) + + self.logger.info( + f"Orchestrated calculation generated {len(results)} strategy results for {strategy_name} " + f"using {len(indicators_data)} indicators" + ) + return results + + except Exception as e: + self.logger.error(f"Error in orchestrated strategy calculation for {strategy_name}: {e}") + return [] + + def get_calculation_performance_stats(self) -> Dict[str, Any]: + """ + Get performance statistics for calculation methods. + + Returns: + Dictionary with performance metrics + """ + cache_stats = self.get_cache_stats() + + return { + 'cache_performance': cache_stats, + 'available_methods': [ + 'calculate_strategy_signals', # Basic method + 'calculate_strategy_signals_enhanced', # Vectorized method + 'calculate_strategy_signals_orchestrated' # Fully orchestrated method + ], + 'recommended_method': 'calculate_strategy_signals_orchestrated', + 'performance_tips': [ + 'Use orchestrated method for best performance', + 'Enable caching for repeated calculations', + 'Use larger datasets to benefit from vectorization', + 'Monitor cache hit rates for optimization' + ] + } + + def get_shared_indicator_cache(self, indicator_key: str, market_data_length: int = None) -> Optional[pd.DataFrame]: + """ + Get indicator result from shared cache. + + This method allows strategies to share cached indicator results, + improving efficiency when multiple strategies use the same indicators. + + Args: + indicator_key: Unique indicator key + market_data_length: Expected market data length for validation + + Returns: + Cached DataFrame if available and valid, None otherwise + """ + if indicator_key not in self._indicator_cache: + return None + + cached_data = self._indicator_cache[indicator_key] + + # Check if cache is still valid + cache_time = cached_data.get('timestamp') + if cache_time: + age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60 + if age_minutes >= self.config.cache_timeout_minutes: + # Remove expired cache + del self._indicator_cache[indicator_key] + return None + + # Validate market data length if provided + cached_result = cached_data.get('result') + if market_data_length is not None: + cached_length = cached_data.get('market_data_length') + if cached_length != market_data_length: + return None + + if cached_result is not None: + self.logger.debug(f"Using shared cached indicator result for {indicator_key}") + return cached_result + + return None + + def share_indicator_result( + self, + indicator_key: str, + result: pd.DataFrame, + market_data_length: int + ) -> None: + """ + Share indicator result for cross-strategy use. + + Args: + indicator_key: Unique indicator key + result: Calculated indicator DataFrame + market_data_length: Length of market data used for calculation + """ + self._indicator_cache[indicator_key] = { + 'result': result.copy(), + 'market_data_length': market_data_length, + 'timestamp': datetime.now(timezone.utc), + 'shared': True # Mark as shared cache entry + } + + # Save to persistent storage for cross-session sharing + if len(self._indicator_cache) % 5 == 0: # More frequent saves for shared cache + self._save_persistent_cache() + + self.logger.debug(f"Shared indicator result for cross-strategy use: {indicator_key}") + + def get_cache_sharing_stats(self) -> Dict[str, Any]: + """Get statistics about cache sharing across strategies.""" + shared_entries = sum(1 for data in self._indicator_cache.values() if data.get('shared', False)) + + return { + 'total_cached_indicators': len(self._indicator_cache), + 'shared_cache_entries': shared_entries, + 'private_cache_entries': len(self._indicator_cache) - shared_entries, + 'sharing_efficiency': shared_entries / len(self._indicator_cache) if self._indicator_cache else 0.0, + 'persistent_cache_available': self._persistent_cache_file.exists() + } + + +def get_strategy_data_integrator(config: StrategyDataIntegrationConfig = None) -> StrategyDataIntegrator: + """ + Factory function to get a strategy data integrator instance. + + Args: + config: Optional configuration + + Returns: + StrategyDataIntegrator instance + """ + return StrategyDataIntegrator(config) \ No newline at end of file diff --git a/strategies/realtime_execution.py b/strategies/realtime_execution.py new file mode 100644 index 0000000..d9a0f3c --- /dev/null +++ b/strategies/realtime_execution.py @@ -0,0 +1,649 @@ +""" +Real-time Strategy Execution Pipeline + +This module provides real-time strategy execution capabilities that integrate +with the existing chart data refresh cycle. It handles incremental strategy +calculations, real-time signal generation, and live chart updates. +""" + +import pandas as pd +from datetime import datetime, timezone, timedelta +from typing import List, Dict, Any, Optional, Callable, Set, Tuple +from dataclasses import dataclass, field +from threading import Thread, Event, Lock +from queue import Queue, Empty +import asyncio +from concurrent.futures import ThreadPoolExecutor +import time + +from database.operations import get_database_operations, DatabaseOperationError +from data.common.data_types import OHLCVCandle +from components.charts.data_integration import MarketDataIntegrator +from .data_integration import StrategyDataIntegrator, StrategyDataIntegrationConfig +from .factory import StrategyFactory +from .data_types import StrategyResult, StrategySignal +from utils.logger import get_logger + +# Initialize logger +logger = get_logger() + + +@dataclass +class RealTimeConfig: + """Configuration for real-time strategy execution""" + refresh_interval_seconds: int = 30 # How often to check for new data + max_strategies_concurrent: int = 5 # Maximum concurrent strategy calculations + incremental_calculation: bool = True # Use incremental vs full recalculation + signal_batch_size: int = 100 # Batch size for signal storage + enable_signal_broadcasting: bool = True # Enable real-time signal broadcasting + max_signal_queue_size: int = 1000 # Maximum signals in queue before dropping + chart_update_throttle_ms: int = 1000 # Minimum time between chart updates + error_retry_attempts: int = 3 # Number of retries on calculation errors + error_retry_delay_seconds: int = 5 # Delay between retry attempts + + +@dataclass +class StrategyExecutionContext: + """Context for strategy execution""" + strategy_name: str + strategy_config: Dict[str, Any] + symbol: str + timeframe: str + exchange: str = "okx" + last_calculation_time: Optional[datetime] = None + consecutive_errors: int = 0 + is_active: bool = True + + +@dataclass +class RealTimeSignal: + """Real-time signal with metadata""" + strategy_result: StrategyResult + context: StrategyExecutionContext + generation_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + chart_update_required: bool = True + + +class StrategySignalBroadcaster: + """ + Handles real-time signal broadcasting and distribution. + + Manages signal queues, chart updates, and database storage + for real-time strategy signals. + """ + + def __init__(self, config: RealTimeConfig): + """Initialize signal broadcaster.""" + self.config = config + self.logger = logger + self.db_ops = get_database_operations(self.logger) + + # Signal queues + self._signal_queue: Queue[RealTimeSignal] = Queue(maxsize=self.config.max_signal_queue_size) + self._chart_update_queue: Queue[RealTimeSignal] = Queue() + + # Chart update throttling + self._last_chart_update = {} # symbol_timeframe -> timestamp + self._chart_update_lock = Lock() + + # Background processing + self._processing_thread: Optional[Thread] = None + self._stop_event = Event() + self._is_running = False + + # Callback for chart updates + self._chart_update_callback: Optional[Callable] = None + + if self.logger: + self.logger.info("StrategySignalBroadcaster: Initialized") + + def start(self) -> None: + """Start the signal broadcasting service.""" + if self._is_running: + return + + self._is_running = True + self._stop_event.clear() + + # Start background processing thread + self._processing_thread = Thread( + target=self._process_signals_loop, + name="StrategySignalProcessor", + daemon=True + ) + self._processing_thread.start() + + if self.logger: + self.logger.info("StrategySignalBroadcaster: Started signal processing") + + def stop(self) -> None: + """Stop the signal broadcasting service.""" + if not self._is_running: + return + + self._is_running = False + self._stop_event.set() + + if self._processing_thread and self._processing_thread.is_alive(): + self._processing_thread.join(timeout=5.0) + + if self.logger: + self.logger.info("StrategySignalBroadcaster: Stopped signal processing") + + def broadcast_signal(self, signal: RealTimeSignal) -> bool: + """ + Broadcast a real-time signal. + + Args: + signal: Real-time signal to broadcast + + Returns: + True if signal was queued successfully, False if queue is full + """ + try: + self._signal_queue.put_nowait(signal) + return True + except: + # Queue is full, drop the signal + if self.logger: + self.logger.warning(f"Signal queue full, dropping signal for {signal.context.symbol}") + return False + + def set_chart_update_callback(self, callback: Callable[[RealTimeSignal], None]) -> None: + """Set callback for chart updates.""" + self._chart_update_callback = callback + + def _process_signals_loop(self) -> None: + """Main signal processing loop.""" + batch_signals = [] + + while not self._stop_event.is_set(): + try: + # Collect signals in batches + try: + signal = self._signal_queue.get(timeout=1.0) + batch_signals.append(signal) + + # Collect more signals if available (up to batch size) + while len(batch_signals) < self.config.signal_batch_size: + try: + signal = self._signal_queue.get_nowait() + batch_signals.append(signal) + except Empty: + break + + # Process the batch + if batch_signals: + self._process_signal_batch(batch_signals) + batch_signals.clear() + + except Empty: + # No signals to process, continue + continue + + except Exception as e: + if self.logger: + self.logger.error(f"Error in signal processing loop: {e}") + time.sleep(1.0) # Brief pause on error + + def _process_signal_batch(self, signals: List[RealTimeSignal]) -> None: + """Process a batch of signals.""" + try: + # Store signals in database + self._store_signals_batch(signals) + + # Process chart updates + self._process_chart_updates(signals) + + except Exception as e: + if self.logger: + self.logger.error(f"Error processing signal batch: {e}") + + def _store_signals_batch(self, signals: List[RealTimeSignal]) -> None: + """Store signals in database.""" + try: + signal_data = [] + for signal in signals: + result = signal.strategy_result + context = signal.context + + signal_data.append({ + 'strategy_name': context.strategy_name, + 'strategy_config': context.strategy_config, + 'symbol': context.symbol, + 'timeframe': context.timeframe, + 'exchange': context.exchange, + 'timestamp': result.timestamp, + 'signal_type': result.signal.signal_type.value if result.signal else 'HOLD', + 'price': float(result.price) if result.price else None, + 'confidence': result.confidence, + 'signal_metadata': result.metadata or {}, + 'generation_time': signal.generation_time + }) + + # Batch insert into database + self.db_ops.strategy.store_signals_batch(signal_data) + + if self.logger: + self.logger.debug(f"Stored batch of {len(signals)} real-time signals") + + except Exception as e: + if self.logger: + self.logger.error(f"Error storing signal batch: {e}") + + def _process_chart_updates(self, signals: List[RealTimeSignal]) -> None: + """Process chart updates for signals.""" + if not self._chart_update_callback: + return + + # Group signals by symbol/timeframe for throttling + signal_groups = {} + for signal in signals: + if not signal.chart_update_required: + continue + + key = f"{signal.context.symbol}_{signal.context.timeframe}" + if key not in signal_groups: + signal_groups[key] = [] + signal_groups[key].append(signal) + + # Process chart updates with throttling + current_time = time.time() * 1000 # milliseconds + + with self._chart_update_lock: + for key, group_signals in signal_groups.items(): + last_update = self._last_chart_update.get(key, 0) + + if current_time - last_update >= self.config.chart_update_throttle_ms: + # Update chart with latest signal from group + latest_signal = max(group_signals, key=lambda s: s.generation_time) + + try: + self._chart_update_callback(latest_signal) + self._last_chart_update[key] = current_time + except Exception as e: + if self.logger: + self.logger.error(f"Error in chart update callback: {e}") + + def get_signal_stats(self) -> Dict[str, Any]: + """Get signal broadcasting statistics.""" + return { + 'queue_size': self._signal_queue.qsize(), + 'chart_queue_size': self._chart_update_queue.qsize(), + 'is_running': self._is_running, + 'last_chart_updates': dict(self._last_chart_update) + } + + +class RealTimeStrategyProcessor: + """ + Real-time strategy execution processor. + + Integrates with existing chart data refresh cycle to provide + real-time strategy signal generation and broadcasting. + """ + + def __init__(self, config: RealTimeConfig = None): + """Initialize real-time strategy processor.""" + self.config = config or RealTimeConfig() + self.logger = logger + + # Core components + self.data_integrator = StrategyDataIntegrator( + StrategyDataIntegrationConfig( + cache_timeout_minutes=1, # Shorter cache for real-time + enable_indicator_caching=True + ) + ) + self.market_integrator = MarketDataIntegrator() + self.strategy_factory = StrategyFactory(self.logger) + self.signal_broadcaster = StrategySignalBroadcaster(self.config) + + # Strategy execution contexts + self._execution_contexts: Dict[str, StrategyExecutionContext] = {} + self._context_lock = Lock() + + # Performance tracking + self._performance_stats = { + 'total_calculations': 0, + 'successful_calculations': 0, + 'failed_calculations': 0, + 'average_calculation_time_ms': 0.0, + 'signals_generated': 0, + 'last_update_time': None + } + + # Thread pool for concurrent strategy execution + self._executor = ThreadPoolExecutor(max_workers=self.config.max_strategies_concurrent) + + if self.logger: + self.logger.info("RealTimeStrategyProcessor: Initialized") + + def start(self) -> None: + """Start the real-time strategy processor.""" + self.signal_broadcaster.start() + if self.logger: + self.logger.info("RealTimeStrategyProcessor: Started") + + def stop(self) -> None: + """Stop the real-time strategy processor.""" + self.signal_broadcaster.stop() + self._executor.shutdown(wait=True) + if self.logger: + self.logger.info("RealTimeStrategyProcessor: Stopped") + + def register_strategy( + self, + strategy_name: str, + strategy_config: Dict[str, Any], + symbol: str, + timeframe: str, + exchange: str = "okx" + ) -> str: + """ + Register a strategy for real-time execution. + + Args: + strategy_name: Name of the strategy + strategy_config: Strategy configuration + symbol: Trading symbol + timeframe: Timeframe + exchange: Exchange name + + Returns: + Context ID for the registered strategy + """ + context_id = f"{strategy_name}_{symbol}_{timeframe}_{exchange}" + + context = StrategyExecutionContext( + strategy_name=strategy_name, + strategy_config=strategy_config, + symbol=symbol, + timeframe=timeframe, + exchange=exchange + ) + + with self._context_lock: + self._execution_contexts[context_id] = context + + if self.logger: + self.logger.info(f"Registered strategy for real-time execution: {context_id}") + + return context_id + + def unregister_strategy(self, context_id: str) -> bool: + """ + Unregister a strategy from real-time execution. + + Args: + context_id: Context ID to unregister + + Returns: + True if strategy was unregistered, False if not found + """ + with self._context_lock: + if context_id in self._execution_contexts: + del self._execution_contexts[context_id] + if self.logger: + self.logger.info(f"Unregistered strategy: {context_id}") + return True + return False + + def execute_realtime_update( + self, + symbol: str, + timeframe: str, + exchange: str = "okx" + ) -> List[RealTimeSignal]: + """ + Execute real-time strategy update for new market data. + + This method should be called when new candle data is available, + typically triggered by the chart refresh cycle. + + Args: + symbol: Trading symbol that was updated + timeframe: Timeframe that was updated + exchange: Exchange name + + Returns: + List of generated real-time signals + """ + start_time = time.time() + generated_signals = [] + + try: + # Find all strategies for this symbol/timeframe + matching_contexts = [] + with self._context_lock: + for context_id, context in self._execution_contexts.items(): + if (context.symbol == symbol and + context.timeframe == timeframe and + context.exchange == exchange and + context.is_active): + matching_contexts.append((context_id, context)) + + if not matching_contexts: + return generated_signals + + # Execute strategies concurrently + futures = [] + for context_id, context in matching_contexts: + future = self._executor.submit( + self._execute_strategy_context, + context_id, + context + ) + futures.append((context_id, future)) + + # Collect results + for context_id, future in futures: + try: + signals = future.result(timeout=10.0) # 10 second timeout + generated_signals.extend(signals) + except Exception as e: + if self.logger: + self.logger.error(f"Error executing strategy {context_id}: {e}") + self._handle_strategy_error(context_id, e) + + # Update performance stats + calculation_time = (time.time() - start_time) * 1000 + self._update_performance_stats(len(generated_signals), calculation_time, True) + + if self.logger and generated_signals: + self.logger.debug(f"Generated {len(generated_signals)} real-time signals for {symbol} {timeframe}") + + return generated_signals + + except Exception as e: + if self.logger: + self.logger.error(f"Error in real-time strategy execution: {e}") + calculation_time = (time.time() - start_time) * 1000 + self._update_performance_stats(0, calculation_time, False) + return generated_signals + + def _execute_strategy_context( + self, + context_id: str, + context: StrategyExecutionContext + ) -> List[RealTimeSignal]: + """Execute a single strategy context.""" + try: + # Calculate strategy signals + if self.config.incremental_calculation and context.last_calculation_time: + # Use incremental calculation for better performance + results = self._calculate_incremental_signals(context) + else: + # Full recalculation + results = self._calculate_full_signals(context) + + # Convert to real-time signals + real_time_signals = [] + for result in results: + signal = RealTimeSignal( + strategy_result=result, + context=context + ) + real_time_signals.append(signal) + + # Broadcast signal + self.signal_broadcaster.broadcast_signal(signal) + + # Update context + with self._context_lock: + context.last_calculation_time = datetime.now(timezone.utc) + context.consecutive_errors = 0 + + return real_time_signals + + except Exception as e: + if self.logger: + self.logger.error(f"Error executing strategy context {context_id}: {e}") + self._handle_strategy_error(context_id, e) + return [] + + def _calculate_incremental_signals( + self, + context: StrategyExecutionContext + ) -> List[StrategyResult]: + """Calculate signals incrementally (only for new data).""" + # For this initial implementation, fall back to full calculation + # Incremental calculation optimization can be added later + return self._calculate_full_signals(context) + + def _calculate_full_signals( + self, + context: StrategyExecutionContext + ) -> List[StrategyResult]: + """Calculate signals with full recalculation.""" + return self.data_integrator.calculate_strategy_signals( + strategy_name=context.strategy_name, + strategy_config=context.strategy_config, + symbol=context.symbol, + timeframe=context.timeframe, + days_back=7, # Use shorter history for real-time + exchange=context.exchange, + enable_caching=True + ) + + def _handle_strategy_error(self, context_id: str, error: Exception) -> None: + """Handle strategy execution error.""" + with self._context_lock: + if context_id in self._execution_contexts: + context = self._execution_contexts[context_id] + context.consecutive_errors += 1 + + # Disable strategy if too many consecutive errors + if context.consecutive_errors >= self.config.error_retry_attempts: + context.is_active = False + if self.logger: + self.logger.warning( + f"Disabling strategy {context_id} due to consecutive errors: {context.consecutive_errors}" + ) + + def _update_performance_stats( + self, + signals_generated: int, + calculation_time_ms: float, + success: bool + ) -> None: + """Update performance statistics.""" + self._performance_stats['total_calculations'] += 1 + if success: + self._performance_stats['successful_calculations'] += 1 + else: + self._performance_stats['failed_calculations'] += 1 + + self._performance_stats['signals_generated'] += signals_generated + + # Update average calculation time + total_calcs = self._performance_stats['total_calculations'] + current_avg = self._performance_stats['average_calculation_time_ms'] + self._performance_stats['average_calculation_time_ms'] = ( + (current_avg * (total_calcs - 1) + calculation_time_ms) / total_calcs + ) + + self._performance_stats['last_update_time'] = datetime.now(timezone.utc) + + def set_chart_update_callback(self, callback: Callable[[RealTimeSignal], None]) -> None: + """Set callback for chart updates.""" + self.signal_broadcaster.set_chart_update_callback(callback) + + def get_active_strategies(self) -> Dict[str, StrategyExecutionContext]: + """Get all active strategy contexts.""" + with self._context_lock: + return { + context_id: context + for context_id, context in self._execution_contexts.items() + if context.is_active + } + + def get_performance_stats(self) -> Dict[str, Any]: + """Get real-time execution performance statistics.""" + stats = dict(self._performance_stats) + stats.update(self.signal_broadcaster.get_signal_stats()) + return stats + + def pause_strategy(self, context_id: str) -> bool: + """Pause a strategy (set as inactive).""" + with self._context_lock: + if context_id in self._execution_contexts: + self._execution_contexts[context_id].is_active = False + return True + return False + + def resume_strategy(self, context_id: str) -> bool: + """Resume a strategy (set as active).""" + with self._context_lock: + if context_id in self._execution_contexts: + context = self._execution_contexts[context_id] + context.is_active = True + context.consecutive_errors = 0 # Reset error count + return True + return False + + +# Singleton instance for global access +_realtime_processor: Optional[RealTimeStrategyProcessor] = None + + +def get_realtime_strategy_processor(config: RealTimeConfig = None) -> RealTimeStrategyProcessor: + """ + Get the singleton real-time strategy processor instance. + + Args: + config: Configuration for the processor (only used on first call) + + Returns: + RealTimeStrategyProcessor instance + """ + global _realtime_processor + + if _realtime_processor is None: + _realtime_processor = RealTimeStrategyProcessor(config) + + return _realtime_processor + + +def initialize_realtime_strategy_system(config: RealTimeConfig = None) -> RealTimeStrategyProcessor: + """ + Initialize the real-time strategy system. + + Args: + config: Configuration for the system + + Returns: + Initialized RealTimeStrategyProcessor + """ + processor = get_realtime_strategy_processor(config) + processor.start() + return processor + + +def shutdown_realtime_strategy_system() -> None: + """Shutdown the real-time strategy system.""" + global _realtime_processor + + if _realtime_processor is not None: + _realtime_processor.stop() + _realtime_processor = None \ No newline at end of file diff --git a/strategies/validation.py b/strategies/validation.py new file mode 100644 index 0000000..84c83ec --- /dev/null +++ b/strategies/validation.py @@ -0,0 +1,375 @@ +""" +Strategy Signal Validation Pipeline + +This module provides validation, filtering, and quality assessment +for strategy-generated signals to ensure reliability and consistency. +""" + +from typing import List, Dict, Any, Optional, Tuple +from datetime import datetime, timezone +from dataclasses import dataclass + +from .data_types import StrategySignal, SignalType, StrategyResult +from utils.logger import get_logger + + +@dataclass +class ValidationConfig: + """Configuration for signal validation.""" + min_confidence: float = 0.0 + max_confidence: float = 1.0 + required_metadata_fields: List[str] = None + allowed_signal_types: List[SignalType] = None + price_tolerance_percent: float = 5.0 # Max price deviation from market + + def __post_init__(self): + if self.required_metadata_fields is None: + self.required_metadata_fields = [] + if self.allowed_signal_types is None: + self.allowed_signal_types = list(SignalType) + + +class StrategySignalValidator: + """ + Validates strategy signals for quality, consistency, and compliance. + + Provides comprehensive validation including confidence checks, + signal type validation, price reasonableness, and metadata validation. + """ + + def __init__(self, config: ValidationConfig = None): + """ + Initialize signal validator. + + Args: + config: Validation configuration + """ + self.config = config or ValidationConfig() + self.logger = get_logger() + + # Validation statistics + self._validation_stats = { + 'total_signals_validated': 0, + 'valid_signals': 0, + 'invalid_signals': 0, + 'validation_errors': {} + } + + def validate_signal(self, signal: StrategySignal) -> Tuple[bool, List[str]]: + """ + Validate a single strategy signal. + + Args: + signal: Signal to validate + + Returns: + Tuple of (is_valid, list_of_errors) + """ + errors = [] + self._validation_stats['total_signals_validated'] += 1 + + # Validate confidence + if not (self.config.min_confidence <= signal.confidence <= self.config.max_confidence): + errors.append(f"Invalid confidence {signal.confidence}, must be between {self.config.min_confidence} and {self.config.max_confidence}") + + # Validate signal type + if signal.signal_type not in self.config.allowed_signal_types: + errors.append(f"Signal type {signal.signal_type} not in allowed types") + + # Validate price + if signal.price <= 0: + errors.append(f"Invalid price {signal.price}, must be positive") + + # Validate required metadata + if self.config.required_metadata_fields: + if not signal.metadata: + errors.append(f"Missing required metadata fields: {self.config.required_metadata_fields}") + else: + missing_fields = [field for field in self.config.required_metadata_fields + if field not in signal.metadata] + if missing_fields: + errors.append(f"Missing required metadata fields: {missing_fields}") + + # Update statistics + is_valid = len(errors) == 0 + if is_valid: + self._validation_stats['valid_signals'] += 1 + else: + self._validation_stats['invalid_signals'] += 1 + for error in errors: + error_type = error.split(':')[0] if ':' in error else error + self._validation_stats['validation_errors'][error_type] = \ + self._validation_stats['validation_errors'].get(error_type, 0) + 1 + + return is_valid, errors + + def validate_signals_batch(self, signals: List[StrategySignal]) -> Tuple[List[StrategySignal], List[StrategySignal]]: + """ + Validate multiple signals and return valid and invalid lists. + + Args: + signals: List of signals to validate + + Returns: + Tuple of (valid_signals, invalid_signals) + """ + valid_signals = [] + invalid_signals = [] + + for signal in signals: + is_valid, errors = self.validate_signal(signal) + if is_valid: + valid_signals.append(signal) + else: + invalid_signals.append(signal) + self.logger.debug(f"Invalid signal filtered out: {errors}") + + return valid_signals, invalid_signals + + def filter_signals_by_confidence( + self, + signals: List[StrategySignal], + min_confidence: float = None + ) -> List[StrategySignal]: + """ + Filter signals by minimum confidence threshold. + + Args: + signals: List of signals to filter + min_confidence: Minimum confidence threshold (uses config if None) + + Returns: + Filtered list of signals + """ + threshold = min_confidence if min_confidence is not None else self.config.min_confidence + + filtered_signals = [signal for signal in signals if signal.confidence >= threshold] + + self.logger.debug(f"Filtered {len(signals) - len(filtered_signals)} signals below confidence {threshold}") + + return filtered_signals + + def filter_signals_by_type( + self, + signals: List[StrategySignal], + allowed_types: List[SignalType] = None + ) -> List[StrategySignal]: + """ + Filter signals by allowed signal types. + + Args: + signals: List of signals to filter + allowed_types: Allowed signal types (uses config if None) + + Returns: + Filtered list of signals + """ + types = allowed_types if allowed_types is not None else self.config.allowed_signal_types + + filtered_signals = [signal for signal in signals if signal.signal_type in types] + + self.logger.debug(f"Filtered {len(signals) - len(filtered_signals)} signals by type") + + return filtered_signals + + def get_validation_statistics(self) -> Dict[str, Any]: + """Get comprehensive validation statistics.""" + stats = self._validation_stats.copy() + + if stats['total_signals_validated'] > 0: + stats['validation_success_rate'] = stats['valid_signals'] / stats['total_signals_validated'] + stats['validation_failure_rate'] = stats['invalid_signals'] / stats['total_signals_validated'] + else: + stats['validation_success_rate'] = 0.0 + stats['validation_failure_rate'] = 0.0 + + return stats + + def transform_signal_confidence( + self, + signal: StrategySignal, + confidence_multiplier: float = 1.0, + max_confidence: float = None + ) -> StrategySignal: + """ + Transform signal confidence with multiplier and cap. + + Args: + signal: Signal to transform + confidence_multiplier: Multiplier for confidence + max_confidence: Maximum confidence cap (uses config if None) + + Returns: + Transformed signal with updated confidence + """ + max_conf = max_confidence if max_confidence is not None else self.config.max_confidence + + # Create new signal with transformed confidence + new_confidence = min(signal.confidence * confidence_multiplier, max_conf) + + transformed_signal = StrategySignal( + timestamp=signal.timestamp, + symbol=signal.symbol, + timeframe=signal.timeframe, + signal_type=signal.signal_type, + price=signal.price, + confidence=new_confidence, + metadata=signal.metadata.copy() if signal.metadata else None + ) + + return transformed_signal + + def enrich_signal_metadata( + self, + signal: StrategySignal, + additional_metadata: Dict[str, Any] + ) -> StrategySignal: + """ + Enrich signal with additional metadata. + + Args: + signal: Signal to enrich + additional_metadata: Additional metadata to add + + Returns: + Signal with enriched metadata + """ + # Merge metadata + enriched_metadata = signal.metadata.copy() if signal.metadata else {} + enriched_metadata.update(additional_metadata) + + enriched_signal = StrategySignal( + timestamp=signal.timestamp, + symbol=signal.symbol, + timeframe=signal.timeframe, + signal_type=signal.signal_type, + price=signal.price, + confidence=signal.confidence, + metadata=enriched_metadata + ) + + return enriched_signal + + def transform_signals_batch( + self, + signals: List[StrategySignal], + confidence_multiplier: float = 1.0, + additional_metadata: Dict[str, Any] = None + ) -> List[StrategySignal]: + """ + Apply transformations to multiple signals. + + Args: + signals: List of signals to transform + confidence_multiplier: Confidence multiplier + additional_metadata: Additional metadata to add + + Returns: + List of transformed signals + """ + transformed_signals = [] + + for signal in signals: + # Apply confidence transformation + transformed_signal = self.transform_signal_confidence(signal, confidence_multiplier) + + # Apply metadata enrichment if provided + if additional_metadata: + transformed_signal = self.enrich_signal_metadata(transformed_signal, additional_metadata) + + transformed_signals.append(transformed_signal) + + self.logger.debug(f"Transformed {len(signals)} signals") + + return transformed_signals + + def calculate_signal_quality_metrics(self, signals: List[StrategySignal]) -> Dict[str, Any]: + """ + Calculate comprehensive quality metrics for signals. + + Args: + signals: List of signals to analyze + + Returns: + Dictionary containing quality metrics + """ + if not signals: + return {'error': 'No signals provided for quality analysis'} + + # Basic metrics + total_signals = len(signals) + confidence_values = [signal.confidence for signal in signals] + + # Signal type distribution + signal_type_counts = {} + for signal in signals: + signal_type_counts[signal.signal_type.value] = signal_type_counts.get(signal.signal_type.value, 0) + 1 + + # Confidence metrics + avg_confidence = sum(confidence_values) / total_signals + min_confidence = min(confidence_values) + max_confidence = max(confidence_values) + + # Quality scoring (0-100) + high_confidence_signals = sum(1 for conf in confidence_values if conf >= 0.7) + quality_score = (high_confidence_signals / total_signals) * 100 + + # Metadata completeness + signals_with_metadata = sum(1 for signal in signals if signal.metadata) + metadata_completeness = (signals_with_metadata / total_signals) * 100 + + return { + 'total_signals': total_signals, + 'signal_type_distribution': signal_type_counts, + 'confidence_metrics': { + 'average': round(avg_confidence, 3), + 'minimum': round(min_confidence, 3), + 'maximum': round(max_confidence, 3), + 'high_confidence_count': high_confidence_signals, + 'high_confidence_percentage': round((high_confidence_signals / total_signals) * 100, 1) + }, + 'quality_score': round(quality_score, 1), + 'metadata_completeness_percentage': round(metadata_completeness, 1), + 'recommendations': self._generate_quality_recommendations(signals) + } + + def _generate_quality_recommendations(self, signals: List[StrategySignal]) -> List[str]: + """Generate quality improvement recommendations.""" + recommendations = [] + + confidence_values = [signal.confidence for signal in signals] + avg_confidence = sum(confidence_values) / len(confidence_values) + + if avg_confidence < 0.5: + recommendations.append("Consider increasing confidence thresholds or improving signal generation logic") + + signals_with_metadata = sum(1 for signal in signals if signal.metadata) + if signals_with_metadata / len(signals) < 0.8: + recommendations.append("Enhance metadata collection to improve signal traceability") + + signal_types = set(signal.signal_type for signal in signals) + if len(signal_types) == 1: + recommendations.append("Consider diversifying signal types for better strategy coverage") + + return recommendations if recommendations else ["Signal quality appears good - no specific recommendations"] + + def generate_validation_report(self) -> Dict[str, Any]: + """Generate comprehensive validation report.""" + stats = self.get_validation_statistics() + + return { + 'report_timestamp': datetime.now(timezone.utc).isoformat(), + 'validation_summary': { + 'total_validated': stats['total_signals_validated'], + 'success_rate': f"{stats.get('validation_success_rate', 0) * 100:.1f}%", + 'failure_rate': f"{stats.get('validation_failure_rate', 0) * 100:.1f}%" + }, + 'error_analysis': stats.get('validation_errors', {}), + 'configuration': { + 'min_confidence': self.config.min_confidence, + 'max_confidence': self.config.max_confidence, + 'allowed_signal_types': [st.value for st in self.config.allowed_signal_types], + 'required_metadata_fields': self.config.required_metadata_fields + }, + 'health_status': 'good' if stats.get('validation_success_rate', 0) >= 0.8 else 'needs_attention' + } \ No newline at end of file diff --git a/tasks/4.0-strategy-engine-foundation.md b/tasks/4.0-strategy-engine-foundation.md index d76c540..048f13f 100644 --- a/tasks/4.0-strategy-engine-foundation.md +++ b/tasks/4.0-strategy-engine-foundation.md @@ -22,12 +22,21 @@ - `database/migrations/versions/add_strategy_signals_table.py` - Alembic migration for strategy signals table - `components/charts/layers/strategy_signals.py` - Strategy signal chart layer for visualization - `components/charts/data_integration.py` - Updated to include strategy data integration +- `strategies/data_integration.py` - Strategy data integration with indicator orchestration and caching +- `strategies/validation.py` - Strategy signal validation and quality assurance +- `strategies/batch_processing.py` - Batch processing engine for backtesting multiple strategies across large datasets +- `strategies/realtime_execution.py` - Real-time strategy execution pipeline for live signal generation +- `dashboard/callbacks/realtime_strategies.py` - Dashboard callbacks for real-time strategy integration - `tests/strategies/test_base_strategy.py` - Unit tests for BaseStrategy abstract class - `tests/strategies/test_strategy_factory.py` - Unit tests for strategy factory system - `tests/strategies/test_strategy_manager.py` - Unit tests for StrategyManager class - `tests/strategies/implementations/test_ema_crossover.py` - Unit tests for EMA Crossover strategy - `tests/strategies/implementations/test_rsi.py` - Unit tests for RSI strategy - `tests/strategies/implementations/test_macd.py` - Unit tests for MACD strategy +- `tests/strategies/test_data_integration.py` - Unit tests for strategy data integration +- `tests/strategies/test_validation.py` - Unit tests for strategy signal validation +- `tests/strategies/test_batch_processing.py` - Unit tests for batch processing capabilities +- `tests/strategies/test_realtime_execution.py` - Unit tests for real-time execution pipeline - `tests/database/test_strategy_repository.py` - Unit tests for strategy repository ### Notes @@ -73,6 +82,26 @@ - **Reasoning**: Maintains consistency with existing database access patterns, ensures proper session management, and provides a clean API for strategy data operations. - **Impact**: All strategy database operations follow the same patterns as other modules, with proper error handling, logging, and transaction management. +### 7. Vectorized Data Integration +- **Decision**: Implement vectorized approaches in `StrategyDataIntegrator` for DataFrame construction, indicator batching, and multi-strategy processing while maintaining iterative interfaces for backward compatibility. +- **Reasoning**: Significant performance improvements for backtesting and bulk analysis scenarios, better memory efficiency with pandas operations, and preparation for multi-strategy batch processing capabilities. +- **Impact**: Enhanced performance for large datasets while maintaining existing single-strategy interfaces. Sets foundation for efficient multi-strategy and multi-timeframe processing in future phases. + +### 8. Single-Strategy Orchestration Focus +- **Decision**: Implement strategy calculation orchestration focused on single-strategy optimization with indicator dependency resolution, avoiding premature multi-strategy complexity. +- **Reasoning**: Multi-strategy coordination is better handled at the backtesting layer or through parallelization. Single-strategy optimization provides immediate benefits while keeping code maintainable and focused. +- **Impact**: Cleaner, more maintainable code with optimized single-strategy performance. Provides foundation for future backtester-level parallelization without architectural complexity. + +### 9. Indicator Warm-up Handling for Streaming Batch Processing +- **Decision**: Implemented dynamic warm-up period calculation and overlapping windows with result trimming for streaming batch processing. +- **Reasoning**: To ensure accurate indicator calculations and prevent false signals when processing large datasets in chunks, as indicators require a certain amount of historical data to 'warm up'. +- **Impact**: Guarantees correct backtest results for strategies relying on indicators with warm-up periods, even when using memory-efficient streaming. Automatically adjusts chunk processing to include necessary historical context and removes duplicate/invalid initial signals. + +### 10. Real-time Strategy Execution Architecture +- **Decision**: Implemented event-driven real-time strategy execution pipeline with signal broadcasting, chart integration, and concurrent processing capabilities. +- **Reasoning**: Real-time strategy execution requires different architecture than batch processing - event-driven triggers, background signal processing, throttled chart updates, and integration with existing dashboard refresh cycles. +- **Impact**: Enables live strategy signal generation that integrates seamlessly with the existing chart system. Provides concurrent strategy execution, real-time signal storage, error handling with automatic strategy disabling, and performance monitoring for production use. + ## Tasks - [x] 1.0 Core Strategy Foundation Setup @@ -109,16 +138,16 @@ - [x] 3.8 Add data retention policies for strategy signals (configurable cleanup of old analysis data) - [x] 3.9 Implement strategy signal aggregation queries for performance analysis -- [ ] 4.0 Strategy Data Integration - - [ ] 4.1 Create `StrategyDataIntegrator` class in new `strategies/data_integration.py` module - - [ ] 4.2 Implement data loading interface that leverages existing `TechnicalIndicators` class for indicator dependencies +- [x] 4.0 Strategy Data Integration + - [x] 4.1 Create `StrategyDataIntegrator` class in new `strategies/data_integration.py` module + - [x] 4.2 Implement data loading interface that leverages existing `TechnicalIndicators` class for indicator dependencies - [x] 4.3 Add multi-timeframe data handling for strategies that require indicators from different timeframes - - [ ] 4.4 Implement strategy calculation orchestration with proper indicator dependency resolution - - [ ] 4.5 Create caching layer for computed indicator results to avoid recalculation across strategies - - [ ] 4.6 Add strategy signal generation and validation pipeline - - [ ] 4.7 Implement batch processing capabilities for backtesting large datasets - - [ ] 4.8 Create real-time strategy execution pipeline that integrates with existing chart data refresh - - [ ] 4.9 Add error handling and recovery mechanisms for strategy calculation failures + - [x] 4.4 Implement strategy calculation orchestration with proper indicator dependency resolution + - [x] 4.5 Create caching layer for computed indicator results to avoid recalculation across strategies + - [x] 4.6 Add strategy signal generation and validation pipeline + - [x] 4.7 Implement batch processing capabilities for backtesting large datasets + - [x] 4.8 Create real-time strategy execution pipeline that integrates with existing chart data refresh + - [x] 4.9 Add error handling and recovery mechanisms for strategy calculation failures - [ ] 5.0 Chart Integration and Visualization - [ ] 5.1 Create `StrategySignalLayer` class in `components/charts/layers/strategy_signals.py` diff --git a/tests/strategies/test_batch_processing.py b/tests/strategies/test_batch_processing.py new file mode 100644 index 0000000..ba61f6b --- /dev/null +++ b/tests/strategies/test_batch_processing.py @@ -0,0 +1,798 @@ +""" +Tests for Strategy Batch Processing + +This module tests batch processing capabilities for strategy backtesting +including memory management, parallel processing, and performance monitoring. +""" + +import pytest +from unittest.mock import patch, MagicMock +from datetime import datetime, timezone +import pandas as pd + +from strategies.batch_processing import BacktestingBatchProcessor, BatchProcessingConfig +from strategies.data_types import StrategyResult, StrategySignal, SignalType + + +class TestBatchProcessingConfig: + """Tests for BatchProcessingConfig dataclass.""" + + def test_default_config(self): + """Test default batch processing configuration.""" + config = BatchProcessingConfig() + + assert config.max_concurrent_strategies == 4 + assert config.max_memory_usage_percent == 80.0 + assert config.chunk_size_days == 30 + assert config.enable_memory_monitoring is True + assert config.enable_result_validation is True + assert config.result_cache_size == 1000 + assert config.progress_reporting_interval == 10 + + def test_custom_config(self): + """Test custom batch processing configuration.""" + config = BatchProcessingConfig( + max_concurrent_strategies=8, + max_memory_usage_percent=90.0, + chunk_size_days=60, + enable_memory_monitoring=False, + enable_result_validation=False, + result_cache_size=500, + progress_reporting_interval=5 + ) + + assert config.max_concurrent_strategies == 8 + assert config.max_memory_usage_percent == 90.0 + assert config.chunk_size_days == 60 + assert config.enable_memory_monitoring is False + assert config.enable_result_validation is False + assert config.result_cache_size == 500 + assert config.progress_reporting_interval == 5 + + +class TestBacktestingBatchProcessor: + """Tests for BacktestingBatchProcessor class.""" + + @pytest.fixture + def processor(self): + """Create batch processor with default configuration.""" + config = BatchProcessingConfig( + enable_memory_monitoring=False, # Disable for testing + progress_reporting_interval=1, # Report every strategy for testing + enable_result_validation=False # Disable validation for basic tests + ) + with patch('strategies.batch_processing.StrategyDataIntegrator'): + return BacktestingBatchProcessor(config) + + @pytest.fixture + def sample_strategy_configs(self): + """Create sample strategy configurations for testing.""" + return [ + { + 'name': 'ema_crossover', + 'type': 'trend_following', + 'parameters': {'fast_ema': 12, 'slow_ema': 26} + }, + { + 'name': 'rsi_momentum', + 'type': 'momentum', + 'parameters': {'rsi_period': 14, 'oversold': 30, 'overbought': 70} + }, + { + 'name': 'macd_trend', + 'type': 'trend_following', + 'parameters': {'fast_ema': 12, 'slow_ema': 26, 'signal': 9} + } + ] + + @pytest.fixture + def sample_strategy_results(self): + """Create sample strategy results for testing.""" + return [ + StrategyResult( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + strategy_name='test_strategy', + signals=[ + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + signal_type=SignalType.BUY, + price=50000.0, + confidence=0.8, + metadata={'rsi': 30} + ) + ], + indicators_used={'rsi': 30, 'ema': 49000}, + metadata={'execution_time': 0.5} + ) + ] + + def test_initialization(self, processor): + """Test batch processor initialization.""" + assert processor.config is not None + assert processor.logger is not None + assert processor.data_integrator is not None + assert processor._processing_stats['strategies_processed'] == 0 + assert processor._processing_stats['total_signals_generated'] == 0 + assert processor._processing_stats['errors_count'] == 0 + + def test_initialization_with_validation_disabled(self): + """Test initialization with validation disabled.""" + config = BatchProcessingConfig(enable_result_validation=False) + with patch('strategies.batch_processing.StrategyDataIntegrator'): + processor = BacktestingBatchProcessor(config) + assert processor.signal_validator is None + + @patch('strategies.batch_processing.StrategyDataIntegrator') + def test_process_strategies_batch(self, mock_integrator_class, processor, sample_strategy_configs, sample_strategy_results): + """Test batch processing of multiple strategies.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + symbols = ['BTC-USDT', 'ETH-USDT'] + timeframe = '1h' + days_back = 30 + + results = processor.process_strategies_batch( + strategy_configs=sample_strategy_configs, + symbols=symbols, + timeframe=timeframe, + days_back=days_back + ) + + # Verify results structure + assert len(results) == len(sample_strategy_configs) + assert 'ema_crossover' in results + assert 'rsi_momentum' in results + assert 'macd_trend' in results + + # Verify statistics + stats = processor.get_processing_statistics() + assert stats['strategies_processed'] == 3 + assert stats['total_signals_generated'] == 6 # 3 strategies × 2 symbols × 1 signal each + assert stats['errors_count'] == 0 + + def test_process_single_strategy_batch(self, processor, sample_strategy_results): + """Test processing a single strategy across multiple symbols.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + strategy_config = {'name': 'test_strategy', 'type': 'test'} + symbols = ['BTC-USDT', 'ETH-USDT'] + + results = processor._process_single_strategy_batch( + strategy_config, symbols, '1h', 30, 'okx' + ) + + assert len(results) == 2 # Results for 2 symbols + assert processor._processing_stats['total_signals_generated'] == 2 + + def test_validate_strategy_results(self, processor, sample_strategy_results): + """Test strategy result validation.""" + # Setup mock signal validator + mock_validator = MagicMock() + mock_validator.validate_signals_batch.return_value = ( + sample_strategy_results[0].signals, # valid signals + [] # no invalid signals + ) + processor.signal_validator = mock_validator + + validated_results = processor._validate_strategy_results(sample_strategy_results) + + assert len(validated_results) == 1 + assert len(validated_results[0].signals) == 1 + mock_validator.validate_signals_batch.assert_called_once() + + @patch('strategies.batch_processing.psutil') + def test_check_memory_usage_normal(self, mock_psutil, processor): + """Test memory usage monitoring under normal conditions.""" + # Mock memory usage below threshold + mock_process = MagicMock() + mock_process.memory_percent.return_value = 60.0 # Below 80% threshold + mock_process.memory_info.return_value.rss = 500 * 1024 * 1024 # 500 MB + mock_psutil.Process.return_value = mock_process + + processor._check_memory_usage() + + assert processor._processing_stats['memory_peak_mb'] == 500.0 + + @patch('strategies.batch_processing.psutil') + def test_check_memory_usage_high(self, mock_psutil, processor): + """Test memory usage monitoring with high usage.""" + # Mock memory usage above threshold + mock_process = MagicMock() + mock_process.memory_percent.return_value = 85.0 # Above 80% threshold + mock_process.memory_info.return_value.rss = 1000 * 1024 * 1024 # 1000 MB + mock_psutil.Process.return_value = mock_process + + with patch.object(processor, '_cleanup_memory') as mock_cleanup: + processor._check_memory_usage() + mock_cleanup.assert_called_once() + + def test_cleanup_memory(self, processor): + """Test memory cleanup operations.""" + # Fill result cache beyond limit + for i in range(1500): # Above 1000 limit + processor._result_cache[f'key_{i}'] = f'result_{i}' + + initial_cache_size = len(processor._result_cache) + + with patch.object(processor.data_integrator, 'clear_cache') as mock_clear, \ + patch('strategies.batch_processing.gc.collect') as mock_gc: + + processor._cleanup_memory() + + # Verify cache was reduced + assert len(processor._result_cache) < initial_cache_size + assert len(processor._result_cache) == 500 # Half of cache size limit + + # Verify other cleanup operations + mock_clear.assert_called_once() + mock_gc.assert_called_once() + + def test_get_processing_statistics(self, processor): + """Test processing statistics calculation.""" + # Set some test statistics + processor._processing_stats.update({ + 'strategies_processed': 5, + 'total_signals_generated': 25, + 'processing_time_seconds': 10.0, + 'errors_count': 1, + 'validation_failures': 2 + }) + + stats = processor.get_processing_statistics() + + assert stats['strategies_processed'] == 5 + assert stats['total_signals_generated'] == 25 + assert stats['average_signals_per_strategy'] == 5.0 + assert stats['average_processing_time_per_strategy'] == 2.0 + assert stats['error_rate'] == 20.0 # 1/5 * 100 + assert stats['validation_failure_rate'] == 8.0 # 2/25 * 100 + + def test_get_processing_statistics_zero_division(self, processor): + """Test statistics calculation with zero values.""" + stats = processor.get_processing_statistics() + + assert stats['average_signals_per_strategy'] == 0 + assert stats['average_processing_time_per_strategy'] == 0 + assert stats['error_rate'] == 0.0 + assert stats['validation_failure_rate'] == 0.0 + + def test_process_strategies_batch_with_error(self, processor, sample_strategy_configs): + """Test batch processing with errors.""" + # Setup mock to raise an exception + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.side_effect = Exception("Test error") + processor.data_integrator = mock_integrator + + results = processor.process_strategies_batch( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + days_back=30 + ) + + # Should handle errors gracefully + assert isinstance(results, dict) + assert processor._processing_stats['errors_count'] > 0 + + @patch('strategies.batch_processing.StrategyDataIntegrator') + def test_process_strategies_parallel(self, mock_integrator_class, processor, sample_strategy_configs, sample_strategy_results): + """Test parallel processing of multiple strategies.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + symbols = ['BTC-USDT', 'ETH-USDT'] + timeframe = '1h' + days_back = 30 + + results = processor.process_strategies_parallel( + strategy_configs=sample_strategy_configs, + symbols=symbols, + timeframe=timeframe, + days_back=days_back + ) + + # Verify results structure (same as sequential processing) + assert len(results) == len(sample_strategy_configs) + assert 'ema_crossover' in results + assert 'rsi_momentum' in results + assert 'macd_trend' in results + + # Verify statistics + stats = processor.get_processing_statistics() + assert stats['strategies_processed'] == 3 + assert stats['total_signals_generated'] == 6 # 3 strategies × 2 symbols × 1 signal each + assert stats['errors_count'] == 0 + + def test_process_symbols_parallel(self, processor, sample_strategy_results): + """Test parallel processing of single strategy across multiple symbols.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + strategy_config = {'name': 'test_strategy', 'type': 'test'} + symbols = ['BTC-USDT', 'ETH-USDT', 'BNB-USDT'] + + results = processor.process_symbols_parallel( + strategy_config=strategy_config, + symbols=symbols, + timeframe='1h', + days_back=30 + ) + + # Should have results for all symbols + assert len(results) == 3 # Results for 3 symbols + assert processor._processing_stats['total_signals_generated'] == 3 + + def test_process_strategy_for_symbol(self, processor, sample_strategy_results): + """Test processing a single strategy for a single symbol.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + strategy_config = {'name': 'test_strategy', 'type': 'test'} + + results = processor._process_strategy_for_symbol( + strategy_config=strategy_config, + symbol='BTC-USDT', + timeframe='1h', + days_back=30, + exchange='okx' + ) + + assert len(results) == 1 + assert results[0].strategy_name == 'test_strategy' + assert results[0].symbol == 'BTC-USDT' + + def test_process_strategy_for_symbol_with_error(self, processor): + """Test symbol processing with error handling.""" + # Setup mock to raise an exception + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.side_effect = Exception("Test error") + processor.data_integrator = mock_integrator + + strategy_config = {'name': 'test_strategy', 'type': 'test'} + + results = processor._process_strategy_for_symbol( + strategy_config=strategy_config, + symbol='BTC-USDT', + timeframe='1h', + days_back=30, + exchange='okx' + ) + + # Should return empty list on error + assert results == [] + + def test_process_large_dataset_streaming(self, processor, sample_strategy_configs, sample_strategy_results): + """Test streaming processing for large datasets.""" + # Setup mock data integrator + mock_integrator = MagicMock() + mock_integrator.calculate_strategy_signals_orchestrated.return_value = sample_strategy_results + processor.data_integrator = mock_integrator + + # Mock the parallel processing method to avoid actual parallel execution + with patch.object(processor, 'process_strategies_parallel') as mock_parallel: + mock_parallel.return_value = { + 'test_strategy': sample_strategy_results + } + + # Test streaming with 90 days split into 30-day chunks + stream = processor.process_large_dataset_streaming( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + total_days_back=90 # Should create 3 chunks + ) + + # Collect all chunks + chunks = list(stream) + + assert len(chunks) == 3 # 90 days / 30 days per chunk + + # Each chunk should have results for all strategies + for chunk in chunks: + assert 'test_strategy' in chunk + + def test_aggregate_streaming_results(self, processor, sample_strategy_results): + """Test aggregation of streaming results.""" + # Create mock streaming results + chunk1 = {'strategy1': sample_strategy_results[:1], 'strategy2': []} + chunk2 = {'strategy1': [], 'strategy2': sample_strategy_results[:1]} + chunk3 = {'strategy1': sample_strategy_results[:1], 'strategy2': sample_strategy_results[:1]} + + stream = iter([chunk1, chunk2, chunk3]) + + aggregated = processor.aggregate_streaming_results(stream) + + assert len(aggregated) == 2 + assert 'strategy1' in aggregated + assert 'strategy2' in aggregated + assert len(aggregated['strategy1']) == 2 # From chunk1 and chunk3 + assert len(aggregated['strategy2']) == 2 # From chunk2 and chunk3 + + @patch('strategies.batch_processing.psutil') + def test_process_with_memory_constraints_sufficient_memory(self, mock_psutil, processor, sample_strategy_configs): + """Test memory-constrained processing with sufficient memory.""" + # Mock low memory usage + mock_process = MagicMock() + mock_process.memory_info.return_value.rss = 100 * 1024 * 1024 # 100 MB + mock_psutil.Process.return_value = mock_process + + with patch.object(processor, 'process_strategies_parallel') as mock_parallel: + mock_parallel.return_value = {} + + processor.process_with_memory_constraints( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + days_back=30, + max_memory_mb=1000.0 # High limit + ) + + # Should use parallel processing for sufficient memory + mock_parallel.assert_called_once() + + @patch('strategies.batch_processing.psutil') + def test_process_with_memory_constraints_moderate_constraint(self, mock_psutil, processor, sample_strategy_configs): + """Test memory-constrained processing with moderate constraint.""" + # Mock moderate memory usage + mock_process = MagicMock() + mock_process.memory_info.return_value.rss = 400 * 1024 * 1024 # 400 MB + mock_psutil.Process.return_value = mock_process + + with patch.object(processor, 'process_strategies_batch') as mock_batch: + mock_batch.return_value = {} + + processor.process_with_memory_constraints( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + days_back=30, + max_memory_mb=500.0 # Moderate limit + ) + + # Should use sequential batch processing + mock_batch.assert_called_once() + + @patch('strategies.batch_processing.psutil') + def test_process_with_memory_constraints_severe_constraint(self, mock_psutil, processor, sample_strategy_configs): + """Test memory-constrained processing with severe constraint.""" + # Mock high memory usage + mock_process = MagicMock() + mock_process.memory_info.return_value.rss = 450 * 1024 * 1024 # 450 MB + mock_psutil.Process.return_value = mock_process + + with patch.object(processor, 'process_large_dataset_streaming_with_warmup') as mock_streaming, \ + patch.object(processor, 'aggregate_streaming_results') as mock_aggregate: + + mock_streaming.return_value = iter([{}]) + mock_aggregate.return_value = {} + + processor.process_with_memory_constraints( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + days_back=30, + max_memory_mb=500.0 # Low limit with high current usage + ) + + # Should use streaming processing with warm-up + mock_streaming.assert_called_once() + mock_aggregate.assert_called_once() + + def test_get_performance_metrics(self, processor): + """Test comprehensive performance metrics calculation.""" + # Set some test statistics + processor._processing_stats.update({ + 'strategies_processed': 5, + 'total_signals_generated': 25, + 'processing_time_seconds': 10.0, + 'memory_peak_mb': 500.0, + 'errors_count': 1, + 'validation_failures': 2 + }) + + with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats: + mock_cache_stats.return_value = {'cache_hits': 80, 'cache_misses': 20} + + metrics = processor.get_performance_metrics() + + assert 'cache_hit_rate' in metrics + assert 'memory_efficiency' in metrics + assert 'throughput_signals_per_second' in metrics + assert 'parallel_efficiency' in metrics + assert 'optimization_recommendations' in metrics + + assert metrics['cache_hit_rate'] == 80.0 # 80/(80+20) * 100 + assert metrics['throughput_signals_per_second'] == 2.5 # 25/10 + + def test_calculate_cache_hit_rate(self, processor): + """Test cache hit rate calculation.""" + with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats: + mock_cache_stats.return_value = {'cache_hits': 70, 'cache_misses': 30} + + hit_rate = processor._calculate_cache_hit_rate() + assert hit_rate == 70.0 # 70/(70+30) * 100 + + def test_calculate_memory_efficiency(self, processor): + """Test memory efficiency calculation.""" + processor._processing_stats.update({ + 'memory_peak_mb': 200.0, + 'strategies_processed': 2 + }) + + efficiency = processor._calculate_memory_efficiency() + # 200MB / 2 strategies = 100MB per strategy + # Baseline is 100MB, so efficiency should be 50% + assert efficiency == 50.0 + + def test_generate_optimization_recommendations(self, processor): + """Test optimization recommendations generation.""" + # Set up poor performance metrics + processor._processing_stats.update({ + 'strategies_processed': 1, + 'total_signals_generated': 1, + 'processing_time_seconds': 10.0, + 'memory_peak_mb': 1000.0, # High memory usage + 'errors_count': 2, # High error rate + 'validation_failures': 0 + }) + + with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats: + mock_cache_stats.return_value = {'cache_hits': 1, 'cache_misses': 9} # Low cache hit rate + + recommendations = processor._generate_optimization_recommendations() + + assert isinstance(recommendations, list) + assert len(recommendations) > 0 + # Should recommend memory efficiency improvement + assert any('memory efficiency' in rec.lower() for rec in recommendations) + + def test_optimize_configuration(self, processor): + """Test automatic configuration optimization.""" + # Set up metrics that indicate poor memory efficiency + processor._processing_stats.update({ + 'strategies_processed': 4, + 'total_signals_generated': 20, + 'processing_time_seconds': 8.0, + 'memory_peak_mb': 2000.0, # Very high memory usage + 'errors_count': 0, + 'validation_failures': 0 + }) + + with patch.object(processor.data_integrator, 'get_cache_stats') as mock_cache_stats: + mock_cache_stats.return_value = {'cache_hits': 10, 'cache_misses': 90} + + original_workers = processor.config.max_concurrent_strategies + original_chunk_size = processor.config.chunk_size_days + + optimized_config = processor.optimize_configuration() + + # Should reduce workers and chunk size due to poor memory efficiency + assert optimized_config.max_concurrent_strategies <= original_workers + assert optimized_config.chunk_size_days <= original_chunk_size + + def test_benchmark_processing_methods(self, processor, sample_strategy_configs): + """Test processing method benchmarking.""" + with patch.object(processor, 'process_strategies_batch') as mock_batch, \ + patch.object(processor, 'process_strategies_parallel') as mock_parallel: + + # Mock batch processing results + mock_batch.return_value = {'strategy1': []} + + # Mock parallel processing results + mock_parallel.return_value = {'strategy1': []} + + benchmark_results = processor.benchmark_processing_methods( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + days_back=7 + ) + + assert 'sequential' in benchmark_results + assert 'parallel' in benchmark_results + assert 'recommendation' in benchmark_results + + # Verify both methods were called + mock_batch.assert_called_once() + mock_parallel.assert_called_once() + + def test_reset_stats(self, processor): + """Test statistics reset functionality.""" + # Set some statistics + processor._processing_stats.update({ + 'strategies_processed': 5, + 'total_signals_generated': 25, + 'processing_time_seconds': 10.0 + }) + processor._result_cache['test'] = 'data' + + processor._reset_stats() + + # Verify all stats are reset + assert processor._processing_stats['strategies_processed'] == 0 + assert processor._processing_stats['total_signals_generated'] == 0 + assert processor._processing_stats['processing_time_seconds'] == 0.0 + assert len(processor._result_cache) == 0 + + def test_calculate_warmup_period_ema_strategy(self, processor): + """Test warm-up period calculation for EMA strategy.""" + strategy_configs = [ + { + 'name': 'ema_crossover', + 'fast_period': 12, + 'slow_period': 26 + } + ] + + warmup = processor._calculate_warmup_period(strategy_configs) + + # Should be max(12, 26) + 10 safety buffer = 36 + assert warmup == 36 + + def test_calculate_warmup_period_macd_strategy(self, processor): + """Test warm-up period calculation for MACD strategy.""" + strategy_configs = [ + { + 'name': 'macd_trend', + 'slow_period': 26, + 'signal_period': 9 + } + ] + + warmup = processor._calculate_warmup_period(strategy_configs) + + # Should be max(26, 9) + 10 MACD buffer + 10 safety buffer = 46 + assert warmup == 46 + + def test_calculate_warmup_period_rsi_strategy(self, processor): + """Test warm-up period calculation for RSI strategy.""" + strategy_configs = [ + { + 'name': 'rsi_momentum', + 'period': 14 + } + ] + + warmup = processor._calculate_warmup_period(strategy_configs) + + # Should be 14 + 5 RSI buffer + 10 safety buffer = 29 + assert warmup == 29 + + def test_calculate_warmup_period_multiple_strategies(self, processor): + """Test warm-up period calculation with multiple strategies.""" + strategy_configs = [ + {'name': 'ema_crossover', 'slow_period': 26}, + {'name': 'rsi_momentum', 'period': 14}, + {'name': 'macd_trend', 'slow_period': 26, 'signal_period': 9} + ] + + warmup = processor._calculate_warmup_period(strategy_configs) + + # Should be max of all strategies: 46 (from MACD) + assert warmup == 46 + + def test_calculate_warmup_period_unknown_strategy(self, processor): + """Test warm-up period calculation for unknown strategy type.""" + strategy_configs = [ + { + 'name': 'custom_strategy', + 'some_param': 100 + } + ] + + warmup = processor._calculate_warmup_period(strategy_configs) + + # Should be 30 default + 10 safety buffer = 40 + assert warmup == 40 + + def test_process_large_dataset_streaming_with_warmup(self, processor, sample_strategy_configs, sample_strategy_results): + """Test streaming processing with warm-up period handling.""" + # Mock the warm-up calculation + with patch.object(processor, '_calculate_warmup_period') as mock_warmup: + mock_warmup.return_value = 10 # 10 days warm-up + + # Mock the parallel processing method + with patch.object(processor, 'process_strategies_parallel') as mock_parallel: + mock_parallel.return_value = { + 'test_strategy': sample_strategy_results + } + + # Mock the trimming method + with patch.object(processor, '_trim_warmup_from_results') as mock_trim: + mock_trim.return_value = {'test_strategy': sample_strategy_results} + + # Test streaming with 60 days split into 30-day chunks + stream = processor.process_large_dataset_streaming_with_warmup( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + total_days_back=60 # Should create 2 chunks + ) + + # Collect all chunks + chunks = list(stream) + + assert len(chunks) == 2 # 60 days / 30 days per chunk + + # Verify parallel processing was called with correct parameters + assert mock_parallel.call_count == 2 + + # First chunk should not have warm-up, second should + first_call_args = mock_parallel.call_args_list[0] + second_call_args = mock_parallel.call_args_list[1] + + # First chunk: 30 days (no warm-up) + assert first_call_args[1]['days_back'] == 30 + + # Second chunk: 30 + 10 warm-up = 40 days + assert second_call_args[1]['days_back'] == 40 + + # Trimming should only be called for second chunk + assert mock_trim.call_count == 1 + + def test_trim_warmup_from_results(self, processor, sample_strategy_results): + """Test trimming warm-up period from results.""" + # Create test results with multiple signals + extended_results = sample_strategy_results * 10 # 10 results total + chunk_results = { + 'strategy1': extended_results, + 'strategy2': sample_strategy_results * 5 # 5 results + } + + trimmed = processor._trim_warmup_from_results( + chunk_results=chunk_results, + warmup_days=10, + target_start_days=30, + target_end_days=60 + ) + + # Verify trimming occurred + assert len(trimmed['strategy1']) <= len(extended_results) + assert len(trimmed['strategy2']) <= len(sample_strategy_results * 5) + + # Results should be sorted by timestamp + for strategy_name, results in trimmed.items(): + if len(results) > 1: + timestamps = [r.timestamp for r in results] + assert timestamps == sorted(timestamps) + + def test_streaming_with_warmup_chunk_size_adjustment(self, processor, sample_strategy_configs): + """Test automatic chunk size adjustment when too small for warm-up.""" + # Set up small chunk size relative to warm-up + processor.config.chunk_size_days = 15 # Small chunk size + + with patch.object(processor, '_calculate_warmup_period') as mock_warmup: + mock_warmup.return_value = 30 # Large warm-up period + + with patch.object(processor, 'process_strategies_parallel') as mock_parallel: + mock_parallel.return_value = {} + + # This should trigger chunk size adjustment + stream = processor.process_large_dataset_streaming_with_warmup( + strategy_configs=sample_strategy_configs, + symbols=['BTC-USDT'], + timeframe='1h', + total_days_back=90 + ) + + # Consume the stream to trigger processing + list(stream) + + # Verify warning was logged about chunk size adjustment + # (In a real implementation, you might want to capture log messages) \ No newline at end of file diff --git a/tests/strategies/test_data_integration.py b/tests/strategies/test_data_integration.py new file mode 100644 index 0000000..3f923c5 --- /dev/null +++ b/tests/strategies/test_data_integration.py @@ -0,0 +1,1068 @@ +""" +Unit tests for Strategy Data Integration module. + +Tests the StrategyDataIntegrator class and its data orchestration capabilities. +""" + +import pytest +import pandas as pd +from datetime import datetime, timezone, timedelta +from unittest.mock import Mock, patch, MagicMock +from typing import List, Dict, Any +from decimal import Decimal + +from strategies.data_integration import ( + StrategyDataIntegrator, + StrategyDataIntegrationConfig, + get_strategy_data_integrator +) +from strategies.data_types import StrategyResult, StrategySignal, SignalType +from data.common.data_types import OHLCVCandle + + +class TestStrategyDataIntegrationConfig: + """Test configuration class for strategy data integration.""" + + def test_default_config(self): + """Test default configuration values.""" + config = StrategyDataIntegrationConfig() + + assert config.default_days_back == 30 + assert config.min_candles_required == 100 + assert config.max_candles_limit == 5000 + assert config.cache_timeout_minutes == 15 + assert config.enable_data_validation is True + assert config.enable_sparse_data_handling is True + assert config.enable_indicator_caching is True + assert config.max_cached_indicators == 50 + + def test_custom_config(self): + """Test custom configuration values.""" + config = StrategyDataIntegrationConfig( + default_days_back=60, + min_candles_required=200, + cache_timeout_minutes=30, + enable_indicator_caching=False + ) + + assert config.default_days_back == 60 + assert config.min_candles_required == 200 + assert config.cache_timeout_minutes == 30 + assert config.enable_indicator_caching is False + + +class TestStrategyDataIntegrator: + """Test strategy data integrator functionality.""" + + @pytest.fixture + def mock_db_ops(self): + """Create mock database operations.""" + mock_db_ops = Mock() + mock_db_ops.market_data = Mock() + return mock_db_ops + + @pytest.fixture + def mock_technical_indicators(self): + """Create mock technical indicators.""" + return Mock() + + @pytest.fixture + def mock_strategy_factory(self): + """Create mock strategy factory.""" + return Mock() + + @pytest.fixture + def sample_candles(self): + """Create sample OHLCV candles for testing.""" + candles = [] + base_time = datetime.now(timezone.utc) - timedelta(days=10) + + for i in range(100): + start_time = base_time + timedelta(hours=i) + end_time = start_time + timedelta(hours=1) + candles.append(OHLCVCandle( + symbol='BTC-USDT', + timeframe='1h', + start_time=start_time, + end_time=end_time, + open=Decimal(str(100.0 + i * 0.1)), + high=Decimal(str(101.0 + i * 0.1)), + low=Decimal(str(99.0 + i * 0.1)), + close=Decimal(str(100.5 + i * 0.1)), + volume=Decimal(str(1000.0 + i * 10)), + trade_count=10 + i, + exchange='okx' + )) + + return candles + + @pytest.fixture + def sample_raw_candles(self): + """Create sample raw candles from database.""" + candles = [] + base_time = datetime.now(timezone.utc) - timedelta(days=10) + + for i in range(100): + timestamp = base_time + timedelta(hours=i) + candles.append({ + 'timestamp': timestamp, + 'open': 100.0 + i * 0.1, + 'high': 101.0 + i * 0.1, + 'low': 99.0 + i * 0.1, + 'close': 100.5 + i * 0.1, + 'volume': 1000.0 + i * 10, + 'symbol': 'BTC-USDT', + 'timeframe': '1h', + 'exchange': 'okx' + }) + + return candles + + @pytest.fixture + def integrator(self, mock_db_ops, mock_technical_indicators, mock_strategy_factory): + """Create strategy data integrator with mocked dependencies.""" + config = StrategyDataIntegrationConfig() + + with patch('strategies.data_integration.get_database_operations') as mock_get_db, \ + patch('strategies.data_integration.TechnicalIndicators') as mock_ti, \ + patch('strategies.data_integration.StrategyFactory') as mock_sf: + + mock_get_db.return_value = mock_db_ops + mock_ti.return_value = mock_technical_indicators + mock_sf.return_value = mock_strategy_factory + + integrator = StrategyDataIntegrator(config) + + # Set the mocked objects + integrator.db_ops = mock_db_ops + integrator.technical_indicators = mock_technical_indicators + integrator.strategy_factory = mock_strategy_factory + + return integrator + + def test_initialization(self): + """Test integrator initialization.""" + config = StrategyDataIntegrationConfig(default_days_back=60) + + with patch('strategies.data_integration.get_database_operations'), \ + patch('strategies.data_integration.TechnicalIndicators'), \ + patch('strategies.data_integration.StrategyFactory'), \ + patch('pathlib.Path.exists', return_value=False): # Mock no persistent cache file + + integrator = StrategyDataIntegrator(config) + + assert integrator.config.default_days_back == 60 + assert integrator._data_cache == {} + assert integrator._indicator_cache == {} + + def test_prepare_dataframe_from_candles(self, integrator, sample_candles): + """Test conversion of OHLCV candles to DataFrame.""" + df = integrator._prepare_dataframe_from_candles(sample_candles) + + assert len(df) == 100 + assert list(df.columns) == ['open', 'high', 'low', 'close', 'volume'] + assert df.index.name is None # timestamp index name is removed for cleaner appearance + assert df['open'].iloc[0] == 100.0 + assert df['close'].iloc[-1] == 110.4 # 100.5 + 99 * 0.1 + + def test_prepare_dataframe_empty_candles(self, integrator): + """Test DataFrame preparation with empty candles.""" + df = integrator._prepare_dataframe_from_candles([]) + + assert df.empty + assert len(df) == 0 + + def test_validate_strategy_requirements_success(self, integrator): + """Test successful strategy requirements validation.""" + # Create valid DataFrame + data = { + 'open': [100.0] * 150, + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + } + df = pd.DataFrame(data) + + result = integrator.validate_strategy_requirements(df, 'test_strategy') + assert result is True + + def test_validate_strategy_requirements_insufficient_data(self, integrator): + """Test validation failure due to insufficient data.""" + # Create DataFrame with insufficient data + data = { + 'open': [100.0] * 50, # Less than min_candles_required (100) + 'high': [101.0] * 50, + 'low': [99.0] * 50, + 'close': [100.5] * 50, + 'volume': [1000.0] * 50 + } + df = pd.DataFrame(data) + + result = integrator.validate_strategy_requirements(df, 'test_strategy') + assert result is False + + def test_validate_strategy_requirements_missing_columns(self, integrator): + """Test validation failure due to missing columns.""" + # Create DataFrame with missing columns + data = { + 'open': [100.0] * 150, + 'high': [101.0] * 150, + # Missing 'low', 'close', 'volume' + } + df = pd.DataFrame(data) + + result = integrator.validate_strategy_requirements(df, 'test_strategy') + assert result is False + + def test_validate_strategy_requirements_invalid_prices(self, integrator): + """Test validation failure due to invalid price data.""" + # Create DataFrame with invalid prices + data = { + 'open': [100.0, 0.0, 102.0] + [100.0] * 147, # Zero price + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + } + df = pd.DataFrame(data) + + result = integrator.validate_strategy_requirements(df, 'test_strategy') + assert result is False + + @patch('strategies.data_integration.convert_database_candles_to_ohlcv') + def test_get_strategy_data_success(self, mock_convert, integrator, sample_raw_candles, sample_candles): + """Test successful strategy data retrieval.""" + # Setup mocks + integrator.db_ops.market_data.get_candles.return_value = sample_raw_candles + mock_convert.return_value = sample_candles + + # Call method + result_df = integrator.get_strategy_data('BTC-USDT', '1h') + + # Verify results + assert not result_df.empty + assert len(result_df) == 100 + assert list(result_df.columns) == ['open', 'high', 'low', 'close', 'volume'] + + # Verify database call + integrator.db_ops.market_data.get_candles.assert_called_once() + call_args = integrator.db_ops.market_data.get_candles.call_args + assert call_args[1]['symbol'] == 'BTC-USDT' + assert call_args[1]['timeframe'] == '1h' + assert call_args[1]['exchange'] == 'okx' + + def test_get_strategy_data_no_raw_candles(self, integrator): + """Test strategy data retrieval with no raw candles.""" + # Setup mock to return empty list + integrator.db_ops.market_data.get_candles.return_value = [] + + # Call method + result_df = integrator.get_strategy_data('BTC-USDT', '1h') + + # Verify empty result + assert result_df.empty + + @patch('strategies.data_integration.convert_database_candles_to_ohlcv') + def test_get_strategy_data_no_ohlcv_candles(self, mock_convert, integrator, sample_raw_candles): + """Test strategy data retrieval with no OHLCV candles after conversion.""" + # Setup mocks + integrator.db_ops.market_data.get_candles.return_value = sample_raw_candles + mock_convert.return_value = [] # Empty OHLCV candles + + # Call method + result_df = integrator.get_strategy_data('BTC-USDT', '1h') + + # Verify empty result + assert result_df.empty + + def test_get_strategy_data_caching(self, integrator): + """Test data caching functionality.""" + # Create cached data + cached_df = pd.DataFrame({ + 'open': [100.0] * 10, + 'high': [101.0] * 10, + 'low': [99.0] * 10, + 'close': [100.5] * 10, + 'volume': [1000.0] * 10 + }) + + cache_key = "market_data_BTC-USDT_1h_30_okx" + integrator._data_cache[cache_key] = { + 'dataframe': cached_df, + 'timestamp': datetime.now(timezone.utc) + } + + # Call method + result_df = integrator.get_strategy_data('BTC-USDT', '1h') + + # Verify cached data is returned + assert not result_df.empty + assert len(result_df) == 10 + + # Verify database was not called + integrator.db_ops.market_data.get_candles.assert_not_called() + + def test_calculate_strategy_signals_success(self, integrator): + """Test successful strategy signal calculation.""" + # Setup market data + market_df = pd.DataFrame({ + 'open': [100.0] * 150, + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + }) + + # Mock strategy results + mock_result = StrategyResult( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + strategy_name='test_strategy', + signals=[], + indicators_used={}, + metadata={} + ) + + # Setup mocks + integrator.get_strategy_data = Mock(return_value=market_df) + integrator.validate_strategy_requirements = Mock(return_value=True) + integrator.strategy_factory.calculate_strategy_signals.return_value = [mock_result] + + # Call method + results = integrator.calculate_strategy_signals( + strategy_name='test_strategy', + strategy_config={'param1': 'value1'}, + symbol='BTC-USDT', + timeframe='1h' + ) + + # Verify results + assert len(results) == 1 + assert results[0].strategy_name == 'test_strategy' + assert 'symbol' in results[0].metadata + assert results[0].metadata['symbol'] == 'BTC-USDT' + assert results[0].metadata['data_points_used'] == 150 + + def test_calculate_strategy_signals_no_data(self, integrator): + """Test strategy signal calculation with no market data.""" + # Setup mocks + integrator.get_strategy_data = Mock(return_value=pd.DataFrame()) + + # Call method + results = integrator.calculate_strategy_signals( + strategy_name='test_strategy', + strategy_config={}, + symbol='BTC-USDT', + timeframe='1h' + ) + + # Verify empty results + assert len(results) == 0 + # Note: validate_strategy_requirements is not called when get_strategy_data returns empty DataFrame + + def test_calculate_strategy_signals_insufficient_data(self, integrator): + """Test strategy signal calculation with insufficient data.""" + # Setup market data + market_df = pd.DataFrame({ + 'open': [100.0] * 50, # Insufficient data + 'high': [101.0] * 50, + 'low': [99.0] * 50, + 'close': [100.5] * 50, + 'volume': [1000.0] * 50 + }) + + # Setup mocks + integrator.get_strategy_data = Mock(return_value=market_df) + integrator.validate_strategy_requirements = Mock(return_value=False) + + # Call method + results = integrator.calculate_strategy_signals( + strategy_name='test_strategy', + strategy_config={}, + symbol='BTC-USDT', + timeframe='1h' + ) + + # Verify empty results + assert len(results) == 0 + integrator.strategy_factory.calculate_strategy_signals.assert_not_called() + + def test_cache_management(self, integrator): + """Test cache management functionality.""" + # Test caching + cache_key = "test_key" + test_data = { + 'test': 'data', + 'timestamp': datetime.now(timezone.utc) + } + + integrator._cache_data(cache_key, test_data) + assert cache_key in integrator._data_cache + + # Test cache retrieval + cached_data = integrator._get_cached_data(cache_key) + assert cached_data is not None + assert cached_data['test'] == 'data' + + # Test cache expiration + expired_data = { + 'test': 'expired', + 'timestamp': datetime.now(timezone.utc) - timedelta(hours=1) # Expired + } + integrator._cache_data("expired_key", expired_data) + + cached_expired = integrator._get_cached_data("expired_key") + assert cached_expired is None # Should be None due to expiration + assert "expired_key" not in integrator._data_cache # Should be removed + + def test_clear_cache(self, integrator): + """Test cache clearing functionality.""" + # Add some cached data + integrator._data_cache['key1'] = {'data': 'test1'} + integrator._indicator_cache['key2'] = {'data': 'test2'} + + # Clear cache + integrator.clear_cache() + + # Verify cache is cleared + assert len(integrator._data_cache) == 0 + assert len(integrator._indicator_cache) == 0 + + def test_get_cache_stats(self, integrator): + """Test cache statistics retrieval.""" + # Add some cached data + integrator._data_cache['key1'] = {'data': 'test1'} + integrator._indicator_cache['key2'] = {'data': 'test2'} + + # Get stats + stats = integrator.get_cache_stats() + + # Verify stats + assert stats['data_cache_size'] == 1 + assert stats['indicator_cache_size'] == 1 + assert 'config' in stats + assert stats['config']['cache_timeout_minutes'] == 15 + + def test_calculate_indicators_batch(self, integrator): + """Test batch indicator calculation functionality.""" + # Create test market data + market_df = pd.DataFrame({ + 'open': [100.0] * 150, + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + }) + + # Mock indicator configurations + indicator_configs = [ + {'type': 'sma', 'period': 20}, + {'type': 'ema', 'period': 12}, + {'type': 'rsi', 'period': 14} + ] + + # Mock technical indicators responses + mock_sma_result = pd.DataFrame({'sma': [100.0] * 150}) + mock_ema_result = pd.DataFrame({'ema': [100.2] * 150}) + mock_rsi_result = pd.DataFrame({'rsi': [50.0] * 150}) + + integrator.technical_indicators.calculate.side_effect = [ + mock_sma_result, + mock_ema_result, + mock_rsi_result + ] + + # Call method + results = integrator.calculate_indicators_batch(market_df, indicator_configs) + + # Verify results + assert len(results) == 3 + assert 'sma_period_20' in results + assert 'ema_period_12' in results + assert 'rsi_period_14' in results + + # Verify TechnicalIndicators was called correctly + assert integrator.technical_indicators.calculate.call_count == 3 + + def test_calculate_indicators_batch_with_caching(self, integrator): + """Test batch indicator calculation with caching.""" + # Create test market data + market_df = pd.DataFrame({ + 'open': [100.0] * 150, + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + }) + + # Mock indicator configuration + indicator_configs = [{'type': 'sma', 'period': 20}] + + # Mock technical indicators response + mock_result = pd.DataFrame({'sma': [100.0] * 150}) + integrator.technical_indicators.calculate.return_value = mock_result + + # First call - should calculate and cache + results1 = integrator.calculate_indicators_batch(market_df, indicator_configs) + assert len(results1) == 1 + assert integrator.technical_indicators.calculate.call_count == 1 + + # Second call - should use cache + results2 = integrator.calculate_indicators_batch(market_df, indicator_configs) + assert len(results2) == 1 + assert integrator.technical_indicators.calculate.call_count == 1 # No additional calls + + # Verify cached result is returned + pd.testing.assert_frame_equal(results1['sma_period_20'], results2['sma_period_20']) + + def test_create_indicator_key(self, integrator): + """Test indicator key generation.""" + # Test with parameters + config1 = {'type': 'sma', 'period': 20, 'price_column': 'close'} + key1 = integrator._create_indicator_key(config1) + assert key1 == 'sma_period_20_price_column_close' + + # Test without parameters + config2 = {'type': 'macd'} + key2 = integrator._create_indicator_key(config2) + assert key2 == 'macd' + + # Test consistent key generation (order shouldn't matter) + config3 = {'type': 'rsi', 'price_column': 'close', 'period': 14} + config4 = {'type': 'rsi', 'period': 14, 'price_column': 'close'} + key3 = integrator._create_indicator_key(config3) + key4 = integrator._create_indicator_key(config4) + assert key3 == key4 + + def test_indicator_caching_functionality(self, integrator): + """Test indicator caching mechanisms.""" + # Create test data + market_df = pd.DataFrame({ + 'open': [100.0] * 10, + 'high': [101.0] * 10, + 'low': [99.0] * 10, + 'close': [100.5] * 10, + 'volume': [1000.0] * 10 + }) + + test_result = pd.DataFrame({'test': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + + # Test caching + integrator._cache_indicator_result('test_key', test_result, market_df) + + # Test cache retrieval + cached_result = integrator._get_cached_indicator('test_key', market_df) + assert cached_result is not None + pd.testing.assert_frame_equal(cached_result, test_result) + + # Test cache miss + missing_result = integrator._get_cached_indicator('missing_key', market_df) + assert missing_result is None + + # Test cache invalidation with different data size + different_df = pd.DataFrame({ + 'open': [100.0] * 5, # Different size + 'high': [101.0] * 5, + 'low': [99.0] * 5, + 'close': [100.5] * 5, + 'volume': [1000.0] * 5 + }) + + invalid_result = integrator._get_cached_indicator('test_key', different_df) + assert invalid_result is None + + def test_calculate_strategy_signals_enhanced(self, integrator): + """Test enhanced strategy signal calculation with vectorized operations.""" + # Setup market data + market_df = pd.DataFrame({ + 'open': [100.0] * 150, + 'high': [101.0] * 150, + 'low': [99.0] * 150, + 'close': [100.5] * 150, + 'volume': [1000.0] * 150 + }) + + # Mock strategy + mock_strategy = Mock() + mock_strategy.get_required_indicators.return_value = [ + {'type': 'sma', 'period': 20} + ] + + mock_result = StrategyResult( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + strategy_name='test_strategy', + signals=[], + indicators_used={}, + metadata={} + ) + mock_strategy.calculate.return_value = [mock_result] + + # Setup mocks + integrator.get_strategy_data = Mock(return_value=market_df) + integrator.validate_strategy_requirements = Mock(return_value=True) + integrator.strategy_factory.create_strategy.return_value = mock_strategy + integrator.calculate_indicators_batch = Mock(return_value={'sma_period_20': pd.DataFrame({'sma': [100.0] * 150})}) + + # Call enhanced method + results = integrator.calculate_strategy_signals_enhanced( + strategy_name='test_strategy', + strategy_config={'param1': 'value1'}, + symbol='BTC-USDT', + timeframe='1h' + ) + + # Verify results + assert len(results) == 1 + assert results[0].strategy_name == 'test_strategy' + assert 'enhanced_calculation' in results[0].metadata + assert results[0].metadata['enhanced_calculation'] is True + assert results[0].metadata['indicators_calculated'] == 1 + + # Verify method calls + integrator.calculate_indicators_batch.assert_called_once() + mock_strategy.calculate.assert_called_once() + + def test_vectorized_dataframe_construction_performance(self, integrator, sample_candles): + """Test that vectorized DataFrame construction works correctly.""" + # This test verifies the vectorized approach produces same results as iterative + df = integrator._prepare_dataframe_from_candles(sample_candles) + + # Verify structure + assert len(df) == 100 + assert list(df.columns) == ['open', 'high', 'low', 'close', 'volume'] + assert df.index.name is None + + # Verify data integrity (should be same as iterative approach) + assert df['open'].iloc[0] == 100.0 + assert df['close'].iloc[-1] == 110.4 # 100.5 + 99 * 0.1 + + # Verify all data types are numeric + for col in ['open', 'high', 'low', 'close', 'volume']: + assert pd.api.types.is_numeric_dtype(df[col]) + + # Verify no NaN values + assert not df.isnull().any().any() + + def test_enhanced_calculation_error_handling(self, integrator): + """Test error handling in enhanced calculation methods.""" + # Test with invalid strategy name + integrator.get_strategy_data = Mock(return_value=pd.DataFrame({'open': [100.0] * 150, 'high': [101.0] * 150, 'low': [99.0] * 150, 'close': [100.5] * 150, 'volume': [1000.0] * 150})) + integrator.validate_strategy_requirements = Mock(return_value=True) + integrator.strategy_factory.create_strategy.return_value = None # Strategy creation fails + + results = integrator.calculate_strategy_signals_enhanced( + strategy_name='invalid_strategy', + strategy_config={}, + symbol='BTC-USDT', + timeframe='1h' + ) + + assert len(results) == 0 + + # Test indicator batch calculation with empty data + empty_results = integrator.calculate_indicators_batch( + pd.DataFrame(), # Empty DataFrame + [{'type': 'sma', 'period': 20}] + ) + + assert len(empty_results) == 0 + + def test_cache_size_management(self, integrator): + """Test that indicator cache properly manages its size.""" + # Create test data + market_df = pd.DataFrame({ + 'open': [100.0] * 10, + 'high': [101.0] * 10, + 'low': [99.0] * 10, + 'close': [100.5] * 10, + 'volume': [1000.0] * 10 + }) + + test_result = pd.DataFrame({'test': [1] * 10}) + + # Add more indicators than max_cached_indicators (50) + for i in range(60): + integrator._cache_indicator_result(f'test_key_{i}', test_result, market_df) + + # Verify cache size is managed + assert len(integrator._indicator_cache) <= integrator.config.max_cached_indicators + + # Verify cache stats + stats = integrator.get_cache_stats() + assert stats['indicator_cache_size'] <= integrator.config.max_cached_indicators + + def test_analyze_indicator_dependencies(self, integrator): + """Test indicator dependency analysis""" + indicator_configs = [ + {'type': 'sma', 'period': 20}, + {'type': 'ema', 'period': 12}, + {'type': 'macd', 'fast': 12, 'slow': 26, 'signal': 9}, + {'type': 'rsi', 'period': 14}, + {'type': 'bollinger_bands', 'period': 20, 'std': 2} + ] + + dependencies = integrator.analyze_indicator_dependencies(indicator_configs) + + # Check that dependencies are properly analyzed + assert isinstance(dependencies, dict) + assert len(dependencies) == len(indicator_configs) + + # All current indicators should have no external dependencies + for deps in dependencies.values(): + assert isinstance(deps, list) + assert len(deps) == 0 # No external dependencies currently + + def test_resolve_calculation_order(self, integrator): + """Test calculation order resolution""" + indicator_configs = [ + {'type': 'macd', 'fast': 12, 'slow': 26, 'signal': 9}, + {'type': 'sma', 'period': 20}, + {'type': 'bollinger_bands', 'period': 20, 'std': 2}, + {'type': 'ema', 'period': 12}, + {'type': 'rsi', 'period': 14}, + {'type': 'sma', 'period': 10} # Another SMA with different period + ] + + ordered_configs = integrator.resolve_calculation_order(indicator_configs) + + # Check that all indicators are included + assert len(ordered_configs) == len(indicator_configs) + + # Check that SMA comes before more complex indicators + sma_indices = [i for i, config in enumerate(ordered_configs) if config['type'] == 'sma'] + macd_indices = [i for i, config in enumerate(ordered_configs) if config['type'] == 'macd'] + + # SMA should come before MACD + if sma_indices and macd_indices: + assert max(sma_indices) < min(macd_indices) + + # Within SMA group, smaller periods should come first + sma_configs = [config for config in ordered_configs if config['type'] == 'sma'] + if len(sma_configs) > 1: + periods = [config['period'] for config in sma_configs] + assert periods == sorted(periods) + + def test_calculate_indicators_orchestrated(self, integrator): + """Test orchestrated indicator calculation""" + # Create test data + test_data = pd.DataFrame({ + 'open': [100.0 + i * 0.1 for i in range(150)], + 'high': [101.0 + i * 0.1 for i in range(150)], + 'low': [99.0 + i * 0.1 for i in range(150)], + 'close': [100.5 + i * 0.1 for i in range(150)], + 'volume': [1000.0 + i * 10 for i in range(150)] + }) + + indicator_configs = [ + {'type': 'sma', 'period': 5}, + {'type': 'ema', 'period': 10}, + {'type': 'rsi', 'period': 14} + ] + + # Mock technical indicators to return proper data + def mock_calculate(indicator_type, df, **kwargs): + if indicator_type == 'sma': + return pd.DataFrame({'sma': [100.0] * len(df)}) + elif indicator_type == 'ema': + return pd.DataFrame({'ema': [101.0] * len(df)}) + elif indicator_type == 'rsi': + return pd.DataFrame({'rsi': [50.0] * len(df)}) + return pd.DataFrame() + + integrator.technical_indicators.calculate.side_effect = mock_calculate + + # Test with caching enabled + indicators_data = integrator.calculate_indicators_orchestrated( + market_df=test_data, + indicator_configs=indicator_configs, + enable_caching=True + ) + + # Verify results + assert isinstance(indicators_data, dict) + assert len(indicators_data) == 3 + + # Check that each indicator has data + for indicator_key, data in indicators_data.items(): + assert isinstance(data, pd.DataFrame) + assert not data.empty + + # Test second call to verify caching + indicators_data_cached = integrator.calculate_indicators_orchestrated( + market_df=test_data, + indicator_configs=indicator_configs, + enable_caching=True + ) + + # Results should be identical + assert len(indicators_data_cached) == len(indicators_data) + + # Test with caching disabled + indicators_data_no_cache = integrator.calculate_indicators_orchestrated( + market_df=test_data, + indicator_configs=indicator_configs, + enable_caching=False + ) + + assert len(indicators_data_no_cache) == len(indicators_data) + + def test_calculate_indicators_orchestrated_empty_data(self, integrator): + """Test orchestrated calculation with empty data""" + empty_df = pd.DataFrame() + indicator_configs = [{'type': 'sma', 'period': 5}] + + result = integrator.calculate_indicators_orchestrated( + market_df=empty_df, + indicator_configs=indicator_configs + ) + + assert isinstance(result, dict) + assert len(result) == 0 + + def test_calculate_indicators_orchestrated_error_handling(self, integrator): + """Test orchestrated calculation error handling""" + test_data = pd.DataFrame({ + 'open': [100.0 + i * 0.1 for i in range(150)], + 'high': [101.0 + i * 0.1 for i in range(150)], + 'low': [99.0 + i * 0.1 for i in range(150)], + 'close': [100.5 + i * 0.1 for i in range(150)], + 'volume': [1000.0 + i * 10 for i in range(150)] + }) + + # Include invalid indicator type + indicator_configs = [ + {'type': 'sma', 'period': 5}, + {'type': 'invalid_indicator', 'period': 10} + ] + + indicators_data = integrator.calculate_indicators_orchestrated( + market_df=test_data, + indicator_configs=indicator_configs, + enable_caching=True + ) + + # Should handle errors gracefully + assert isinstance(indicators_data, dict) + + # Valid indicator should still be calculated + valid_keys = [k for k in indicators_data.keys() if 'sma' in k.lower()] + assert len(valid_keys) > 0 + + def test_calculate_strategy_signals_orchestrated(self, integrator): + """Test fully orchestrated strategy signal calculation""" + # Mock database operations to return test data + test_data = pd.DataFrame({ + 'timestamp': pd.date_range(start='2023-01-01', periods=150, freq='1h'), + 'open': [100.0 + i * 0.1 for i in range(150)], + 'high': [101.0 + i * 0.1 for i in range(150)], + 'low': [99.0 + i * 0.1 for i in range(150)], + 'close': [100.5 + i * 0.1 for i in range(150)], + 'volume': [1000.0 + i * 10 for i in range(150)] + }) + + def mock_get_candles(*args, **kwargs): + return [ + type('OHLCVCandle', (), { + 'start_time': row['timestamp'], + 'end_time': row['timestamp'] + pd.Timedelta(minutes=1), + 'open': row['open'], + 'high': row['high'], + 'low': row['low'], + 'close': row['close'], + 'volume': row['volume'] + })() + for _, row in test_data.iterrows() + ] + + with patch.object(integrator.db_ops.market_data, 'get_candles', side_effect=mock_get_candles): + results = integrator.calculate_strategy_signals_orchestrated( + strategy_name='ema_crossover', + strategy_config={'fast_period': 5, 'slow_period': 10}, + symbol='BTC/USDT', + timeframe='1m', + days_back=1, + enable_caching=True + ) + + # Verify results + assert isinstance(results, list) + + if results: # Only check if we have results + for result in results: + assert isinstance(result, StrategyResult) + + # Check metadata includes orchestration info + assert result.metadata is not None + assert result.metadata.get('calculation_method') == 'orchestrated' + assert result.metadata.get('orchestrated_calculation') is True + assert 'symbol' in result.metadata + assert 'timeframe' in result.metadata + assert 'data_points_used' in result.metadata + assert 'indicators_calculated' in result.metadata + + def test_calculate_strategy_signals_orchestrated_no_data(self, integrator): + """Test orchestrated calculation with no market data""" + def mock_get_candles_empty(*args, **kwargs): + return [] + + with patch.object(integrator.db_ops.market_data, 'get_candles', side_effect=mock_get_candles_empty): + results = integrator.calculate_strategy_signals_orchestrated( + strategy_name='ema_crossover', + strategy_config={'fast_period': 5, 'slow_period': 10}, + symbol='BTC/USDT', + timeframe='1m' + ) + + assert isinstance(results, list) + assert len(results) == 0 + + def test_calculate_strategy_signals_orchestrated_invalid_strategy(self, integrator): + """Test orchestrated calculation with invalid strategy""" + test_data = pd.DataFrame({ + 'timestamp': pd.date_range(start='2023-01-01', periods=150, freq='1h'), + 'open': [100.0 + i * 0.1 for i in range(150)], + 'high': [101.0 + i * 0.1 for i in range(150)], + 'low': [99.0 + i * 0.1 for i in range(150)], + 'close': [100.5 + i * 0.1 for i in range(150)], + 'volume': [1000.0 + i * 10 for i in range(150)] + }) + + def mock_get_candles(*args, **kwargs): + return [ + type('OHLCVCandle', (), { + 'start_time': row['timestamp'], + 'end_time': row['timestamp'] + pd.Timedelta(minutes=1), + 'open': row['open'], + 'high': row['high'], + 'low': row['low'], + 'close': row['close'], + 'volume': row['volume'] + })() + for _, row in test_data.iterrows() + ] + + with patch.object(integrator.db_ops.market_data, 'get_candles', side_effect=mock_get_candles): + results = integrator.calculate_strategy_signals_orchestrated( + strategy_name='nonexistent_strategy', + strategy_config={}, + symbol='BTC/USDT', + timeframe='1m' + ) + + assert isinstance(results, list) + assert len(results) == 0 + + def test_get_calculation_performance_stats(self, integrator): + """Test calculation performance statistics retrieval""" + stats = integrator.get_calculation_performance_stats() + + # Should return performance statistics structure + assert 'cache_performance' in stats + assert 'available_methods' in stats + assert 'recommended_method' in stats + assert 'performance_tips' in stats + + # Check available methods + available_methods = stats['available_methods'] + assert 'calculate_strategy_signals' in available_methods + assert 'calculate_strategy_signals_enhanced' in available_methods + assert 'calculate_strategy_signals_orchestrated' in available_methods + + # Check recommended method + assert stats['recommended_method'] == 'calculate_strategy_signals_orchestrated' + + def test_cache_persistence(self, integrator): + """Test cache persistence functionality""" + # Clear any existing cache + integrator.clear_cache() + + # Add some data to cache + test_df = pd.DataFrame({ + 'sma': [1.0, 2.0, 3.0], + 'timestamp': pd.date_range('2023-01-01', periods=3, freq='1h') + }) + + # Cache an indicator result + integrator._cache_indicator_result('test_sma', test_df, test_df) + + # Verify it's in memory cache + assert len(integrator._indicator_cache) == 1 + + # Save to persistent storage + integrator._save_persistent_cache() + + # Verify cache file exists + assert integrator._persistent_cache_file.exists() + + # Clear memory cache + integrator._indicator_cache.clear() + assert len(integrator._indicator_cache) == 0 + + # Load from persistent storage + integrator._load_persistent_cache() + + # Verify data was restored + assert len(integrator._indicator_cache) == 1 + assert 'test_sma' in integrator._indicator_cache + + def test_cross_strategy_cache_sharing(self, integrator): + """Test cross-strategy cache sharing functionality""" + # Clear any existing cache + integrator.clear_cache() + + # Create test indicator data + test_df = pd.DataFrame({ + 'sma': [10.0, 11.0, 12.0], + 'value': [100.0, 101.0, 102.0] + }) + + # Share an indicator result + integrator.share_indicator_result('shared_sma_20', test_df, len(test_df)) + + # Verify it was shared + cached_result = integrator.get_shared_indicator_cache('shared_sma_20', len(test_df)) + assert cached_result is not None + assert len(cached_result) == len(test_df) + assert 'sma' in cached_result.columns + + # Check sharing statistics + stats = integrator.get_cache_sharing_stats() + assert stats['shared_cache_entries'] == 1 + assert stats['total_cached_indicators'] == 1 + assert stats['sharing_efficiency'] == 1.0 + + # Test cache miss + missing_result = integrator.get_shared_indicator_cache('nonexistent_indicator') + assert missing_result is None + + +class TestFactoryFunction: + """Test factory function for strategy data integrator.""" + + def test_get_strategy_data_integrator_default(self): + """Test factory function with default configuration.""" + with patch('strategies.data_integration.get_database_operations'), \ + patch('strategies.data_integration.TechnicalIndicators'), \ + patch('strategies.data_integration.StrategyFactory'): + + integrator = get_strategy_data_integrator() + + assert isinstance(integrator, StrategyDataIntegrator) + assert integrator.config.default_days_back == 30 + + def test_get_strategy_data_integrator_custom_config(self): + """Test factory function with custom configuration.""" + config = StrategyDataIntegrationConfig(default_days_back=60) + + with patch('strategies.data_integration.get_database_operations'), \ + patch('strategies.data_integration.TechnicalIndicators'), \ + patch('strategies.data_integration.StrategyFactory'): + + integrator = get_strategy_data_integrator(config) + + assert isinstance(integrator, StrategyDataIntegrator) + assert integrator.config.default_days_back == 60 \ No newline at end of file diff --git a/tests/strategies/test_realtime_execution.py b/tests/strategies/test_realtime_execution.py new file mode 100644 index 0000000..f3db4aa --- /dev/null +++ b/tests/strategies/test_realtime_execution.py @@ -0,0 +1,558 @@ +""" +Tests for real-time strategy execution pipeline. +""" + +import pytest +import pandas as pd +from datetime import datetime, timezone, timedelta +from unittest.mock import Mock, patch, MagicMock +import time +from queue import Queue, Empty +import threading + +from strategies.realtime_execution import ( + RealTimeStrategyProcessor, + StrategySignalBroadcaster, + RealTimeConfig, + StrategyExecutionContext, + RealTimeSignal, + get_realtime_strategy_processor, + initialize_realtime_strategy_system, + shutdown_realtime_strategy_system +) +from strategies.data_types import StrategyResult, StrategySignal, SignalType +from data.common.data_types import OHLCVCandle + + +class TestRealTimeConfig: + """Test RealTimeConfig dataclass.""" + + def test_default_config(self): + """Test default configuration values.""" + config = RealTimeConfig() + + assert config.refresh_interval_seconds == 30 + assert config.max_strategies_concurrent == 5 + assert config.incremental_calculation == True + assert config.signal_batch_size == 100 + assert config.enable_signal_broadcasting == True + assert config.max_signal_queue_size == 1000 + assert config.chart_update_throttle_ms == 1000 + assert config.error_retry_attempts == 3 + assert config.error_retry_delay_seconds == 5 + + def test_custom_config(self): + """Test custom configuration values.""" + config = RealTimeConfig( + refresh_interval_seconds=15, + max_strategies_concurrent=3, + incremental_calculation=False, + signal_batch_size=50 + ) + + assert config.refresh_interval_seconds == 15 + assert config.max_strategies_concurrent == 3 + assert config.incremental_calculation == False + assert config.signal_batch_size == 50 + + +class TestStrategyExecutionContext: + """Test StrategyExecutionContext dataclass.""" + + def test_context_creation(self): + """Test strategy execution context creation.""" + context = StrategyExecutionContext( + strategy_name="ema_crossover", + strategy_config={"short_period": 12, "long_period": 26}, + symbol="BTC-USDT", + timeframe="1h" + ) + + assert context.strategy_name == "ema_crossover" + assert context.strategy_config == {"short_period": 12, "long_period": 26} + assert context.symbol == "BTC-USDT" + assert context.timeframe == "1h" + assert context.exchange == "okx" + assert context.last_calculation_time is None + assert context.consecutive_errors == 0 + assert context.is_active == True + + def test_context_with_custom_exchange(self): + """Test context with custom exchange.""" + context = StrategyExecutionContext( + strategy_name="rsi", + strategy_config={"period": 14}, + symbol="ETH-USDT", + timeframe="4h", + exchange="binance" + ) + + assert context.exchange == "binance" + + +class TestRealTimeSignal: + """Test RealTimeSignal dataclass.""" + + def test_signal_creation(self): + """Test real-time signal creation.""" + # Create mock strategy result + strategy_result = Mock(spec=StrategyResult) + strategy_result.timestamp = datetime.now(timezone.utc) + strategy_result.confidence = 0.8 + + # Create context + context = StrategyExecutionContext( + strategy_name="macd", + strategy_config={"fast_period": 12}, + symbol="BTC-USDT", + timeframe="1d" + ) + + # Create signal + signal = RealTimeSignal( + strategy_result=strategy_result, + context=context + ) + + assert signal.strategy_result == strategy_result + assert signal.context == context + assert signal.chart_update_required == True + assert isinstance(signal.generation_time, datetime) + + +class TestStrategySignalBroadcaster: + """Test StrategySignalBroadcaster class.""" + + @pytest.fixture + def config(self): + """Test configuration.""" + return RealTimeConfig( + signal_batch_size=5, + max_signal_queue_size=10, + chart_update_throttle_ms=100 + ) + + @pytest.fixture + def mock_db_ops(self): + """Mock database operations.""" + with patch('strategies.realtime_execution.get_database_operations') as mock: + db_ops = Mock() + db_ops.strategy = Mock() + db_ops.strategy.store_signals_batch = Mock(return_value=5) + mock.return_value = db_ops + yield db_ops + + @pytest.fixture + def broadcaster(self, config, mock_db_ops): + """Create broadcaster instance.""" + return StrategySignalBroadcaster(config) + + def test_broadcaster_initialization(self, broadcaster, config): + """Test broadcaster initialization.""" + assert broadcaster.config == config + assert broadcaster._is_running == False + assert broadcaster._chart_update_callback is None + + def test_start_stop_broadcaster(self, broadcaster): + """Test starting and stopping broadcaster.""" + assert not broadcaster._is_running + + broadcaster.start() + assert broadcaster._is_running + assert broadcaster._processing_thread is not None + + broadcaster.stop() + assert not broadcaster._is_running + + def test_broadcast_signal(self, broadcaster): + """Test broadcasting signals.""" + # Create test signal + strategy_result = Mock(spec=StrategyResult) + context = StrategyExecutionContext( + strategy_name="test", + strategy_config={}, + symbol="BTC-USDT", + timeframe="1h" + ) + signal = RealTimeSignal(strategy_result=strategy_result, context=context) + + # Broadcast signal + success = broadcaster.broadcast_signal(signal) + assert success == True + + # Check queue has signal + assert broadcaster._signal_queue.qsize() == 1 + + def test_broadcast_signal_queue_full(self, config, mock_db_ops): + """Test broadcasting when queue is full.""" + # Create broadcaster with very small queue + small_config = RealTimeConfig(max_signal_queue_size=1) + broadcaster = StrategySignalBroadcaster(small_config) + + # Create test signals + strategy_result = Mock(spec=StrategyResult) + context = StrategyExecutionContext( + strategy_name="test", + strategy_config={}, + symbol="BTC-USDT", + timeframe="1h" + ) + signal1 = RealTimeSignal(strategy_result=strategy_result, context=context) + signal2 = RealTimeSignal(strategy_result=strategy_result, context=context) + + # Fill queue + success1 = broadcaster.broadcast_signal(signal1) + assert success1 == True + + # Try to overfill queue + success2 = broadcaster.broadcast_signal(signal2) + assert success2 == False # Should fail due to full queue + + def test_set_chart_update_callback(self, broadcaster): + """Test setting chart update callback.""" + callback = Mock() + broadcaster.set_chart_update_callback(callback) + assert broadcaster._chart_update_callback == callback + + def test_get_signal_stats(self, broadcaster): + """Test getting signal statistics.""" + stats = broadcaster.get_signal_stats() + + assert 'queue_size' in stats + assert 'chart_queue_size' in stats + assert 'is_running' in stats + assert 'last_chart_updates' in stats + assert stats['is_running'] == False + + +class TestRealTimeStrategyProcessor: + """Test RealTimeStrategyProcessor class.""" + + @pytest.fixture + def config(self): + """Test configuration.""" + return RealTimeConfig( + max_strategies_concurrent=2, + error_retry_attempts=2 + ) + + @pytest.fixture + def mock_dependencies(self): + """Mock all external dependencies.""" + mocks = {} + + with patch('strategies.realtime_execution.StrategyDataIntegrator') as mock_integrator: + mocks['data_integrator'] = Mock() + mock_integrator.return_value = mocks['data_integrator'] + + with patch('strategies.realtime_execution.MarketDataIntegrator') as mock_market: + mocks['market_integrator'] = Mock() + mock_market.return_value = mocks['market_integrator'] + + with patch('strategies.realtime_execution.StrategyFactory') as mock_factory: + mocks['strategy_factory'] = Mock() + mock_factory.return_value = mocks['strategy_factory'] + + yield mocks + + @pytest.fixture + def processor(self, config, mock_dependencies): + """Create processor instance.""" + return RealTimeStrategyProcessor(config) + + def test_processor_initialization(self, processor, config): + """Test processor initialization.""" + assert processor.config == config + assert processor._execution_contexts == {} + assert processor._performance_stats['total_calculations'] == 0 + + def test_start_stop_processor(self, processor): + """Test starting and stopping processor.""" + processor.start() + assert processor.signal_broadcaster._is_running == True + + processor.stop() + assert processor.signal_broadcaster._is_running == False + + def test_register_strategy(self, processor): + """Test registering strategy for real-time execution.""" + context_id = processor.register_strategy( + strategy_name="ema_crossover", + strategy_config={"short_period": 12, "long_period": 26}, + symbol="BTC-USDT", + timeframe="1h" + ) + + expected_id = "ema_crossover_BTC-USDT_1h_okx" + assert context_id == expected_id + assert context_id in processor._execution_contexts + + context = processor._execution_contexts[context_id] + assert context.strategy_name == "ema_crossover" + assert context.symbol == "BTC-USDT" + assert context.timeframe == "1h" + assert context.is_active == True + + def test_unregister_strategy(self, processor): + """Test unregistering strategy.""" + # Register first + context_id = processor.register_strategy( + strategy_name="rsi", + strategy_config={"period": 14}, + symbol="ETH-USDT", + timeframe="4h" + ) + + assert context_id in processor._execution_contexts + + # Unregister + success = processor.unregister_strategy(context_id) + assert success == True + assert context_id not in processor._execution_contexts + + # Try to unregister again + success2 = processor.unregister_strategy(context_id) + assert success2 == False + + def test_execute_realtime_update_no_strategies(self, processor): + """Test real-time update with no registered strategies.""" + signals = processor.execute_realtime_update("BTC-USDT", "1h") + assert signals == [] + + def test_execute_realtime_update_with_strategies(self, processor, mock_dependencies): + """Test real-time update with registered strategies.""" + # Mock strategy calculation results + mock_result = Mock(spec=StrategyResult) + mock_result.timestamp = datetime.now(timezone.utc) + mock_result.confidence = 0.8 + + mock_dependencies['data_integrator'].calculate_strategy_signals.return_value = [mock_result] + + # Register strategy + processor.register_strategy( + strategy_name="ema_crossover", + strategy_config={"short_period": 12, "long_period": 26}, + symbol="BTC-USDT", + timeframe="1h" + ) + + # Execute update + signals = processor.execute_realtime_update("BTC-USDT", "1h") + + assert len(signals) == 1 + assert isinstance(signals[0], RealTimeSignal) + assert signals[0].strategy_result == mock_result + + def test_get_active_strategies(self, processor): + """Test getting active strategies.""" + # Register some strategies + processor.register_strategy("ema", {}, "BTC-USDT", "1h") + processor.register_strategy("rsi", {}, "ETH-USDT", "4h") + + active = processor.get_active_strategies() + assert len(active) == 2 + + # Pause one strategy + context_id = list(active.keys())[0] + processor.pause_strategy(context_id) + + active_after_pause = processor.get_active_strategies() + assert len(active_after_pause) == 1 + + def test_pause_resume_strategy(self, processor): + """Test pausing and resuming strategies.""" + context_id = processor.register_strategy("macd", {}, "BTC-USDT", "1d") + + # Pause strategy + success = processor.pause_strategy(context_id) + assert success == True + assert not processor._execution_contexts[context_id].is_active + + # Resume strategy + success = processor.resume_strategy(context_id) + assert success == True + assert processor._execution_contexts[context_id].is_active + + # Test with invalid context_id + invalid_success = processor.pause_strategy("invalid_id") + assert invalid_success == False + + def test_get_performance_stats(self, processor): + """Test getting performance statistics.""" + stats = processor.get_performance_stats() + + assert 'total_calculations' in stats + assert 'successful_calculations' in stats + assert 'failed_calculations' in stats + assert 'average_calculation_time_ms' in stats + assert 'signals_generated' in stats + assert 'queue_size' in stats # From signal broadcaster + + +class TestSingletonAndInitialization: + """Test singleton pattern and system initialization.""" + + def test_get_realtime_strategy_processor_singleton(self): + """Test that processor is singleton.""" + # Clean up any existing processor + shutdown_realtime_strategy_system() + + processor1 = get_realtime_strategy_processor() + processor2 = get_realtime_strategy_processor() + + assert processor1 is processor2 + + # Clean up + shutdown_realtime_strategy_system() + + def test_initialize_realtime_strategy_system(self): + """Test system initialization.""" + # Clean up any existing processor + shutdown_realtime_strategy_system() + + config = RealTimeConfig(max_strategies_concurrent=2) + processor = initialize_realtime_strategy_system(config) + + assert processor is not None + assert processor.signal_broadcaster._is_running == True + + # Clean up + shutdown_realtime_strategy_system() + + def test_shutdown_realtime_strategy_system(self): + """Test system shutdown.""" + # Initialize system + processor = initialize_realtime_strategy_system() + assert processor.signal_broadcaster._is_running == True + + # Shutdown + shutdown_realtime_strategy_system() + + # Verify shutdown + # Note: After shutdown, the global processor is set to None + # So we can't check the processor state, but we can verify + # a new processor is created on next call + new_processor = get_realtime_strategy_processor() + assert new_processor is not None + + +class TestIntegration: + """Integration tests for real-time execution pipeline.""" + + @pytest.fixture + def integration_config(self): + """Configuration for integration tests.""" + return RealTimeConfig( + signal_batch_size=2, + max_signal_queue_size=5, + chart_update_throttle_ms=50 + ) + + def test_end_to_end_signal_flow(self, integration_config): + """Test complete signal flow from strategy to storage.""" + with patch('strategies.realtime_execution.get_database_operations') as mock_db: + # Setup mocks + db_ops = Mock() + db_ops.strategy = Mock() + db_ops.strategy.store_signals_batch = Mock(return_value=2) + mock_db.return_value = db_ops + + # Create processor + processor = RealTimeStrategyProcessor(integration_config) + processor.start() + + try: + # Mock strategy calculation + mock_result = Mock(spec=StrategyResult) + mock_result.timestamp = datetime.now(timezone.utc) + mock_result.confidence = 0.8 + mock_result.signal = Mock() + mock_result.signal.signal_type = SignalType.BUY + mock_result.price = 50000.0 + mock_result.metadata = {"test": True} + + with patch.object(processor.data_integrator, 'calculate_strategy_signals') as mock_calc: + mock_calc.return_value = [mock_result] + + # Register strategy + processor.register_strategy( + strategy_name="test_strategy", + strategy_config={"param": "value"}, + symbol="BTC-USDT", + timeframe="1h" + ) + + # Execute real-time update + signals = processor.execute_realtime_update("BTC-USDT", "1h") + + assert len(signals) == 1 + + # Wait for signal processing + time.sleep(0.2) # Allow background processing + + # Verify calculation was called + mock_calc.assert_called_once() + + finally: + processor.stop() + + def test_error_handling_and_retry(self, integration_config): + """Test error handling and retry mechanisms.""" + processor = RealTimeStrategyProcessor(integration_config) + processor.start() + + try: + # Mock strategy calculation to raise error + with patch.object(processor.data_integrator, 'calculate_strategy_signals') as mock_calc: + mock_calc.side_effect = Exception("Test error") + + # Register strategy + context_id = processor.register_strategy( + strategy_name="error_strategy", + strategy_config={}, + symbol="BTC-USDT", + timeframe="1h" + ) + + # Execute multiple times to trigger error handling + for _ in range(integration_config.error_retry_attempts + 1): + processor.execute_realtime_update("BTC-USDT", "1h") + + # Strategy should be disabled after max errors + context = processor._execution_contexts[context_id] + assert not context.is_active + assert context.consecutive_errors >= integration_config.error_retry_attempts + + finally: + processor.stop() + + def test_concurrent_strategy_execution(self, integration_config): + """Test concurrent execution of multiple strategies.""" + processor = RealTimeStrategyProcessor(integration_config) + processor.start() + + try: + # Mock strategy calculations + mock_result1 = Mock(spec=StrategyResult) + mock_result1.timestamp = datetime.now(timezone.utc) + mock_result1.confidence = 0.7 + + mock_result2 = Mock(spec=StrategyResult) + mock_result2.timestamp = datetime.now(timezone.utc) + mock_result2.confidence = 0.9 + + with patch.object(processor.data_integrator, 'calculate_strategy_signals') as mock_calc: + mock_calc.side_effect = [[mock_result1], [mock_result2]] + + # Register multiple strategies for same symbol/timeframe + processor.register_strategy("strategy1", {}, "BTC-USDT", "1h") + processor.register_strategy("strategy2", {}, "BTC-USDT", "1h") + + # Execute update + signals = processor.execute_realtime_update("BTC-USDT", "1h") + + # Should get signals from both strategies + assert len(signals) == 2 + + finally: + processor.stop() \ No newline at end of file diff --git a/tests/strategies/test_validation.py b/tests/strategies/test_validation.py new file mode 100644 index 0000000..c212351 --- /dev/null +++ b/tests/strategies/test_validation.py @@ -0,0 +1,478 @@ +""" +Tests for Strategy Signal Validation Pipeline + +This module tests signal validation, filtering, and quality assessment +functionality for strategy-generated signals. +""" + +import pytest +from datetime import datetime, timezone +from unittest.mock import patch + +from strategies.validation import StrategySignalValidator, ValidationConfig +from strategies.data_types import StrategySignal, SignalType + + +class TestValidationConfig: + """Tests for ValidationConfig dataclass.""" + + def test_default_config(self): + """Test default validation configuration.""" + config = ValidationConfig() + + assert config.min_confidence == 0.0 + assert config.max_confidence == 1.0 + assert config.required_metadata_fields == [] + assert config.allowed_signal_types == list(SignalType) + assert config.price_tolerance_percent == 5.0 + + def test_custom_config(self): + """Test custom validation configuration.""" + config = ValidationConfig( + min_confidence=0.3, + max_confidence=0.9, + required_metadata_fields=['indicator1', 'indicator2'], + allowed_signal_types=[SignalType.BUY, SignalType.SELL], + price_tolerance_percent=2.0 + ) + + assert config.min_confidence == 0.3 + assert config.max_confidence == 0.9 + assert config.required_metadata_fields == ['indicator1', 'indicator2'] + assert config.allowed_signal_types == [SignalType.BUY, SignalType.SELL] + assert config.price_tolerance_percent == 2.0 + + +class TestStrategySignalValidator: + """Tests for StrategySignalValidator class.""" + + @pytest.fixture + def validator(self): + """Create validator with default configuration.""" + return StrategySignalValidator() + + @pytest.fixture + def strict_validator(self): + """Create validator with strict configuration.""" + config = ValidationConfig( + min_confidence=0.5, + max_confidence=1.0, + required_metadata_fields=['rsi', 'macd'], + allowed_signal_types=[SignalType.BUY, SignalType.SELL] + ) + return StrategySignalValidator(config) + + @pytest.fixture + def valid_signal(self): + """Create a valid strategy signal for testing.""" + return StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + signal_type=SignalType.BUY, + price=50000.0, + confidence=0.8, + metadata={'rsi': 30, 'macd': 0.05} + ) + + def test_initialization(self, validator): + """Test validator initialization.""" + assert validator.config is not None + assert validator.logger is not None + assert validator._validation_stats['total_signals_validated'] == 0 + assert validator._validation_stats['valid_signals'] == 0 + assert validator._validation_stats['invalid_signals'] == 0 + + def test_validate_valid_signal(self, validator, valid_signal): + """Test validation of a completely valid signal.""" + is_valid, errors = validator.validate_signal(valid_signal) + + assert is_valid is True + assert errors == [] + assert validator._validation_stats['total_signals_validated'] == 1 + assert validator._validation_stats['valid_signals'] == 1 + assert validator._validation_stats['invalid_signals'] == 0 + + def test_validate_invalid_confidence_low(self, validator, valid_signal): + """Test validation with confidence too low.""" + valid_signal.confidence = -0.1 + + is_valid, errors = validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 1 + assert "Invalid confidence" in errors[0] + assert validator._validation_stats['invalid_signals'] == 1 + + def test_validate_invalid_confidence_high(self, validator, valid_signal): + """Test validation with confidence too high.""" + valid_signal.confidence = 1.5 + + is_valid, errors = validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 1 + assert "Invalid confidence" in errors[0] + + def test_validate_invalid_signal_type(self, strict_validator, valid_signal): + """Test validation with disallowed signal type.""" + valid_signal.signal_type = SignalType.HOLD + + is_valid, errors = strict_validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 1 + assert "Signal type" in errors[0] and "not in allowed types" in errors[0] + + def test_validate_invalid_price(self, validator, valid_signal): + """Test validation with invalid price.""" + valid_signal.price = -100.0 + + is_valid, errors = validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 1 + assert "Invalid price" in errors[0] + + def test_validate_missing_required_metadata(self, strict_validator, valid_signal): + """Test validation with missing required metadata.""" + valid_signal.metadata = {'rsi': 30} # Missing 'macd' + + is_valid, errors = strict_validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 1 + assert "Missing required metadata fields" in errors[0] + assert "macd" in errors[0] + + def test_validate_multiple_errors(self, strict_validator, valid_signal): + """Test validation with multiple errors.""" + valid_signal.confidence = 1.5 # Too high + valid_signal.price = -100.0 # Invalid + valid_signal.signal_type = SignalType.HOLD # Not allowed + valid_signal.metadata = {} # Missing required fields + + is_valid, errors = strict_validator.validate_signal(valid_signal) + + assert is_valid is False + assert len(errors) == 4 + assert any("confidence" in error for error in errors) + assert any("price" in error for error in errors) + assert any("Signal type" in error for error in errors) + assert any("Missing required metadata" in error for error in errors) + + def test_validation_statistics_tracking(self, validator, valid_signal): + """Test that validation statistics are properly tracked.""" + # Validate multiple signals + validator.validate_signal(valid_signal) # Valid + + invalid_signal = valid_signal + invalid_signal.confidence = 1.5 # Invalid + validator.validate_signal(invalid_signal) # Invalid + + stats = validator._validation_stats + assert stats['total_signals_validated'] == 2 + assert stats['valid_signals'] == 1 + assert stats['invalid_signals'] == 1 + assert len(stats['validation_errors']) > 0 + + def test_validate_signals_batch(self, validator, valid_signal): + """Test batch validation of multiple signals.""" + # Create a mix of valid and invalid signals + signals = [ + valid_signal, # Valid + StrategySignal( # Invalid confidence + timestamp=datetime.now(timezone.utc), + symbol='ETH-USDT', + timeframe='1h', + signal_type=SignalType.SELL, + price=3000.0, + confidence=1.5, # Invalid + metadata={} + ), + StrategySignal( # Valid + timestamp=datetime.now(timezone.utc), + symbol='BNB-USDT', + timeframe='1h', + signal_type=SignalType.BUY, + price=300.0, + confidence=0.7, + metadata={} + ) + ] + + valid_signals, invalid_signals = validator.validate_signals_batch(signals) + + assert len(valid_signals) == 2 + assert len(invalid_signals) == 1 + assert invalid_signals[0].confidence == 1.5 + + def test_filter_signals_by_confidence(self, validator, valid_signal): + """Test filtering signals by confidence threshold.""" + signals = [ + valid_signal, # confidence 0.8 + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='ETH-USDT', + timeframe='1h', + signal_type=SignalType.SELL, + price=3000.0, + confidence=0.3, # Low confidence + metadata={} + ), + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BNB-USDT', + timeframe='1h', + signal_type=SignalType.BUY, + price=300.0, + confidence=0.9, # High confidence + metadata={} + ) + ] + + # Filter with threshold 0.5 + filtered_signals = validator.filter_signals_by_confidence(signals, min_confidence=0.5) + + assert len(filtered_signals) == 2 + assert all(signal.confidence >= 0.5 for signal in filtered_signals) + assert filtered_signals[0].confidence == 0.8 + assert filtered_signals[1].confidence == 0.9 + + def test_filter_signals_by_type(self, validator, valid_signal): + """Test filtering signals by allowed types.""" + signals = [ + valid_signal, # BUY + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='ETH-USDT', + timeframe='1h', + signal_type=SignalType.SELL, + price=3000.0, + confidence=0.8, + metadata={} + ), + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BNB-USDT', + timeframe='1h', + signal_type=SignalType.HOLD, + price=300.0, + confidence=0.7, + metadata={} + ) + ] + + # Filter to only allow BUY and SELL + filtered_signals = validator.filter_signals_by_type( + signals, + allowed_types=[SignalType.BUY, SignalType.SELL] + ) + + assert len(filtered_signals) == 2 + assert filtered_signals[0].signal_type == SignalType.BUY + assert filtered_signals[1].signal_type == SignalType.SELL + + def test_get_validation_statistics(self, validator, valid_signal): + """Test comprehensive validation statistics.""" + # Validate some signals to generate statistics + validator.validate_signal(valid_signal) # Valid + + invalid_signal = valid_signal + invalid_signal.confidence = -0.1 # Invalid + validator.validate_signal(invalid_signal) # Invalid + + stats = validator.get_validation_statistics() + + assert stats['total_signals_validated'] == 2 + assert stats['valid_signals'] == 1 + assert stats['invalid_signals'] == 1 + assert stats['validation_success_rate'] == 0.5 + assert stats['validation_failure_rate'] == 0.5 + assert 'validation_errors' in stats + + def test_transform_signal_confidence(self, validator, valid_signal): + """Test signal confidence transformation.""" + original_confidence = valid_signal.confidence # 0.8 + + # Test confidence multiplier + transformed_signal = validator.transform_signal_confidence( + valid_signal, + confidence_multiplier=1.2 + ) + + assert transformed_signal.confidence == original_confidence * 1.2 + assert transformed_signal.symbol == valid_signal.symbol + assert transformed_signal.signal_type == valid_signal.signal_type + assert transformed_signal.price == valid_signal.price + + # Test confidence cap + capped_signal = validator.transform_signal_confidence( + valid_signal, + confidence_multiplier=2.0, # Would exceed 1.0 + max_confidence=1.0 + ) + + assert capped_signal.confidence == 1.0 # Capped at max + + def test_enrich_signal_metadata(self, validator, valid_signal): + """Test signal metadata enrichment.""" + additional_metadata = { + 'validation_timestamp': datetime.now(timezone.utc).isoformat(), + 'validation_status': 'approved', + 'risk_score': 0.2 + } + + enriched_signal = validator.enrich_signal_metadata(valid_signal, additional_metadata) + + # Original metadata should be preserved + assert enriched_signal.metadata['rsi'] == 30 + assert enriched_signal.metadata['macd'] == 0.05 + + # New metadata should be added + assert enriched_signal.metadata['validation_status'] == 'approved' + assert enriched_signal.metadata['risk_score'] == 0.2 + assert 'validation_timestamp' in enriched_signal.metadata + + # Other properties should remain unchanged + assert enriched_signal.confidence == valid_signal.confidence + assert enriched_signal.signal_type == valid_signal.signal_type + + def test_transform_signals_batch(self, validator, valid_signal): + """Test batch signal transformation.""" + signals = [ + valid_signal, + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='ETH-USDT', + timeframe='1h', + signal_type=SignalType.SELL, + price=3000.0, + confidence=0.6, + metadata={'ema': 2950} + ) + ] + + additional_metadata = {'batch_id': 'test_batch_001'} + + transformed_signals = validator.transform_signals_batch( + signals, + confidence_multiplier=1.1, + additional_metadata=additional_metadata + ) + + assert len(transformed_signals) == 2 + + # Check confidence transformation + assert transformed_signals[0].confidence == 0.8 * 1.1 + assert transformed_signals[1].confidence == 0.6 * 1.1 + + # Check metadata enrichment + assert transformed_signals[0].metadata['batch_id'] == 'test_batch_001' + assert transformed_signals[1].metadata['batch_id'] == 'test_batch_001' + + # Verify original metadata preserved + assert transformed_signals[0].metadata['rsi'] == 30 + assert transformed_signals[1].metadata['ema'] == 2950 + + def test_calculate_signal_quality_metrics(self, validator, valid_signal): + """Test signal quality metrics calculation.""" + signals = [ + valid_signal, # confidence 0.8, has metadata + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='ETH-USDT', + timeframe='1h', + signal_type=SignalType.SELL, + price=3000.0, + confidence=0.9, # High confidence + metadata={'volume_spike': True} + ), + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BNB-USDT', + timeframe='1h', + signal_type=SignalType.HOLD, + price=300.0, + confidence=0.4, # Low confidence + metadata=None # No metadata + ) + ] + + metrics = validator.calculate_signal_quality_metrics(signals) + + assert metrics['total_signals'] == 3 + assert metrics['confidence_metrics']['average'] == round((0.8 + 0.9 + 0.4) / 3, 3) + assert metrics['confidence_metrics']['minimum'] == 0.4 + assert metrics['confidence_metrics']['maximum'] == 0.9 + assert metrics['confidence_metrics']['high_confidence_count'] == 2 # >= 0.7 + assert metrics['quality_score'] == round((2/3) * 100, 1) # 66.7% + assert metrics['metadata_completeness_percentage'] == round((2/3) * 100, 1) + + # Check signal type distribution + assert metrics['signal_type_distribution']['buy'] == 1 + assert metrics['signal_type_distribution']['sell'] == 1 + assert metrics['signal_type_distribution']['hold'] == 1 + + # Check recommendations + assert isinstance(metrics['recommendations'], list) + assert len(metrics['recommendations']) > 0 + + def test_calculate_signal_quality_metrics_empty(self, validator): + """Test quality metrics with empty signal list.""" + metrics = validator.calculate_signal_quality_metrics([]) + + assert 'error' in metrics + assert metrics['error'] == 'No signals provided for quality analysis' + + def test_generate_quality_recommendations(self, validator): + """Test quality recommendation generation.""" + # Test low confidence signals + low_confidence_signals = [ + StrategySignal( + timestamp=datetime.now(timezone.utc), + symbol='BTC-USDT', + timeframe='1h', + signal_type=SignalType.BUY, + price=50000.0, + confidence=0.3, # Low confidence + metadata=None # No metadata + ) + ] + + recommendations = validator._generate_quality_recommendations(low_confidence_signals) + + assert any("confidence" in rec.lower() for rec in recommendations) + assert any("metadata" in rec.lower() for rec in recommendations) + + def test_generate_validation_report(self, validator, valid_signal): + """Test comprehensive validation report generation.""" + # Generate some validation activity + validator.validate_signal(valid_signal) # Valid + + invalid_signal = valid_signal + invalid_signal.confidence = -0.1 # Invalid + validator.validate_signal(invalid_signal) # Invalid + + report = validator.generate_validation_report() + + assert 'report_timestamp' in report + assert 'validation_summary' in report + assert 'error_analysis' in report + assert 'configuration' in report + assert 'health_status' in report + + # Check validation summary + summary = report['validation_summary'] + assert summary['total_validated'] == 2 + assert '50.0%' in summary['success_rate'] + assert '50.0%' in summary['failure_rate'] + + # Check configuration + config = report['configuration'] + assert config['min_confidence'] == 0.0 + assert config['max_confidence'] == 1.0 + assert isinstance(config['allowed_signal_types'], list) + + # Check health status + assert report['health_status'] in ['good', 'needs_attention'] \ No newline at end of file