- Added `realtime_execution.py` for real-time strategy execution, enabling live signal generation and integration with the dashboard's chart refresh cycle. - Introduced `data_integration.py` to manage market data orchestration, caching, and technical indicator calculations for strategy signal generation. - Implemented `validation.py` for comprehensive validation and quality assessment of strategy-generated signals, ensuring reliability and consistency. - Developed `batch_processing.py` to facilitate efficient backtesting of multiple strategies across large datasets with memory management and performance optimization. - Updated `__init__.py` files to include new modules and ensure proper exports, enhancing modularity and maintainability. - Enhanced unit tests for the new features, ensuring robust functionality and adherence to project standards. These changes establish a solid foundation for real-time strategy execution and data integration, aligning with project goals for modularity, performance, and maintainability.
1060 lines
42 KiB
Python
1060 lines
42 KiB
Python
"""
|
|
Strategy Data Integration Module
|
|
|
|
This module provides seamless integration between market data, technical indicators,
|
|
and strategy calculations, handling data orchestration, caching, and optimization
|
|
for strategy signal generation and backtesting.
|
|
"""
|
|
|
|
import pandas as pd
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
from dataclasses import dataclass
|
|
import json
|
|
import pickle
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from database.operations import get_database_operations, DatabaseOperationError
|
|
from data.common.data_types import OHLCVCandle
|
|
from data.common.indicators import TechnicalIndicators
|
|
from components.charts.config.indicator_defs import convert_database_candles_to_ohlcv
|
|
from .factory import StrategyFactory
|
|
from .data_types import StrategyResult
|
|
from utils.logger import get_logger
|
|
|
|
# Initialize logger
|
|
logger = get_logger()
|
|
|
|
|
|
@dataclass
|
|
class StrategyDataIntegrationConfig:
|
|
"""Configuration for strategy data integration"""
|
|
default_days_back: int = 30 # Strategies often need more historical data
|
|
min_candles_required: int = 100 # Strategies need sufficient data for reliable signals
|
|
max_candles_limit: int = 5000 # Allow larger datasets for backtesting
|
|
cache_timeout_minutes: int = 15 # Longer cache for strategy analysis
|
|
enable_data_validation: bool = True
|
|
enable_sparse_data_handling: bool = True
|
|
enable_indicator_caching: bool = True
|
|
max_cached_indicators: int = 50 # Limit memory usage
|
|
|
|
|
|
class StrategyDataIntegrator:
|
|
"""
|
|
Integrates market data with strategy calculations and signal generation.
|
|
|
|
This class handles:
|
|
- Fetching and preparing market data for strategies
|
|
- Pre-calculating required technical indicators
|
|
- Orchestrating strategy signal generation
|
|
- Caching computed indicators for performance
|
|
- Multi-timeframe data handling
|
|
- Strategy signal validation and storage
|
|
"""
|
|
|
|
def __init__(self, config: StrategyDataIntegrationConfig = None):
|
|
"""
|
|
Initialize strategy data integrator.
|
|
|
|
Args:
|
|
config: Integration configuration
|
|
"""
|
|
self.config = config or StrategyDataIntegrationConfig()
|
|
self.logger = logger
|
|
self.db_ops = get_database_operations(self.logger)
|
|
self.technical_indicators = TechnicalIndicators(self.logger)
|
|
self.strategy_factory = StrategyFactory(self.logger)
|
|
|
|
# Caching for computed indicators and market data
|
|
self._indicator_cache: Dict[str, Dict[str, Any]] = {}
|
|
self._data_cache: Dict[str, Dict[str, Any]] = {}
|
|
|
|
# Cache persistence setup
|
|
self._cache_dir = Path("temp/strategy_cache")
|
|
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
|
self._persistent_cache_file = self._cache_dir / "indicator_cache.pkl"
|
|
|
|
# Load persistent cache if available
|
|
self._load_persistent_cache()
|
|
|
|
if self.logger:
|
|
self.logger.info("StrategyDataIntegrator: Initialized with strategy-optimized configuration")
|
|
|
|
def _load_persistent_cache(self) -> None:
|
|
"""Load indicator cache from persistent storage."""
|
|
try:
|
|
if self._persistent_cache_file.exists():
|
|
with open(self._persistent_cache_file, 'rb') as f:
|
|
cached_data = pickle.load(f)
|
|
|
|
# Validate and filter expired entries
|
|
current_time = datetime.now(timezone.utc)
|
|
valid_entries = 0
|
|
|
|
for key, data in cached_data.items():
|
|
cache_time = data.get('timestamp')
|
|
if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes:
|
|
self._indicator_cache[key] = data
|
|
valid_entries += 1
|
|
|
|
self.logger.debug(f"Loaded {valid_entries} valid cache entries from persistent storage")
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to load persistent cache: {e}")
|
|
|
|
def _save_persistent_cache(self) -> None:
|
|
"""Save indicator cache to persistent storage."""
|
|
try:
|
|
# Only save recent, valid entries to avoid bloat
|
|
current_time = datetime.now(timezone.utc)
|
|
entries_to_save = {}
|
|
|
|
for key, data in self._indicator_cache.items():
|
|
cache_time = data.get('timestamp')
|
|
if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes:
|
|
entries_to_save[key] = data
|
|
|
|
with open(self._persistent_cache_file, 'wb') as f:
|
|
pickle.dump(entries_to_save, f)
|
|
|
|
self.logger.debug(f"Saved {len(entries_to_save)} cache entries to persistent storage")
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to save persistent cache: {e}")
|
|
|
|
def calculate_strategy_signals(
|
|
self,
|
|
strategy_name: str,
|
|
strategy_config: Dict[str, Any],
|
|
symbol: str,
|
|
timeframe: str,
|
|
days_back: Optional[int] = None,
|
|
exchange: str = "okx",
|
|
enable_caching: bool = True
|
|
) -> List[StrategyResult]:
|
|
"""
|
|
Main orchestration method for calculating strategy signals.
|
|
|
|
Args:
|
|
strategy_name: Name of the strategy to execute
|
|
strategy_config: Strategy-specific configuration parameters
|
|
symbol: Trading pair symbol
|
|
timeframe: Timeframe for strategy calculation
|
|
days_back: Number of days to look back for data
|
|
exchange: Exchange name
|
|
enable_caching: Whether to use cached indicator results
|
|
|
|
Returns:
|
|
List of strategy results with signals
|
|
"""
|
|
try:
|
|
self.logger.info(f"StrategyDataIntegrator: Calculating signals for {strategy_name} on {symbol} {timeframe}")
|
|
|
|
# Get market data for strategy
|
|
market_df = self.get_strategy_data(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
days_back=days_back,
|
|
exchange=exchange
|
|
)
|
|
|
|
if market_df.empty:
|
|
self.logger.warning(f"No market data available for {symbol} {timeframe}")
|
|
return []
|
|
|
|
# Validate data sufficiency
|
|
if not self.validate_strategy_requirements(market_df, strategy_name):
|
|
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
|
|
return []
|
|
|
|
# Calculate strategy signals using factory
|
|
results = self.strategy_factory.calculate_strategy_signals(
|
|
strategy_name=strategy_name,
|
|
df=market_df,
|
|
strategy_config=strategy_config
|
|
)
|
|
|
|
# Add metadata to results
|
|
for result in results:
|
|
if not hasattr(result, 'metadata') or result.metadata is None:
|
|
result.metadata = {}
|
|
result.metadata.update({
|
|
'symbol': symbol,
|
|
'timeframe': timeframe,
|
|
'exchange': exchange,
|
|
'data_points_used': len(market_df),
|
|
'calculation_timestamp': datetime.now(timezone.utc).isoformat()
|
|
})
|
|
|
|
self.logger.info(f"Generated {len(results)} strategy results for {strategy_name}")
|
|
return results
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error calculating strategy signals for {strategy_name}: {e}")
|
|
return []
|
|
|
|
def get_strategy_data(
|
|
self,
|
|
symbol: str,
|
|
timeframe: str,
|
|
days_back: Optional[int] = None,
|
|
exchange: str = "okx"
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Fetch and prepare market data for strategy calculation.
|
|
|
|
Args:
|
|
symbol: Trading pair symbol
|
|
timeframe: Timeframe
|
|
days_back: Number of days to look back
|
|
exchange: Exchange name
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data ready for strategy calculation
|
|
"""
|
|
try:
|
|
# Use configured default if not specified
|
|
if days_back is None:
|
|
days_back = self.config.default_days_back
|
|
|
|
# Check cache first
|
|
cache_key = f"market_data_{symbol}_{timeframe}_{days_back}_{exchange}"
|
|
cached_data = self._get_cached_data(cache_key)
|
|
if cached_data:
|
|
self.logger.debug(f"Using cached market data for {symbol} {timeframe}")
|
|
return cached_data['dataframe']
|
|
|
|
# Calculate time range
|
|
end_time = datetime.now(timezone.utc)
|
|
start_time = end_time - timedelta(days=days_back)
|
|
|
|
# Fetch raw market data
|
|
raw_candles = self.db_ops.market_data.get_candles(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
exchange=exchange
|
|
)
|
|
|
|
if not raw_candles:
|
|
self.logger.warning(f"No raw candles found for {symbol} {timeframe}")
|
|
return pd.DataFrame()
|
|
|
|
# Convert to OHLCV format
|
|
ohlcv_candles = convert_database_candles_to_ohlcv(raw_candles)
|
|
|
|
if not ohlcv_candles:
|
|
self.logger.warning(f"No OHLCV candles after conversion for {symbol} {timeframe}")
|
|
return pd.DataFrame()
|
|
|
|
# Convert to DataFrame for strategy processing
|
|
market_df = self._prepare_dataframe_from_candles(ohlcv_candles)
|
|
|
|
# Cache the results
|
|
self._cache_data(cache_key, {
|
|
'dataframe': market_df,
|
|
'candle_count': len(raw_candles),
|
|
'timestamp': datetime.now(timezone.utc)
|
|
})
|
|
|
|
self.logger.debug(f"Fetched {len(raw_candles)} candles for strategy data: {symbol} {timeframe}")
|
|
return market_df
|
|
|
|
except DatabaseOperationError as e:
|
|
self.logger.error(f"Database error fetching strategy data: {e}")
|
|
return pd.DataFrame()
|
|
except Exception as e:
|
|
self.logger.error(f"Error fetching strategy data for {symbol} {timeframe}: {e}")
|
|
return pd.DataFrame()
|
|
|
|
def _prepare_dataframe_from_candles(self, candles: List[OHLCVCandle]) -> pd.DataFrame:
|
|
"""
|
|
Convert OHLCV candles to DataFrame optimized for strategy calculations.
|
|
Uses vectorized approach for improved performance.
|
|
|
|
Args:
|
|
candles: List of OHLCV candles
|
|
|
|
Returns:
|
|
DataFrame with OHLCV data
|
|
"""
|
|
if not candles:
|
|
return pd.DataFrame()
|
|
|
|
# Vectorized DataFrame construction - extract all values at once
|
|
df = pd.DataFrame({
|
|
'timestamp': [candle.end_time for candle in candles],
|
|
'open': [float(candle.open) for candle in candles],
|
|
'high': [float(candle.high) for candle in candles],
|
|
'low': [float(candle.low) for candle in candles],
|
|
'close': [float(candle.close) for candle in candles],
|
|
'volume': [float(candle.volume) for candle in candles]
|
|
})
|
|
|
|
# Set timestamp as index and sort
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df.set_index('timestamp', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
|
|
# Remove index name for cleaner appearance
|
|
df.index.name = None
|
|
|
|
# Ensure proper data types using vectorized operations
|
|
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
|
|
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
|
|
|
|
# Remove any rows with NaN values
|
|
df.dropna(inplace=True)
|
|
|
|
return df
|
|
|
|
def validate_strategy_requirements(
|
|
self,
|
|
market_df: pd.DataFrame,
|
|
strategy_name: str
|
|
) -> bool:
|
|
"""
|
|
Validate that market data meets strategy requirements.
|
|
|
|
Args:
|
|
market_df: Market data DataFrame
|
|
strategy_name: Name of the strategy
|
|
|
|
Returns:
|
|
True if data meets requirements, False otherwise
|
|
"""
|
|
try:
|
|
# Check minimum data points
|
|
if len(market_df) < self.config.min_candles_required:
|
|
self.logger.warning(
|
|
f"Insufficient data points: {len(market_df)} < {self.config.min_candles_required}"
|
|
)
|
|
return False
|
|
|
|
# Check for required columns
|
|
required_columns = ['open', 'high', 'low', 'close', 'volume']
|
|
missing_columns = [col for col in required_columns if col not in market_df.columns]
|
|
if missing_columns:
|
|
self.logger.error(f"Missing required columns: {missing_columns}")
|
|
return False
|
|
|
|
# Check for data quality (no all-zero or invalid values)
|
|
for col in ['open', 'high', 'low', 'close']:
|
|
if (market_df[col] <= 0).any():
|
|
self.logger.warning(f"Invalid price data found in column {col}")
|
|
return False
|
|
|
|
# Strategy-specific validations could be added here
|
|
# For example, some strategies might need specific minimum periods
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error validating strategy requirements: {e}")
|
|
return False
|
|
|
|
def _get_cached_data(self, cache_key: str) -> Optional[Dict[str, Any]]:
|
|
"""Get cached data if available and not expired."""
|
|
if cache_key not in self._data_cache:
|
|
return None
|
|
|
|
cached_data = self._data_cache[cache_key]
|
|
cache_time = cached_data.get('timestamp')
|
|
|
|
if cache_time:
|
|
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
|
|
if age_minutes < self.config.cache_timeout_minutes:
|
|
return cached_data
|
|
else:
|
|
# Remove expired cache
|
|
del self._data_cache[cache_key]
|
|
|
|
return None
|
|
|
|
def _cache_data(self, cache_key: str, data: Dict[str, Any]) -> None:
|
|
"""Cache data with timestamp."""
|
|
self._data_cache[cache_key] = data
|
|
|
|
# Simple cache size management
|
|
if len(self._data_cache) > 100: # Limit cache size
|
|
# Remove oldest entries
|
|
oldest_keys = sorted(
|
|
self._data_cache.keys(),
|
|
key=lambda k: self._data_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc))
|
|
)[:20]
|
|
for key in oldest_keys:
|
|
del self._data_cache[key]
|
|
|
|
def clear_cache(self) -> None:
|
|
"""Clear all cached data including persistent storage."""
|
|
self._data_cache.clear()
|
|
self._indicator_cache.clear()
|
|
|
|
# Clear persistent cache file
|
|
try:
|
|
if self._persistent_cache_file.exists():
|
|
self._persistent_cache_file.unlink()
|
|
self.logger.debug("Cleared persistent cache file")
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to clear persistent cache file: {e}")
|
|
|
|
self.logger.info("StrategyDataIntegrator: Cleared all caches")
|
|
|
|
def get_cache_stats(self) -> Dict[str, Any]:
|
|
"""Get cache statistics for monitoring."""
|
|
return {
|
|
'data_cache_size': len(self._data_cache),
|
|
'indicator_cache_size': len(self._indicator_cache),
|
|
'config': {
|
|
'cache_timeout_minutes': self.config.cache_timeout_minutes,
|
|
'enable_indicator_caching': self.config.enable_indicator_caching,
|
|
'max_cached_indicators': self.config.max_cached_indicators
|
|
}
|
|
}
|
|
|
|
def calculate_indicators_batch(
|
|
self,
|
|
market_df: pd.DataFrame,
|
|
indicator_configs: List[Dict[str, Any]],
|
|
enable_caching: bool = True
|
|
) -> Dict[str, pd.DataFrame]:
|
|
"""
|
|
Calculate multiple indicators efficiently using vectorized operations.
|
|
|
|
Args:
|
|
market_df: DataFrame with OHLCV data
|
|
indicator_configs: List of indicator configurations
|
|
enable_caching: Whether to use cached results
|
|
|
|
Returns:
|
|
Dictionary mapping indicator keys to their DataFrames
|
|
"""
|
|
try:
|
|
if market_df.empty:
|
|
self.logger.warning("StrategyDataIntegrator: Empty market data for indicator calculation")
|
|
return {}
|
|
|
|
indicators_data = {}
|
|
|
|
# Group indicators by type for potential optimization
|
|
indicators_by_type = {}
|
|
for config in indicator_configs:
|
|
indicator_type = config.get('type')
|
|
if indicator_type not in indicators_by_type:
|
|
indicators_by_type[indicator_type] = []
|
|
indicators_by_type[indicator_type].append(config)
|
|
|
|
# Calculate indicators, leveraging caching and batching where possible
|
|
for indicator_type, configs in indicators_by_type.items():
|
|
for config in configs:
|
|
indicator_key = self._create_indicator_key(config)
|
|
|
|
# Check cache first if enabled
|
|
if enable_caching and self.config.enable_indicator_caching:
|
|
cached_result = self._get_cached_indicator(indicator_key, market_df)
|
|
if cached_result is not None:
|
|
indicators_data[indicator_key] = cached_result
|
|
continue
|
|
|
|
try:
|
|
# Calculate indicator using TechnicalIndicators class
|
|
indicator_result = self.technical_indicators.calculate(
|
|
indicator_type,
|
|
market_df,
|
|
**{k: v for k, v in config.items() if k != 'type'}
|
|
)
|
|
|
|
if indicator_result is not None and not indicator_result.empty:
|
|
indicators_data[indicator_key] = indicator_result
|
|
|
|
# Cache the result if enabled
|
|
if enable_caching and self.config.enable_indicator_caching:
|
|
self._cache_indicator_result(indicator_key, indicator_result, market_df)
|
|
else:
|
|
self.logger.warning(f"Empty result for indicator: {indicator_key}")
|
|
indicators_data[indicator_key] = pd.DataFrame()
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error calculating indicator {indicator_key}: {e}")
|
|
indicators_data[indicator_key] = pd.DataFrame()
|
|
|
|
self.logger.debug(f"Calculated {len(indicators_data)} indicators in batch")
|
|
return indicators_data
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in batch indicator calculation: {e}")
|
|
return {}
|
|
|
|
def _create_indicator_key(self, indicator_config: Dict[str, Any]) -> str:
|
|
"""
|
|
Create a unique key for indicator configuration.
|
|
|
|
Args:
|
|
indicator_config: Indicator configuration dictionary
|
|
|
|
Returns:
|
|
Unique string key for the indicator
|
|
"""
|
|
indicator_type = indicator_config.get('type', 'unknown')
|
|
|
|
# Create key from type and parameters
|
|
params = {k: v for k, v in indicator_config.items() if k != 'type'}
|
|
|
|
if params:
|
|
# Sort parameters for consistent key generation
|
|
param_str = "_".join(f"{k}_{v}" for k, v in sorted(params.items()))
|
|
return f"{indicator_type}_{param_str}"
|
|
else:
|
|
return indicator_type
|
|
|
|
def _get_cached_indicator(
|
|
self,
|
|
indicator_key: str,
|
|
market_df: pd.DataFrame
|
|
) -> Optional[pd.DataFrame]:
|
|
"""
|
|
Get cached indicator result if available and valid.
|
|
|
|
Args:
|
|
indicator_key: Unique indicator key
|
|
market_df: Current market data DataFrame
|
|
|
|
Returns:
|
|
Cached DataFrame if valid, None otherwise
|
|
"""
|
|
if indicator_key not in self._indicator_cache:
|
|
return None
|
|
|
|
cached_data = self._indicator_cache[indicator_key]
|
|
|
|
# Check if cache is still valid
|
|
cache_time = cached_data.get('timestamp')
|
|
if cache_time:
|
|
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
|
|
if age_minutes >= self.config.cache_timeout_minutes:
|
|
# Remove expired cache
|
|
del self._indicator_cache[indicator_key]
|
|
return None
|
|
|
|
# Check if market data size matches (simple validation)
|
|
cached_result = cached_data.get('result')
|
|
if cached_result is not None and len(cached_result) == len(market_df):
|
|
self.logger.debug(f"Using cached indicator result for {indicator_key}")
|
|
return cached_result
|
|
|
|
return None
|
|
|
|
def _cache_indicator_result(
|
|
self,
|
|
indicator_key: str,
|
|
result: pd.DataFrame,
|
|
market_df: pd.DataFrame
|
|
) -> None:
|
|
"""
|
|
Cache indicator calculation result.
|
|
|
|
Args:
|
|
indicator_key: Unique indicator key
|
|
result: Calculated indicator DataFrame
|
|
market_df: Market data used for calculation
|
|
"""
|
|
self._indicator_cache[indicator_key] = {
|
|
'result': result.copy(), # Store a copy to avoid modification
|
|
'market_data_length': len(market_df),
|
|
'timestamp': datetime.now(timezone.utc)
|
|
}
|
|
|
|
# Manage cache size
|
|
if len(self._indicator_cache) > self.config.max_cached_indicators:
|
|
# Remove oldest entries
|
|
oldest_keys = sorted(
|
|
self._indicator_cache.keys(),
|
|
key=lambda k: self._indicator_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc))
|
|
)[:10] # Remove 10 oldest entries
|
|
|
|
for key in oldest_keys:
|
|
del self._indicator_cache[key]
|
|
|
|
self.logger.debug(f"Cleaned up indicator cache, removed {len(oldest_keys)} entries")
|
|
|
|
# Save to persistent storage periodically
|
|
if len(self._indicator_cache) % 10 == 0: # Every 10 new cache entries
|
|
self._save_persistent_cache()
|
|
|
|
def calculate_strategy_signals_enhanced(
|
|
self,
|
|
strategy_name: str,
|
|
strategy_config: Dict[str, Any],
|
|
symbol: str,
|
|
timeframe: str,
|
|
days_back: Optional[int] = None,
|
|
exchange: str = "okx",
|
|
enable_caching: bool = True
|
|
) -> List[StrategyResult]:
|
|
"""
|
|
Enhanced strategy signal calculation with vectorized indicator batching.
|
|
|
|
This method uses the new indicator batching interface for improved performance
|
|
while maintaining compatibility with the existing single-strategy interface.
|
|
|
|
Args:
|
|
strategy_name: Name of the strategy to execute
|
|
strategy_config: Strategy-specific configuration parameters
|
|
symbol: Trading pair symbol
|
|
timeframe: Timeframe for strategy calculation
|
|
days_back: Number of days to look back for data
|
|
exchange: Exchange name
|
|
enable_caching: Whether to use cached results
|
|
|
|
Returns:
|
|
List of strategy results with signals
|
|
"""
|
|
try:
|
|
self.logger.info(f"StrategyDataIntegrator: Enhanced calculation for {strategy_name} on {symbol} {timeframe}")
|
|
|
|
# Get market data for strategy
|
|
market_df = self.get_strategy_data(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
days_back=days_back,
|
|
exchange=exchange
|
|
)
|
|
|
|
if market_df.empty:
|
|
self.logger.warning(f"No market data available for {symbol} {timeframe}")
|
|
return []
|
|
|
|
# Validate data sufficiency
|
|
if not self.validate_strategy_requirements(market_df, strategy_name):
|
|
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
|
|
return []
|
|
|
|
# Get required indicators from strategy
|
|
strategy = self.strategy_factory.create_strategy(strategy_name)
|
|
if not strategy:
|
|
self.logger.error(f"Could not create strategy: {strategy_name}")
|
|
return []
|
|
|
|
required_indicators = strategy.get_required_indicators()
|
|
|
|
# Use vectorized indicator calculation
|
|
indicators_data = self.calculate_indicators_batch(
|
|
market_df=market_df,
|
|
indicator_configs=required_indicators,
|
|
enable_caching=enable_caching
|
|
)
|
|
|
|
# Calculate strategy signals using the strategy directly
|
|
results = strategy.calculate(market_df, indicators_data, **strategy_config)
|
|
|
|
# Add metadata to results
|
|
for result in results:
|
|
if not hasattr(result, 'metadata') or result.metadata is None:
|
|
result.metadata = {}
|
|
result.metadata.update({
|
|
'symbol': symbol,
|
|
'timeframe': timeframe,
|
|
'exchange': exchange,
|
|
'data_points_used': len(market_df),
|
|
'indicators_calculated': len(indicators_data),
|
|
'calculation_timestamp': datetime.now(timezone.utc).isoformat(),
|
|
'enhanced_calculation': True
|
|
})
|
|
|
|
self.logger.info(f"Enhanced calculation generated {len(results)} strategy results for {strategy_name}")
|
|
return results
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in enhanced strategy signal calculation for {strategy_name}: {e}")
|
|
return []
|
|
|
|
def analyze_indicator_dependencies(
|
|
self,
|
|
indicator_configs: List[Dict[str, Any]]
|
|
) -> Dict[str, List[str]]:
|
|
"""
|
|
Analyze indicator dependencies to optimize calculation order.
|
|
|
|
Args:
|
|
indicator_configs: List of indicator configurations
|
|
|
|
Returns:
|
|
Dictionary mapping indicator keys to their dependencies
|
|
"""
|
|
dependencies = {}
|
|
|
|
for config in indicator_configs:
|
|
indicator_key = self._create_indicator_key(config)
|
|
indicator_type = config.get('type', '').lower()
|
|
|
|
# Define known indicator dependencies
|
|
# Most indicators depend only on price data, but some depend on other indicators
|
|
deps = []
|
|
|
|
if indicator_type == 'macd':
|
|
# MACD signal line depends on MACD line (handled internally by TechnicalIndicators)
|
|
deps = [] # No external dependencies
|
|
elif indicator_type == 'bollinger_bands':
|
|
# Bollinger Bands depend on SMA (handled internally)
|
|
deps = [] # No external dependencies
|
|
elif indicator_type in ['stochastic', 'rsi_stochastic']:
|
|
# These might depend on RSI (if implemented)
|
|
deps = [] # For now, no external dependencies
|
|
else:
|
|
# Most indicators (SMA, EMA, RSI, etc.) depend only on price data
|
|
deps = []
|
|
|
|
dependencies[indicator_key] = deps
|
|
|
|
return dependencies
|
|
|
|
def resolve_calculation_order(
|
|
self,
|
|
indicator_configs: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Resolve optimal calculation order based on indicator dependencies.
|
|
|
|
Args:
|
|
indicator_configs: List of indicator configurations
|
|
|
|
Returns:
|
|
List of indicator configurations in optimal calculation order
|
|
"""
|
|
try:
|
|
# Analyze dependencies
|
|
dependencies = self.analyze_indicator_dependencies(indicator_configs)
|
|
|
|
# For current implementation, most indicators don't have external dependencies
|
|
# So we can optimize by grouping similar indicators together
|
|
|
|
# Group by indicator type for potential batching optimizations
|
|
type_groups = {}
|
|
for config in indicator_configs:
|
|
indicator_type = config.get('type', 'unknown')
|
|
if indicator_type not in type_groups:
|
|
type_groups[indicator_type] = []
|
|
type_groups[indicator_type].append(config)
|
|
|
|
# Order types by computational complexity (lighter first)
|
|
type_priority = {
|
|
'sma': 1, # Simple moving average - fastest
|
|
'ema': 2, # Exponential moving average
|
|
'rsi': 3, # RSI calculation
|
|
'macd': 4, # MACD - more complex
|
|
'bollinger_bands': 5, # Bollinger Bands
|
|
'stochastic': 6 # Most complex
|
|
}
|
|
|
|
# Sort groups by priority, unknown types go last
|
|
sorted_types = sorted(
|
|
type_groups.keys(),
|
|
key=lambda t: type_priority.get(t, 999)
|
|
)
|
|
|
|
# Build final ordered list
|
|
ordered_configs = []
|
|
for indicator_type in sorted_types:
|
|
# Within each type, sort by period (smaller periods first for caching benefits)
|
|
configs_for_type = type_groups[indicator_type]
|
|
configs_for_type.sort(key=lambda c: c.get('period', 0))
|
|
ordered_configs.extend(configs_for_type)
|
|
|
|
self.logger.debug(f"Resolved calculation order for {len(ordered_configs)} indicators")
|
|
return ordered_configs
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error resolving calculation order: {e}")
|
|
# Fallback to original order
|
|
return indicator_configs
|
|
|
|
def calculate_indicators_orchestrated(
|
|
self,
|
|
market_df: pd.DataFrame,
|
|
indicator_configs: List[Dict[str, Any]],
|
|
enable_caching: bool = True
|
|
) -> Dict[str, pd.DataFrame]:
|
|
"""
|
|
Orchestrated indicator calculation with dependency resolution and optimization.
|
|
|
|
Args:
|
|
market_df: DataFrame with OHLCV data
|
|
indicator_configs: List of indicator configurations
|
|
enable_caching: Whether to use cached results
|
|
|
|
Returns:
|
|
Dictionary mapping indicator keys to their DataFrames
|
|
"""
|
|
try:
|
|
if market_df.empty:
|
|
self.logger.warning("StrategyDataIntegrator: Empty market data for orchestrated calculation")
|
|
return {}
|
|
|
|
# Resolve optimal calculation order
|
|
ordered_configs = self.resolve_calculation_order(indicator_configs)
|
|
|
|
indicators_data = {}
|
|
calculation_stats = {
|
|
'cache_hits': 0,
|
|
'calculations_performed': 0,
|
|
'errors': 0
|
|
}
|
|
|
|
# Calculate indicators in optimized order
|
|
for config in ordered_configs:
|
|
indicator_key = self._create_indicator_key(config)
|
|
indicator_type = config.get('type')
|
|
|
|
# Check cache first if enabled
|
|
if enable_caching and self.config.enable_indicator_caching:
|
|
cached_result = self._get_cached_indicator(indicator_key, market_df)
|
|
if cached_result is not None:
|
|
indicators_data[indicator_key] = cached_result
|
|
calculation_stats['cache_hits'] += 1
|
|
continue
|
|
|
|
try:
|
|
# Calculate indicator using TechnicalIndicators class
|
|
indicator_result = self.technical_indicators.calculate(
|
|
indicator_type,
|
|
market_df,
|
|
**{k: v for k, v in config.items() if k != 'type'}
|
|
)
|
|
|
|
if indicator_result is not None and not indicator_result.empty:
|
|
indicators_data[indicator_key] = indicator_result
|
|
calculation_stats['calculations_performed'] += 1
|
|
|
|
# Cache the result if enabled
|
|
if enable_caching and self.config.enable_indicator_caching:
|
|
self._cache_indicator_result(indicator_key, indicator_result, market_df)
|
|
else:
|
|
self.logger.warning(f"Empty result for indicator: {indicator_key}")
|
|
indicators_data[indicator_key] = pd.DataFrame()
|
|
calculation_stats['errors'] += 1
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error calculating indicator {indicator_key}: {e}")
|
|
indicators_data[indicator_key] = pd.DataFrame()
|
|
calculation_stats['errors'] += 1
|
|
|
|
self.logger.debug(
|
|
f"Orchestrated calculation complete: {calculation_stats['calculations_performed']} calculated, "
|
|
f"{calculation_stats['cache_hits']} cached, {calculation_stats['errors']} errors"
|
|
)
|
|
|
|
return indicators_data
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in orchestrated indicator calculation: {e}")
|
|
return {}
|
|
|
|
def calculate_strategy_signals_orchestrated(
|
|
self,
|
|
strategy_name: str,
|
|
strategy_config: Dict[str, Any],
|
|
symbol: str,
|
|
timeframe: str,
|
|
days_back: Optional[int] = None,
|
|
exchange: str = "okx",
|
|
enable_caching: bool = True
|
|
) -> List[StrategyResult]:
|
|
"""
|
|
Fully orchestrated strategy signal calculation with optimized workflow.
|
|
|
|
This method provides the most optimized calculation flow:
|
|
Data → Dependency Analysis → Orchestrated Indicators → Strategy → Results
|
|
|
|
Args:
|
|
strategy_name: Name of the strategy to execute
|
|
strategy_config: Strategy-specific configuration parameters
|
|
symbol: Trading pair symbol
|
|
timeframe: Timeframe for strategy calculation
|
|
days_back: Number of days to look back for data
|
|
exchange: Exchange name
|
|
enable_caching: Whether to use cached results
|
|
|
|
Returns:
|
|
List of strategy results with signals
|
|
"""
|
|
try:
|
|
self.logger.info(f"StrategyDataIntegrator: Orchestrated calculation for {strategy_name} on {symbol} {timeframe}")
|
|
|
|
# Step 1: Get market data
|
|
market_df = self.get_strategy_data(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
days_back=days_back,
|
|
exchange=exchange
|
|
)
|
|
|
|
if market_df.empty:
|
|
self.logger.warning(f"No market data available for {symbol} {timeframe}")
|
|
return []
|
|
|
|
# Step 2: Validate data sufficiency
|
|
if not self.validate_strategy_requirements(market_df, strategy_name):
|
|
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
|
|
return []
|
|
|
|
# Step 3: Get strategy and analyze its requirements
|
|
strategy = self.strategy_factory.create_strategy(strategy_name)
|
|
if not strategy:
|
|
self.logger.error(f"Could not create strategy: {strategy_name}")
|
|
return []
|
|
|
|
required_indicators = strategy.get_required_indicators()
|
|
|
|
# Step 4: Orchestrated indicator calculation with dependency resolution
|
|
indicators_data = self.calculate_indicators_orchestrated(
|
|
market_df=market_df,
|
|
indicator_configs=required_indicators,
|
|
enable_caching=enable_caching
|
|
)
|
|
|
|
# Step 5: Calculate strategy signals
|
|
results = strategy.calculate(market_df, indicators_data, **strategy_config)
|
|
|
|
# Step 6: Add comprehensive metadata
|
|
for result in results:
|
|
if not hasattr(result, 'metadata') or result.metadata is None:
|
|
result.metadata = {}
|
|
result.metadata.update({
|
|
'symbol': symbol,
|
|
'timeframe': timeframe,
|
|
'exchange': exchange,
|
|
'data_points_used': len(market_df),
|
|
'indicators_calculated': len(indicators_data),
|
|
'required_indicators': len(required_indicators),
|
|
'calculation_timestamp': datetime.now(timezone.utc).isoformat(),
|
|
'orchestrated_calculation': True,
|
|
'calculation_method': 'orchestrated'
|
|
})
|
|
|
|
self.logger.info(
|
|
f"Orchestrated calculation generated {len(results)} strategy results for {strategy_name} "
|
|
f"using {len(indicators_data)} indicators"
|
|
)
|
|
return results
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in orchestrated strategy calculation for {strategy_name}: {e}")
|
|
return []
|
|
|
|
def get_calculation_performance_stats(self) -> Dict[str, Any]:
|
|
"""
|
|
Get performance statistics for calculation methods.
|
|
|
|
Returns:
|
|
Dictionary with performance metrics
|
|
"""
|
|
cache_stats = self.get_cache_stats()
|
|
|
|
return {
|
|
'cache_performance': cache_stats,
|
|
'available_methods': [
|
|
'calculate_strategy_signals', # Basic method
|
|
'calculate_strategy_signals_enhanced', # Vectorized method
|
|
'calculate_strategy_signals_orchestrated' # Fully orchestrated method
|
|
],
|
|
'recommended_method': 'calculate_strategy_signals_orchestrated',
|
|
'performance_tips': [
|
|
'Use orchestrated method for best performance',
|
|
'Enable caching for repeated calculations',
|
|
'Use larger datasets to benefit from vectorization',
|
|
'Monitor cache hit rates for optimization'
|
|
]
|
|
}
|
|
|
|
def get_shared_indicator_cache(self, indicator_key: str, market_data_length: int = None) -> Optional[pd.DataFrame]:
|
|
"""
|
|
Get indicator result from shared cache.
|
|
|
|
This method allows strategies to share cached indicator results,
|
|
improving efficiency when multiple strategies use the same indicators.
|
|
|
|
Args:
|
|
indicator_key: Unique indicator key
|
|
market_data_length: Expected market data length for validation
|
|
|
|
Returns:
|
|
Cached DataFrame if available and valid, None otherwise
|
|
"""
|
|
if indicator_key not in self._indicator_cache:
|
|
return None
|
|
|
|
cached_data = self._indicator_cache[indicator_key]
|
|
|
|
# Check if cache is still valid
|
|
cache_time = cached_data.get('timestamp')
|
|
if cache_time:
|
|
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
|
|
if age_minutes >= self.config.cache_timeout_minutes:
|
|
# Remove expired cache
|
|
del self._indicator_cache[indicator_key]
|
|
return None
|
|
|
|
# Validate market data length if provided
|
|
cached_result = cached_data.get('result')
|
|
if market_data_length is not None:
|
|
cached_length = cached_data.get('market_data_length')
|
|
if cached_length != market_data_length:
|
|
return None
|
|
|
|
if cached_result is not None:
|
|
self.logger.debug(f"Using shared cached indicator result for {indicator_key}")
|
|
return cached_result
|
|
|
|
return None
|
|
|
|
def share_indicator_result(
|
|
self,
|
|
indicator_key: str,
|
|
result: pd.DataFrame,
|
|
market_data_length: int
|
|
) -> None:
|
|
"""
|
|
Share indicator result for cross-strategy use.
|
|
|
|
Args:
|
|
indicator_key: Unique indicator key
|
|
result: Calculated indicator DataFrame
|
|
market_data_length: Length of market data used for calculation
|
|
"""
|
|
self._indicator_cache[indicator_key] = {
|
|
'result': result.copy(),
|
|
'market_data_length': market_data_length,
|
|
'timestamp': datetime.now(timezone.utc),
|
|
'shared': True # Mark as shared cache entry
|
|
}
|
|
|
|
# Save to persistent storage for cross-session sharing
|
|
if len(self._indicator_cache) % 5 == 0: # More frequent saves for shared cache
|
|
self._save_persistent_cache()
|
|
|
|
self.logger.debug(f"Shared indicator result for cross-strategy use: {indicator_key}")
|
|
|
|
def get_cache_sharing_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about cache sharing across strategies."""
|
|
shared_entries = sum(1 for data in self._indicator_cache.values() if data.get('shared', False))
|
|
|
|
return {
|
|
'total_cached_indicators': len(self._indicator_cache),
|
|
'shared_cache_entries': shared_entries,
|
|
'private_cache_entries': len(self._indicator_cache) - shared_entries,
|
|
'sharing_efficiency': shared_entries / len(self._indicator_cache) if self._indicator_cache else 0.0,
|
|
'persistent_cache_available': self._persistent_cache_file.exists()
|
|
}
|
|
|
|
|
|
def get_strategy_data_integrator(config: StrategyDataIntegrationConfig = None) -> StrategyDataIntegrator:
|
|
"""
|
|
Factory function to get a strategy data integrator instance.
|
|
|
|
Args:
|
|
config: Optional configuration
|
|
|
|
Returns:
|
|
StrategyDataIntegrator instance
|
|
"""
|
|
return StrategyDataIntegrator(config) |