TCPDashboard/strategies/data_integration.py
Vasily.onl 8c23489ff0 4.0 - 4.0 Implement real-time strategy execution and data integration features
- Added `realtime_execution.py` for real-time strategy execution, enabling live signal generation and integration with the dashboard's chart refresh cycle.
- Introduced `data_integration.py` to manage market data orchestration, caching, and technical indicator calculations for strategy signal generation.
- Implemented `validation.py` for comprehensive validation and quality assessment of strategy-generated signals, ensuring reliability and consistency.
- Developed `batch_processing.py` to facilitate efficient backtesting of multiple strategies across large datasets with memory management and performance optimization.
- Updated `__init__.py` files to include new modules and ensure proper exports, enhancing modularity and maintainability.
- Enhanced unit tests for the new features, ensuring robust functionality and adherence to project standards.

These changes establish a solid foundation for real-time strategy execution and data integration, aligning with project goals for modularity, performance, and maintainability.
2025-06-12 18:29:39 +08:00

1060 lines
42 KiB
Python

"""
Strategy Data Integration Module
This module provides seamless integration between market data, technical indicators,
and strategy calculations, handling data orchestration, caching, and optimization
for strategy signal generation and backtesting.
"""
import pandas as pd
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
import json
import pickle
import os
from pathlib import Path
from database.operations import get_database_operations, DatabaseOperationError
from data.common.data_types import OHLCVCandle
from data.common.indicators import TechnicalIndicators
from components.charts.config.indicator_defs import convert_database_candles_to_ohlcv
from .factory import StrategyFactory
from .data_types import StrategyResult
from utils.logger import get_logger
# Initialize logger
logger = get_logger()
@dataclass
class StrategyDataIntegrationConfig:
"""Configuration for strategy data integration"""
default_days_back: int = 30 # Strategies often need more historical data
min_candles_required: int = 100 # Strategies need sufficient data for reliable signals
max_candles_limit: int = 5000 # Allow larger datasets for backtesting
cache_timeout_minutes: int = 15 # Longer cache for strategy analysis
enable_data_validation: bool = True
enable_sparse_data_handling: bool = True
enable_indicator_caching: bool = True
max_cached_indicators: int = 50 # Limit memory usage
class StrategyDataIntegrator:
"""
Integrates market data with strategy calculations and signal generation.
This class handles:
- Fetching and preparing market data for strategies
- Pre-calculating required technical indicators
- Orchestrating strategy signal generation
- Caching computed indicators for performance
- Multi-timeframe data handling
- Strategy signal validation and storage
"""
def __init__(self, config: StrategyDataIntegrationConfig = None):
"""
Initialize strategy data integrator.
Args:
config: Integration configuration
"""
self.config = config or StrategyDataIntegrationConfig()
self.logger = logger
self.db_ops = get_database_operations(self.logger)
self.technical_indicators = TechnicalIndicators(self.logger)
self.strategy_factory = StrategyFactory(self.logger)
# Caching for computed indicators and market data
self._indicator_cache: Dict[str, Dict[str, Any]] = {}
self._data_cache: Dict[str, Dict[str, Any]] = {}
# Cache persistence setup
self._cache_dir = Path("temp/strategy_cache")
self._cache_dir.mkdir(parents=True, exist_ok=True)
self._persistent_cache_file = self._cache_dir / "indicator_cache.pkl"
# Load persistent cache if available
self._load_persistent_cache()
if self.logger:
self.logger.info("StrategyDataIntegrator: Initialized with strategy-optimized configuration")
def _load_persistent_cache(self) -> None:
"""Load indicator cache from persistent storage."""
try:
if self._persistent_cache_file.exists():
with open(self._persistent_cache_file, 'rb') as f:
cached_data = pickle.load(f)
# Validate and filter expired entries
current_time = datetime.now(timezone.utc)
valid_entries = 0
for key, data in cached_data.items():
cache_time = data.get('timestamp')
if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes:
self._indicator_cache[key] = data
valid_entries += 1
self.logger.debug(f"Loaded {valid_entries} valid cache entries from persistent storage")
except Exception as e:
self.logger.warning(f"Failed to load persistent cache: {e}")
def _save_persistent_cache(self) -> None:
"""Save indicator cache to persistent storage."""
try:
# Only save recent, valid entries to avoid bloat
current_time = datetime.now(timezone.utc)
entries_to_save = {}
for key, data in self._indicator_cache.items():
cache_time = data.get('timestamp')
if cache_time and (current_time - cache_time).total_seconds() / 60 < self.config.cache_timeout_minutes:
entries_to_save[key] = data
with open(self._persistent_cache_file, 'wb') as f:
pickle.dump(entries_to_save, f)
self.logger.debug(f"Saved {len(entries_to_save)} cache entries to persistent storage")
except Exception as e:
self.logger.warning(f"Failed to save persistent cache: {e}")
def calculate_strategy_signals(
self,
strategy_name: str,
strategy_config: Dict[str, Any],
symbol: str,
timeframe: str,
days_back: Optional[int] = None,
exchange: str = "okx",
enable_caching: bool = True
) -> List[StrategyResult]:
"""
Main orchestration method for calculating strategy signals.
Args:
strategy_name: Name of the strategy to execute
strategy_config: Strategy-specific configuration parameters
symbol: Trading pair symbol
timeframe: Timeframe for strategy calculation
days_back: Number of days to look back for data
exchange: Exchange name
enable_caching: Whether to use cached indicator results
Returns:
List of strategy results with signals
"""
try:
self.logger.info(f"StrategyDataIntegrator: Calculating signals for {strategy_name} on {symbol} {timeframe}")
# Get market data for strategy
market_df = self.get_strategy_data(
symbol=symbol,
timeframe=timeframe,
days_back=days_back,
exchange=exchange
)
if market_df.empty:
self.logger.warning(f"No market data available for {symbol} {timeframe}")
return []
# Validate data sufficiency
if not self.validate_strategy_requirements(market_df, strategy_name):
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
return []
# Calculate strategy signals using factory
results = self.strategy_factory.calculate_strategy_signals(
strategy_name=strategy_name,
df=market_df,
strategy_config=strategy_config
)
# Add metadata to results
for result in results:
if not hasattr(result, 'metadata') or result.metadata is None:
result.metadata = {}
result.metadata.update({
'symbol': symbol,
'timeframe': timeframe,
'exchange': exchange,
'data_points_used': len(market_df),
'calculation_timestamp': datetime.now(timezone.utc).isoformat()
})
self.logger.info(f"Generated {len(results)} strategy results for {strategy_name}")
return results
except Exception as e:
self.logger.error(f"Error calculating strategy signals for {strategy_name}: {e}")
return []
def get_strategy_data(
self,
symbol: str,
timeframe: str,
days_back: Optional[int] = None,
exchange: str = "okx"
) -> pd.DataFrame:
"""
Fetch and prepare market data for strategy calculation.
Args:
symbol: Trading pair symbol
timeframe: Timeframe
days_back: Number of days to look back
exchange: Exchange name
Returns:
DataFrame with OHLCV data ready for strategy calculation
"""
try:
# Use configured default if not specified
if days_back is None:
days_back = self.config.default_days_back
# Check cache first
cache_key = f"market_data_{symbol}_{timeframe}_{days_back}_{exchange}"
cached_data = self._get_cached_data(cache_key)
if cached_data:
self.logger.debug(f"Using cached market data for {symbol} {timeframe}")
return cached_data['dataframe']
# Calculate time range
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
# Fetch raw market data
raw_candles = self.db_ops.market_data.get_candles(
symbol=symbol,
timeframe=timeframe,
start_time=start_time,
end_time=end_time,
exchange=exchange
)
if not raw_candles:
self.logger.warning(f"No raw candles found for {symbol} {timeframe}")
return pd.DataFrame()
# Convert to OHLCV format
ohlcv_candles = convert_database_candles_to_ohlcv(raw_candles)
if not ohlcv_candles:
self.logger.warning(f"No OHLCV candles after conversion for {symbol} {timeframe}")
return pd.DataFrame()
# Convert to DataFrame for strategy processing
market_df = self._prepare_dataframe_from_candles(ohlcv_candles)
# Cache the results
self._cache_data(cache_key, {
'dataframe': market_df,
'candle_count': len(raw_candles),
'timestamp': datetime.now(timezone.utc)
})
self.logger.debug(f"Fetched {len(raw_candles)} candles for strategy data: {symbol} {timeframe}")
return market_df
except DatabaseOperationError as e:
self.logger.error(f"Database error fetching strategy data: {e}")
return pd.DataFrame()
except Exception as e:
self.logger.error(f"Error fetching strategy data for {symbol} {timeframe}: {e}")
return pd.DataFrame()
def _prepare_dataframe_from_candles(self, candles: List[OHLCVCandle]) -> pd.DataFrame:
"""
Convert OHLCV candles to DataFrame optimized for strategy calculations.
Uses vectorized approach for improved performance.
Args:
candles: List of OHLCV candles
Returns:
DataFrame with OHLCV data
"""
if not candles:
return pd.DataFrame()
# Vectorized DataFrame construction - extract all values at once
df = pd.DataFrame({
'timestamp': [candle.end_time for candle in candles],
'open': [float(candle.open) for candle in candles],
'high': [float(candle.high) for candle in candles],
'low': [float(candle.low) for candle in candles],
'close': [float(candle.close) for candle in candles],
'volume': [float(candle.volume) for candle in candles]
})
# Set timestamp as index and sort
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
# Remove index name for cleaner appearance
df.index.name = None
# Ensure proper data types using vectorized operations
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
# Remove any rows with NaN values
df.dropna(inplace=True)
return df
def validate_strategy_requirements(
self,
market_df: pd.DataFrame,
strategy_name: str
) -> bool:
"""
Validate that market data meets strategy requirements.
Args:
market_df: Market data DataFrame
strategy_name: Name of the strategy
Returns:
True if data meets requirements, False otherwise
"""
try:
# Check minimum data points
if len(market_df) < self.config.min_candles_required:
self.logger.warning(
f"Insufficient data points: {len(market_df)} < {self.config.min_candles_required}"
)
return False
# Check for required columns
required_columns = ['open', 'high', 'low', 'close', 'volume']
missing_columns = [col for col in required_columns if col not in market_df.columns]
if missing_columns:
self.logger.error(f"Missing required columns: {missing_columns}")
return False
# Check for data quality (no all-zero or invalid values)
for col in ['open', 'high', 'low', 'close']:
if (market_df[col] <= 0).any():
self.logger.warning(f"Invalid price data found in column {col}")
return False
# Strategy-specific validations could be added here
# For example, some strategies might need specific minimum periods
return True
except Exception as e:
self.logger.error(f"Error validating strategy requirements: {e}")
return False
def _get_cached_data(self, cache_key: str) -> Optional[Dict[str, Any]]:
"""Get cached data if available and not expired."""
if cache_key not in self._data_cache:
return None
cached_data = self._data_cache[cache_key]
cache_time = cached_data.get('timestamp')
if cache_time:
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
if age_minutes < self.config.cache_timeout_minutes:
return cached_data
else:
# Remove expired cache
del self._data_cache[cache_key]
return None
def _cache_data(self, cache_key: str, data: Dict[str, Any]) -> None:
"""Cache data with timestamp."""
self._data_cache[cache_key] = data
# Simple cache size management
if len(self._data_cache) > 100: # Limit cache size
# Remove oldest entries
oldest_keys = sorted(
self._data_cache.keys(),
key=lambda k: self._data_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc))
)[:20]
for key in oldest_keys:
del self._data_cache[key]
def clear_cache(self) -> None:
"""Clear all cached data including persistent storage."""
self._data_cache.clear()
self._indicator_cache.clear()
# Clear persistent cache file
try:
if self._persistent_cache_file.exists():
self._persistent_cache_file.unlink()
self.logger.debug("Cleared persistent cache file")
except Exception as e:
self.logger.warning(f"Failed to clear persistent cache file: {e}")
self.logger.info("StrategyDataIntegrator: Cleared all caches")
def get_cache_stats(self) -> Dict[str, Any]:
"""Get cache statistics for monitoring."""
return {
'data_cache_size': len(self._data_cache),
'indicator_cache_size': len(self._indicator_cache),
'config': {
'cache_timeout_minutes': self.config.cache_timeout_minutes,
'enable_indicator_caching': self.config.enable_indicator_caching,
'max_cached_indicators': self.config.max_cached_indicators
}
}
def calculate_indicators_batch(
self,
market_df: pd.DataFrame,
indicator_configs: List[Dict[str, Any]],
enable_caching: bool = True
) -> Dict[str, pd.DataFrame]:
"""
Calculate multiple indicators efficiently using vectorized operations.
Args:
market_df: DataFrame with OHLCV data
indicator_configs: List of indicator configurations
enable_caching: Whether to use cached results
Returns:
Dictionary mapping indicator keys to their DataFrames
"""
try:
if market_df.empty:
self.logger.warning("StrategyDataIntegrator: Empty market data for indicator calculation")
return {}
indicators_data = {}
# Group indicators by type for potential optimization
indicators_by_type = {}
for config in indicator_configs:
indicator_type = config.get('type')
if indicator_type not in indicators_by_type:
indicators_by_type[indicator_type] = []
indicators_by_type[indicator_type].append(config)
# Calculate indicators, leveraging caching and batching where possible
for indicator_type, configs in indicators_by_type.items():
for config in configs:
indicator_key = self._create_indicator_key(config)
# Check cache first if enabled
if enable_caching and self.config.enable_indicator_caching:
cached_result = self._get_cached_indicator(indicator_key, market_df)
if cached_result is not None:
indicators_data[indicator_key] = cached_result
continue
try:
# Calculate indicator using TechnicalIndicators class
indicator_result = self.technical_indicators.calculate(
indicator_type,
market_df,
**{k: v for k, v in config.items() if k != 'type'}
)
if indicator_result is not None and not indicator_result.empty:
indicators_data[indicator_key] = indicator_result
# Cache the result if enabled
if enable_caching and self.config.enable_indicator_caching:
self._cache_indicator_result(indicator_key, indicator_result, market_df)
else:
self.logger.warning(f"Empty result for indicator: {indicator_key}")
indicators_data[indicator_key] = pd.DataFrame()
except Exception as e:
self.logger.error(f"Error calculating indicator {indicator_key}: {e}")
indicators_data[indicator_key] = pd.DataFrame()
self.logger.debug(f"Calculated {len(indicators_data)} indicators in batch")
return indicators_data
except Exception as e:
self.logger.error(f"Error in batch indicator calculation: {e}")
return {}
def _create_indicator_key(self, indicator_config: Dict[str, Any]) -> str:
"""
Create a unique key for indicator configuration.
Args:
indicator_config: Indicator configuration dictionary
Returns:
Unique string key for the indicator
"""
indicator_type = indicator_config.get('type', 'unknown')
# Create key from type and parameters
params = {k: v for k, v in indicator_config.items() if k != 'type'}
if params:
# Sort parameters for consistent key generation
param_str = "_".join(f"{k}_{v}" for k, v in sorted(params.items()))
return f"{indicator_type}_{param_str}"
else:
return indicator_type
def _get_cached_indicator(
self,
indicator_key: str,
market_df: pd.DataFrame
) -> Optional[pd.DataFrame]:
"""
Get cached indicator result if available and valid.
Args:
indicator_key: Unique indicator key
market_df: Current market data DataFrame
Returns:
Cached DataFrame if valid, None otherwise
"""
if indicator_key not in self._indicator_cache:
return None
cached_data = self._indicator_cache[indicator_key]
# Check if cache is still valid
cache_time = cached_data.get('timestamp')
if cache_time:
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
if age_minutes >= self.config.cache_timeout_minutes:
# Remove expired cache
del self._indicator_cache[indicator_key]
return None
# Check if market data size matches (simple validation)
cached_result = cached_data.get('result')
if cached_result is not None and len(cached_result) == len(market_df):
self.logger.debug(f"Using cached indicator result for {indicator_key}")
return cached_result
return None
def _cache_indicator_result(
self,
indicator_key: str,
result: pd.DataFrame,
market_df: pd.DataFrame
) -> None:
"""
Cache indicator calculation result.
Args:
indicator_key: Unique indicator key
result: Calculated indicator DataFrame
market_df: Market data used for calculation
"""
self._indicator_cache[indicator_key] = {
'result': result.copy(), # Store a copy to avoid modification
'market_data_length': len(market_df),
'timestamp': datetime.now(timezone.utc)
}
# Manage cache size
if len(self._indicator_cache) > self.config.max_cached_indicators:
# Remove oldest entries
oldest_keys = sorted(
self._indicator_cache.keys(),
key=lambda k: self._indicator_cache[k].get('timestamp', datetime.min.replace(tzinfo=timezone.utc))
)[:10] # Remove 10 oldest entries
for key in oldest_keys:
del self._indicator_cache[key]
self.logger.debug(f"Cleaned up indicator cache, removed {len(oldest_keys)} entries")
# Save to persistent storage periodically
if len(self._indicator_cache) % 10 == 0: # Every 10 new cache entries
self._save_persistent_cache()
def calculate_strategy_signals_enhanced(
self,
strategy_name: str,
strategy_config: Dict[str, Any],
symbol: str,
timeframe: str,
days_back: Optional[int] = None,
exchange: str = "okx",
enable_caching: bool = True
) -> List[StrategyResult]:
"""
Enhanced strategy signal calculation with vectorized indicator batching.
This method uses the new indicator batching interface for improved performance
while maintaining compatibility with the existing single-strategy interface.
Args:
strategy_name: Name of the strategy to execute
strategy_config: Strategy-specific configuration parameters
symbol: Trading pair symbol
timeframe: Timeframe for strategy calculation
days_back: Number of days to look back for data
exchange: Exchange name
enable_caching: Whether to use cached results
Returns:
List of strategy results with signals
"""
try:
self.logger.info(f"StrategyDataIntegrator: Enhanced calculation for {strategy_name} on {symbol} {timeframe}")
# Get market data for strategy
market_df = self.get_strategy_data(
symbol=symbol,
timeframe=timeframe,
days_back=days_back,
exchange=exchange
)
if market_df.empty:
self.logger.warning(f"No market data available for {symbol} {timeframe}")
return []
# Validate data sufficiency
if not self.validate_strategy_requirements(market_df, strategy_name):
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
return []
# Get required indicators from strategy
strategy = self.strategy_factory.create_strategy(strategy_name)
if not strategy:
self.logger.error(f"Could not create strategy: {strategy_name}")
return []
required_indicators = strategy.get_required_indicators()
# Use vectorized indicator calculation
indicators_data = self.calculate_indicators_batch(
market_df=market_df,
indicator_configs=required_indicators,
enable_caching=enable_caching
)
# Calculate strategy signals using the strategy directly
results = strategy.calculate(market_df, indicators_data, **strategy_config)
# Add metadata to results
for result in results:
if not hasattr(result, 'metadata') or result.metadata is None:
result.metadata = {}
result.metadata.update({
'symbol': symbol,
'timeframe': timeframe,
'exchange': exchange,
'data_points_used': len(market_df),
'indicators_calculated': len(indicators_data),
'calculation_timestamp': datetime.now(timezone.utc).isoformat(),
'enhanced_calculation': True
})
self.logger.info(f"Enhanced calculation generated {len(results)} strategy results for {strategy_name}")
return results
except Exception as e:
self.logger.error(f"Error in enhanced strategy signal calculation for {strategy_name}: {e}")
return []
def analyze_indicator_dependencies(
self,
indicator_configs: List[Dict[str, Any]]
) -> Dict[str, List[str]]:
"""
Analyze indicator dependencies to optimize calculation order.
Args:
indicator_configs: List of indicator configurations
Returns:
Dictionary mapping indicator keys to their dependencies
"""
dependencies = {}
for config in indicator_configs:
indicator_key = self._create_indicator_key(config)
indicator_type = config.get('type', '').lower()
# Define known indicator dependencies
# Most indicators depend only on price data, but some depend on other indicators
deps = []
if indicator_type == 'macd':
# MACD signal line depends on MACD line (handled internally by TechnicalIndicators)
deps = [] # No external dependencies
elif indicator_type == 'bollinger_bands':
# Bollinger Bands depend on SMA (handled internally)
deps = [] # No external dependencies
elif indicator_type in ['stochastic', 'rsi_stochastic']:
# These might depend on RSI (if implemented)
deps = [] # For now, no external dependencies
else:
# Most indicators (SMA, EMA, RSI, etc.) depend only on price data
deps = []
dependencies[indicator_key] = deps
return dependencies
def resolve_calculation_order(
self,
indicator_configs: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
Resolve optimal calculation order based on indicator dependencies.
Args:
indicator_configs: List of indicator configurations
Returns:
List of indicator configurations in optimal calculation order
"""
try:
# Analyze dependencies
dependencies = self.analyze_indicator_dependencies(indicator_configs)
# For current implementation, most indicators don't have external dependencies
# So we can optimize by grouping similar indicators together
# Group by indicator type for potential batching optimizations
type_groups = {}
for config in indicator_configs:
indicator_type = config.get('type', 'unknown')
if indicator_type not in type_groups:
type_groups[indicator_type] = []
type_groups[indicator_type].append(config)
# Order types by computational complexity (lighter first)
type_priority = {
'sma': 1, # Simple moving average - fastest
'ema': 2, # Exponential moving average
'rsi': 3, # RSI calculation
'macd': 4, # MACD - more complex
'bollinger_bands': 5, # Bollinger Bands
'stochastic': 6 # Most complex
}
# Sort groups by priority, unknown types go last
sorted_types = sorted(
type_groups.keys(),
key=lambda t: type_priority.get(t, 999)
)
# Build final ordered list
ordered_configs = []
for indicator_type in sorted_types:
# Within each type, sort by period (smaller periods first for caching benefits)
configs_for_type = type_groups[indicator_type]
configs_for_type.sort(key=lambda c: c.get('period', 0))
ordered_configs.extend(configs_for_type)
self.logger.debug(f"Resolved calculation order for {len(ordered_configs)} indicators")
return ordered_configs
except Exception as e:
self.logger.error(f"Error resolving calculation order: {e}")
# Fallback to original order
return indicator_configs
def calculate_indicators_orchestrated(
self,
market_df: pd.DataFrame,
indicator_configs: List[Dict[str, Any]],
enable_caching: bool = True
) -> Dict[str, pd.DataFrame]:
"""
Orchestrated indicator calculation with dependency resolution and optimization.
Args:
market_df: DataFrame with OHLCV data
indicator_configs: List of indicator configurations
enable_caching: Whether to use cached results
Returns:
Dictionary mapping indicator keys to their DataFrames
"""
try:
if market_df.empty:
self.logger.warning("StrategyDataIntegrator: Empty market data for orchestrated calculation")
return {}
# Resolve optimal calculation order
ordered_configs = self.resolve_calculation_order(indicator_configs)
indicators_data = {}
calculation_stats = {
'cache_hits': 0,
'calculations_performed': 0,
'errors': 0
}
# Calculate indicators in optimized order
for config in ordered_configs:
indicator_key = self._create_indicator_key(config)
indicator_type = config.get('type')
# Check cache first if enabled
if enable_caching and self.config.enable_indicator_caching:
cached_result = self._get_cached_indicator(indicator_key, market_df)
if cached_result is not None:
indicators_data[indicator_key] = cached_result
calculation_stats['cache_hits'] += 1
continue
try:
# Calculate indicator using TechnicalIndicators class
indicator_result = self.technical_indicators.calculate(
indicator_type,
market_df,
**{k: v for k, v in config.items() if k != 'type'}
)
if indicator_result is not None and not indicator_result.empty:
indicators_data[indicator_key] = indicator_result
calculation_stats['calculations_performed'] += 1
# Cache the result if enabled
if enable_caching and self.config.enable_indicator_caching:
self._cache_indicator_result(indicator_key, indicator_result, market_df)
else:
self.logger.warning(f"Empty result for indicator: {indicator_key}")
indicators_data[indicator_key] = pd.DataFrame()
calculation_stats['errors'] += 1
except Exception as e:
self.logger.error(f"Error calculating indicator {indicator_key}: {e}")
indicators_data[indicator_key] = pd.DataFrame()
calculation_stats['errors'] += 1
self.logger.debug(
f"Orchestrated calculation complete: {calculation_stats['calculations_performed']} calculated, "
f"{calculation_stats['cache_hits']} cached, {calculation_stats['errors']} errors"
)
return indicators_data
except Exception as e:
self.logger.error(f"Error in orchestrated indicator calculation: {e}")
return {}
def calculate_strategy_signals_orchestrated(
self,
strategy_name: str,
strategy_config: Dict[str, Any],
symbol: str,
timeframe: str,
days_back: Optional[int] = None,
exchange: str = "okx",
enable_caching: bool = True
) -> List[StrategyResult]:
"""
Fully orchestrated strategy signal calculation with optimized workflow.
This method provides the most optimized calculation flow:
Data → Dependency Analysis → Orchestrated Indicators → Strategy → Results
Args:
strategy_name: Name of the strategy to execute
strategy_config: Strategy-specific configuration parameters
symbol: Trading pair symbol
timeframe: Timeframe for strategy calculation
days_back: Number of days to look back for data
exchange: Exchange name
enable_caching: Whether to use cached results
Returns:
List of strategy results with signals
"""
try:
self.logger.info(f"StrategyDataIntegrator: Orchestrated calculation for {strategy_name} on {symbol} {timeframe}")
# Step 1: Get market data
market_df = self.get_strategy_data(
symbol=symbol,
timeframe=timeframe,
days_back=days_back,
exchange=exchange
)
if market_df.empty:
self.logger.warning(f"No market data available for {symbol} {timeframe}")
return []
# Step 2: Validate data sufficiency
if not self.validate_strategy_requirements(market_df, strategy_name):
self.logger.warning(f"Insufficient data for strategy {strategy_name}")
return []
# Step 3: Get strategy and analyze its requirements
strategy = self.strategy_factory.create_strategy(strategy_name)
if not strategy:
self.logger.error(f"Could not create strategy: {strategy_name}")
return []
required_indicators = strategy.get_required_indicators()
# Step 4: Orchestrated indicator calculation with dependency resolution
indicators_data = self.calculate_indicators_orchestrated(
market_df=market_df,
indicator_configs=required_indicators,
enable_caching=enable_caching
)
# Step 5: Calculate strategy signals
results = strategy.calculate(market_df, indicators_data, **strategy_config)
# Step 6: Add comprehensive metadata
for result in results:
if not hasattr(result, 'metadata') or result.metadata is None:
result.metadata = {}
result.metadata.update({
'symbol': symbol,
'timeframe': timeframe,
'exchange': exchange,
'data_points_used': len(market_df),
'indicators_calculated': len(indicators_data),
'required_indicators': len(required_indicators),
'calculation_timestamp': datetime.now(timezone.utc).isoformat(),
'orchestrated_calculation': True,
'calculation_method': 'orchestrated'
})
self.logger.info(
f"Orchestrated calculation generated {len(results)} strategy results for {strategy_name} "
f"using {len(indicators_data)} indicators"
)
return results
except Exception as e:
self.logger.error(f"Error in orchestrated strategy calculation for {strategy_name}: {e}")
return []
def get_calculation_performance_stats(self) -> Dict[str, Any]:
"""
Get performance statistics for calculation methods.
Returns:
Dictionary with performance metrics
"""
cache_stats = self.get_cache_stats()
return {
'cache_performance': cache_stats,
'available_methods': [
'calculate_strategy_signals', # Basic method
'calculate_strategy_signals_enhanced', # Vectorized method
'calculate_strategy_signals_orchestrated' # Fully orchestrated method
],
'recommended_method': 'calculate_strategy_signals_orchestrated',
'performance_tips': [
'Use orchestrated method for best performance',
'Enable caching for repeated calculations',
'Use larger datasets to benefit from vectorization',
'Monitor cache hit rates for optimization'
]
}
def get_shared_indicator_cache(self, indicator_key: str, market_data_length: int = None) -> Optional[pd.DataFrame]:
"""
Get indicator result from shared cache.
This method allows strategies to share cached indicator results,
improving efficiency when multiple strategies use the same indicators.
Args:
indicator_key: Unique indicator key
market_data_length: Expected market data length for validation
Returns:
Cached DataFrame if available and valid, None otherwise
"""
if indicator_key not in self._indicator_cache:
return None
cached_data = self._indicator_cache[indicator_key]
# Check if cache is still valid
cache_time = cached_data.get('timestamp')
if cache_time:
age_minutes = (datetime.now(timezone.utc) - cache_time).total_seconds() / 60
if age_minutes >= self.config.cache_timeout_minutes:
# Remove expired cache
del self._indicator_cache[indicator_key]
return None
# Validate market data length if provided
cached_result = cached_data.get('result')
if market_data_length is not None:
cached_length = cached_data.get('market_data_length')
if cached_length != market_data_length:
return None
if cached_result is not None:
self.logger.debug(f"Using shared cached indicator result for {indicator_key}")
return cached_result
return None
def share_indicator_result(
self,
indicator_key: str,
result: pd.DataFrame,
market_data_length: int
) -> None:
"""
Share indicator result for cross-strategy use.
Args:
indicator_key: Unique indicator key
result: Calculated indicator DataFrame
market_data_length: Length of market data used for calculation
"""
self._indicator_cache[indicator_key] = {
'result': result.copy(),
'market_data_length': market_data_length,
'timestamp': datetime.now(timezone.utc),
'shared': True # Mark as shared cache entry
}
# Save to persistent storage for cross-session sharing
if len(self._indicator_cache) % 5 == 0: # More frequent saves for shared cache
self._save_persistent_cache()
self.logger.debug(f"Shared indicator result for cross-strategy use: {indicator_key}")
def get_cache_sharing_stats(self) -> Dict[str, Any]:
"""Get statistics about cache sharing across strategies."""
shared_entries = sum(1 for data in self._indicator_cache.values() if data.get('shared', False))
return {
'total_cached_indicators': len(self._indicator_cache),
'shared_cache_entries': shared_entries,
'private_cache_entries': len(self._indicator_cache) - shared_entries,
'sharing_efficiency': shared_entries / len(self._indicator_cache) if self._indicator_cache else 0.0,
'persistent_cache_available': self._persistent_cache_file.exists()
}
def get_strategy_data_integrator(config: StrategyDataIntegrationConfig = None) -> StrategyDataIntegrator:
"""
Factory function to get a strategy data integrator instance.
Args:
config: Optional configuration
Returns:
StrategyDataIntegrator instance
"""
return StrategyDataIntegrator(config)