import pandas as pd import concurrent.futures import logging from typing import List, Tuple, Dict, Any, Optional from cycles.utils.storage import Storage from cycles.utils.system import SystemUtils from cycles.utils.progress_manager import ProgressManager from result_processor import ResultProcessor def _process_single_task_static(task: Tuple[str, str, pd.DataFrame, float, float], progress_callback=None) -> Tuple[List[Dict], List[Dict]]: """ Static version of _process_single_task for use with ProcessPoolExecutor Args: task: Tuple of (task_id, timeframe, data_1min, stop_loss_pct, initial_usd) progress_callback: Optional progress callback function Returns: Tuple of (results, trades) """ task_id, timeframe, data_1min, stop_loss_pct, initial_usd = task try: if timeframe == "1T" or timeframe == "1min": df = data_1min.copy() else: df = _resample_data_static(data_1min, timeframe) # Create required components for processing from cycles.utils.storage import Storage from result_processor import ResultProcessor # Create storage with default paths (for subprocess) storage = Storage() result_processor = ResultProcessor(storage) results, trades = result_processor.process_timeframe_results( data_1min, df, [stop_loss_pct], timeframe, initial_usd, progress_callback=progress_callback ) return results, trades except Exception as e: error_msg = f"Failed to process {timeframe} with stop loss {stop_loss_pct}: {e}" raise RuntimeError(error_msg) from e def _resample_data_static(data_1min: pd.DataFrame, timeframe: str) -> pd.DataFrame: """ Static function to resample 1-minute data to specified timeframe Args: data_1min: 1-minute data DataFrame timeframe: Target timeframe string Returns: Resampled DataFrame """ try: agg_dict = { 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' } if 'predicted_close_price' in data_1min.columns: agg_dict['predicted_close_price'] = 'last' resampled = data_1min.resample(timeframe).agg(agg_dict).dropna() return resampled.reset_index() except Exception as e: error_msg = f"Failed to resample data to {timeframe}: {e}" raise ValueError(error_msg) from e class BacktestRunner: """Handles the execution of backtests across multiple timeframes and parameters""" def __init__( self, storage: Storage, system_utils: SystemUtils, result_processor: ResultProcessor, logging_instance: Optional[logging.Logger] = None, show_progress: bool = True ): """ Initialize backtest runner Args: storage: Storage instance for data operations system_utils: System utilities for resource management result_processor: Result processor for handling outputs logging_instance: Optional logging instance show_progress: Whether to show visual progress bars """ self.storage = storage self.system_utils = system_utils self.result_processor = result_processor self.logging = logging_instance self.show_progress = show_progress self.progress_manager = ProgressManager() if show_progress else None def run_backtests( self, data_1min: pd.DataFrame, timeframes: List[str], stop_loss_pcts: List[float], initial_usd: float, debug: bool = False ) -> Tuple[List[Dict], List[Dict]]: """ Run backtests across all timeframe and stop loss combinations Args: data_1min: 1-minute data DataFrame timeframes: List of timeframe strings (e.g., ['1D', '6h']) stop_loss_pcts: List of stop loss percentages initial_usd: Initial USD amount debug: Whether to enable debug mode Returns: Tuple of (all_results, all_trades) """ # Create tasks for all combinations tasks = self._create_tasks(timeframes, stop_loss_pcts, data_1min, initial_usd) if self.logging: self.logging.info(f"Starting {len(tasks)} backtest tasks") if debug: return self._run_sequential(tasks) else: return self._run_parallel(tasks) def _create_tasks( self, timeframes: List[str], stop_loss_pcts: List[float], data_1min: pd.DataFrame, initial_usd: float ) -> List[Tuple]: """Create task tuples for processing""" tasks = [] for timeframe in timeframes: for stop_loss_pct in stop_loss_pcts: task_id = f"{timeframe}_{stop_loss_pct}" task = (task_id, timeframe, data_1min, stop_loss_pct, initial_usd) tasks.append(task) return tasks def _run_sequential(self, tasks: List[Tuple]) -> Tuple[List[Dict], List[Dict]]: """Run tasks sequentially (for debug mode)""" # Initialize progress tracking if enabled if self.progress_manager: for task in tasks: task_id, timeframe, data_1min, stop_loss_pct, initial_usd = task # Calculate actual DataFrame size that will be processed if timeframe == "1T" or timeframe == "1min": actual_df_size = len(data_1min) else: # Get the actual resampled DataFrame size temp_df = self._resample_data(data_1min, timeframe) actual_df_size = len(temp_df) task_name = f"{timeframe} SL:{stop_loss_pct:.0%}" self.progress_manager.start_task(task_id, task_name, actual_df_size) self.progress_manager.start_display() all_results = [] all_trades = [] try: for task in tasks: try: # Get progress callback for this task if available progress_callback = None if self.progress_manager: progress_callback = self.progress_manager.get_task_progress_callback(task[0]) results, trades = self._process_single_task(task, progress_callback) if results: all_results.extend(results) if trades: all_trades.extend(trades) # Mark task as completed if self.progress_manager: self.progress_manager.complete_task(task[0]) except Exception as e: error_msg = f"Error processing task {task[1]} with stop loss {task[3]}: {e}" if self.logging: self.logging.error(error_msg) raise RuntimeError(error_msg) from e finally: # Stop progress display if self.progress_manager: self.progress_manager.stop_display() return all_results, all_trades def _run_parallel(self, tasks: List[Tuple]) -> Tuple[List[Dict], List[Dict]]: """Run tasks in parallel using ProcessPoolExecutor""" workers = self.system_utils.get_optimal_workers() if self.logging: self.logging.info(f"Running {len(tasks)} tasks with {workers} workers") # OPTIMIZATION: Disable progress manager for parallel execution to reduce overhead # Progress tracking adds significant overhead in multiprocessing if self.progress_manager and self.logging: self.logging.info("Progress tracking disabled for parallel execution (performance optimization)") all_results = [] all_trades = [] completed_tasks = 0 try: with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor: future_to_task = { executor.submit(_process_single_task_static, task): task for task in tasks } for future in concurrent.futures.as_completed(future_to_task): task = future_to_task[future] try: results, trades = future.result() if results: all_results.extend(results) if trades: all_trades.extend(trades) completed_tasks += 1 if self.logging: self.logging.info(f"Completed task {task[0]} ({completed_tasks}/{len(tasks)})") except Exception as e: error_msg = f"Task {task[1]} with stop loss {task[3]} failed: {e}" if self.logging: self.logging.error(error_msg) raise RuntimeError(error_msg) from e except Exception as e: error_msg = f"Parallel execution failed: {e}" if self.logging: self.logging.error(error_msg) raise RuntimeError(error_msg) from e finally: # Stop progress display if self.progress_manager: self.progress_manager.stop_display() if self.logging: self.logging.info(f"All {len(tasks)} tasks completed successfully") return all_results, all_trades def _process_single_task( self, task: Tuple[str, str, pd.DataFrame, float, float], progress_callback=None ) -> Tuple[List[Dict], List[Dict]]: """ Process a single backtest task Args: task: Tuple of (task_id, timeframe, data_1min, stop_loss_pct, initial_usd) progress_callback: Optional progress callback function Returns: Tuple of (results, trades) """ task_id, timeframe, data_1min, stop_loss_pct, initial_usd = task try: if timeframe == "1T" or timeframe == "1min": df = data_1min.copy() else: df = self._resample_data(data_1min, timeframe) results, trades = self.result_processor.process_timeframe_results( data_1min, df, [stop_loss_pct], timeframe, initial_usd, progress_callback=progress_callback ) # OPTIMIZATION: Skip individual trade file saving during parallel execution # Trade files will be saved in batch at the end # if trades: # self.result_processor.save_trade_file(trades, timeframe, stop_loss_pct) if self.logging: self.logging.info(f"Completed task {task_id}: {len(results)} results, {len(trades)} trades") return results, trades except Exception as e: error_msg = f"Failed to process {timeframe} with stop loss {stop_loss_pct}: {e}" if self.logging: self.logging.error(error_msg) raise RuntimeError(error_msg) from e def _resample_data(self, data_1min: pd.DataFrame, timeframe: str) -> pd.DataFrame: """ Resample 1-minute data to specified timeframe Args: data_1min: 1-minute data DataFrame timeframe: Target timeframe string Returns: Resampled DataFrame """ try: agg_dict = { 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' } if 'predicted_close_price' in data_1min.columns: agg_dict['predicted_close_price'] = 'last' resampled = data_1min.resample(timeframe).agg(agg_dict).dropna() return resampled.reset_index() except Exception as e: error_msg = f"Failed to resample data to {timeframe}: {e}" if self.logging: self.logging.error(error_msg) raise ValueError(error_msg) from e def _get_timeframe_factor(self, timeframe: str) -> int: """ Get the factor by which data is reduced when resampling to timeframe Args: timeframe: Target timeframe string (e.g., '1h', '4h', '1D') Returns: Factor for estimating data size after resampling """ timeframe_factors = { '1T': 1, '1min': 1, '5T': 5, '5min': 5, '15T': 15, '15min': 15, '30T': 30, '30min': 30, '1h': 60, '1H': 60, '2h': 120, '2H': 120, '4h': 240, '4H': 240, '6h': 360, '6H': 360, '8h': 480, '8H': 480, '12h': 720, '12H': 720, '1D': 1440, '1d': 1440, '2D': 2880, '2d': 2880, '3D': 4320, '3d': 4320, '1W': 10080, '1w': 10080 } return timeframe_factors.get(timeframe, 60) # Default to 1 hour if unknown def load_data(self, filename: str, start_date: str, stop_date: str) -> pd.DataFrame: """ Load and validate data for backtesting Args: filename: Name of data file start_date: Start date string stop_date: Stop date string Returns: Loaded and validated DataFrame Raises: ValueError: If data is empty or invalid """ try: data = self.storage.load_data(filename, start_date, stop_date) if data.empty: raise ValueError(f"No data loaded for period {start_date} to {stop_date}") required_columns = ['open', 'high', 'low', 'close', 'volume'] if 'predicted_close_price' in data.columns: required_columns.append('predicted_close_price') missing_columns = [col for col in required_columns if col not in data.columns] if missing_columns: raise ValueError(f"Missing required columns: {missing_columns}") if self.logging: self.logging.info(f"Loaded {len(data)} rows of data from {filename}") return data except Exception as e: error_msg = f"Failed to load data from {filename}: {e}" if self.logging: self.logging.error(error_msg) raise RuntimeError(error_msg) from e def validate_inputs( self, timeframes: List[str], stop_loss_pcts: List[float], initial_usd: float ) -> None: """ Validate backtest input parameters Args: timeframes: List of timeframe strings stop_loss_pcts: List of stop loss percentages initial_usd: Initial USD amount Raises: ValueError: If any input is invalid """ if not timeframes: raise ValueError("At least one timeframe must be specified") if not stop_loss_pcts: raise ValueError("At least one stop loss percentage must be specified") for pct in stop_loss_pcts: if not 0 < pct < 1: raise ValueError(f"Stop loss percentage must be between 0 and 1, got: {pct}") if initial_usd <= 0: raise ValueError("Initial USD must be positive") if self.logging: self.logging.info("Input validation completed successfully")