Cycles/backtest_runner.py

import pandas as pd
import concurrent.futures
import logging
from typing import List, Tuple, Dict, Any, Optional

from cycles.utils.storage import Storage
from cycles.utils.system import SystemUtils
from result_processor import ResultProcessor


class BacktestRunner:
    """Handles the execution of backtests across multiple timeframes and parameters"""
    
    def __init__(
        self, 
        storage: Storage, 
        system_utils: SystemUtils, 
        result_processor: ResultProcessor,
        logging_instance: Optional[logging.Logger] = None
    ):
        """
        Initialize backtest runner
        
        Args:
            storage: Storage instance for data operations
            system_utils: System utilities for resource management
            result_processor: Result processor for handling outputs
            logging_instance: Optional logging instance
        """
        self.storage = storage
        self.system_utils = system_utils
        self.result_processor = result_processor
        self.logging = logging_instance

    def run_backtests(
        self, 
        data_1min: pd.DataFrame,
        timeframes: List[str],
        stop_loss_pcts: List[float],
        initial_usd: float,
        debug: bool = False
    ) -> Tuple[List[Dict], List[Dict]]:
        """
        Run backtests across all timeframe and stop loss combinations
        
        Args:
            data_1min: 1-minute data DataFrame
            timeframes: List of timeframe strings (e.g., ['1D', '6h'])
            stop_loss_pcts: List of stop loss percentages
            initial_usd: Initial USD amount
            debug: Whether to enable debug mode
            
        Returns:
            Tuple of (all_results, all_trades)
        """
        # Create tasks for all combinations
        tasks = self._create_tasks(timeframes, stop_loss_pcts, data_1min, initial_usd)
        
        if debug:
            return self._run_sequential(tasks, debug)
        else:
            return self._run_parallel(tasks, debug)

    def _create_tasks(
        self, 
        timeframes: List[str], 
        stop_loss_pcts: List[float], 
        data_1min: pd.DataFrame, 
        initial_usd: float
    ) -> List[Tuple]:
        """Create task tuples for processing"""
        tasks = []
        for timeframe in timeframes:
            for stop_loss_pct in stop_loss_pcts:
                task = (timeframe, data_1min, stop_loss_pct, initial_usd)
                tasks.append(task)
        return tasks

    def _run_sequential(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:
        """Run tasks sequentially (for debug mode)"""
        all_results = []
        all_trades = []

        for task in tasks:
            try:
                results, trades = self._process_single_task(task, debug)
                if results:
                    all_results.extend(results)
                if trades:
                    all_trades.extend(trades)
                    
            except Exception as e:
                error_msg = f"Error processing task {task[0]} with stop loss {task[2]}: {e}"
                if self.logging:
                    self.logging.error(error_msg)
                raise RuntimeError(error_msg) from e

        return all_results, all_trades

    def _run_parallel(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:
        """Run tasks in parallel using ProcessPoolExecutor"""
        workers = self.system_utils.get_optimal_workers()
        
        if self.logging:
            self.logging.info(f"Running {len(tasks)} tasks with {workers} workers")

        all_results = []
        all_trades = []

        try:
            with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
                # Submit all tasks
                future_to_task = {
                    executor.submit(self._process_single_task, task, debug): task 
                    for task in tasks
                }

                # Collect results as they complete
                for future in concurrent.futures.as_completed(future_to_task):
                    task = future_to_task[future]
                    try:
                        results, trades = future.result()
                        if results:
                            all_results.extend(results)
                        if trades:
                            all_trades.extend(trades)
                            
                    except Exception as e:
                        error_msg = f"Task {task[0]} with stop loss {task[2]} failed: {e}"
                        if self.logging:
                            self.logging.error(error_msg)
                        raise RuntimeError(error_msg) from e

        except Exception as e:
            error_msg = f"Parallel execution failed: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

        return all_results, all_trades

    def _process_single_task(
        self, 
        task: Tuple[str, pd.DataFrame, float, float], 
        debug: bool = False
    ) -> Tuple[List[Dict], List[Dict]]:
        """
        Process a single backtest task
        
        Args:
            task: Tuple of (timeframe, data_1min, stop_loss_pct, initial_usd)
            debug: Whether to enable debug output
            
        Returns:
            Tuple of (results, trades)
        """
        timeframe, data_1min, stop_loss_pct, initial_usd = task

        try:
            # Resample data if needed
            if timeframe == "1T" or timeframe == "1min":
                df = data_1min.copy()
            else:
                df = self._resample_data(data_1min, timeframe)

            # Process timeframe results
            results, trades = self.result_processor.process_timeframe_results(
                data_1min, 
                df, 
                [stop_loss_pct], 
                timeframe, 
                initial_usd, 
                debug
            )

            # Save individual trade files if trades exist
            if trades:
                self.result_processor.save_trade_file(trades, timeframe, stop_loss_pct)

            return results, trades

        except Exception as e:
            error_msg = f"Failed to process {timeframe} with stop loss {stop_loss_pct}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

    def _resample_data(self, data_1min: pd.DataFrame, timeframe: str) -> pd.DataFrame:
        """
        Resample 1-minute data to specified timeframe
        
        Args:
            data_1min: 1-minute data DataFrame
            timeframe: Target timeframe string
            
        Returns:
            Resampled DataFrame
        """
        try:
            resampled = data_1min.resample(timeframe).agg({
                'open': 'first',
                'high': 'max',
                'low': 'min',
                'close': 'last',
                'volume': 'sum'
            }).dropna()
            
            return resampled.reset_index()
            
        except Exception as e:
            error_msg = f"Failed to resample data to {timeframe}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise ValueError(error_msg) from e

    def load_data(self, filename: str, start_date: str, stop_date: str) -> pd.DataFrame:
        """
        Load and validate data for backtesting
        
        Args:
            filename: Name of data file
            start_date: Start date string
            stop_date: Stop date string
            
        Returns:
            Loaded and validated DataFrame
            
        Raises:
            ValueError: If data is empty or invalid
        """
        try:
            data = self.storage.load_data(filename, start_date, stop_date)
            
            if data.empty:
                raise ValueError(f"No data loaded for period {start_date} to {stop_date}")
                
            # Validate required columns
            required_columns = ['open', 'high', 'low', 'close', 'volume']
            missing_columns = [col for col in required_columns if col not in data.columns]
            
            if missing_columns:
                raise ValueError(f"Missing required columns: {missing_columns}")
                
            if self.logging:
                self.logging.info(f"Loaded {len(data)} rows of data from {filename}")
                
            return data
            
        except Exception as e:
            error_msg = f"Failed to load data from {filename}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

    def validate_inputs(
        self, 
        timeframes: List[str], 
        stop_loss_pcts: List[float], 
        initial_usd: float
    ) -> None:
        """
        Validate backtest input parameters
        
        Args:
            timeframes: List of timeframe strings
            stop_loss_pcts: List of stop loss percentages
            initial_usd: Initial USD amount
            
        Raises:
            ValueError: If any input is invalid
        """
        # Validate timeframes
        if not timeframes:
            raise ValueError("At least one timeframe must be specified")
            
        # Validate stop loss percentages
        if not stop_loss_pcts:
            raise ValueError("At least one stop loss percentage must be specified")
            
        for pct in stop_loss_pcts:
            if not 0 < pct < 1:
                raise ValueError(f"Stop loss percentage must be between 0 and 1, got: {pct}")
                
        # Validate initial USD
        if initial_usd <= 0:
            raise ValueError("Initial USD must be positive")
            
        if self.logging:
            self.logging.info("Input validation completed successfully")
Implement backtesting framework with modular architecture for data loading, processing, and result management. Introduced BacktestRunner, ConfigManager, and ResultProcessor classes for improved maintainability and error handling. Updated main execution script to utilize new components and added comprehensive logging. Enhanced README with detailed project overview and usage instructions. 2025-06-25 13:08:07 +08:00			`import pandas as pd`
			`import concurrent.futures`
			`import logging`
			`from typing import List, Tuple, Dict, Any, Optional`

			`from cycles.utils.storage import Storage`
			`from cycles.utils.system import SystemUtils`
			`from result_processor import ResultProcessor`


			`class BacktestRunner:`
			`"""Handles the execution of backtests across multiple timeframes and parameters"""`

			`def __init__(`
			`self,`
			`storage: Storage,`
			`system_utils: SystemUtils,`
			`result_processor: ResultProcessor,`
			`logging_instance: Optional[logging.Logger] = None`
			`):`
			`"""`
			`Initialize backtest runner`

			`Args:`
			`storage: Storage instance for data operations`
			`system_utils: System utilities for resource management`
			`result_processor: Result processor for handling outputs`
			`logging_instance: Optional logging instance`
			`"""`
			`self.storage = storage`
			`self.system_utils = system_utils`
			`self.result_processor = result_processor`
			`self.logging = logging_instance`

			`def run_backtests(`
			`self,`
			`data_1min: pd.DataFrame,`
			`timeframes: List[str],`
			`stop_loss_pcts: List[float],`
			`initial_usd: float,`
			`debug: bool = False`
			`) -> Tuple[List[Dict], List[Dict]]:`
			`"""`
			`Run backtests across all timeframe and stop loss combinations`

			`Args:`
			`data_1min: 1-minute data DataFrame`
			`timeframes: List of timeframe strings (e.g., ['1D', '6h'])`
			`stop_loss_pcts: List of stop loss percentages`
			`initial_usd: Initial USD amount`
			`debug: Whether to enable debug mode`

			`Returns:`
			`Tuple of (all_results, all_trades)`
			`"""`
			`# Create tasks for all combinations`
			`tasks = self._create_tasks(timeframes, stop_loss_pcts, data_1min, initial_usd)`

			`if debug:`
			`return self._run_sequential(tasks, debug)`
			`else:`
			`return self._run_parallel(tasks, debug)`

			`def _create_tasks(`
			`self,`
			`timeframes: List[str],`
			`stop_loss_pcts: List[float],`
			`data_1min: pd.DataFrame,`
			`initial_usd: float`
			`) -> List[Tuple]:`
			`"""Create task tuples for processing"""`
			`tasks = []`
			`for timeframe in timeframes:`
			`for stop_loss_pct in stop_loss_pcts:`
			`task = (timeframe, data_1min, stop_loss_pct, initial_usd)`
			`tasks.append(task)`
			`return tasks`

			`def _run_sequential(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:`
			`"""Run tasks sequentially (for debug mode)"""`
			`all_results = []`
			`all_trades = []`

			`for task in tasks:`
			`try:`
			`results, trades = self._process_single_task(task, debug)`
			`if results:`
			`all_results.extend(results)`
			`if trades:`
			`all_trades.extend(trades)`

			`except Exception as e:`
			`error_msg = f"Error processing task {task[0]} with stop loss {task[2]}: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise RuntimeError(error_msg) from e`

			`return all_results, all_trades`

			`def _run_parallel(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:`
			`"""Run tasks in parallel using ProcessPoolExecutor"""`
			`workers = self.system_utils.get_optimal_workers()`

			`if self.logging:`
			`self.logging.info(f"Running {len(tasks)} tasks with {workers} workers")`

			`all_results = []`
			`all_trades = []`

			`try:`
			`with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:`
			`# Submit all tasks`
			`future_to_task = {`
			`executor.submit(self._process_single_task, task, debug): task`
			`for task in tasks`
			`}`

			`# Collect results as they complete`
			`for future in concurrent.futures.as_completed(future_to_task):`
			`task = future_to_task[future]`
			`try:`
			`results, trades = future.result()`
			`if results:`
			`all_results.extend(results)`
			`if trades:`
			`all_trades.extend(trades)`

			`except Exception as e:`
			`error_msg = f"Task {task[0]} with stop loss {task[2]} failed: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise RuntimeError(error_msg) from e`

			`except Exception as e:`
			`error_msg = f"Parallel execution failed: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise RuntimeError(error_msg) from e`

			`return all_results, all_trades`

			`def _process_single_task(`
			`self,`
			`task: Tuple[str, pd.DataFrame, float, float],`
			`debug: bool = False`
			`) -> Tuple[List[Dict], List[Dict]]:`
			`"""`
			`Process a single backtest task`

			`Args:`
			`task: Tuple of (timeframe, data_1min, stop_loss_pct, initial_usd)`
			`debug: Whether to enable debug output`

			`Returns:`
			`Tuple of (results, trades)`
			`"""`
			`timeframe, data_1min, stop_loss_pct, initial_usd = task`

			`try:`
			`# Resample data if needed`
			`if timeframe == "1T" or timeframe == "1min":`
			`df = data_1min.copy()`
			`else:`
			`df = self._resample_data(data_1min, timeframe)`

			`# Process timeframe results`
			`results, trades = self.result_processor.process_timeframe_results(`
			`data_1min,`
			`df,`
			`[stop_loss_pct],`
			`timeframe,`
			`initial_usd,`
			`debug`
			`)`

			`# Save individual trade files if trades exist`
			`if trades:`
			`self.result_processor.save_trade_file(trades, timeframe, stop_loss_pct)`

			`return results, trades`

			`except Exception as e:`
			`error_msg = f"Failed to process {timeframe} with stop loss {stop_loss_pct}: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise RuntimeError(error_msg) from e`

			`def _resample_data(self, data_1min: pd.DataFrame, timeframe: str) -> pd.DataFrame:`
			`"""`
			`Resample 1-minute data to specified timeframe`

			`Args:`
			`data_1min: 1-minute data DataFrame`
			`timeframe: Target timeframe string`

			`Returns:`
			`Resampled DataFrame`
			`"""`
			`try:`
			`resampled = data_1min.resample(timeframe).agg({`
			`'open': 'first',`
			`'high': 'max',`
			`'low': 'min',`
			`'close': 'last',`
			`'volume': 'sum'`
			`}).dropna()`

			`return resampled.reset_index()`

			`except Exception as e:`
			`error_msg = f"Failed to resample data to {timeframe}: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise ValueError(error_msg) from e`

			`def load_data(self, filename: str, start_date: str, stop_date: str) -> pd.DataFrame:`
			`"""`
			`Load and validate data for backtesting`

			`Args:`
			`filename: Name of data file`
			`start_date: Start date string`
			`stop_date: Stop date string`

			`Returns:`
			`Loaded and validated DataFrame`

			`Raises:`
			`ValueError: If data is empty or invalid`
			`"""`
			`try:`
			`data = self.storage.load_data(filename, start_date, stop_date)`

			`if data.empty:`
			`raise ValueError(f"No data loaded for period {start_date} to {stop_date}")`

			`# Validate required columns`
			`required_columns = ['open', 'high', 'low', 'close', 'volume']`
			`missing_columns = [col for col in required_columns if col not in data.columns]`

			`if missing_columns:`
			`raise ValueError(f"Missing required columns: {missing_columns}")`

			`if self.logging:`
			`self.logging.info(f"Loaded {len(data)} rows of data from {filename}")`

			`return data`

			`except Exception as e:`
			`error_msg = f"Failed to load data from {filename}: {e}"`
			`if self.logging:`
			`self.logging.error(error_msg)`
			`raise RuntimeError(error_msg) from e`

			`def validate_inputs(`
			`self,`
			`timeframes: List[str],`
			`stop_loss_pcts: List[float],`
			`initial_usd: float`
			`) -> None:`
			`"""`
			`Validate backtest input parameters`

			`Args:`
			`timeframes: List of timeframe strings`
			`stop_loss_pcts: List of stop loss percentages`
			`initial_usd: Initial USD amount`

			`Raises:`
			`ValueError: If any input is invalid`
			`"""`
			`# Validate timeframes`
			`if not timeframes:`
			`raise ValueError("At least one timeframe must be specified")`

			`# Validate stop loss percentages`
			`if not stop_loss_pcts:`
			`raise ValueError("At least one stop loss percentage must be specified")`

			`for pct in stop_loss_pcts:`
			`if not 0 < pct < 1:`
			`raise ValueError(f"Stop loss percentage must be between 0 and 1, got: {pct}")`

			`# Validate initial USD`
			`if initial_usd <= 0:`
			`raise ValueError("Initial USD must be positive")`

			`if self.logging:`
			`self.logging.info("Input validation completed successfully")`