Cycles/backtest_runner.py

import pandas as pd
import concurrent.futures
import logging
from typing import List, Tuple, Dict, Any, Optional

from cycles.utils.storage import Storage
from cycles.utils.system import SystemUtils
from result_processor import ResultProcessor


class BacktestRunner:
    """Handles the execution of backtests across multiple timeframes and parameters"""

    def __init__(
        self,
        storage: Storage,
        system_utils: SystemUtils,
        result_processor: ResultProcessor,
        logging_instance: Optional[logging.Logger] = None
    ):
        """
        Initialize backtest runner

        Args:
            storage: Storage instance for data operations
            system_utils: System utilities for resource management
            result_processor: Result processor for handling outputs
            logging_instance: Optional logging instance
        """
        self.storage = storage
        self.system_utils = system_utils
        self.result_processor = result_processor
        self.logging = logging_instance

    def run_backtests(
        self,
        data_1min: pd.DataFrame,
        timeframes: List[str],
        stop_loss_pcts: List[float],
        initial_usd: float,
        debug: bool = False
    ) -> Tuple[List[Dict], List[Dict]]:
        """
        Run backtests across all timeframe and stop loss combinations

        Args:
            data_1min: 1-minute data DataFrame
            timeframes: List of timeframe strings (e.g., ['1D', '6h'])
            stop_loss_pcts: List of stop loss percentages
            initial_usd: Initial USD amount
            debug: Whether to enable debug mode

        Returns:
            Tuple of (all_results, all_trades)
        """
        # Create tasks for all combinations
        tasks = self._create_tasks(timeframes, stop_loss_pcts, data_1min, initial_usd)

        if debug:
            return self._run_sequential(tasks, debug)
        else:
            return self._run_parallel(tasks, debug)

    def _create_tasks(
        self,
        timeframes: List[str],
        stop_loss_pcts: List[float],
        data_1min: pd.DataFrame,
        initial_usd: float
    ) -> List[Tuple]:
        """Create task tuples for processing"""
        tasks = []
        for timeframe in timeframes:
            for stop_loss_pct in stop_loss_pcts:
                task = (timeframe, data_1min, stop_loss_pct, initial_usd)
                tasks.append(task)
        return tasks

    def _run_sequential(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:
        """Run tasks sequentially (for debug mode)"""
        all_results = []
        all_trades = []

        for task in tasks:
            try:
                results, trades = self._process_single_task(task, debug)
                if results:
                    all_results.extend(results)
                if trades:
                    all_trades.extend(trades)

            except Exception as e:
                error_msg = f"Error processing task {task[0]} with stop loss {task[2]}: {e}"
                if self.logging:
                    self.logging.error(error_msg)
                raise RuntimeError(error_msg) from e

        return all_results, all_trades

    def _run_parallel(self, tasks: List[Tuple], debug: bool) -> Tuple[List[Dict], List[Dict]]:
        """Run tasks in parallel using ProcessPoolExecutor"""
        workers = self.system_utils.get_optimal_workers()

        if self.logging:
            self.logging.info(f"Running {len(tasks)} tasks with {workers} workers")

        all_results = []
        all_trades = []

        try:
            with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
                # Submit all tasks
                future_to_task = {
                    executor.submit(self._process_single_task, task, debug): task
                    for task in tasks
                }

                # Collect results as they complete
                for future in concurrent.futures.as_completed(future_to_task):
                    task = future_to_task[future]
                    try:
                        results, trades = future.result()
                        if results:
                            all_results.extend(results)
                        if trades:
                            all_trades.extend(trades)

                    except Exception as e:
                        error_msg = f"Task {task[0]} with stop loss {task[2]} failed: {e}"
                        if self.logging:
                            self.logging.error(error_msg)
                        raise RuntimeError(error_msg) from e

        except Exception as e:
            error_msg = f"Parallel execution failed: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

        return all_results, all_trades

    def _process_single_task(
        self,
        task: Tuple[str, pd.DataFrame, float, float],
        debug: bool = False
    ) -> Tuple[List[Dict], List[Dict]]:
        """
        Process a single backtest task

        Args:
            task: Tuple of (timeframe, data_1min, stop_loss_pct, initial_usd)
            debug: Whether to enable debug output

        Returns:
            Tuple of (results, trades)
        """
        timeframe, data_1min, stop_loss_pct, initial_usd = task

        try:
            # Resample data if needed
            if timeframe == "1T" or timeframe == "1min":
                df = data_1min.copy()
            else:
                df = self._resample_data(data_1min, timeframe)

            # Process timeframe results
            results, trades = self.result_processor.process_timeframe_results(
                data_1min,
                df,
                [stop_loss_pct],
                timeframe,
                initial_usd,
                debug
            )

            # Save individual trade files if trades exist
            if trades:
                self.result_processor.save_trade_file(trades, timeframe, stop_loss_pct)

            return results, trades

        except Exception as e:
            error_msg = f"Failed to process {timeframe} with stop loss {stop_loss_pct}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

    def _resample_data(self, data_1min: pd.DataFrame, timeframe: str) -> pd.DataFrame:
        """
        Resample 1-minute data to specified timeframe

        Args:
            data_1min: 1-minute data DataFrame
            timeframe: Target timeframe string

        Returns:
            Resampled DataFrame
        """
        try:
            resampled = data_1min.resample(timeframe).agg({
                'open': 'first',
                'high': 'max',
                'low': 'min',
                'close': 'last',
                'volume': 'sum'
            }).dropna()

            return resampled.reset_index()

        except Exception as e:
            error_msg = f"Failed to resample data to {timeframe}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise ValueError(error_msg) from e

    def load_data(self, filename: str, start_date: str, stop_date: str) -> pd.DataFrame:
        """
        Load and validate data for backtesting

        Args:
            filename: Name of data file
            start_date: Start date string
            stop_date: Stop date string

        Returns:
            Loaded and validated DataFrame

        Raises:
            ValueError: If data is empty or invalid
        """
        try:
            data = self.storage.load_data(filename, start_date, stop_date)

            if data.empty:
                raise ValueError(f"No data loaded for period {start_date} to {stop_date}")

            # Validate required columns
            required_columns = ['open', 'high', 'low', 'close', 'volume']
            missing_columns = [col for col in required_columns if col not in data.columns]

            if missing_columns:
                raise ValueError(f"Missing required columns: {missing_columns}")

            if self.logging:
                self.logging.info(f"Loaded {len(data)} rows of data from {filename}")

            return data

        except Exception as e:
            error_msg = f"Failed to load data from {filename}: {e}"
            if self.logging:
                self.logging.error(error_msg)
            raise RuntimeError(error_msg) from e

    def validate_inputs(
        self,
        timeframes: List[str],
        stop_loss_pcts: List[float],
        initial_usd: float
    ) -> None:
        """
        Validate backtest input parameters

        Args:
            timeframes: List of timeframe strings
            stop_loss_pcts: List of stop loss percentages
            initial_usd: Initial USD amount

        Raises:
            ValueError: If any input is invalid
        """
        # Validate timeframes
        if not timeframes:
            raise ValueError("At least one timeframe must be specified")

        # Validate stop loss percentages
        if not stop_loss_pcts:
            raise ValueError("At least one stop loss percentage must be specified")

        for pct in stop_loss_pcts:
            if not 0 < pct < 1:
                raise ValueError(f"Stop loss percentage must be between 0 and 1, got: {pct}")

        # Validate initial USD
        if initial_usd <= 0:
            raise ValueError("Initial USD must be positive")

        if self.logging:
            self.logging.info("Input validation completed successfully")