""" Incremental Backtester for testing incremental strategies. This module provides the IncBacktester class that orchestrates multiple IncTraders for parallel testing, handles data loading and feeding, and supports multiprocessing for parameter optimization. """ import pandas as pd import numpy as np from typing import Dict, List, Optional, Any, Callable, Union, Tuple import logging import time from concurrent.futures import ProcessPoolExecutor, as_completed from itertools import product import multiprocessing as mp from dataclasses import dataclass import json from datetime import datetime from .inc_trader import IncTrader from .base import IncStrategyBase from ..utils.storage import Storage from ..utils.system import SystemUtils logger = logging.getLogger(__name__) def _worker_function(args: Tuple[type, Dict, Dict, 'BacktestConfig', str]) -> Dict[str, Any]: """ Worker function for multiprocessing parameter optimization. This function must be at module level to be picklable for multiprocessing. Args: args: Tuple containing (strategy_class, strategy_params, trader_params, config, data_file) Returns: Dict containing backtest results """ try: strategy_class, strategy_params, trader_params, config, data_file = args # Create new storage and backtester instance for this worker storage = Storage() worker_backtester = IncBacktester(config, storage) # Create strategy instance strategy = strategy_class(params=strategy_params) # Run backtest result = worker_backtester.run_single_strategy(strategy, trader_params) result["success"] = True return result except Exception as e: logger.error(f"Worker error for {strategy_params}, {trader_params}: {e}") return { "strategy_params": strategy_params, "trader_params": trader_params, "error": str(e), "success": False } @dataclass class BacktestConfig: """Configuration for backtesting runs.""" data_file: str start_date: str end_date: str initial_usd: float = 10000 timeframe: str = "1min" # Trader parameters stop_loss_pct: float = 0.0 take_profit_pct: float = 0.0 # Performance settings max_workers: Optional[int] = None chunk_size: int = 1000 class IncBacktester: """ Incremental backtester for testing incremental strategies. This class orchestrates multiple IncTraders for parallel testing: - Loads data using the existing Storage class - Creates multiple IncTrader instances with different parameters - Feeds data sequentially to all traders - Collects and aggregates results - Supports multiprocessing for parallel execution - Uses SystemUtils for optimal worker count determination The backtester can run multiple strategies simultaneously or test parameter combinations across multiple CPU cores. Example: # Single strategy backtest config = BacktestConfig( data_file="btc_1min_2023.csv", start_date="2023-01-01", end_date="2023-12-31", initial_usd=10000 ) strategy = IncRandomStrategy(params={"timeframe": "15min"}) backtester = IncBacktester(config) results = backtester.run_single_strategy(strategy) # Multiple strategies strategies = [strategy1, strategy2, strategy3] results = backtester.run_multiple_strategies(strategies) # Parameter optimization param_grid = { "timeframe": ["5min", "15min", "30min"], "stop_loss_pct": [0.01, 0.02, 0.03] } results = backtester.optimize_parameters(strategy_class, param_grid) """ def __init__(self, config: BacktestConfig, storage: Optional[Storage] = None): """ Initialize the incremental backtester. Args: config: Backtesting configuration storage: Storage instance for data loading (creates new if None) """ self.config = config self.storage = storage or Storage() self.system_utils = SystemUtils(logging=logger) self.data = None self.results_cache = {} # Track all actions performed during backtesting self.action_log = [] self.session_start_time = datetime.now() logger.info(f"IncBacktester initialized: {config.data_file}, " f"{config.start_date} to {config.end_date}") self._log_action("backtester_initialized", { "config": config.__dict__, "session_start": self.session_start_time.isoformat() }) def _log_action(self, action_type: str, details: Dict[str, Any]) -> None: """Log an action performed during backtesting.""" self.action_log.append({ "timestamp": datetime.now().isoformat(), "action_type": action_type, "details": details }) def load_data(self) -> pd.DataFrame: """ Load and prepare data for backtesting. Returns: pd.DataFrame: Loaded OHLCV data with DatetimeIndex """ if self.data is None: logger.info(f"Loading data from {self.config.data_file}...") start_time = time.time() self.data = self.storage.load_data( self.config.data_file, self.config.start_date, self.config.end_date ) load_time = time.time() - start_time logger.info(f"Data loaded: {len(self.data)} rows in {load_time:.2f}s") # Validate data if self.data.empty: raise ValueError(f"No data loaded for the specified date range") required_columns = ['open', 'high', 'low', 'close', 'volume'] missing_columns = [col for col in required_columns if col not in self.data.columns] if missing_columns: raise ValueError(f"Missing required columns: {missing_columns}") self._log_action("data_loaded", { "file": self.config.data_file, "rows": len(self.data), "load_time_seconds": load_time, "date_range": f"{self.config.start_date} to {self.config.end_date}", "columns": list(self.data.columns) }) return self.data def run_single_strategy(self, strategy: IncStrategyBase, trader_params: Optional[Dict] = None) -> Dict[str, Any]: """ Run backtest for a single strategy. Args: strategy: Incremental strategy instance trader_params: Additional trader parameters Returns: Dict containing backtest results """ data = self.load_data() # Merge trader parameters final_trader_params = { "stop_loss_pct": self.config.stop_loss_pct, "take_profit_pct": self.config.take_profit_pct } if trader_params: final_trader_params.update(trader_params) # Create trader trader = IncTrader( strategy=strategy, initial_usd=self.config.initial_usd, params=final_trader_params ) # Run backtest logger.info(f"Starting backtest for {strategy.name}...") start_time = time.time() self._log_action("single_strategy_backtest_started", { "strategy_name": strategy.name, "strategy_params": strategy.params, "trader_params": final_trader_params, "data_points": len(data) }) for timestamp, row in data.iterrows(): ohlcv_data = { 'open': row['open'], 'high': row['high'], 'low': row['low'], 'close': row['close'], 'volume': row['volume'] } trader.process_data_point(timestamp, ohlcv_data) # Finalize and get results trader.finalize() results = trader.get_results() backtest_time = time.time() - start_time results["backtest_duration_seconds"] = backtest_time results["data_points"] = len(data) results["config"] = self.config.__dict__ logger.info(f"Backtest completed for {strategy.name} in {backtest_time:.2f}s: " f"${results['final_usd']:.2f} ({results['profit_ratio']*100:.2f}%), " f"{results['n_trades']} trades") self._log_action("single_strategy_backtest_completed", { "strategy_name": strategy.name, "backtest_duration_seconds": backtest_time, "final_usd": results['final_usd'], "profit_ratio": results['profit_ratio'], "n_trades": results['n_trades'], "win_rate": results['win_rate'] }) return results def run_multiple_strategies(self, strategies: List[IncStrategyBase], trader_params: Optional[Dict] = None) -> List[Dict[str, Any]]: """ Run backtest for multiple strategies simultaneously. Args: strategies: List of incremental strategy instances trader_params: Additional trader parameters Returns: List of backtest results for each strategy """ self._log_action("multiple_strategies_backtest_started", { "strategy_count": len(strategies), "strategy_names": [s.name for s in strategies] }) results = [] for strategy in strategies: try: result = self.run_single_strategy(strategy, trader_params) results.append(result) except Exception as e: logger.error(f"Error running strategy {strategy.name}: {e}") # Add error result error_result = { "strategy_name": strategy.name, "error": str(e), "success": False } results.append(error_result) self._log_action("strategy_error", { "strategy_name": strategy.name, "error": str(e) }) self._log_action("multiple_strategies_backtest_completed", { "total_strategies": len(strategies), "successful_strategies": len([r for r in results if r.get("success", True)]), "failed_strategies": len([r for r in results if not r.get("success", True)]) }) return results def optimize_parameters(self, strategy_class: type, param_grid: Dict[str, List], trader_param_grid: Optional[Dict[str, List]] = None, max_workers: Optional[int] = None) -> List[Dict[str, Any]]: """ Optimize strategy parameters using grid search with multiprocessing. Args: strategy_class: Strategy class to instantiate param_grid: Grid of strategy parameters to test trader_param_grid: Grid of trader parameters to test max_workers: Maximum number of worker processes (uses SystemUtils if None) Returns: List of results for each parameter combination """ # Generate parameter combinations strategy_combinations = list(self._generate_param_combinations(param_grid)) trader_combinations = list(self._generate_param_combinations(trader_param_grid or {})) # If no trader param grid, use default if not trader_combinations: trader_combinations = [{}] # Create all combinations all_combinations = [] for strategy_params in strategy_combinations: for trader_params in trader_combinations: all_combinations.append((strategy_params, trader_params)) logger.info(f"Starting parameter optimization: {len(all_combinations)} combinations") # Determine number of workers using SystemUtils if max_workers is None: max_workers = self.system_utils.get_optimal_workers() else: max_workers = min(max_workers, len(all_combinations)) self._log_action("parameter_optimization_started", { "strategy_class": strategy_class.__name__, "total_combinations": len(all_combinations), "max_workers": max_workers, "strategy_param_grid": param_grid, "trader_param_grid": trader_param_grid or {} }) # Run optimization if max_workers == 1 or len(all_combinations) == 1: # Single-threaded execution results = [] for strategy_params, trader_params in all_combinations: result = self._run_single_combination(strategy_class, strategy_params, trader_params) results.append(result) else: # Multi-threaded execution results = self._run_parallel_optimization( strategy_class, all_combinations, max_workers ) # Sort results by profit ratio valid_results = [r for r in results if r.get("success", True)] valid_results.sort(key=lambda x: x.get("profit_ratio", -float('inf')), reverse=True) logger.info(f"Parameter optimization completed: {len(valid_results)} successful runs") self._log_action("parameter_optimization_completed", { "total_runs": len(results), "successful_runs": len(valid_results), "failed_runs": len(results) - len(valid_results), "best_profit_ratio": valid_results[0]["profit_ratio"] if valid_results else None, "worst_profit_ratio": valid_results[-1]["profit_ratio"] if valid_results else None }) return results def _generate_param_combinations(self, param_grid: Dict[str, List]) -> List[Dict]: """Generate all parameter combinations from grid.""" if not param_grid: return [{}] keys = list(param_grid.keys()) values = list(param_grid.values()) combinations = [] for combination in product(*values): param_dict = dict(zip(keys, combination)) combinations.append(param_dict) return combinations def _run_single_combination(self, strategy_class: type, strategy_params: Dict, trader_params: Dict) -> Dict[str, Any]: """Run backtest for a single parameter combination.""" try: # Create strategy instance strategy = strategy_class(params=strategy_params) # Run backtest result = self.run_single_strategy(strategy, trader_params) result["success"] = True return result except Exception as e: logger.error(f"Error in parameter combination {strategy_params}, {trader_params}: {e}") return { "strategy_params": strategy_params, "trader_params": trader_params, "error": str(e), "success": False } def _run_parallel_optimization(self, strategy_class: type, combinations: List, max_workers: int) -> List[Dict[str, Any]]: """Run parameter optimization in parallel.""" results = [] # Prepare arguments for worker function worker_args = [] for strategy_params, trader_params in combinations: args = (strategy_class, strategy_params, trader_params, self.config, self.config.data_file) worker_args.append(args) # Execute in parallel with ProcessPoolExecutor(max_workers=max_workers) as executor: # Submit all jobs future_to_params = { executor.submit(_worker_function, args): args[1:3] # strategy_params, trader_params for args in worker_args } # Collect results as they complete for future in as_completed(future_to_params): combo = future_to_params[future] try: result = future.result() results.append(result) if result.get("success", True): logger.info(f"Completed: {combo[0]} -> " f"${result.get('final_usd', 0):.2f} " f"({result.get('profit_ratio', 0)*100:.2f}%)") except Exception as e: logger.error(f"Worker error for {combo}: {e}") results.append({ "strategy_params": combo[0], "trader_params": combo[1], "error": str(e), "success": False }) return results def get_summary_statistics(self, results: List[Dict[str, Any]]) -> Dict[str, Any]: """ Calculate summary statistics across multiple backtest results. Args: results: List of backtest results Returns: Dict containing summary statistics """ valid_results = [r for r in results if r.get("success", True)] if not valid_results: return { "total_runs": len(results), "successful_runs": 0, "failed_runs": len(results), "error": "No valid results to summarize" } # Extract metrics profit_ratios = [r["profit_ratio"] for r in valid_results] final_balances = [r["final_usd"] for r in valid_results] n_trades_list = [r["n_trades"] for r in valid_results] win_rates = [r["win_rate"] for r in valid_results] max_drawdowns = [r["max_drawdown"] for r in valid_results] summary = { "total_runs": len(results), "successful_runs": len(valid_results), "failed_runs": len(results) - len(valid_results), # Profit statistics "profit_ratio": { "mean": np.mean(profit_ratios), "std": np.std(profit_ratios), "min": np.min(profit_ratios), "max": np.max(profit_ratios), "median": np.median(profit_ratios) }, # Balance statistics "final_usd": { "mean": np.mean(final_balances), "std": np.std(final_balances), "min": np.min(final_balances), "max": np.max(final_balances), "median": np.median(final_balances) }, # Trading statistics "n_trades": { "mean": np.mean(n_trades_list), "std": np.std(n_trades_list), "min": np.min(n_trades_list), "max": np.max(n_trades_list), "median": np.median(n_trades_list) }, # Performance statistics "win_rate": { "mean": np.mean(win_rates), "std": np.std(win_rates), "min": np.min(win_rates), "max": np.max(win_rates), "median": np.median(win_rates) }, "max_drawdown": { "mean": np.mean(max_drawdowns), "std": np.std(max_drawdowns), "min": np.min(max_drawdowns), "max": np.max(max_drawdowns), "median": np.median(max_drawdowns) }, # Best performing run "best_run": max(valid_results, key=lambda x: x["profit_ratio"]), "worst_run": min(valid_results, key=lambda x: x["profit_ratio"]) } return summary def save_comprehensive_results(self, results: List[Dict[str, Any]], base_filename: str, summary: Optional[Dict[str, Any]] = None) -> None: """ Save comprehensive backtest results including summary, individual results, and action log. Args: results: List of backtest results base_filename: Base filename (without extension) summary: Optional summary statistics """ try: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 1. Save summary report if summary is None: summary = self.get_summary_statistics(results) summary_data = { "session_info": { "timestamp": timestamp, "session_start": self.session_start_time.isoformat(), "session_duration_seconds": (datetime.now() - self.session_start_time).total_seconds(), "config": self.config.__dict__ }, "summary_statistics": summary, "action_log_summary": { "total_actions": len(self.action_log), "action_types": list(set(action["action_type"] for action in self.action_log)) } } summary_filename = f"{base_filename}_summary_{timestamp}.json" with open(f"results/{summary_filename}", 'w') as f: json.dump(summary_data, f, indent=2, default=str) logger.info(f"Summary saved to results/{summary_filename}") # 2. Save detailed results CSV self.save_results(results, f"{base_filename}_detailed_{timestamp}.csv") # 3. Save individual strategy results valid_results = [r for r in results if r.get("success", True)] for i, result in enumerate(valid_results): strategy_filename = f"{base_filename}_strategy_{i+1}_{result['strategy_name']}_{timestamp}.json" # Include trades and detailed info strategy_data = { "strategy_info": { "name": result['strategy_name'], "params": result.get('strategy_params', {}), "trader_params": result.get('trader_params', {}) }, "performance": { "initial_usd": result['initial_usd'], "final_usd": result['final_usd'], "profit_ratio": result['profit_ratio'], "n_trades": result['n_trades'], "win_rate": result['win_rate'], "max_drawdown": result['max_drawdown'], "avg_trade": result['avg_trade'], "total_fees_usd": result['total_fees_usd'] }, "execution": { "backtest_duration_seconds": result.get('backtest_duration_seconds', 0), "data_points_processed": result.get('data_points_processed', 0), "warmup_complete": result.get('warmup_complete', False) }, "trades": result.get('trades', []) } with open(f"results/{strategy_filename}", 'w') as f: json.dump(strategy_data, f, indent=2, default=str) logger.info(f"Strategy {i+1} details saved to results/{strategy_filename}") # 4. Save complete action log action_log_filename = f"{base_filename}_actions_{timestamp}.json" action_log_data = { "session_info": { "timestamp": timestamp, "session_start": self.session_start_time.isoformat(), "total_actions": len(self.action_log) }, "actions": self.action_log } with open(f"results/{action_log_filename}", 'w') as f: json.dump(action_log_data, f, indent=2, default=str) logger.info(f"Action log saved to results/{action_log_filename}") # 5. Create a master index file index_filename = f"{base_filename}_index_{timestamp}.json" index_data = { "session_info": { "timestamp": timestamp, "base_filename": base_filename, "total_strategies": len(valid_results), "session_duration_seconds": (datetime.now() - self.session_start_time).total_seconds() }, "files": { "summary": summary_filename, "detailed_csv": f"{base_filename}_detailed_{timestamp}.csv", "action_log": action_log_filename, "individual_strategies": [ f"{base_filename}_strategy_{i+1}_{result['strategy_name']}_{timestamp}.json" for i, result in enumerate(valid_results) ] }, "quick_stats": { "best_profit": summary.get("profit_ratio", {}).get("max", 0) if summary.get("profit_ratio") else 0, "worst_profit": summary.get("profit_ratio", {}).get("min", 0) if summary.get("profit_ratio") else 0, "avg_profit": summary.get("profit_ratio", {}).get("mean", 0) if summary.get("profit_ratio") else 0, "total_successful_runs": summary.get("successful_runs", 0), "total_failed_runs": summary.get("failed_runs", 0) } } with open(f"results/{index_filename}", 'w') as f: json.dump(index_data, f, indent=2, default=str) logger.info(f"Master index saved to results/{index_filename}") print(f"\nšŸ“Š Comprehensive results saved:") print(f" šŸ“‹ Summary: results/{summary_filename}") print(f" šŸ“ˆ Detailed CSV: results/{base_filename}_detailed_{timestamp}.csv") print(f" šŸ“ Action Log: results/{action_log_filename}") print(f" šŸ“ Individual Strategies: {len(valid_results)} files") print(f" šŸ—‚ļø Master Index: results/{index_filename}") except Exception as e: logger.error(f"Error saving comprehensive results: {e}") raise def save_results(self, results: List[Dict[str, Any]], filename: str) -> None: """ Save backtest results to file. Args: results: List of backtest results filename: Output filename """ try: # Convert results to DataFrame for easy saving df_data = [] for result in results: if result.get("success", True): row = { "strategy_name": result.get("strategy_name", ""), "profit_ratio": result.get("profit_ratio", 0), "final_usd": result.get("final_usd", 0), "n_trades": result.get("n_trades", 0), "win_rate": result.get("win_rate", 0), "max_drawdown": result.get("max_drawdown", 0), "avg_trade": result.get("avg_trade", 0), "total_fees_usd": result.get("total_fees_usd", 0), "backtest_duration_seconds": result.get("backtest_duration_seconds", 0), "data_points_processed": result.get("data_points_processed", 0) } # Add strategy parameters strategy_params = result.get("strategy_params", {}) for key, value in strategy_params.items(): row[f"strategy_{key}"] = value # Add trader parameters trader_params = result.get("trader_params", {}) for key, value in trader_params.items(): row[f"trader_{key}"] = value df_data.append(row) # Save to CSV df = pd.DataFrame(df_data) self.storage.save_data(df, filename) logger.info(f"Results saved to {filename}: {len(df_data)} rows") except Exception as e: logger.error(f"Error saving results to {filename}: {e}") raise def __repr__(self) -> str: """String representation of the backtester.""" return (f"IncBacktester(data_file={self.config.data_file}, " f"date_range={self.config.start_date} to {self.config.end_date}, " f"initial_usd=${self.config.initial_usd})")