import pandas as pd import numpy as np from trend_detector_macd import TrendDetectorMACD from trend_detector_simple import TrendDetectorSimple from cycle_detector import CycleDetector import csv import logging import concurrent.futures import os import psutil # Set up logging logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("backtest.log"), logging.StreamHandler() ] ) def get_optimal_workers(): """Determine optimal number of worker processes based on system resources""" cpu_count = os.cpu_count() or 4 memory_gb = psutil.virtual_memory().total / (1024**3) # Heuristic: Use 75% of cores, but cap based on available memory # Assume each worker needs ~2GB for large datasets workers_by_memory = max(1, int(memory_gb / 2)) workers_by_cpu = max(1, int(cpu_count * 0.75)) return min(workers_by_cpu, workers_by_memory) def load_data(file_path, start_date, stop_date): """Load data with optimized dtypes and filtering""" # Define optimized dtypes dtypes = { 'Open': 'float32', 'High': 'float32', 'Low': 'float32', 'Close': 'float32', 'Volume': 'float32' } # Read data with original capitalized column names data = pd.read_csv(file_path, dtype=dtypes) # Convert timestamp to datetime data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s') # Filter by date range data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)] # Now convert column names to lowercase data.columns = data.columns.str.lower() return data.set_index('timestamp') def process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd): """Process a single month for a given timeframe with all stop loss values""" month_df = month_df.copy().reset_index(drop=True) # Only calculate trends once per month-timeframe combination trend_detector = TrendDetectorSimple(month_df, verbose=False) analysis_results = trend_detector.detect_trends() # Calculate backtest for each stop_loss_pct results_rows = [] for stop_loss_pct in stop_loss_pcts: results = trend_detector.backtest_meta_supertrend( initial_usd=initial_usd, stop_loss_pct=stop_loss_pct ) # Process results n_trades = results["n_trades"] trades = results.get('trades', []) n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0) total_profit = sum(trade['profit_pct'] for trade in trades) total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0) win_rate = n_winning_trades / n_trades if n_trades > 0 else 0 avg_trade = total_profit / n_trades if n_trades > 0 else 0 profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf') # Calculate max drawdown cumulative_profit = 0 max_drawdown = 0 peak = 0 for trade in trades: cumulative_profit += trade['profit_pct'] if cumulative_profit > peak: peak = cumulative_profit drawdown = peak - cumulative_profit if drawdown > max_drawdown: max_drawdown = drawdown # Create row row = { "timeframe": rule_name, "month": str(month_df['timestamp'].iloc[0].to_period('M')), "stop_loss_pct": stop_loss_pct, "n_trades": n_trades, "n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'), "win_rate": win_rate, "max_drawdown": max_drawdown, "avg_trade": avg_trade, "profit_ratio": profit_ratio } results_rows.append(row) return results_rows def process_timeframe(timeframe_info): """Process an entire timeframe""" rule, rule_name, data_1min, stop_loss_pcts, initial_usd = timeframe_info # Resample data if needed if rule == "1T": df = data_1min.copy() else: df = data_1min.resample(rule).agg({ 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' }).dropna() df = df.reset_index() df['month'] = df['timestamp'].dt.to_period('M') results_rows = [] # Process each month for month, month_df in df.groupby('month'): if len(month_df) < 10: # Skip very small months continue logging.info(f"Processing: timeframe={rule_name}, month={month}") try: month_results = process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd) results_rows.extend(month_results) # Write intermediate results to avoid memory buildup if len(results_rows) > 100: return results_rows except Exception as e: logging.error(f"Error processing {rule_name}, month={month}: {str(e)}") return results_rows def write_results_chunk(filename, fieldnames, rows, write_header=False): """Write a chunk of results to a CSV file""" mode = 'w' if write_header else 'a' with open(filename, mode, newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) if write_header: csvfile.write(f"# initial_usd: {initial_usd}\n") writer.writeheader() for row in rows: writer.writerow(row) def aggregate_results(all_rows): """Aggregate results per stop_loss_pct and per rule (timeframe)""" from collections import defaultdict grouped = defaultdict(list) for row in all_rows: key = (row['timeframe'], row['stop_loss_pct']) grouped[key].append(row) summary_rows = [] for (rule, stop_loss_pct), rows in grouped.items(): n_months = len(rows) total_trades = sum(r['n_trades'] for r in rows) total_stop_loss = sum(r['n_stop_loss'] for r in rows) avg_win_rate = np.mean([r['win_rate'] for r in rows]) avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows]) avg_avg_trade = np.mean([r['avg_trade'] for r in rows]) avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows]) summary_rows.append({ "timeframe": rule, "stop_loss_pct": stop_loss_pct, "n_trades": total_trades, "n_stop_loss": total_stop_loss, "win_rate": avg_win_rate, "max_drawdown": avg_max_drawdown, "avg_trade": avg_avg_trade, "profit_ratio": avg_profit_ratio, }) return summary_rows if __name__ == "__main__": # Configuration start_date = '2020-01-01' stop_date = '2025-05-15' initial_usd = 10000 timeframes = { # "1T": "1min", "15T": "15min", "1H": "1h", "6H": "6h", "1D": "1D", } stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10] # Load data once data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date) logging.info(f"1min rows: {len(data_1min)}") # Set up result file filename = f"backtest_results_{start_date}_{stop_date}_multi_timeframe_stoploss.csv" fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio"] # Initialize output file with header write_results_chunk(filename, fieldnames, [], write_header=True) # Prepare tasks tasks = [ (rule, name, data_1min, stop_loss_pcts, initial_usd) for rule, name in timeframes.items() ] # Determine optimal worker count workers = get_optimal_workers() logging.info(f"Using {workers} workers for processing") # Process tasks with optimized concurrency with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor: futures = {executor.submit(process_timeframe, task): task[1] for task in tasks} # Collect all results all_results = [] for future in concurrent.futures.as_completed(futures): timeframe_name = futures[future] try: results = future.result() if results: # logging.info(f"Writing {len(results)} results for {timeframe_name}") # write_results_chunk(filename, fieldnames, results) # <-- REMOVE or COMMENT THIS OUT all_results.extend(results) except Exception as exc: logging.error(f"{timeframe_name} generated an exception: {exc}") # Write summary rows summary_rows = aggregate_results(all_results) write_results_chunk(filename, fieldnames, summary_rows, write_header=True) # Only write summary logging.info(f"Results written to {filename}")