import pandas as pd import numpy as np from trend_detector_macd import TrendDetectorMACD from trend_detector_simple import TrendDetectorSimple from cycle_detector import CycleDetector import csv import logging import concurrent.futures import os import psutil import datetime from charts import BacktestCharts from collections import Counter # Set up logging logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("backtest.log"), logging.StreamHandler() ] ) def get_optimal_workers(): """Determine optimal number of worker processes based on system resources""" cpu_count = os.cpu_count() or 4 memory_gb = psutil.virtual_memory().total / (1024**3) # Heuristic: Use 75% of cores, but cap based on available memory # Assume each worker needs ~2GB for large datasets workers_by_memory = max(1, int(memory_gb / 2)) workers_by_cpu = max(1, int(cpu_count * 0.75)) return min(workers_by_cpu, workers_by_memory) def load_data(file_path, start_date, stop_date): """Load data with optimized dtypes and filtering""" # Define optimized dtypes dtypes = { 'Open': 'float32', 'High': 'float32', 'Low': 'float32', 'Close': 'float32', 'Volume': 'float32' } # Read data with original capitalized column names data = pd.read_csv(file_path, dtype=dtypes) # Convert timestamp to datetime data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s') # Filter by date range data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)] # Now convert column names to lowercase data.columns = data.columns.str.lower() return data.set_index('timestamp') def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd, debug=False): """Process the entire timeframe with all stop loss values (no monthly split)""" df = df.copy().reset_index(drop=True) trend_detector = TrendDetectorSimple(df, verbose=False) results_rows = [] trade_rows = [] for stop_loss_pct in stop_loss_pcts: results = trend_detector.backtest_meta_supertrend( min1_df, initial_usd=initial_usd, stop_loss_pct=stop_loss_pct, debug=debug ) n_trades = results["n_trades"] trades = results.get('trades', []) n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0) total_profit = sum(trade['profit_pct'] for trade in trades) total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0) win_rate = n_winning_trades / n_trades if n_trades > 0 else 0 avg_trade = total_profit / n_trades if n_trades > 0 else 0 profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf') cumulative_profit = 0 max_drawdown = 0 peak = 0 for trade in trades: cumulative_profit += trade['profit_pct'] if cumulative_profit > peak: peak = cumulative_profit drawdown = peak - cumulative_profit if drawdown > max_drawdown: max_drawdown = drawdown final_usd = initial_usd for trade in trades: final_usd *= (1 + trade['profit_pct']) row = { "timeframe": rule_name, "stop_loss_pct": stop_loss_pct, "n_trades": n_trades, "n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'), "win_rate": win_rate, "max_drawdown": max_drawdown, "avg_trade": avg_trade, "profit_ratio": profit_ratio, "initial_usd": initial_usd, "final_usd": final_usd, } results_rows.append(row) for trade in trades: trade_rows.append({ "timeframe": rule_name, "stop_loss_pct": stop_loss_pct, "entry_time": trade.get("entry_time"), "exit_time": trade.get("exit_time"), "entry_price": trade.get("entry"), "exit_price": trade.get("exit"), "profit_pct": trade.get("profit_pct"), "type": trade.get("type", ""), }) logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}") if debug: for trade in trades: if trade['type'] == 'STOP': print(trade) for trade in trades: if trade['profit_pct'] < -0.09: # or whatever is close to -0.10 print("Large loss trade:", trade) return results_rows, trade_rows def process_timeframe(timeframe_info, debug=False): """Process an entire timeframe (no monthly split)""" rule, data_1min, stop_loss_pcts, initial_usd = timeframe_info if rule == "1T": df = data_1min.copy() else: df = data_1min.resample(rule).agg({ 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' }).dropna() df = df.reset_index() # --- Add this block to check alignment --- print("1-min data range:", data_1min.index.min(), "to", data_1min.index.max()) print(f"{rule} data range:", df['timestamp'].min(), "to", df['timestamp'].max()) # ----------------------------------------- results_rows, all_trade_rows = process_timeframe_data(data_1min, df, stop_loss_pcts, rule, initial_usd, debug=debug) return results_rows, all_trade_rows def write_results_chunk(filename, fieldnames, rows, write_header=False): """Write a chunk of results to a CSV file""" mode = 'w' if write_header else 'a' with open(filename, mode, newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) if write_header: csvfile.write(f"# initial_usd: {initial_usd}\n") writer.writeheader() for row in rows: # Only keep keys that are in fieldnames filtered_row = {k: v for k, v in row.items() if k in fieldnames} writer.writerow(filtered_row) def aggregate_results(all_rows): """Aggregate results per stop_loss_pct and per rule (timeframe)""" from collections import defaultdict grouped = defaultdict(list) for row in all_rows: key = (row['timeframe'], row['stop_loss_pct']) grouped[key].append(row) summary_rows = [] for (rule, stop_loss_pct), rows in grouped.items(): n_months = len(rows) total_trades = sum(r['n_trades'] for r in rows) total_stop_loss = sum(r['n_stop_loss'] for r in rows) avg_win_rate = np.mean([r['win_rate'] for r in rows]) avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows]) avg_avg_trade = np.mean([r['avg_trade'] for r in rows]) avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows]) # Calculate final USD final_usd = np.mean([r.get('final_usd', initial_usd) for r in rows]) summary_rows.append({ "timeframe": rule, "stop_loss_pct": stop_loss_pct, "n_trades": total_trades, "n_stop_loss": total_stop_loss, "win_rate": avg_win_rate, "max_drawdown": avg_max_drawdown, "avg_trade": avg_avg_trade, "profit_ratio": avg_profit_ratio, "initial_usd": initial_usd, "final_usd": final_usd, }) return summary_rows def write_results_per_combination(results_rows, trade_rows, timestamp): results_dir = "results" os.makedirs(results_dir, exist_ok=True) for row in results_rows: timeframe = row["timeframe"] stop_loss_pct = row["stop_loss_pct"] filename = os.path.join( results_dir, f"{timestamp}_backtest_{timeframe}_{stop_loss_pct}.csv" ) fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"] write_results_chunk(filename, fieldnames, [row], write_header=not os.path.exists(filename)) for trade in trade_rows: timeframe = trade["timeframe"] stop_loss_pct = trade["stop_loss_pct"] trades_filename = os.path.join( results_dir, f"{timestamp}_trades_{timeframe}_{stop_loss_pct}.csv" ) trades_fieldnames = [ "timeframe", "stop_loss_pct", "entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type" ] write_results_chunk(trades_filename, trades_fieldnames, [trade], write_header=not os.path.exists(trades_filename)) if __name__ == "__main__": # Configuration start_date = '2020-01-01' stop_date = '2025-05-15' initial_usd = 10000 debug = False # Set to True to enable debug prints # --- NEW: Prepare results folder and timestamp --- results_dir = "results" os.makedirs(results_dir, exist_ok=True) timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M") # --- END NEW --- # Replace the dictionary with a list of timeframe names timeframes = ["15min", "1h", "6h", "1D"] # timeframes = ["6h"] stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10] # stop_loss_pcts = [0.01] # Load data once data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date) logging.info(f"1min rows: {len(data_1min)}") # Prepare tasks tasks = [ (name, data_1min, stop_loss_pcts, initial_usd) for name in timeframes ] # Determine optimal worker count workers = get_optimal_workers() logging.info(f"Using {workers} workers for processing") # Process tasks with optimized concurrency with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor: futures = {executor.submit(process_timeframe, task, debug): task[1] for task in tasks} all_results_rows = [] for future in concurrent.futures.as_completed(futures): #try: results, trades = future.result() if results or trades: all_results_rows.extend(results) write_results_per_combination(results, trades, timestamp) #except Exception as exc: # logging.error(f"generated an exception: {exc}") # Write all results to a single CSV file combined_filename = os.path.join(results_dir, f"{timestamp}_backtest_combined.csv") combined_fieldnames = [ "timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio", "final_usd" ] def format_row(row): # Format percentages and floats as in your example return { "timeframe": row["timeframe"], "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%", "n_trades": row["n_trades"], "n_stop_loss": row["n_stop_loss"], "win_rate": f"{row['win_rate']*100:.2f}%", "max_drawdown": f"{row['max_drawdown']*100:.2f}%", "avg_trade": f"{row['avg_trade']*100:.2f}%", "profit_ratio": f"{row['profit_ratio']*100:.2f}%", "final_usd": f"{row['final_usd']:.2f}", } with open(combined_filename, "w", newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=combined_fieldnames, delimiter='\t') writer.writeheader() for row in all_results_rows: writer.writerow(format_row(row)) logging.info(f"Combined results written to {combined_filename}")