Cycles/main.py
Simon Moisy 125d4f7d52 Add BacktestCharts class for visualizing backtest results and update main.py for enhanced data processing
- Introduced BacktestCharts class in charts.py to plot profit ratio vs stop loss and average trade vs stop loss for different timeframes.
- Updated main.py to integrate new charting functionality and streamline data processing without monthly splits.
- Enhanced backtesting logic in TrendDetectorSimple to include transaction costs and improved stop loss handling using 1-minute data for accuracy.
- Added functionality to write results to individual CSV files for better organization and analysis.
2025-05-17 13:07:40 +08:00

301 lines
12 KiB
Python

import pandas as pd
import numpy as np
from trend_detector_macd import TrendDetectorMACD
from trend_detector_simple import TrendDetectorSimple
from cycle_detector import CycleDetector
import csv
import logging
import concurrent.futures
import os
import psutil
import datetime
from charts import BacktestCharts
from collections import Counter
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("backtest.log"),
logging.StreamHandler()
]
)
def get_optimal_workers():
"""Determine optimal number of worker processes based on system resources"""
cpu_count = os.cpu_count() or 4
memory_gb = psutil.virtual_memory().total / (1024**3)
# Heuristic: Use 75% of cores, but cap based on available memory
# Assume each worker needs ~2GB for large datasets
workers_by_memory = max(1, int(memory_gb / 2))
workers_by_cpu = max(1, int(cpu_count * 0.75))
return min(workers_by_cpu, workers_by_memory)
def load_data(file_path, start_date, stop_date):
"""Load data with optimized dtypes and filtering"""
# Define optimized dtypes
dtypes = {
'Open': 'float32',
'High': 'float32',
'Low': 'float32',
'Close': 'float32',
'Volume': 'float32'
}
# Read data with original capitalized column names
data = pd.read_csv(file_path, dtype=dtypes)
# Convert timestamp to datetime
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
# Filter by date range
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
# Now convert column names to lowercase
data.columns = data.columns.str.lower()
return data.set_index('timestamp')
def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd, debug=False):
"""Process the entire timeframe with all stop loss values (no monthly split)"""
df = df.copy().reset_index(drop=True)
trend_detector = TrendDetectorSimple(df, verbose=False)
results_rows = []
trade_rows = []
for stop_loss_pct in stop_loss_pcts:
results = trend_detector.backtest_meta_supertrend(
min1_df,
initial_usd=initial_usd,
stop_loss_pct=stop_loss_pct,
debug=debug
)
n_trades = results["n_trades"]
trades = results.get('trades', [])
n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
total_profit = sum(trade['profit_pct'] for trade in trades)
total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)
win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
avg_trade = total_profit / n_trades if n_trades > 0 else 0
profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')
cumulative_profit = 0
max_drawdown = 0
peak = 0
for trade in trades:
cumulative_profit += trade['profit_pct']
if cumulative_profit > peak:
peak = cumulative_profit
drawdown = peak - cumulative_profit
if drawdown > max_drawdown:
max_drawdown = drawdown
final_usd = initial_usd
for trade in trades:
final_usd *= (1 + trade['profit_pct'])
row = {
"timeframe": rule_name,
"stop_loss_pct": stop_loss_pct,
"n_trades": n_trades,
"n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
"win_rate": win_rate,
"max_drawdown": max_drawdown,
"avg_trade": avg_trade,
"profit_ratio": profit_ratio,
"initial_usd": initial_usd,
"final_usd": final_usd,
}
results_rows.append(row)
for trade in trades:
trade_rows.append({
"timeframe": rule_name,
"stop_loss_pct": stop_loss_pct,
"entry_time": trade.get("entry_time"),
"exit_time": trade.get("exit_time"),
"entry_price": trade.get("entry"),
"exit_price": trade.get("exit"),
"profit_pct": trade.get("profit_pct"),
"type": trade.get("type", ""),
})
logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
if debug:
for trade in trades:
if trade['type'] == 'STOP':
print(trade)
for trade in trades:
if trade['profit_pct'] < -0.09: # or whatever is close to -0.10
print("Large loss trade:", trade)
return results_rows, trade_rows
def process_timeframe(timeframe_info, debug=False):
"""Process an entire timeframe (no monthly split)"""
rule, data_1min, stop_loss_pcts, initial_usd = timeframe_info
if rule == "1T":
df = data_1min.copy()
else:
df = data_1min.resample(rule).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}).dropna()
df = df.reset_index()
# --- Add this block to check alignment ---
print("1-min data range:", data_1min.index.min(), "to", data_1min.index.max())
print(f"{rule} data range:", df['timestamp'].min(), "to", df['timestamp'].max())
# -----------------------------------------
results_rows, all_trade_rows = process_timeframe_data(data_1min, df, stop_loss_pcts, rule, initial_usd, debug=debug)
return results_rows, all_trade_rows
def write_results_chunk(filename, fieldnames, rows, write_header=False):
"""Write a chunk of results to a CSV file"""
mode = 'w' if write_header else 'a'
with open(filename, mode, newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if write_header:
csvfile.write(f"# initial_usd: {initial_usd}\n")
writer.writeheader()
for row in rows:
# Only keep keys that are in fieldnames
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
writer.writerow(filtered_row)
def aggregate_results(all_rows):
"""Aggregate results per stop_loss_pct and per rule (timeframe)"""
from collections import defaultdict
grouped = defaultdict(list)
for row in all_rows:
key = (row['timeframe'], row['stop_loss_pct'])
grouped[key].append(row)
summary_rows = []
for (rule, stop_loss_pct), rows in grouped.items():
n_months = len(rows)
total_trades = sum(r['n_trades'] for r in rows)
total_stop_loss = sum(r['n_stop_loss'] for r in rows)
avg_win_rate = np.mean([r['win_rate'] for r in rows])
avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])
# Calculate final USD
final_usd = np.mean([r.get('final_usd', initial_usd) for r in rows])
summary_rows.append({
"timeframe": rule,
"stop_loss_pct": stop_loss_pct,
"n_trades": total_trades,
"n_stop_loss": total_stop_loss,
"win_rate": avg_win_rate,
"max_drawdown": avg_max_drawdown,
"avg_trade": avg_avg_trade,
"profit_ratio": avg_profit_ratio,
"initial_usd": initial_usd,
"final_usd": final_usd,
})
return summary_rows
def write_results_per_combination(results_rows, trade_rows, timestamp):
results_dir = "results"
os.makedirs(results_dir, exist_ok=True)
for row in results_rows:
timeframe = row["timeframe"]
stop_loss_pct = row["stop_loss_pct"]
filename = os.path.join(
results_dir,
f"{timestamp}_backtest_{timeframe}_{stop_loss_pct}.csv"
)
fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"]
write_results_chunk(filename, fieldnames, [row], write_header=not os.path.exists(filename))
for trade in trade_rows:
timeframe = trade["timeframe"]
stop_loss_pct = trade["stop_loss_pct"]
trades_filename = os.path.join(
results_dir,
f"{timestamp}_trades_{timeframe}_{stop_loss_pct}.csv"
)
trades_fieldnames = [
"timeframe", "stop_loss_pct", "entry_time", "exit_time",
"entry_price", "exit_price", "profit_pct", "type"
]
write_results_chunk(trades_filename, trades_fieldnames, [trade], write_header=not os.path.exists(trades_filename))
if __name__ == "__main__":
# Configuration
start_date = '2020-01-01'
stop_date = '2025-05-15'
initial_usd = 10000
debug = False # Set to True to enable debug prints
# --- NEW: Prepare results folder and timestamp ---
results_dir = "results"
os.makedirs(results_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")
# --- END NEW ---
# Replace the dictionary with a list of timeframe names
timeframes = ["15min", "1h", "6h", "1D"]
# timeframes = ["6h"]
stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10]
# stop_loss_pcts = [0.01]
# Load data once
data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
logging.info(f"1min rows: {len(data_1min)}")
# Prepare tasks
tasks = [
(name, data_1min, stop_loss_pcts, initial_usd)
for name in timeframes
]
# Determine optimal worker count
workers = get_optimal_workers()
logging.info(f"Using {workers} workers for processing")
# Process tasks with optimized concurrency
with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
futures = {executor.submit(process_timeframe, task, debug): task[1] for task in tasks}
all_results_rows = []
for future in concurrent.futures.as_completed(futures):
#try:
results, trades = future.result()
if results or trades:
all_results_rows.extend(results)
write_results_per_combination(results, trades, timestamp)
#except Exception as exc:
# logging.error(f"generated an exception: {exc}")
# Write all results to a single CSV file
combined_filename = os.path.join(results_dir, f"{timestamp}_backtest_combined.csv")
combined_fieldnames = [
"timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
"max_drawdown", "avg_trade", "profit_ratio", "final_usd"
]
def format_row(row):
# Format percentages and floats as in your example
return {
"timeframe": row["timeframe"],
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
"n_trades": row["n_trades"],
"n_stop_loss": row["n_stop_loss"],
"win_rate": f"{row['win_rate']*100:.2f}%",
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
"avg_trade": f"{row['avg_trade']*100:.2f}%",
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
"final_usd": f"{row['final_usd']:.2f}",
}
with open(combined_filename, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=combined_fieldnames, delimiter='\t')
writer.writeheader()
for row in all_results_rows:
writer.writerow(format_row(row))
logging.info(f"Combined results written to {combined_filename}")