2025-05-06 15:24:36 +08:00
|
|
|
import pandas as pd
|
2025-05-16 02:44:22 +08:00
|
|
|
import numpy as np
|
2025-05-06 15:24:36 +08:00
|
|
|
from trend_detector_macd import TrendDetectorMACD
|
|
|
|
|
from trend_detector_simple import TrendDetectorSimple
|
|
|
|
|
from cycle_detector import CycleDetector
|
2025-05-16 02:44:22 +08:00
|
|
|
import csv
|
|
|
|
|
import logging
|
|
|
|
|
import concurrent.futures
|
|
|
|
|
import os
|
|
|
|
|
import psutil
|
2025-05-06 15:24:36 +08:00
|
|
|
|
2025-05-16 02:44:22 +08:00
|
|
|
# Set up logging
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
|
|
|
handlers=[
|
|
|
|
|
logging.FileHandler("backtest.log"),
|
|
|
|
|
logging.StreamHandler()
|
|
|
|
|
]
|
|
|
|
|
)
|
2025-05-06 16:20:43 +08:00
|
|
|
|
2025-05-16 02:44:22 +08:00
|
|
|
def get_optimal_workers():
|
|
|
|
|
"""Determine optimal number of worker processes based on system resources"""
|
|
|
|
|
cpu_count = os.cpu_count() or 4
|
|
|
|
|
memory_gb = psutil.virtual_memory().total / (1024**3)
|
|
|
|
|
# Heuristic: Use 75% of cores, but cap based on available memory
|
|
|
|
|
# Assume each worker needs ~2GB for large datasets
|
|
|
|
|
workers_by_memory = max(1, int(memory_gb / 2))
|
|
|
|
|
workers_by_cpu = max(1, int(cpu_count * 0.75))
|
|
|
|
|
return min(workers_by_cpu, workers_by_memory)
|
2025-05-09 12:23:45 +08:00
|
|
|
|
2025-05-16 02:44:22 +08:00
|
|
|
def load_data(file_path, start_date, stop_date):
|
|
|
|
|
"""Load data with optimized dtypes and filtering"""
|
|
|
|
|
# Define optimized dtypes
|
|
|
|
|
dtypes = {
|
|
|
|
|
'Open': 'float32',
|
|
|
|
|
'High': 'float32',
|
|
|
|
|
'Low': 'float32',
|
|
|
|
|
'Close': 'float32',
|
|
|
|
|
'Volume': 'float32'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Read data with original capitalized column names
|
|
|
|
|
data = pd.read_csv(file_path, dtype=dtypes)
|
|
|
|
|
|
|
|
|
|
# Convert timestamp to datetime
|
|
|
|
|
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
|
|
|
|
|
|
|
|
|
|
# Filter by date range
|
|
|
|
|
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
|
|
|
|
|
|
|
|
|
|
# Now convert column names to lowercase
|
|
|
|
|
data.columns = data.columns.str.lower()
|
|
|
|
|
|
|
|
|
|
return data.set_index('timestamp')
|
2025-05-06 16:20:43 +08:00
|
|
|
|
2025-05-16 02:44:22 +08:00
|
|
|
def process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd):
|
|
|
|
|
"""Process a single month for a given timeframe with all stop loss values"""
|
|
|
|
|
month_df = month_df.copy().reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
# Only calculate trends once per month-timeframe combination
|
|
|
|
|
trend_detector = TrendDetectorSimple(month_df, verbose=False)
|
|
|
|
|
analysis_results = trend_detector.detect_trends()
|
|
|
|
|
|
|
|
|
|
# Calculate backtest for each stop_loss_pct
|
|
|
|
|
results_rows = []
|
|
|
|
|
for stop_loss_pct in stop_loss_pcts:
|
|
|
|
|
results = trend_detector.backtest_meta_supertrend(
|
|
|
|
|
initial_usd=initial_usd,
|
|
|
|
|
stop_loss_pct=stop_loss_pct
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Process results
|
|
|
|
|
n_trades = results["n_trades"]
|
|
|
|
|
trades = results.get('trades', [])
|
|
|
|
|
n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
|
|
|
|
|
total_profit = sum(trade['profit_pct'] for trade in trades)
|
|
|
|
|
total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)
|
|
|
|
|
|
|
|
|
|
win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
|
|
|
|
|
avg_trade = total_profit / n_trades if n_trades > 0 else 0
|
|
|
|
|
profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')
|
|
|
|
|
|
|
|
|
|
# Calculate max drawdown
|
|
|
|
|
cumulative_profit = 0
|
|
|
|
|
max_drawdown = 0
|
|
|
|
|
peak = 0
|
|
|
|
|
for trade in trades:
|
|
|
|
|
cumulative_profit += trade['profit_pct']
|
|
|
|
|
if cumulative_profit > peak:
|
|
|
|
|
peak = cumulative_profit
|
|
|
|
|
drawdown = peak - cumulative_profit
|
|
|
|
|
if drawdown > max_drawdown:
|
|
|
|
|
max_drawdown = drawdown
|
|
|
|
|
|
|
|
|
|
# Create row
|
|
|
|
|
row = {
|
|
|
|
|
"timeframe": rule_name,
|
|
|
|
|
"month": str(month_df['timestamp'].iloc[0].to_period('M')),
|
|
|
|
|
"stop_loss_pct": stop_loss_pct,
|
|
|
|
|
"n_trades": n_trades,
|
|
|
|
|
"n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
|
|
|
|
|
"win_rate": win_rate,
|
|
|
|
|
"max_drawdown": max_drawdown,
|
|
|
|
|
"avg_trade": avg_trade,
|
|
|
|
|
"profit_ratio": profit_ratio
|
|
|
|
|
}
|
|
|
|
|
results_rows.append(row)
|
|
|
|
|
|
|
|
|
|
return results_rows
|
2025-05-06 15:24:36 +08:00
|
|
|
|
2025-05-16 02:44:22 +08:00
|
|
|
def process_timeframe(timeframe_info):
|
|
|
|
|
"""Process an entire timeframe"""
|
|
|
|
|
rule, rule_name, data_1min, stop_loss_pcts, initial_usd = timeframe_info
|
|
|
|
|
|
|
|
|
|
# Resample data if needed
|
|
|
|
|
if rule == "1T":
|
|
|
|
|
df = data_1min.copy()
|
|
|
|
|
else:
|
|
|
|
|
df = data_1min.resample(rule).agg({
|
|
|
|
|
'open': 'first',
|
|
|
|
|
'high': 'max',
|
|
|
|
|
'low': 'min',
|
|
|
|
|
'close': 'last',
|
|
|
|
|
'volume': 'sum'
|
|
|
|
|
}).dropna()
|
|
|
|
|
|
|
|
|
|
df = df.reset_index()
|
|
|
|
|
df['month'] = df['timestamp'].dt.to_period('M')
|
|
|
|
|
results_rows = []
|
|
|
|
|
|
|
|
|
|
# Process each month
|
|
|
|
|
for month, month_df in df.groupby('month'):
|
|
|
|
|
if len(month_df) < 10: # Skip very small months
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
logging.info(f"Processing: timeframe={rule_name}, month={month}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
month_results = process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd)
|
|
|
|
|
results_rows.extend(month_results)
|
|
|
|
|
|
|
|
|
|
# Write intermediate results to avoid memory buildup
|
|
|
|
|
if len(results_rows) > 100:
|
|
|
|
|
return results_rows
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logging.error(f"Error processing {rule_name}, month={month}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
return results_rows
|
|
|
|
|
|
|
|
|
|
def write_results_chunk(filename, fieldnames, rows, write_header=False):
|
|
|
|
|
"""Write a chunk of results to a CSV file"""
|
|
|
|
|
mode = 'w' if write_header else 'a'
|
|
|
|
|
|
|
|
|
|
with open(filename, mode, newline="") as csvfile:
|
|
|
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
|
if write_header:
|
|
|
|
|
csvfile.write(f"# initial_usd: {initial_usd}\n")
|
|
|
|
|
writer.writeheader()
|
|
|
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
|
writer.writerow(row)
|
|
|
|
|
|
|
|
|
|
def aggregate_results(all_rows):
|
|
|
|
|
"""Aggregate results per stop_loss_pct and per rule (timeframe)"""
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
grouped = defaultdict(list)
|
|
|
|
|
for row in all_rows:
|
|
|
|
|
key = (row['timeframe'], row['stop_loss_pct'])
|
|
|
|
|
grouped[key].append(row)
|
|
|
|
|
|
|
|
|
|
summary_rows = []
|
|
|
|
|
for (rule, stop_loss_pct), rows in grouped.items():
|
|
|
|
|
n_months = len(rows)
|
|
|
|
|
total_trades = sum(r['n_trades'] for r in rows)
|
|
|
|
|
total_stop_loss = sum(r['n_stop_loss'] for r in rows)
|
|
|
|
|
avg_win_rate = np.mean([r['win_rate'] for r in rows])
|
|
|
|
|
avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
|
|
|
|
|
avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
|
|
|
|
|
avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])
|
|
|
|
|
|
|
|
|
|
summary_rows.append({
|
|
|
|
|
"timeframe": rule,
|
|
|
|
|
"stop_loss_pct": stop_loss_pct,
|
|
|
|
|
"n_trades": total_trades,
|
|
|
|
|
"n_stop_loss": total_stop_loss,
|
|
|
|
|
"win_rate": avg_win_rate,
|
|
|
|
|
"max_drawdown": avg_max_drawdown,
|
|
|
|
|
"avg_trade": avg_avg_trade,
|
|
|
|
|
"profit_ratio": avg_profit_ratio,
|
|
|
|
|
})
|
|
|
|
|
return summary_rows
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# Configuration
|
|
|
|
|
start_date = '2020-01-01'
|
|
|
|
|
stop_date = '2025-05-15'
|
|
|
|
|
initial_usd = 10000
|
|
|
|
|
|
|
|
|
|
timeframes = {
|
|
|
|
|
# "1T": "1min",
|
|
|
|
|
"15T": "15min",
|
|
|
|
|
"1H": "1h",
|
|
|
|
|
"6H": "6h",
|
|
|
|
|
"1D": "1D",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10]
|
|
|
|
|
|
|
|
|
|
# Load data once
|
|
|
|
|
data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
|
|
|
|
|
logging.info(f"1min rows: {len(data_1min)}")
|
|
|
|
|
|
|
|
|
|
# Set up result file
|
|
|
|
|
filename = f"backtest_results_{start_date}_{stop_date}_multi_timeframe_stoploss.csv"
|
|
|
|
|
fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio"]
|
|
|
|
|
|
|
|
|
|
# Initialize output file with header
|
|
|
|
|
write_results_chunk(filename, fieldnames, [], write_header=True)
|
|
|
|
|
|
|
|
|
|
# Prepare tasks
|
|
|
|
|
tasks = [
|
|
|
|
|
(rule, name, data_1min, stop_loss_pcts, initial_usd)
|
|
|
|
|
for rule, name in timeframes.items()
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Determine optimal worker count
|
|
|
|
|
workers = get_optimal_workers()
|
|
|
|
|
logging.info(f"Using {workers} workers for processing")
|
|
|
|
|
|
|
|
|
|
# Process tasks with optimized concurrency
|
|
|
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
|
|
|
|
|
futures = {executor.submit(process_timeframe, task): task[1] for task in tasks}
|
|
|
|
|
|
|
|
|
|
# Collect all results
|
|
|
|
|
all_results = []
|
|
|
|
|
for future in concurrent.futures.as_completed(futures):
|
|
|
|
|
timeframe_name = futures[future]
|
|
|
|
|
try:
|
|
|
|
|
results = future.result()
|
|
|
|
|
if results:
|
|
|
|
|
# logging.info(f"Writing {len(results)} results for {timeframe_name}")
|
|
|
|
|
# write_results_chunk(filename, fieldnames, results) # <-- REMOVE or COMMENT THIS OUT
|
|
|
|
|
all_results.extend(results)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
logging.error(f"{timeframe_name} generated an exception: {exc}")
|
|
|
|
|
|
|
|
|
|
# Write summary rows
|
|
|
|
|
summary_rows = aggregate_results(all_results)
|
|
|
|
|
write_results_chunk(filename, fieldnames, summary_rows, write_header=True) # Only write summary
|
|
|
|
|
|
|
|
|
|
logging.info(f"Results written to {filename}")
|