Enhance backtesting functionality by adding date range parameters to load_data, improving ATR calculation, and refining trade logic with meta Supertrend signals. Update README with detailed usage instructions and requirements. Add CSV logging for trade results and performance metrics. Include ta library as a dependency in pyproject.toml.

This commit is contained in:
Simon Moisy
2025-08-12 10:33:17 +08:00
parent 56dca05a3e
commit 21b14d4fe4
4 changed files with 549 additions and 65 deletions

368
main.py
View File

@@ -1,12 +1,16 @@
import pandas as pd
import numpy as np
from ta.volatility import AverageTrueRange
import time
import csv
import math
import os
def load_data(since):
def load_data(since, until):
df = pd.read_csv('../data/btcusd_1-min_data.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
df = df[df['Timestamp'] >= pd.Timestamp(since)]
df = df[(df['Timestamp'] >= pd.Timestamp(since)) & (df['Timestamp'] <= pd.Timestamp(until))]
return df
def aggregate_data(df, timeframe):
@@ -38,10 +42,32 @@ def calculate_supertrend(df, period, multiplier):
Returns:
pd.Series: Supertrend values.
"""
# Ensure we have enough data for ATR calculation
if len(df) < period + 1:
print(f"Warning: Not enough data for ATR period {period}. Need at least {period + 1} rows, got {len(df)}")
return pd.Series([np.nan] * len(df), index=df.index)
high = df['High'].values
low = df['Low'].values
close = df['Close'].values
atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=period).average_true_range().values
# Calculate True Range first
tr = np.zeros_like(close)
for i in range(1, len(close)):
tr[i] = max(
high[i] - low[i], # Current high - current low
abs(high[i] - close[i-1]), # Current high - previous close
abs(low[i] - close[i-1]) # Current low - previous close
)
# Calculate ATR using simple moving average
atr = np.zeros_like(close)
atr[period] = np.mean(tr[1:period+1]) # First ATR value
for i in range(period+1, len(close)):
atr[i] = (atr[i-1] * (period-1) + tr[i]) / period # Exponential-like smoothing
# Fill initial values with the first valid ATR
atr[:period] = atr[period] if atr[period] > 0 else 0.001
hl2 = (high + low) / 2
upperband = hl2 + (multiplier * atr)
@@ -105,23 +131,38 @@ def precompute_1min_slice_indices(df_aggregated, df_1min):
indices.append((start_idx, end_idx))
return indices, sorted_1min
def backtest(df_aggregated, df_1min, stop_loss_pct, progress_step=1000):
def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=1000):
"""
Backtest trading strategy based on Supertrend indicators with trailing stop loss.
Buys when all three Supertrend columns are positive (>0),
sells when any is negative (<0), or when trailing stop loss is hit.
Args:
df_aggregated (pd.DataFrame): Aggregated OHLCV data with Supertrend columns.
df_1min (pd.DataFrame): 1-minute OHLCV data.
stop_loss_pct (float): Trailing stop loss percentage (e.g., 0.02 for 2%).
progress_step (int): Step interval for progress display.
Backtest trading strategy based on meta supertrend logic (all three supertrends agree).
Uses signal transitions and open prices for entry/exit to match original implementation.
"""
start_time = time.time()
required_st_cols = ["supertrend_12_3.0", "supertrend_10_1.0", "supertrend_11_2.0"]
for col in required_st_cols:
if col not in df_aggregated.columns:
raise ValueError(f"Missing required Supertrend column: {col}")
# Calculate trend directions for each supertrend (-1, 0, 1)
trends = []
for col in required_st_cols:
# Convert supertrend values to trend direction based on close price position
trend = np.where(df_aggregated['Close'] > df_aggregated[col], 1, -1)
trends.append(trend)
# Stack trends and calculate meta trend (all must agree)
trends_arr = np.stack(trends, axis=1)
meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]),
trends_arr[:,0], 0)
meta_trend_signal = meta_trend #incorrect: should be lagging as it introduces lookahead bias.
# Next step: modify OHLCV predictor to not use supertrend as a feature or anyother feature
# that introduces lookahead bias and predict the next close price.
#
# Old code, not that efficient:
# Add signal lagging to avoid lookahead bias
# meta_trend_signal = np.roll(meta_trend, 1)
# meta_trend_signal[0] = 0 # No signal for first bar
# Precompute 1-min slice indices for each aggregated bar
slice_indices, sorted_1min = precompute_1min_slice_indices(df_aggregated, df_1min)
df_1min_sorted = df_1min.iloc[sorted_1min].reset_index(drop=True)
@@ -130,74 +171,275 @@ def backtest(df_aggregated, df_1min, stop_loss_pct, progress_step=1000):
init_usd = 1000
usd = init_usd
coin = 0
highest_price = None
nb_stop_loss = 0
trade_log = []
equity_curve = []
trade_results = []
entry_price = None
entry_time = None
total_steps = len(df_aggregated) - 1
for i in range(1, len(df_aggregated)):
st_vals = [df_aggregated[col][i] for col in required_st_cols]
all_positive = all(val > 0 for val in st_vals)
any_negative = any(val < 0 for val in st_vals)
open_price = df_aggregated['Open'][i] # Use open price for entry/exit
close_price = df_aggregated['Close'][i]
timestamp = df_aggregated['Timestamp'][i]
# Get previous and current meta trend signals
prev_mt = meta_trend_signal[i-1] if i > 0 else 0
curr_mt = meta_trend_signal[i]
# Buy condition: all Supertrend values positive
if not in_position and all_positive:
in_position = True
coin = usd / close_price
usd = 0
highest_price = close_price
# If in position, update highest price and check stop loss on 1-min data
elif in_position:
# Update highest price if new high on aggregated bar
if close_price > highest_price:
highest_price = close_price
# Track equity at each bar
equity = usd + coin * close_price
equity_curve.append((timestamp, equity))
# Use precomputed indices for this bar
# Check stop loss if in position
if in_position:
start_idx, end_idx = slice_indices[i-1]
df_1min_slice = df_1min_sorted.iloc[start_idx:end_idx]
stop_triggered = False
for _, row in df_1min_slice.iterrows():
# Update highest price if new high in 1-min bar
if row['Close'] > highest_price:
highest_price = row['Close']
# Trailing stop loss condition on 1-min close
if row['Close'] < highest_price * (1 - stop_loss_pct):
in_position = False
usd = coin * row['Close']
coin = 0
# print(f"Stop loss triggered at {row['Close']:.2f} on {row['Timestamp']}")
nb_stop_loss += 1
highest_price = None
if not df_1min_slice.empty:
stop_loss_threshold = entry_price * (1 - stop_loss_pct)
below_stop = df_1min_slice['Low'] < stop_loss_threshold
if below_stop.any():
first_idx = below_stop.idxmax()
stop_row = df_1min_slice.loc[first_idx]
stop_triggered = True
break
# If stop loss was triggered, skip further checks for this bar
in_position = False
# More realistic stop loss fill logic
if stop_row['Open'] < stop_loss_threshold:
exit_price = stop_row['Open']
else:
exit_price = stop_loss_threshold
exit_time = stop_row['Timestamp']
gross_usd = coin * exit_price
fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False)
usd = gross_usd - fee
trade_pnl = (exit_price - entry_price) / entry_price if entry_price else 0
trade_results.append(trade_pnl)
trade_log.append({
'type': 'stop_loss',
'time': exit_time,
'price': exit_price,
'usd': usd,
'coin': 0,
'pnl': trade_pnl,
'fee': fee
})
coin = 0
nb_stop_loss += 1
entry_price = None
entry_time = None
if stop_triggered:
continue
# Sell condition: any Supertrend value negative (on aggregated bar close)
if any_negative:
in_position = False
usd = coin * close_price
coin = 0
highest_price = None
# Entry condition: signal changes TO bullish (prev != 1 and curr == 1)
if not in_position and prev_mt != 1 and curr_mt == 1:
in_position = True
fee = calculate_okx_taker_maker_fee(usd, is_maker=False)
usd_after_fee = usd - fee
coin = usd_after_fee / open_price # Use open price
entry_price = open_price
entry_time = timestamp
usd = 0
trade_log.append({
'type': 'buy',
'time': timestamp,
'price': open_price,
'usd': usd,
'coin': coin,
'fee': fee
})
# Exit condition: signal changes TO bearish (prev == 1 and curr == -1)
elif in_position and prev_mt == 1 and curr_mt == -1:
in_position = False
exit_price = open_price # Use open price
exit_time = timestamp
gross_usd = coin * open_price
fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False)
usd = gross_usd - fee
trade_pnl = (exit_price - entry_price) / entry_price if entry_price else 0
trade_results.append(trade_pnl)
trade_log.append({
'type': 'sell',
'time': exit_time,
'price': exit_price,
'usd': usd,
'coin': 0,
'pnl': trade_pnl,
'fee': fee
})
coin = 0
entry_price = None
entry_time = None
if i % progress_step == 0 or i == total_steps:
percent = (i / total_steps) * 100
print(f"Progress: {percent:.1f}% ({i}/{total_steps})")
print(f"\rTimeframe: {timeframe},\tProgress: {percent:.1f}%\tCurrent equity: {equity:.2f}\033[K", end='', flush=True)
print(f"Total profit: {usd - init_usd}")
print(f"Number of stop losses: {nb_stop_loss}")
# Force close any open position at the end
if in_position:
final_open_price = df_aggregated['Open'].iloc[-1] # Use open price for consistency
final_timestamp = df_aggregated['Timestamp'].iloc[-1]
gross_usd = coin * final_open_price
fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False)
usd = gross_usd - fee
trade_pnl = (final_open_price - entry_price) / entry_price if entry_price else 0
trade_results.append(trade_pnl)
trade_log.append({
'type': 'forced_close',
'time': final_timestamp,
'price': final_open_price,
'usd': usd,
'coin': 0,
'pnl': trade_pnl,
'fee': fee
})
coin = 0
in_position = False
entry_price = None
print()
print(f"Timeframe: {timeframe},\tTotal profit: {usd - init_usd},\tNumber of stop losses: {nb_stop_loss}")
# --- Performance Metrics ---
equity_arr = np.array([e[1] for e in equity_curve])
# Handle edge cases for empty or invalid equity data
if len(equity_arr) == 0:
print("Warning: No equity data available")
return None
returns = np.diff(equity_arr) / equity_arr[:-1]
# Filter out infinite and NaN returns
returns = returns[np.isfinite(returns)]
total_return = (equity_arr[-1] - equity_arr[0]) / equity_arr[0] if equity_arr[0] != 0 else 0
running_max = np.maximum.accumulate(equity_arr)
if equity_arr[-1] <= 0.01:
max_drawdown = -1.0
else:
drawdowns = (equity_arr - running_max) / running_max
max_drawdown = drawdowns.min() if len(drawdowns) > 0 and np.isfinite(drawdowns).any() else 0
if len(returns) > 1 and np.std(returns) > 1e-9:
sharpe = np.mean(returns) / np.std(returns) * math.sqrt(252)
else:
sharpe = 0
wins = [1 for r in trade_results if r > 0]
win_rate = len(wins) / len(trade_results) if trade_results else 0
num_trades = len(trade_results)
print(f"Performance Metrics:")
print(f" Total Return: {total_return*100:.2f}%")
print(f" Max Drawdown: {max_drawdown*100:.2f}%")
print(f" Sharpe Ratio: {sharpe:.2f}")
print(f" Win Rate: {win_rate*100:.2f}%")
print(f" Number of Trades: {num_trades}")
print(f" Final Equity: ${equity_arr[-1]:.2f}")
print(f" Initial Equity: ${equity_arr[0]:.2f}")
# --- Save Trade Log ---
log_dir = "backtest_logs"
os.makedirs(log_dir, exist_ok=True)
# Format stop_loss_pct for filename (e.g., 0.05 -> 0p05)
stop_loss_str = f"{stop_loss_pct:.2f}".replace('.', 'p')
log_path = os.path.join(log_dir, f"trade_log_{timeframe}_sl{stop_loss_str}.csv")
if trade_log:
all_keys = set()
for entry in trade_log:
all_keys.update(entry.keys())
all_keys = list(all_keys)
trade_log_filled = []
for entry in trade_log:
filled_entry = {k: entry.get(k, None) for k in all_keys}
trade_log_filled.append(filled_entry)
# Calculate total fees for this backtest
total_fees = sum(entry.get('fee', 0) for entry in trade_log)
# Write summary header row, then trade log header and rows
with open(log_path, 'w', newline='') as f:
writer = csv.writer(f)
summary_header = [
'elapsed_time_sec', 'total_return', 'max_drawdown', 'sharpe_ratio',
'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees'
]
summary_values = [
f"{time.time() - start_time:.2f}",
f"{total_return*100:.2f}%",
f"{max_drawdown*100:.2f}%",
f"{sharpe:.2f}",
f"{win_rate*100:.2f}%",
str(num_trades),
f"${equity_arr[-1]:.2f}",
f"${equity_arr[0]:.2f}",
str(nb_stop_loss),
f"${total_fees:.4f}"
]
writer.writerow(summary_header)
writer.writerow(summary_values)
writer.writerow([]) # Blank row for separation
dict_writer = csv.DictWriter(f, fieldnames=all_keys)
dict_writer.writeheader()
dict_writer.writerows(trade_log_filled)
print(f"Trade log saved to {log_path}")
else:
print("No trades to log.")
# Return summary metrics (excluding elapsed time)
return {
'timeframe': timeframe,
'stop_loss': stop_loss_pct,
'total_return': total_return,
'max_drawdown': max_drawdown,
'sharpe_ratio': sharpe,
'win_rate': win_rate,
'num_trades': num_trades,
'final_equity': equity_arr[-1],
'initial_equity': equity_arr[0],
'num_stop_losses': nb_stop_loss,
'total_fees': total_fees if trade_log else 0
}
if __name__ == "__main__":
df_1min = load_data('2020-01-01')
df_aggregated = aggregate_data(df_1min, '5min')
timeframes = ["5min", "15min", "30min", "1h", "4h", "1d"]
# timeframes = ["5min", "15min", "1h", "4h", "1d"]
# timeframes = ["30min"]
stoplosses = [0.03, 0.05, 0.1]
df_1min = load_data('2021-11-01', '2024-10-16')
# Add Supertrend indicators
df_aggregated = add_supertrend_indicators(df_aggregated)
df_aggregated['log_return'] = np.log(df_aggregated['Close'] / df_aggregated['Close'].shift(1))
# Example: 2% trailing stop loss
backtest(df_aggregated, df_1min, stop_loss_pct=0.02)
# Prepare summary CSV
summary_csv_path = "backtest_summary.csv"
summary_header = [
'timeframe', 'stop_loss', 'total_return', 'max_drawdown', 'sharpe_ratio',
'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees'
]
with open(summary_csv_path, 'w', newline='') as summary_file:
writer = csv.DictWriter(summary_file, fieldnames=summary_header)
writer.writeheader()
for timeframe in timeframes:
df_aggregated = aggregate_data(df_1min, timeframe)
df_aggregated = add_supertrend_indicators(df_aggregated)
for stop_loss_pct in stoplosses:
summary = backtest(timeframe, df_aggregated, df_1min, stop_loss_pct=stop_loss_pct)
if summary is not None:
# Format values for CSV (e.g., floats as rounded strings)
summary_row = {
'timeframe': summary['timeframe'],
'stop_loss': summary['stop_loss'],
'total_return': f"{summary['total_return']*100:.2f}%",
'max_drawdown': f"{summary['max_drawdown']*100:.2f}%",
'sharpe_ratio': f"{summary['sharpe_ratio']:.2f}",
'win_rate': f"{summary['win_rate']*100:.2f}%",
'num_trades': summary['num_trades'],
'final_equity': f"${summary['final_equity']:.2f}",
'initial_equity': f"${summary['initial_equity']:.2f}",
'num_stop_losses': summary['num_stop_losses'],
'total_fees': f"${summary['total_fees']:.4f}"
}
writer.writerow(summary_row)