From a25499e016788076c4a429c8195a6c95cba29572 Mon Sep 17 00:00:00 2001 From: Simon Moisy Date: Mon, 18 Aug 2025 09:57:01 +0800 Subject: [PATCH] Refactor backtesting logic to include slippage estimation, enhancing trade execution realism. Update load_data function to accept a CSV file parameter. Improve summary output with slippage metrics and adjust main script for new slippage configuration. Correct typos in project documentation. --- .cursor/rules/project.mdc | 4 +- main.py | 163 ++++++++++++++++++++++++++++++++------ 2 files changed, 139 insertions(+), 28 deletions(-) diff --git a/.cursor/rules/project.mdc b/.cursor/rules/project.mdc index 28ce870..c5f004d 100644 --- a/.cursor/rules/project.mdc +++ b/.cursor/rules/project.mdc @@ -10,10 +10,10 @@ Unify the project structure and interraction with tools and console ### System tools - **ALWAYS** use UV for package management -- **ALWAYS** use windows PowerShell command for terminal +- **ALWAYS** use Arch linux compatible command for terminal ### Coding patterns -- **ALWYAS** check the arguments and methods before use to avoid errors with whron parameters or names +- **ALWYAS** check the arguments and methods before use to avoid errors with wrong parameters or names - If in doubt, check [CONTEXT.md](mdc:CONTEXT.md) file and [architecture.md](mdc:docs/architecture.md) - **PREFER** ORM pattern for databases with SQLAclhemy. - **DO NOT USE** emoji in code and comments diff --git a/main.py b/main.py index 0f4b660..8f0fe91 100644 --- a/main.py +++ b/main.py @@ -7,8 +7,8 @@ import math import os -def load_data(since, until): - df = pd.read_csv('../data/btcusd_1-min_data.csv') +def load_data(since, until, csv_file): + df = pd.read_csv(csv_file) df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s') df = df[(df['Timestamp'] >= pd.Timestamp(since)) & (df['Timestamp'] <= pd.Timestamp(until))] return df @@ -131,7 +131,45 @@ def precompute_1min_slice_indices(df_aggregated, df_1min): indices.append((start_idx, end_idx)) return indices, sorted_1min -def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=1000): +def estimate_slippage_rate(trade_usd_size, minute_row, base_slippage_rate=0.0003, impact_threshold_pct=0.10, impact_slope=0.0010): + """ + Estimate total slippage rate (decimal) using a hybrid model: + - Base slippage: fixed base_slippage_rate (e.g., 0.0003 = 3 bps) + - Extra slippage: if trade size (USD) > impact_threshold_pct * 1-min USD volume, + add impact_slope * (trade_size/threshold - 1) + + Args: + trade_usd_size (float): Trade notional in USD before slippage. + minute_row (pd.Series|None): 1-min bar with 'Volume' and a price ('Close' preferred, fallback 'Open'). + base_slippage_rate (float): Base slippage in decimal. + impact_threshold_pct (float): Threshold as fraction of 1-min volume (e.g., 0.10 = 10%). + impact_slope (float): Rate added per 1x over threshold (decimal). + + Returns: + float: total slippage rate (>= base_slippage_rate). + """ + if minute_row is None: + return float(base_slippage_rate) + try: + minute_base_vol = float(minute_row.get('Volume', 0.0) or 0.0) + minute_price = float(minute_row.get('Close', minute_row.get('Open', 0.0)) or 0.0) + minute_quote_vol = minute_base_vol * minute_price + except Exception: + minute_quote_vol = 0.0 + + if minute_quote_vol <= 0 or impact_threshold_pct <= 0: + return float(base_slippage_rate) + + threshold_quote = minute_quote_vol * impact_threshold_pct + if trade_usd_size <= threshold_quote: + return float(base_slippage_rate) + + over_ratio = (trade_usd_size / threshold_quote) - 1.0 + extra_slippage = max(0.0, impact_slope * over_ratio) + return float(base_slippage_rate + extra_slippage) + +def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=1000, + base_slippage_rate=0.0003, impact_threshold_pct=0.10, impact_slope=0.0010): """ Backtest trading strategy based on meta supertrend logic (all three supertrends agree). Uses signal transitions and open prices for entry/exit to match original implementation. @@ -166,6 +204,7 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 # Precompute 1-min slice indices for each aggregated bar slice_indices, sorted_1min = precompute_1min_slice_indices(df_aggregated, df_1min) df_1min_sorted = df_1min.iloc[sorted_1min].reset_index(drop=True) + one_min_timestamps_sorted = df_1min_sorted['Timestamp'].values in_position = False init_usd = 1000 @@ -177,6 +216,8 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 trade_results = [] entry_price = None entry_time = None + total_slippage_usd = 0.0 + total_traded_usd = 0.0 total_steps = len(df_aggregated) - 1 for i in range(1, len(df_aggregated)): @@ -208,22 +249,29 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 stop_triggered = True in_position = False - # More realistic stop loss fill logic + # More realistic stop loss fill logic with slippage if stop_row['Open'] < stop_loss_threshold: - exit_price = stop_row['Open'] + base_exit_price = stop_row['Open'] else: - exit_price = stop_loss_threshold - + base_exit_price = stop_loss_threshold + trade_usd_size = float(coin * base_exit_price) + slip_rate = estimate_slippage_rate(trade_usd_size, stop_row, base_slippage_rate, impact_threshold_pct, impact_slope) + exit_price = base_exit_price * (1.0 - slip_rate) + exit_time = stop_row['Timestamp'] gross_usd = coin * exit_price fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False) usd = gross_usd - fee trade_pnl = (exit_price - entry_price) / entry_price if entry_price else 0 + total_slippage_usd += trade_usd_size * slip_rate + total_traded_usd += trade_usd_size trade_results.append(trade_pnl) trade_log.append({ 'type': 'stop_loss', 'time': exit_time, - 'price': exit_price, + 'base_price': base_exit_price, + 'effective_price': exit_price, + 'slippage_rate': slip_rate, 'usd': usd, 'coin': 0, 'pnl': trade_pnl, @@ -242,14 +290,28 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 in_position = True fee = calculate_okx_taker_maker_fee(usd, is_maker=False) usd_after_fee = usd - fee - coin = usd_after_fee / open_price # Use open price - entry_price = open_price + # Slippage on buy increases price + try: + ts64 = np.datetime64(timestamp) + idx_min = int(np.searchsorted(one_min_timestamps_sorted, ts64, side='left')) + minute_row = df_1min_sorted.iloc[idx_min] if 0 <= idx_min < len(df_1min_sorted) else None + except Exception: + minute_row = None + trade_usd_size = float(usd_after_fee) + slip_rate = estimate_slippage_rate(trade_usd_size, minute_row, base_slippage_rate, impact_threshold_pct, impact_slope) + effective_entry_price = open_price * (1.0 + slip_rate) + coin = usd_after_fee / effective_entry_price + entry_price = effective_entry_price entry_time = timestamp usd = 0 + total_slippage_usd += trade_usd_size * slip_rate + total_traded_usd += trade_usd_size trade_log.append({ 'type': 'buy', 'time': timestamp, - 'price': open_price, + 'base_price': open_price, + 'effective_price': effective_entry_price, + 'slippage_rate': slip_rate, 'usd': usd, 'coin': coin, 'fee': fee @@ -258,17 +320,31 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 # Exit condition: signal changes TO bearish (prev == 1 and curr == -1) elif in_position and prev_mt == 1 and curr_mt == -1: in_position = False - exit_price = open_price # Use open price + # Slippage on sell reduces price + try: + ts64 = np.datetime64(timestamp) + idx_min = int(np.searchsorted(one_min_timestamps_sorted, ts64, side='left')) + minute_row = df_1min_sorted.iloc[idx_min] if 0 <= idx_min < len(df_1min_sorted) else None + except Exception: + minute_row = None + base_exit_price = open_price + trade_usd_size = float(coin * base_exit_price) + slip_rate = estimate_slippage_rate(trade_usd_size, minute_row, base_slippage_rate, impact_threshold_pct, impact_slope) + exit_price = base_exit_price * (1.0 - slip_rate) exit_time = timestamp - gross_usd = coin * open_price + gross_usd = coin * exit_price fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False) usd = gross_usd - fee trade_pnl = (exit_price - entry_price) / entry_price if entry_price else 0 + total_slippage_usd += trade_usd_size * slip_rate + total_traded_usd += trade_usd_size trade_results.append(trade_pnl) trade_log.append({ 'type': 'sell', 'time': exit_time, - 'price': exit_price, + 'base_price': base_exit_price, + 'effective_price': exit_price, + 'slippage_rate': slip_rate, 'usd': usd, 'coin': 0, 'pnl': trade_pnl, @@ -286,15 +362,29 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 if in_position: final_open_price = df_aggregated['Open'].iloc[-1] # Use open price for consistency final_timestamp = df_aggregated['Timestamp'].iloc[-1] - gross_usd = coin * final_open_price + try: + ts64 = np.datetime64(final_timestamp) + idx_min = int(np.searchsorted(one_min_timestamps_sorted, ts64, side='left')) + minute_row = df_1min_sorted.iloc[idx_min] if 0 <= idx_min < len(df_1min_sorted) else None + except Exception: + minute_row = None + base_exit_price = final_open_price + trade_usd_size = float(coin * base_exit_price) + slip_rate = estimate_slippage_rate(trade_usd_size, minute_row, base_slippage_rate, impact_threshold_pct, impact_slope) + final_effective_price = base_exit_price * (1.0 - slip_rate) + gross_usd = coin * final_effective_price fee = calculate_okx_taker_maker_fee(gross_usd, is_maker=False) usd = gross_usd - fee - trade_pnl = (final_open_price - entry_price) / entry_price if entry_price else 0 + trade_pnl = (final_effective_price - entry_price) / entry_price if entry_price else 0 + total_slippage_usd += trade_usd_size * slip_rate + total_traded_usd += trade_usd_size trade_results.append(trade_pnl) trade_log.append({ 'type': 'forced_close', 'time': final_timestamp, - 'price': final_open_price, + 'base_price': base_exit_price, + 'effective_price': final_effective_price, + 'slippage_rate': slip_rate, 'usd': usd, 'coin': 0, 'pnl': trade_pnl, @@ -365,7 +455,8 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 writer = csv.writer(f) summary_header = [ 'elapsed_time_sec', 'total_return', 'max_drawdown', 'sharpe_ratio', - 'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees' + 'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees', + 'total_slippage_usd', 'avg_slippage_bps' ] summary_values = [ f"{time.time() - start_time:.2f}", @@ -377,7 +468,9 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 f"${equity_arr[-1]:.2f}", f"${equity_arr[0]:.2f}", str(nb_stop_loss), - f"${total_fees:.4f}" + f"${total_fees:.4f}", + f"${total_slippage_usd:.4f}", + f"{(total_slippage_usd / total_traded_usd * 10000.0) if total_traded_usd > 0 else 0:.2f}" ] writer.writerow(summary_header) writer.writerow(summary_values) @@ -402,22 +495,30 @@ def backtest(timeframe, df_aggregated, df_1min, stop_loss_pct, progress_step=100 'final_equity': equity_arr[-1], 'initial_equity': equity_arr[0], 'num_stop_losses': nb_stop_loss, - 'total_fees': total_fees if trade_log else 0 + 'total_fees': total_fees if trade_log else 0, + 'total_slippage_usd': total_slippage_usd, + 'avg_slippage_bps': (total_slippage_usd / total_traded_usd * 10000.0) if total_traded_usd > 0 else 0.0 } if __name__ == "__main__": - timeframes = ["5min", "15min", "30min", "1h", "4h", "1d"] + timeframes = ["5min", "15min", "30min", "1h", "4h", "1d", "2d"] # timeframes = ["5min", "15min", "1h", "4h", "1d"] # timeframes = ["30min"] - stoplosses = [0.03, 0.05, 0.1] + stoplosses = [0.1, 0.2, 0.3, 0.4, 0.5] + # Slippage configuration (OKX Spot): base in bps, plus volume-impact model + slippage_base_bps = 10 # 10 bps base slippage (realistic, conservative) + impact_threshold_pct = 0.10 # e.g., start impact beyond 10% of 1-min volume + impact_slope = 0.0010 # incremental slippage per 1x over threshold - df_1min = load_data('2021-11-01', '2024-10-16') + # df_1min = load_data('2021-11-01', '2024-10-16', '../data/btcusd_1-min_data.csv') + df_1min = load_data('2021-11-01', '2025-08-19', '../data/btcusd_okx_1-min_data.csv') # Prepare summary CSV summary_csv_path = "backtest_summary.csv" summary_header = [ 'timeframe', 'stop_loss', 'total_return', 'max_drawdown', 'sharpe_ratio', - 'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees' + 'win_rate', 'num_trades', 'final_equity', 'initial_equity', 'num_stop_losses', 'total_fees', + 'total_slippage_usd', 'avg_slippage_bps' ] with open(summary_csv_path, 'w', newline='') as summary_file: writer = csv.DictWriter(summary_file, fieldnames=summary_header) @@ -426,7 +527,15 @@ if __name__ == "__main__": df_aggregated = aggregate_data(df_1min, timeframe) df_aggregated = add_supertrend_indicators(df_aggregated) for stop_loss_pct in stoplosses: - summary = backtest(timeframe, df_aggregated, df_1min, stop_loss_pct=stop_loss_pct) + summary = backtest( + timeframe, + df_aggregated, + df_1min, + stop_loss_pct=stop_loss_pct, + base_slippage_rate=slippage_base_bps / 10000.0, + impact_threshold_pct=impact_threshold_pct, + impact_slope=impact_slope + ) if summary is not None: # Format values for CSV (e.g., floats as rounded strings) summary_row = { @@ -440,6 +549,8 @@ if __name__ == "__main__": 'final_equity': f"${summary['final_equity']:.2f}", 'initial_equity': f"${summary['initial_equity']:.2f}", 'num_stop_losses': summary['num_stop_losses'], - 'total_fees': f"${summary['total_fees']:.4f}" + 'total_fees': f"${summary['total_fees']:.4f}", + 'total_slippage_usd': f"${summary['total_slippage_usd']:.4f}", + 'avg_slippage_bps': f"{summary['avg_slippage_bps']:.2f}" } writer.writerow(summary_row)