- Added GSheetBatchPusher class to handle background updates to Google Sheets. - Refactored write_results_per_combination function to write results directly to Google Sheets instead of CSV files. - Updated process_timeframe function to handle single stop loss percentages. - Introduced a global queue for batching results and trades for efficient updates. - Enhanced error handling for Google Sheets API quota limits. - Adjusted main execution flow to start the batch pusher and ensure all results are pushed after processing.
401 lines
15 KiB
Python
401 lines
15 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from trend_detector_simple import TrendDetectorSimple
|
|
import csv
|
|
import logging
|
|
import concurrent.futures
|
|
import os
|
|
import psutil
|
|
import datetime
|
|
import gspread
|
|
from google.oauth2.service_account import Credentials
|
|
from collections import defaultdict
|
|
import threading
|
|
import queue
|
|
import time
|
|
import math
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.FileHandler("backtest.log"),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
# Global queue for batching Google Sheets updates
|
|
results_queue = queue.Queue()
|
|
|
|
# Background thread function to push updates every minute
|
|
class GSheetBatchPusher(threading.Thread):
|
|
def __init__(self, queue, timestamp, spreadsheet_name, interval=60):
|
|
super().__init__(daemon=True)
|
|
self.queue = queue
|
|
self.timestamp = timestamp
|
|
self.spreadsheet_name = spreadsheet_name
|
|
self.interval = interval
|
|
self._stop_event = threading.Event()
|
|
|
|
def run(self):
|
|
while not self._stop_event.is_set():
|
|
self.push_all()
|
|
time.sleep(self.interval)
|
|
# Final push on stop
|
|
self.push_all()
|
|
|
|
def stop(self):
|
|
self._stop_event.set()
|
|
|
|
def push_all(self):
|
|
batch_results = []
|
|
batch_trades = []
|
|
while True:
|
|
try:
|
|
results, trades = self.queue.get_nowait()
|
|
batch_results.extend(results)
|
|
batch_trades.extend(trades)
|
|
except queue.Empty:
|
|
break
|
|
|
|
if batch_results or batch_trades:
|
|
write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)
|
|
|
|
def get_optimal_workers():
|
|
"""Determine optimal number of worker processes based on system resources"""
|
|
cpu_count = os.cpu_count() or 4
|
|
memory_gb = psutil.virtual_memory().total / (1024**3)
|
|
# Heuristic: Use 75% of cores, but cap based on available memory
|
|
# Assume each worker needs ~2GB for large datasets
|
|
workers_by_memory = max(1, int(memory_gb / 2))
|
|
workers_by_cpu = max(1, int(cpu_count * 0.75))
|
|
return min(workers_by_cpu, workers_by_memory)
|
|
|
|
def load_data(file_path, start_date, stop_date):
|
|
"""Load data with optimized dtypes and filtering"""
|
|
# Define optimized dtypes
|
|
dtypes = {
|
|
'Open': 'float32',
|
|
'High': 'float32',
|
|
'Low': 'float32',
|
|
'Close': 'float32',
|
|
'Volume': 'float32'
|
|
}
|
|
|
|
# Read data with original capitalized column names
|
|
data = pd.read_csv(file_path, dtype=dtypes)
|
|
|
|
# Convert timestamp to datetime
|
|
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
|
|
|
|
# Filter by date range
|
|
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
|
|
|
|
# Now convert column names to lowercase
|
|
data.columns = data.columns.str.lower()
|
|
|
|
return data.set_index('timestamp')
|
|
|
|
def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd, debug=False):
|
|
"""Process the entire timeframe with all stop loss values (no monthly split)"""
|
|
df = df.copy().reset_index(drop=True)
|
|
trend_detector = TrendDetectorSimple(df, verbose=False)
|
|
|
|
results_rows = []
|
|
trade_rows = []
|
|
for stop_loss_pct in stop_loss_pcts:
|
|
results = trend_detector.backtest_meta_supertrend(
|
|
min1_df,
|
|
initial_usd=initial_usd,
|
|
stop_loss_pct=stop_loss_pct,
|
|
debug=debug
|
|
)
|
|
n_trades = results["n_trades"]
|
|
trades = results.get('trades', [])
|
|
n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
|
|
total_profit = sum(trade['profit_pct'] for trade in trades)
|
|
total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)
|
|
win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
|
|
avg_trade = total_profit / n_trades if n_trades > 0 else 0
|
|
profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')
|
|
cumulative_profit = 0
|
|
max_drawdown = 0
|
|
peak = 0
|
|
for trade in trades:
|
|
cumulative_profit += trade['profit_pct']
|
|
if cumulative_profit > peak:
|
|
peak = cumulative_profit
|
|
drawdown = peak - cumulative_profit
|
|
if drawdown > max_drawdown:
|
|
max_drawdown = drawdown
|
|
final_usd = initial_usd
|
|
for trade in trades:
|
|
final_usd *= (1 + trade['profit_pct'])
|
|
row = {
|
|
"timeframe": rule_name,
|
|
"stop_loss_pct": stop_loss_pct,
|
|
"n_trades": n_trades,
|
|
"n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
|
|
"win_rate": win_rate,
|
|
"max_drawdown": max_drawdown,
|
|
"avg_trade": avg_trade,
|
|
"profit_ratio": profit_ratio,
|
|
"initial_usd": initial_usd,
|
|
"final_usd": final_usd,
|
|
}
|
|
results_rows.append(row)
|
|
for trade in trades:
|
|
trade_rows.append({
|
|
"timeframe": rule_name,
|
|
"stop_loss_pct": stop_loss_pct,
|
|
"entry_time": trade.get("entry_time"),
|
|
"exit_time": trade.get("exit_time"),
|
|
"entry_price": trade.get("entry"),
|
|
"exit_price": trade.get("exit"),
|
|
"profit_pct": trade.get("profit_pct"),
|
|
"type": trade.get("type", ""),
|
|
})
|
|
logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
|
|
if debug:
|
|
for trade in trades:
|
|
if trade['type'] == 'STOP':
|
|
print(trade)
|
|
for trade in trades:
|
|
if trade['profit_pct'] < -0.09: # or whatever is close to -0.10
|
|
print("Large loss trade:", trade)
|
|
return results_rows, trade_rows
|
|
|
|
def process_timeframe(timeframe_info, debug=False):
|
|
"""Process a single (timeframe, stop_loss_pct) combination (no monthly split)"""
|
|
rule, data_1min, stop_loss_pct, initial_usd = timeframe_info
|
|
if rule == "1T":
|
|
df = data_1min.copy()
|
|
else:
|
|
df = data_1min.resample(rule).agg({
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum'
|
|
}).dropna()
|
|
df = df.reset_index()
|
|
# Only process one stop loss
|
|
results_rows, all_trade_rows = process_timeframe_data(data_1min, df, [stop_loss_pct], rule, initial_usd, debug=debug)
|
|
return results_rows, all_trade_rows
|
|
|
|
def write_results_chunk(filename, fieldnames, rows, write_header=False):
|
|
"""Write a chunk of results to a CSV file"""
|
|
mode = 'w' if write_header else 'a'
|
|
|
|
with open(filename, mode, newline="") as csvfile:
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
if write_header:
|
|
csvfile.write(f"# initial_usd: {initial_usd}\n")
|
|
writer.writeheader()
|
|
|
|
for row in rows:
|
|
# Only keep keys that are in fieldnames
|
|
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
|
|
writer.writerow(filtered_row)
|
|
|
|
def aggregate_results(all_rows):
|
|
"""Aggregate results per stop_loss_pct and per rule (timeframe)"""
|
|
from collections import defaultdict
|
|
|
|
grouped = defaultdict(list)
|
|
for row in all_rows:
|
|
key = (row['timeframe'], row['stop_loss_pct'])
|
|
grouped[key].append(row)
|
|
|
|
summary_rows = []
|
|
for (rule, stop_loss_pct), rows in grouped.items():
|
|
n_months = len(rows)
|
|
total_trades = sum(r['n_trades'] for r in rows)
|
|
total_stop_loss = sum(r['n_stop_loss'] for r in rows)
|
|
avg_win_rate = np.mean([r['win_rate'] for r in rows])
|
|
avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
|
|
avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
|
|
avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])
|
|
|
|
# Calculate final USD
|
|
final_usd = np.mean([r.get('final_usd', initial_usd) for r in rows])
|
|
|
|
summary_rows.append({
|
|
"timeframe": rule,
|
|
"stop_loss_pct": stop_loss_pct,
|
|
"n_trades": total_trades,
|
|
"n_stop_loss": total_stop_loss,
|
|
"win_rate": avg_win_rate,
|
|
"max_drawdown": avg_max_drawdown,
|
|
"avg_trade": avg_avg_trade,
|
|
"profit_ratio": avg_profit_ratio,
|
|
"initial_usd": initial_usd,
|
|
"final_usd": final_usd,
|
|
})
|
|
return summary_rows
|
|
|
|
def write_results_per_combination_gsheet(results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
|
|
scopes = [
|
|
"https://www.googleapis.com/auth/spreadsheets",
|
|
"https://www.googleapis.com/auth/drive"
|
|
]
|
|
creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
|
|
gc = gspread.authorize(creds)
|
|
sh = gc.open(spreadsheet_name)
|
|
|
|
try:
|
|
worksheet = sh.worksheet("Results")
|
|
except gspread.exceptions.WorksheetNotFound:
|
|
worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")
|
|
|
|
# Clear the worksheet before writing new results
|
|
worksheet.clear()
|
|
|
|
# Updated fieldnames to match your data rows
|
|
fieldnames = [
|
|
"timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
|
|
"max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
|
|
]
|
|
|
|
def to_native(val):
|
|
if isinstance(val, (np.generic, np.ndarray)):
|
|
val = val.item()
|
|
if hasattr(val, 'isoformat'):
|
|
return val.isoformat()
|
|
# Handle inf, -inf, nan
|
|
if isinstance(val, float):
|
|
if math.isinf(val):
|
|
return "∞" if val > 0 else "-∞"
|
|
if math.isnan(val):
|
|
return ""
|
|
return val
|
|
|
|
# Write header if sheet is empty
|
|
if len(worksheet.get_all_values()) == 0:
|
|
worksheet.append_row(fieldnames)
|
|
|
|
for row in results_rows:
|
|
values = [to_native(row.get(field, "")) for field in fieldnames]
|
|
worksheet.append_row(values)
|
|
|
|
trades_fieldnames = [
|
|
"entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
|
|
]
|
|
trades_by_combo = defaultdict(list)
|
|
|
|
for trade in trade_rows:
|
|
tf = trade.get("timeframe")
|
|
sl = trade.get("stop_loss_pct")
|
|
trades_by_combo[(tf, sl)].append(trade)
|
|
|
|
for (tf, sl), trades in trades_by_combo.items():
|
|
sl_percent = int(round(sl * 100))
|
|
sheet_name = f"Trades_{tf}_ST{sl_percent}%"
|
|
|
|
try:
|
|
trades_ws = sh.worksheet(sheet_name)
|
|
except gspread.exceptions.WorksheetNotFound:
|
|
trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
|
|
|
|
# Clear the trades worksheet before writing new trades
|
|
trades_ws.clear()
|
|
|
|
if len(trades_ws.get_all_values()) == 0:
|
|
trades_ws.append_row(trades_fieldnames)
|
|
|
|
for trade in trades:
|
|
trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
|
|
try:
|
|
trades_ws.append_row(trade_row)
|
|
except gspread.exceptions.APIError as e:
|
|
if '429' in str(e):
|
|
logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
|
|
# Re-queue the failed batch for retry
|
|
results_queue.put((results_rows, trade_rows))
|
|
return # Stop pushing for this batch, will retry next interval
|
|
else:
|
|
raise
|
|
|
|
if __name__ == "__main__":
|
|
# Configuration
|
|
start_date = '2020-01-01'
|
|
stop_date = '2025-05-15'
|
|
initial_usd = 10000
|
|
debug = False
|
|
|
|
results_dir = "results"
|
|
os.makedirs(results_dir, exist_ok=True)
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")
|
|
|
|
timeframes = ["15min", "30min", "1h", "6h", "1D"]
|
|
stop_loss_pcts = [0.02, 0.03, 0.05]
|
|
|
|
# Load data once
|
|
data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
|
|
logging.info(f"1min rows: {len(data_1min)}")
|
|
|
|
# Prepare tasks
|
|
tasks = [
|
|
(name, data_1min, stop_loss_pct, initial_usd)
|
|
for name in timeframes
|
|
for stop_loss_pct in stop_loss_pcts
|
|
]
|
|
|
|
# Determine optimal worker count
|
|
workers = get_optimal_workers()
|
|
logging.info(f"Using {workers} workers for processing")
|
|
|
|
# Start the background batch pusher
|
|
spreadsheet_name = "GlimBit Backtest Results"
|
|
batch_pusher = GSheetBatchPusher(results_queue, timestamp, spreadsheet_name, interval=65)
|
|
batch_pusher.start()
|
|
|
|
# Process tasks with optimized concurrency
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
|
|
futures = {executor.submit(process_timeframe, task, debug): task for task in tasks}
|
|
all_results_rows = []
|
|
for future in concurrent.futures.as_completed(futures):
|
|
results, trades = future.result()
|
|
if results or trades:
|
|
all_results_rows.extend(results)
|
|
results_queue.put((results, trades)) # Enqueue for batch update
|
|
|
|
# After all tasks, flush any remaining updates
|
|
batch_pusher.stop()
|
|
batch_pusher.join()
|
|
|
|
# Ensure all batches are pushed, even after 429 errors
|
|
while not results_queue.empty():
|
|
logging.info("Waiting for Google Sheets quota to reset. Retrying batch push in 60 seconds...")
|
|
time.sleep(65)
|
|
batch_pusher.push_all()
|
|
|
|
# Write all results to a single CSV file
|
|
combined_filename = os.path.join(results_dir, f"{timestamp}_backtest_combined.csv")
|
|
combined_fieldnames = [
|
|
"timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
|
|
"max_drawdown", "avg_trade", "profit_ratio", "final_usd"
|
|
]
|
|
|
|
def format_row(row):
|
|
# Format percentages and floats as in your example
|
|
return {
|
|
"timeframe": row["timeframe"],
|
|
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
|
|
"n_trades": row["n_trades"],
|
|
"n_stop_loss": row["n_stop_loss"],
|
|
"win_rate": f"{row['win_rate']*100:.2f}%",
|
|
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
|
|
"avg_trade": f"{row['avg_trade']*100:.2f}%",
|
|
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
|
|
"final_usd": f"{row['final_usd']:.2f}",
|
|
}
|
|
|
|
with open(combined_filename, "w", newline="") as csvfile:
|
|
writer = csv.DictWriter(csvfile, fieldnames=combined_fieldnames, delimiter='\t')
|
|
writer.writeheader()
|
|
for row in all_results_rows:
|
|
writer.writerow(format_row(row))
|
|
|
|
logging.info(f"Combined results written to {combined_filename}") |