Cycles/main.py

import pandas as pd
import numpy as np
from trend_detector_simple import TrendDetectorSimple
import csv
import logging
import concurrent.futures
import os
import psutil
import datetime
import gspread
from google.oauth2.service_account import Credentials
from collections import defaultdict
import threading
import queue
import time
import math

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("backtest.log"),
        logging.StreamHandler()
    ]
)

# Global queue for batching Google Sheets updates
results_queue = queue.Queue()

# Background thread function to push updates every minute
class GSheetBatchPusher(threading.Thread):
    def __init__(self, queue, timestamp, spreadsheet_name, interval=60):
        super().__init__(daemon=True)
        self.queue = queue
        self.timestamp = timestamp
        self.spreadsheet_name = spreadsheet_name
        self.interval = interval
        self._stop_event = threading.Event()

    def run(self):
        while not self._stop_event.is_set():
            self.push_all()
            time.sleep(self.interval)
        # Final push on stop
        self.push_all()

    def stop(self):
        self._stop_event.set()

    def push_all(self):
        batch_results = []
        batch_trades = []
        while True:
            try:
                results, trades = self.queue.get_nowait()
                batch_results.extend(results)
                batch_trades.extend(trades)
            except queue.Empty:
                break
            
        if batch_results or batch_trades:
            write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)

def get_optimal_workers():
    """Determine optimal number of worker processes based on system resources"""
    cpu_count = os.cpu_count() or 4
    memory_gb = psutil.virtual_memory().total / (1024**3)
    # Heuristic: Use 75% of cores, but cap based on available memory
    # Assume each worker needs ~2GB for large datasets
    workers_by_memory = max(1, int(memory_gb / 2))
    workers_by_cpu = max(1, int(cpu_count * 0.75))
    return min(workers_by_cpu, workers_by_memory)

def load_data(file_path, start_date, stop_date):
    """Load data with optimized dtypes and filtering"""
    # Define optimized dtypes
    dtypes = {
        'Open': 'float32',
        'High': 'float32', 
        'Low': 'float32',
        'Close': 'float32',
        'Volume': 'float32'
    }
    
    # Read data with original capitalized column names
    data = pd.read_csv(file_path, dtype=dtypes)
    
    # Convert timestamp to datetime
    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
    
    # Filter by date range
    data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
    
    # Now convert column names to lowercase
    data.columns = data.columns.str.lower()
    
    return data.set_index('timestamp')

def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd, debug=False):
    """Process the entire timeframe with all stop loss values (no monthly split)"""
    df = df.copy().reset_index(drop=True)
    trend_detector = TrendDetectorSimple(df, verbose=False)

    results_rows = []
    trade_rows = []
    for stop_loss_pct in stop_loss_pcts:
        results = trend_detector.backtest_meta_supertrend(
            min1_df,
            initial_usd=initial_usd,
            stop_loss_pct=stop_loss_pct,
            debug=debug
        )
        n_trades = results["n_trades"]
        trades = results.get('trades', [])
        n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
        total_profit = sum(trade['profit_pct'] for trade in trades)
        total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)
        win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
        avg_trade = total_profit / n_trades if n_trades > 0 else 0
        profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')
        cumulative_profit = 0
        max_drawdown = 0
        peak = 0
        for trade in trades:
            cumulative_profit += trade['profit_pct']
            if cumulative_profit > peak:
                peak = cumulative_profit
            drawdown = peak - cumulative_profit
            if drawdown > max_drawdown:
                max_drawdown = drawdown
        final_usd = initial_usd
        for trade in trades:
            final_usd *= (1 + trade['profit_pct'])
        row = {
            "timeframe": rule_name,
            "stop_loss_pct": stop_loss_pct,
            "n_trades": n_trades,
            "n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
            "win_rate": win_rate,
            "max_drawdown": max_drawdown,
            "avg_trade": avg_trade,
            "profit_ratio": profit_ratio,
            "initial_usd": initial_usd,
            "final_usd": final_usd,
        }
        results_rows.append(row)
        for trade in trades:
            trade_rows.append({
                "timeframe": rule_name,
                "stop_loss_pct": stop_loss_pct,
                "entry_time": trade.get("entry_time"),
                "exit_time": trade.get("exit_time"),
                "entry_price": trade.get("entry"),
                "exit_price": trade.get("exit"),
                "profit_pct": trade.get("profit_pct"),
                "type": trade.get("type", ""),
            })
        logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
        if debug:
            for trade in trades:
                if trade['type'] == 'STOP':
                    print(trade)
            for trade in trades:
                if trade['profit_pct'] < -0.09:  # or whatever is close to -0.10
                    print("Large loss trade:", trade)
    return results_rows, trade_rows

def process_timeframe(timeframe_info, debug=False):
    """Process a single (timeframe, stop_loss_pct) combination (no monthly split)"""
    rule, data_1min, stop_loss_pct, initial_usd = timeframe_info
    if rule == "1T":
        df = data_1min.copy()
    else:
        df = data_1min.resample(rule).agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
    df = df.reset_index()
    # Only process one stop loss
    results_rows, all_trade_rows = process_timeframe_data(data_1min, df, [stop_loss_pct], rule, initial_usd, debug=debug)
    return results_rows, all_trade_rows

def write_results_chunk(filename, fieldnames, rows, write_header=False):
    """Write a chunk of results to a CSV file"""
    mode = 'w' if write_header else 'a'
    
    with open(filename, mode, newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if write_header:
            csvfile.write(f"# initial_usd: {initial_usd}\n")
            writer.writeheader()
        
        for row in rows:
            # Only keep keys that are in fieldnames
            filtered_row = {k: v for k, v in row.items() if k in fieldnames}
            writer.writerow(filtered_row)

def aggregate_results(all_rows):
    """Aggregate results per stop_loss_pct and per rule (timeframe)"""
    from collections import defaultdict

    grouped = defaultdict(list)
    for row in all_rows:
        key = (row['timeframe'], row['stop_loss_pct'])
        grouped[key].append(row)

    summary_rows = []
    for (rule, stop_loss_pct), rows in grouped.items():
        n_months = len(rows)
        total_trades = sum(r['n_trades'] for r in rows)
        total_stop_loss = sum(r['n_stop_loss'] for r in rows)
        avg_win_rate = np.mean([r['win_rate'] for r in rows])
        avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
        avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
        avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])

        # Calculate final USD
        final_usd = np.mean([r.get('final_usd', initial_usd) for r in rows])

        summary_rows.append({
            "timeframe": rule,
            "stop_loss_pct": stop_loss_pct,
            "n_trades": total_trades,
            "n_stop_loss": total_stop_loss,
            "win_rate": avg_win_rate,
            "max_drawdown": avg_max_drawdown,
            "avg_trade": avg_avg_trade,
            "profit_ratio": avg_profit_ratio,
            "initial_usd": initial_usd,
            "final_usd": final_usd,
        })
    return summary_rows

def write_results_per_combination_gsheet(results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
    scopes = [
        "https://www.googleapis.com/auth/spreadsheets",
        "https://www.googleapis.com/auth/drive"
    ]
    creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
    gc = gspread.authorize(creds)
    sh = gc.open(spreadsheet_name)
    
    try:
        worksheet = sh.worksheet("Results")
    except gspread.exceptions.WorksheetNotFound:
        worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")

    # Clear the worksheet before writing new results
    worksheet.clear()

    # Updated fieldnames to match your data rows
    fieldnames = [
        "timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
        "max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
    ]

    def to_native(val):
        if isinstance(val, (np.generic, np.ndarray)):
            val = val.item()
        if hasattr(val, 'isoformat'):
            return val.isoformat()
        # Handle inf, -inf, nan
        if isinstance(val, float):
            if math.isinf(val):
                return "∞" if val > 0 else "-∞"
            if math.isnan(val):
                return ""
        return val

    # Write header if sheet is empty
    if len(worksheet.get_all_values()) == 0:
        worksheet.append_row(fieldnames)

    for row in results_rows:
        values = [to_native(row.get(field, "")) for field in fieldnames]
        worksheet.append_row(values)
    
    trades_fieldnames = [
        "entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
    ]
    trades_by_combo = defaultdict(list)

    for trade in trade_rows:
        tf = trade.get("timeframe")
        sl = trade.get("stop_loss_pct")
        trades_by_combo[(tf, sl)].append(trade)
    
    for (tf, sl), trades in trades_by_combo.items():
        sl_percent = int(round(sl * 100))
        sheet_name = f"Trades_{tf}_ST{sl_percent}%"

        try:
            trades_ws = sh.worksheet(sheet_name)
        except gspread.exceptions.WorksheetNotFound:
            trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
        
        # Clear the trades worksheet before writing new trades
        trades_ws.clear()

        if len(trades_ws.get_all_values()) == 0:
            trades_ws.append_row(trades_fieldnames)
        
        for trade in trades:
            trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
            try:
                trades_ws.append_row(trade_row)
            except gspread.exceptions.APIError as e:
                if '429' in str(e):
                    logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
                    # Re-queue the failed batch for retry
                    results_queue.put((results_rows, trade_rows))
                    return  # Stop pushing for this batch, will retry next interval
                else:
                    raise

if __name__ == "__main__":
    # Configuration
    start_date = '2020-01-01'
    stop_date = '2025-05-15'
    initial_usd = 10000
    debug = False
    
    results_dir = "results"
    os.makedirs(results_dir, exist_ok=True)
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")

    timeframes = ["15min", "30min", "1h", "6h", "1D"]
    stop_loss_pcts = [0.02, 0.03, 0.05]

    # Load data once
    data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
    logging.info(f"1min rows: {len(data_1min)}")
    
    # Prepare tasks
    tasks = [
        (name, data_1min, stop_loss_pct, initial_usd)
        for name in timeframes
        for stop_loss_pct in stop_loss_pcts
    ]
    
    # Determine optimal worker count
    workers = get_optimal_workers()
    logging.info(f"Using {workers} workers for processing")

    # Start the background batch pusher
    spreadsheet_name = "GlimBit Backtest Results"
    batch_pusher = GSheetBatchPusher(results_queue, timestamp, spreadsheet_name, interval=65)
    batch_pusher.start()
    
    # Process tasks with optimized concurrency
    with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
        futures = {executor.submit(process_timeframe, task, debug): task for task in tasks}
        all_results_rows = []
        for future in concurrent.futures.as_completed(futures):
            results, trades = future.result()
            if results or trades:
                all_results_rows.extend(results)
                results_queue.put((results, trades))  # Enqueue for batch update

    # After all tasks, flush any remaining updates
    batch_pusher.stop()
    batch_pusher.join()

    # Ensure all batches are pushed, even after 429 errors
    while not results_queue.empty():
        logging.info("Waiting for Google Sheets quota to reset. Retrying batch push in 60 seconds...")
        time.sleep(65)
        batch_pusher.push_all()

    # Write all results to a single CSV file
    combined_filename = os.path.join(results_dir, f"{timestamp}_backtest_combined.csv")
    combined_fieldnames = [
        "timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
        "max_drawdown", "avg_trade", "profit_ratio", "final_usd"
    ]

    def format_row(row):
        # Format percentages and floats as in your example
        return {
            "timeframe": row["timeframe"],
            "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
            "n_trades": row["n_trades"],
            "n_stop_loss": row["n_stop_loss"],
            "win_rate": f"{row['win_rate']*100:.2f}%",
            "max_drawdown": f"{row['max_drawdown']*100:.2f}%",
            "avg_trade": f"{row['avg_trade']*100:.2f}%",
            "profit_ratio": f"{row['profit_ratio']*100:.2f}%",
            "final_usd": f"{row['final_usd']:.2f}",
        }

    with open(combined_filename, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=combined_fieldnames, delimiter='\t')
        writer.writeheader()
        for row in all_results_rows:
            writer.writerow(format_row(row))

    logging.info(f"Combined results written to {combined_filename}")