Cycles/main.py

import pandas as pd
import numpy as np
from trend_detector_macd import TrendDetectorMACD
from trend_detector_simple import TrendDetectorSimple
from cycle_detector import CycleDetector
import csv
import logging
import concurrent.futures
import os
import psutil

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("backtest.log"),
        logging.StreamHandler()
    ]
)

def get_optimal_workers():
    """Determine optimal number of worker processes based on system resources"""
    cpu_count = os.cpu_count() or 4
    memory_gb = psutil.virtual_memory().total / (1024**3)
    # Heuristic: Use 75% of cores, but cap based on available memory
    # Assume each worker needs ~2GB for large datasets
    workers_by_memory = max(1, int(memory_gb / 2))
    workers_by_cpu = max(1, int(cpu_count * 0.75))
    return min(workers_by_cpu, workers_by_memory)

def load_data(file_path, start_date, stop_date):
    """Load data with optimized dtypes and filtering"""
    # Define optimized dtypes
    dtypes = {
        'Open': 'float32',
        'High': 'float32',
        'Low': 'float32',
        'Close': 'float32',
        'Volume': 'float32'
    }

    # Read data with original capitalized column names
    data = pd.read_csv(file_path, dtype=dtypes)

    # Convert timestamp to datetime
    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')

    # Filter by date range
    data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]

    # Now convert column names to lowercase
    data.columns = data.columns.str.lower()

    return data.set_index('timestamp')

def process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd):
    """Process a single month for a given timeframe with all stop loss values"""
    month_df = month_df.copy().reset_index(drop=True)

    # Only calculate trends once per month-timeframe combination
    trend_detector = TrendDetectorSimple(month_df, verbose=False)
    analysis_results = trend_detector.detect_trends()

    # Calculate backtest for each stop_loss_pct
    results_rows = []
    for stop_loss_pct in stop_loss_pcts:
        results = trend_detector.backtest_meta_supertrend(
            initial_usd=initial_usd,
            stop_loss_pct=stop_loss_pct
        )

        # Process results
        n_trades = results["n_trades"]
        trades = results.get('trades', [])
        n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
        total_profit = sum(trade['profit_pct'] for trade in trades)
        total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)

        win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
        avg_trade = total_profit / n_trades if n_trades > 0 else 0
        profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')

        # Calculate max drawdown
        cumulative_profit = 0
        max_drawdown = 0
        peak = 0
        for trade in trades:
            cumulative_profit += trade['profit_pct']
            if cumulative_profit > peak:
                peak = cumulative_profit
            drawdown = peak - cumulative_profit
            if drawdown > max_drawdown:
                max_drawdown = drawdown

        # Create row
        row = {
            "timeframe": rule_name,
            "month": str(month_df['timestamp'].iloc[0].to_period('M')),
            "stop_loss_pct": stop_loss_pct,
            "n_trades": n_trades,
            "n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
            "win_rate": win_rate,
            "max_drawdown": max_drawdown,
            "avg_trade": avg_trade,
            "profit_ratio": profit_ratio
        }
        results_rows.append(row)

    return results_rows

def process_timeframe(timeframe_info):
    """Process an entire timeframe"""
    rule, rule_name, data_1min, stop_loss_pcts, initial_usd = timeframe_info

    # Resample data if needed
    if rule == "1T":
        df = data_1min.copy()
    else:
        df = data_1min.resample(rule).agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()

    df = df.reset_index()
    df['month'] = df['timestamp'].dt.to_period('M')
    results_rows = []

    # Process each month
    for month, month_df in df.groupby('month'):
        if len(month_df) < 10:  # Skip very small months
            continue

        logging.info(f"Processing: timeframe={rule_name}, month={month}")

        try:
            month_results = process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd)
            results_rows.extend(month_results)

            # Write intermediate results to avoid memory buildup
            if len(results_rows) > 100:
                return results_rows
        except Exception as e:
            logging.error(f"Error processing {rule_name}, month={month}: {str(e)}")

    return results_rows

def write_results_chunk(filename, fieldnames, rows, write_header=False):
    """Write a chunk of results to a CSV file"""
    mode = 'w' if write_header else 'a'

    with open(filename, mode, newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if write_header:
            csvfile.write(f"# initial_usd: {initial_usd}\n")
            writer.writeheader()

        for row in rows:
            writer.writerow(row)

def aggregate_results(all_rows):
    """Aggregate results per stop_loss_pct and per rule (timeframe)"""
    from collections import defaultdict

    grouped = defaultdict(list)
    for row in all_rows:
        key = (row['timeframe'], row['stop_loss_pct'])
        grouped[key].append(row)

    summary_rows = []
    for (rule, stop_loss_pct), rows in grouped.items():
        n_months = len(rows)
        total_trades = sum(r['n_trades'] for r in rows)
        total_stop_loss = sum(r['n_stop_loss'] for r in rows)
        avg_win_rate = np.mean([r['win_rate'] for r in rows])
        avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
        avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
        avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])

        summary_rows.append({
            "timeframe": rule,
            "stop_loss_pct": stop_loss_pct,
            "n_trades": total_trades,
            "n_stop_loss": total_stop_loss,
            "win_rate": avg_win_rate,
            "max_drawdown": avg_max_drawdown,
            "avg_trade": avg_avg_trade,
            "profit_ratio": avg_profit_ratio,
        })
    return summary_rows

if __name__ == "__main__":
    # Configuration
    start_date = '2020-01-01'
    stop_date = '2025-05-15'
    initial_usd = 10000

    timeframes = {
        # "1T": "1min",
        "15T": "15min",
        "1H": "1h",
        "6H": "6h",
        "1D": "1D",
    }

    stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10]

    # Load data once
    data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
    logging.info(f"1min rows: {len(data_1min)}")

    # Set up result file
    filename = f"backtest_results_{start_date}_{stop_date}_multi_timeframe_stoploss.csv"
    fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio"]

    # Initialize output file with header
    write_results_chunk(filename, fieldnames, [], write_header=True)

    # Prepare tasks
    tasks = [
        (rule, name, data_1min, stop_loss_pcts, initial_usd)
        for rule, name in timeframes.items()
    ]

    # Determine optimal worker count
    workers = get_optimal_workers()
    logging.info(f"Using {workers} workers for processing")

    # Process tasks with optimized concurrency
    with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
        futures = {executor.submit(process_timeframe, task): task[1] for task in tasks}

        # Collect all results
        all_results = []
        for future in concurrent.futures.as_completed(futures):
            timeframe_name = futures[future]
            try:
                results = future.result()
                if results:
                    # logging.info(f"Writing {len(results)} results for {timeframe_name}")
                    # write_results_chunk(filename, fieldnames, results)  # <-- REMOVE or COMMENT THIS OUT
                    all_results.extend(results)
            except Exception as exc:
                logging.error(f"{timeframe_name} generated an exception: {exc}")

        # Write summary rows
        summary_rows = aggregate_results(all_results)
        write_results_chunk(filename, fieldnames, summary_rows, write_header=True)  # Only write summary

    logging.info(f"Results written to {filename}")