Cycles/main.py

import pandas as pd
import numpy as np
from trend_detector_macd import TrendDetectorMACD
from trend_detector_simple import TrendDetectorSimple
from cycle_detector import CycleDetector
import csv
import logging
import concurrent.futures
import os
import psutil

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler("backtest.log"),
        logging.StreamHandler()
    ]
)

def get_optimal_workers():
    """Determine optimal number of worker processes based on system resources"""
    cpu_count = os.cpu_count() or 4
    memory_gb = psutil.virtual_memory().total / (1024**3)
    # Heuristic: Use 75% of cores, but cap based on available memory
    # Assume each worker needs ~2GB for large datasets
    workers_by_memory = max(1, int(memory_gb / 2))
    workers_by_cpu = max(1, int(cpu_count * 0.75))
    return min(workers_by_cpu, workers_by_memory)

def load_data(file_path, start_date, stop_date):
    """Load data with optimized dtypes and filtering"""
    # Define optimized dtypes
    dtypes = {
        'Open': 'float32',
        'High': 'float32', 
        'Low': 'float32',
        'Close': 'float32',
        'Volume': 'float32'
    }
    
    # Read data with original capitalized column names
    data = pd.read_csv(file_path, dtype=dtypes)
    
    # Convert timestamp to datetime
    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
    
    # Filter by date range
    data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
    
    # Now convert column names to lowercase
    data.columns = data.columns.str.lower()
    
    return data.set_index('timestamp')

def process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd):
    """Process a single month for a given timeframe with all stop loss values"""
    month_df = month_df.copy().reset_index(drop=True)
    
    # Only calculate trends once per month-timeframe combination
    trend_detector = TrendDetectorSimple(month_df, verbose=False)
    analysis_results = trend_detector.detect_trends()
    
    # Calculate backtest for each stop_loss_pct
    results_rows = []
    for stop_loss_pct in stop_loss_pcts:
        results = trend_detector.backtest_meta_supertrend(
            initial_usd=initial_usd,
            stop_loss_pct=stop_loss_pct
        )
        
        # Process results
        n_trades = results["n_trades"]
        trades = results.get('trades', [])
        n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)
        total_profit = sum(trade['profit_pct'] for trade in trades)
        total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)
        
        win_rate = n_winning_trades / n_trades if n_trades > 0 else 0
        avg_trade = total_profit / n_trades if n_trades > 0 else 0
        profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')
        
        # Calculate max drawdown
        cumulative_profit = 0
        max_drawdown = 0
        peak = 0
        for trade in trades:
            cumulative_profit += trade['profit_pct']
            if cumulative_profit > peak:
                peak = cumulative_profit
            drawdown = peak - cumulative_profit
            if drawdown > max_drawdown:
                max_drawdown = drawdown
        
        # Create row
        row = {
            "timeframe": rule_name,
            "month": str(month_df['timestamp'].iloc[0].to_period('M')),
            "stop_loss_pct": stop_loss_pct,
            "n_trades": n_trades,
            "n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),
            "win_rate": win_rate,
            "max_drawdown": max_drawdown,
            "avg_trade": avg_trade,
            "profit_ratio": profit_ratio
        }
        results_rows.append(row)
    
    return results_rows

def process_timeframe(timeframe_info):
    """Process an entire timeframe"""
    rule, rule_name, data_1min, stop_loss_pcts, initial_usd = timeframe_info
    
    # Resample data if needed
    if rule == "1T":
        df = data_1min.copy()
    else:
        df = data_1min.resample(rule).agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).dropna()
    
    df = df.reset_index()
    df['month'] = df['timestamp'].dt.to_period('M')
    results_rows = []
    
    # Process each month
    for month, month_df in df.groupby('month'):
        if len(month_df) < 10:  # Skip very small months
            continue
            
        logging.info(f"Processing: timeframe={rule_name}, month={month}")
        
        try:
            month_results = process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd)
            results_rows.extend(month_results)
            
            # Write intermediate results to avoid memory buildup
            if len(results_rows) > 100:
                return results_rows
        except Exception as e:
            logging.error(f"Error processing {rule_name}, month={month}: {str(e)}")
    
    return results_rows

def write_results_chunk(filename, fieldnames, rows, write_header=False):
    """Write a chunk of results to a CSV file"""
    mode = 'w' if write_header else 'a'
    
    with open(filename, mode, newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if write_header:
            csvfile.write(f"# initial_usd: {initial_usd}\n")
            writer.writeheader()
        
        for row in rows:
            writer.writerow(row)

def aggregate_results(all_rows):
    """Aggregate results per stop_loss_pct and per rule (timeframe)"""
    from collections import defaultdict

    grouped = defaultdict(list)
    for row in all_rows:
        key = (row['timeframe'], row['stop_loss_pct'])
        grouped[key].append(row)

    summary_rows = []
    for (rule, stop_loss_pct), rows in grouped.items():
        n_months = len(rows)
        total_trades = sum(r['n_trades'] for r in rows)
        total_stop_loss = sum(r['n_stop_loss'] for r in rows)
        avg_win_rate = np.mean([r['win_rate'] for r in rows])
        avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])
        avg_avg_trade = np.mean([r['avg_trade'] for r in rows])
        avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])

        summary_rows.append({
            "timeframe": rule,
            "stop_loss_pct": stop_loss_pct,
            "n_trades": total_trades,
            "n_stop_loss": total_stop_loss,
            "win_rate": avg_win_rate,
            "max_drawdown": avg_max_drawdown,
            "avg_trade": avg_avg_trade,
            "profit_ratio": avg_profit_ratio,
        })
    return summary_rows

if __name__ == "__main__":
    # Configuration
    start_date = '2020-01-01'
    stop_date = '2025-05-15'
    initial_usd = 10000
    
    timeframes = {
        # "1T": "1min", 
        "15T": "15min", 
        "1H": "1h", 
        "6H": "6h", 
        "1D": "1D", 
    }
    
    stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10]
    
    # Load data once
    data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)
    logging.info(f"1min rows: {len(data_1min)}")
    
    # Set up result file
    filename = f"backtest_results_{start_date}_{stop_date}_multi_timeframe_stoploss.csv"
    fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio"]
    
    # Initialize output file with header
    write_results_chunk(filename, fieldnames, [], write_header=True)
    
    # Prepare tasks
    tasks = [
        (rule, name, data_1min, stop_loss_pcts, initial_usd)
        for rule, name in timeframes.items()
    ]
    
    # Determine optimal worker count
    workers = get_optimal_workers()
    logging.info(f"Using {workers} workers for processing")
    
    # Process tasks with optimized concurrency
    with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
        futures = {executor.submit(process_timeframe, task): task[1] for task in tasks}
        
        # Collect all results
        all_results = []
        for future in concurrent.futures.as_completed(futures):
            timeframe_name = futures[future]
            try:
                results = future.result()
                if results:
                    # logging.info(f"Writing {len(results)} results for {timeframe_name}")
                    # write_results_chunk(filename, fieldnames, results)  # <-- REMOVE or COMMENT THIS OUT
                    all_results.extend(results)
            except Exception as exc:
                logging.error(f"{timeframe_name} generated an exception: {exc}")
        
        # Write summary rows
        summary_rows = aggregate_results(all_results)
        write_results_chunk(filename, fieldnames, summary_rows, write_header=True)  # Only write summary

    logging.info(f"Results written to {filename}")
first trend detection WIP 2025-05-06 15:24:36 +08:00			`import pandas as pd`
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`import numpy as np`
first trend detection WIP 2025-05-06 15:24:36 +08:00			`from trend_detector_macd import TrendDetectorMACD`
			`from trend_detector_simple import TrendDetectorSimple`
			`from cycle_detector import CycleDetector`
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`import csv`
			`import logging`
			`import concurrent.futures`
			`import os`
			`import psutil`
first trend detection WIP 2025-05-06 15:24:36 +08:00
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`# Set up logging`
			`logging.basicConfig(`
			`level=logging.INFO,`
			`format="%(asctime)s [%(levelname)s] %(message)s",`
			`handlers=[`
			`logging.FileHandler("backtest.log"),`
			`logging.StreamHandler()`
			`]`
			`)`
Refactor trend detection logic and update date filtering in main.py. Removed width parameter from detect_trends method and enhanced peak detection to include valleys and peaks between consecutive points. 2025-05-06 16:20:43 +08:00
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`def get_optimal_workers():`
			`"""Determine optimal number of worker processes based on system resources"""`
			`cpu_count = os.cpu_count() or 4`
			`memory_gb = psutil.virtual_memory().total / (1024**3)`
			`# Heuristic: Use 75% of cores, but cap based on available memory`
			`# Assume each worker needs ~2GB for large datasets`
			`workers_by_memory = max(1, int(memory_gb / 2))`
			`workers_by_cpu = max(1, int(cpu_count * 0.75))`
			`return min(workers_by_cpu, workers_by_memory)`
Refactor cycle detection and trend analysis; enhance trend detection with linear regression and moving averages. Update main script for improved data handling and visualization. 2025-05-09 12:23:45 +08:00
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`def load_data(file_path, start_date, stop_date):`
			`"""Load data with optimized dtypes and filtering"""`
			`# Define optimized dtypes`
			`dtypes = {`
			`'Open': 'float32',`
			`'High': 'float32',`
			`'Low': 'float32',`
			`'Close': 'float32',`
			`'Volume': 'float32'`
			`}`

			`# Read data with original capitalized column names`
			`data = pd.read_csv(file_path, dtype=dtypes)`

			`# Convert timestamp to datetime`
			`data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')`

			`# Filter by date range`
			`data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]`

			`# Now convert column names to lowercase`
			`data.columns = data.columns.str.lower()`

			`return data.set_index('timestamp')`
Refactor trend detection logic and update date filtering in main.py. Removed width parameter from detect_trends method and enhanced peak detection to include valleys and peaks between consecutive points. 2025-05-06 16:20:43 +08:00
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`def process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd):`
			`"""Process a single month for a given timeframe with all stop loss values"""`
			`month_df = month_df.copy().reset_index(drop=True)`

			`# Only calculate trends once per month-timeframe combination`
			`trend_detector = TrendDetectorSimple(month_df, verbose=False)`
			`analysis_results = trend_detector.detect_trends()`

			`# Calculate backtest for each stop_loss_pct`
			`results_rows = []`
			`for stop_loss_pct in stop_loss_pcts:`
			`results = trend_detector.backtest_meta_supertrend(`
			`initial_usd=initial_usd,`
			`stop_loss_pct=stop_loss_pct`
			`)`

			`# Process results`
			`n_trades = results["n_trades"]`
			`trades = results.get('trades', [])`
			`n_winning_trades = sum(1 for trade in trades if trade['profit_pct'] > 0)`
			`total_profit = sum(trade['profit_pct'] for trade in trades)`
			`total_loss = sum(-trade['profit_pct'] for trade in trades if trade['profit_pct'] < 0)`

			`win_rate = n_winning_trades / n_trades if n_trades > 0 else 0`
			`avg_trade = total_profit / n_trades if n_trades > 0 else 0`
			`profit_ratio = total_profit / total_loss if total_loss > 0 else float('inf')`

			`# Calculate max drawdown`
			`cumulative_profit = 0`
			`max_drawdown = 0`
			`peak = 0`
			`for trade in trades:`
			`cumulative_profit += trade['profit_pct']`
			`if cumulative_profit > peak:`
			`peak = cumulative_profit`
			`drawdown = peak - cumulative_profit`
			`if drawdown > max_drawdown:`
			`max_drawdown = drawdown`

			`# Create row`
			`row = {`
			`"timeframe": rule_name,`
			`"month": str(month_df['timestamp'].iloc[0].to_period('M')),`
			`"stop_loss_pct": stop_loss_pct,`
			`"n_trades": n_trades,`
			`"n_stop_loss": sum(1 for trade in trades if 'type' in trade and trade['type'] == 'STOP'),`
			`"win_rate": win_rate,`
			`"max_drawdown": max_drawdown,`
			`"avg_trade": avg_trade,`
			`"profit_ratio": profit_ratio`
			`}`
			`results_rows.append(row)`

			`return results_rows`
first trend detection WIP 2025-05-06 15:24:36 +08:00
Enhance main.py with optimized data loading, logging setup, and concurrent processing for backtesting. Introduce new functions for data handling and results aggregation. Update TrendDetectorSimple to support meta supertrend backtesting and improve SuperTrend calculations with caching and parallel execution. Refactor TrendDetectorMACD for better performance in trend detection. 2025-05-16 02:44:22 +08:00			`def process_timeframe(timeframe_info):`
			`"""Process an entire timeframe"""`
			`rule, rule_name, data_1min, stop_loss_pcts, initial_usd = timeframe_info`

			`# Resample data if needed`
			`if rule == "1T":`
			`df = data_1min.copy()`
			`else:`
			`df = data_1min.resample(rule).agg({`
			`'open': 'first',`
			`'high': 'max',`
			`'low': 'min',`
			`'close': 'last',`
			`'volume': 'sum'`
			`}).dropna()`

			`df = df.reset_index()`
			`df['month'] = df['timestamp'].dt.to_period('M')`
			`results_rows = []`

			`# Process each month`
			`for month, month_df in df.groupby('month'):`
			`if len(month_df) < 10: # Skip very small months`
			`continue`

			`logging.info(f"Processing: timeframe={rule_name}, month={month}")`

			`try:`
			`month_results = process_month_timeframe(month_df, stop_loss_pcts, rule_name, initial_usd)`
			`results_rows.extend(month_results)`

			`# Write intermediate results to avoid memory buildup`
			`if len(results_rows) > 100:`
			`return results_rows`
			`except Exception as e:`
			`logging.error(f"Error processing {rule_name}, month={month}: {str(e)}")`

			`return results_rows`

			`def write_results_chunk(filename, fieldnames, rows, write_header=False):`
			`"""Write a chunk of results to a CSV file"""`
			`mode = 'w' if write_header else 'a'`

			`with open(filename, mode, newline="") as csvfile:`
			`writer = csv.DictWriter(csvfile, fieldnames=fieldnames)`
			`if write_header:`
			`csvfile.write(f"# initial_usd: {initial_usd}\n")`
			`writer.writeheader()`

			`for row in rows:`
			`writer.writerow(row)`

			`def aggregate_results(all_rows):`
			`"""Aggregate results per stop_loss_pct and per rule (timeframe)"""`
			`from collections import defaultdict`

			`grouped = defaultdict(list)`
			`for row in all_rows:`
			`key = (row['timeframe'], row['stop_loss_pct'])`
			`grouped[key].append(row)`

			`summary_rows = []`
			`for (rule, stop_loss_pct), rows in grouped.items():`
			`n_months = len(rows)`
			`total_trades = sum(r['n_trades'] for r in rows)`
			`total_stop_loss = sum(r['n_stop_loss'] for r in rows)`
			`avg_win_rate = np.mean([r['win_rate'] for r in rows])`
			`avg_max_drawdown = np.mean([r['max_drawdown'] for r in rows])`
			`avg_avg_trade = np.mean([r['avg_trade'] for r in rows])`
			`avg_profit_ratio = np.mean([r['profit_ratio'] for r in rows])`

			`summary_rows.append({`
			`"timeframe": rule,`
			`"stop_loss_pct": stop_loss_pct,`
			`"n_trades": total_trades,`
			`"n_stop_loss": total_stop_loss,`
			`"win_rate": avg_win_rate,`
			`"max_drawdown": avg_max_drawdown,`
			`"avg_trade": avg_avg_trade,`
			`"profit_ratio": avg_profit_ratio,`
			`})`
			`return summary_rows`

			`if __name__ == "__main__":`
			`# Configuration`
			`start_date = '2020-01-01'`
			`stop_date = '2025-05-15'`
			`initial_usd = 10000`

			`timeframes = {`
			`# "1T": "1min",`
			`"15T": "15min",`
			`"1H": "1h",`
			`"6H": "6h",`
			`"1D": "1D",`
			`}`

			`stop_loss_pcts = [0.01, 0.02, 0.03, 0.05, 0.07, 0.10]`

			`# Load data once`
			`data_1min = load_data('./data/btcusd_1-min_data.csv', start_date, stop_date)`
			`logging.info(f"1min rows: {len(data_1min)}")`

			`# Set up result file`
			`filename = f"backtest_results_{start_date}_{stop_date}_multi_timeframe_stoploss.csv"`
			`fieldnames = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio"]`

			`# Initialize output file with header`
			`write_results_chunk(filename, fieldnames, [], write_header=True)`

			`# Prepare tasks`
			`tasks = [`
			`(rule, name, data_1min, stop_loss_pcts, initial_usd)`
			`for rule, name in timeframes.items()`
			`]`

			`# Determine optimal worker count`
			`workers = get_optimal_workers()`
			`logging.info(f"Using {workers} workers for processing")`

			`# Process tasks with optimized concurrency`
			`with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:`
			`futures = {executor.submit(process_timeframe, task): task[1] for task in tasks}`

			`# Collect all results`
			`all_results = []`
			`for future in concurrent.futures.as_completed(futures):`
			`timeframe_name = futures[future]`
			`try:`
			`results = future.result()`
			`if results:`
			`# logging.info(f"Writing {len(results)} results for {timeframe_name}")`
			`# write_results_chunk(filename, fieldnames, results) # <-- REMOVE or COMMENT THIS OUT`
			`all_results.extend(results)`
			`except Exception as exc:`
			`logging.error(f"{timeframe_name} generated an exception: {exc}")`

			`# Write summary rows`
			`summary_rows = aggregate_results(all_results)`
			`write_results_chunk(filename, fieldnames, summary_rows, write_header=True) # Only write summary`

			`logging.info(f"Results written to {filename}")`