Implement Incremental Trading Framework

- Introduced a comprehensive framework for incremental trading strategies, including modules for strategy execution, backtesting, and data processing.
- Added key components such as `IncTrader`, `IncBacktester`, and various trading strategies (e.g., `MetaTrendStrategy`, `BBRSStrategy`, `RandomStrategy`) to facilitate real-time trading and backtesting.
- Implemented a robust backtesting framework with configuration management, parallel execution, and result analysis capabilities.
- Developed an incremental indicators framework to support real-time data processing with constant memory usage.
- Enhanced documentation to provide clear usage examples and architecture overview, ensuring maintainability and ease of understanding for future development.
- Ensured compatibility with existing strategies and maintained a focus on performance and scalability throughout the implementation.
This commit is contained in:
Vasily.onl
2025-05-28 16:29:48 +08:00
parent 8055f46328
commit c9ae507bb7
21 changed files with 5898 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
"""
Incremental Backtesting Framework
This module provides comprehensive backtesting capabilities for incremental trading strategies.
It includes configuration management, data loading, parallel execution, and result analysis.
Components:
- IncBacktester: Main backtesting engine
- BacktestConfig: Configuration management for backtests
- OptimizationConfig: Configuration for parameter optimization
- DataLoader: Data loading and validation utilities
- SystemUtils: System resource management
- ResultsSaver: Result saving and reporting utilities
Example:
from IncrementalTrader.backtester import IncBacktester, BacktestConfig
from IncrementalTrader.strategies import MetaTrendStrategy
# Configure backtest
config = BacktestConfig(
data_file="btc_1min_2023.csv",
start_date="2023-01-01",
end_date="2023-12-31",
initial_usd=10000
)
# Run single strategy
strategy = MetaTrendStrategy("metatrend")
backtester = IncBacktester(config)
results = backtester.run_single_strategy(strategy)
# Parameter optimization
param_grid = {"timeframe": ["5min", "15min", "30min"]}
results = backtester.optimize_parameters(MetaTrendStrategy, param_grid)
"""
from .backtester import IncBacktester
from .config import BacktestConfig, OptimizationConfig
from .utils import DataLoader, SystemUtils, ResultsSaver
__all__ = [
"IncBacktester",
"BacktestConfig",
"OptimizationConfig",
"DataLoader",
"SystemUtils",
"ResultsSaver",
]

View File

@@ -0,0 +1,524 @@
"""
Incremental Backtester for testing incremental strategies.
This module provides the IncBacktester class that orchestrates multiple IncTraders
for parallel testing, handles data loading and feeding, and supports multiprocessing
for parameter optimization.
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Optional, Any, Callable, Union, Tuple
import logging
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from itertools import product
import multiprocessing as mp
from datetime import datetime
# Use try/except for imports to handle both relative and absolute import scenarios
try:
from ..trader.trader import IncTrader
from ..strategies.base import IncStrategyBase
from .config import BacktestConfig, OptimizationConfig
from .utils import DataLoader, SystemUtils, ResultsSaver
except ImportError:
# Fallback for direct execution
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from trader.trader import IncTrader
from strategies.base import IncStrategyBase
from config import BacktestConfig, OptimizationConfig
from utils import DataLoader, SystemUtils, ResultsSaver
logger = logging.getLogger(__name__)
def _worker_function(args: Tuple[type, Dict, Dict, BacktestConfig]) -> Dict[str, Any]:
"""
Worker function for multiprocessing parameter optimization.
This function must be at module level to be picklable for multiprocessing.
Args:
args: Tuple containing (strategy_class, strategy_params, trader_params, config)
Returns:
Dict containing backtest results
"""
try:
strategy_class, strategy_params, trader_params, config = args
# Create new backtester instance for this worker
worker_backtester = IncBacktester(config)
# Create strategy instance
strategy = strategy_class(params=strategy_params)
# Run backtest
result = worker_backtester.run_single_strategy(strategy, trader_params)
result["success"] = True
return result
except Exception as e:
logger.error(f"Worker error for {strategy_params}, {trader_params}: {e}")
return {
"strategy_params": strategy_params,
"trader_params": trader_params,
"error": str(e),
"success": False
}
class IncBacktester:
"""
Incremental backtester for testing incremental strategies.
This class orchestrates multiple IncTraders for parallel testing:
- Loads data using the integrated DataLoader
- Creates multiple IncTrader instances with different parameters
- Feeds data sequentially to all traders
- Collects and aggregates results
- Supports multiprocessing for parallel execution
- Uses SystemUtils for optimal worker count determination
The backtester can run multiple strategies simultaneously or test
parameter combinations across multiple CPU cores.
Example:
# Single strategy backtest
config = BacktestConfig(
data_file="btc_1min_2023.csv",
start_date="2023-01-01",
end_date="2023-12-31",
initial_usd=10000
)
strategy = RandomStrategy("random", params={"timeframe": "15min"})
backtester = IncBacktester(config)
results = backtester.run_single_strategy(strategy)
# Multiple strategies
strategies = [strategy1, strategy2, strategy3]
results = backtester.run_multiple_strategies(strategies)
# Parameter optimization
param_grid = {
"timeframe": ["5min", "15min", "30min"],
"stop_loss_pct": [0.01, 0.02, 0.03]
}
results = backtester.optimize_parameters(strategy_class, param_grid)
"""
def __init__(self, config: BacktestConfig):
"""
Initialize the incremental backtester.
Args:
config: Backtesting configuration
"""
self.config = config
# Initialize utilities
self.data_loader = DataLoader(config.data_dir)
self.system_utils = SystemUtils()
self.results_saver = ResultsSaver(config.results_dir)
# State management
self.data = None
self.results_cache = {}
# Track all actions performed during backtesting
self.action_log = []
self.session_start_time = datetime.now()
logger.info(f"IncBacktester initialized: {config.data_file}, "
f"{config.start_date} to {config.end_date}")
self._log_action("backtester_initialized", {
"config": config.to_dict(),
"session_start": self.session_start_time.isoformat(),
"system_info": self.system_utils.get_system_info()
})
def _log_action(self, action_type: str, details: Dict[str, Any]) -> None:
"""Log an action performed during backtesting."""
self.action_log.append({
"timestamp": datetime.now().isoformat(),
"action_type": action_type,
"details": details
})
def load_data(self) -> pd.DataFrame:
"""
Load and prepare data for backtesting.
Returns:
pd.DataFrame: Loaded OHLCV data with DatetimeIndex
"""
if self.data is None:
logger.info(f"Loading data from {self.config.data_file}...")
start_time = time.time()
self.data = self.data_loader.load_data(
self.config.data_file,
self.config.start_date,
self.config.end_date
)
load_time = time.time() - start_time
logger.info(f"Data loaded: {len(self.data)} rows in {load_time:.2f}s")
# Validate data
if self.data.empty:
raise ValueError(f"No data loaded for the specified date range")
if not self.data_loader.validate_data(self.data):
raise ValueError("Data validation failed")
self._log_action("data_loaded", {
"file": self.config.data_file,
"rows": len(self.data),
"load_time_seconds": load_time,
"date_range": f"{self.config.start_date} to {self.config.end_date}",
"columns": list(self.data.columns)
})
return self.data
def run_single_strategy(self, strategy: IncStrategyBase,
trader_params: Optional[Dict] = None) -> Dict[str, Any]:
"""
Run backtest for a single strategy.
Args:
strategy: Incremental strategy instance
trader_params: Additional trader parameters
Returns:
Dict containing backtest results
"""
data = self.load_data()
# Merge trader parameters
final_trader_params = {
"stop_loss_pct": self.config.stop_loss_pct,
"take_profit_pct": self.config.take_profit_pct
}
if trader_params:
final_trader_params.update(trader_params)
# Create trader
trader = IncTrader(
strategy=strategy,
initial_usd=self.config.initial_usd,
params=final_trader_params
)
# Run backtest
logger.info(f"Starting backtest for {strategy.name}...")
start_time = time.time()
self._log_action("single_strategy_backtest_started", {
"strategy_name": strategy.name,
"strategy_params": strategy.params,
"trader_params": final_trader_params,
"data_points": len(data)
})
for timestamp, row in data.iterrows():
ohlcv_data = {
'open': row['open'],
'high': row['high'],
'low': row['low'],
'close': row['close'],
'volume': row['volume']
}
trader.process_data_point(timestamp, ohlcv_data)
# Finalize and get results
trader.finalize()
results = trader.get_results()
backtest_time = time.time() - start_time
results["backtest_duration_seconds"] = backtest_time
results["data_points"] = len(data)
results["config"] = self.config.to_dict()
logger.info(f"Backtest completed for {strategy.name} in {backtest_time:.2f}s: "
f"${results['final_usd']:.2f} ({results['profit_ratio']*100:.2f}%), "
f"{results['n_trades']} trades")
self._log_action("single_strategy_backtest_completed", {
"strategy_name": strategy.name,
"backtest_duration_seconds": backtest_time,
"final_usd": results['final_usd'],
"profit_ratio": results['profit_ratio'],
"n_trades": results['n_trades'],
"win_rate": results['win_rate']
})
return results
def run_multiple_strategies(self, strategies: List[IncStrategyBase],
trader_params: Optional[Dict] = None) -> List[Dict[str, Any]]:
"""
Run backtest for multiple strategies simultaneously.
Args:
strategies: List of incremental strategy instances
trader_params: Additional trader parameters
Returns:
List of backtest results for each strategy
"""
self._log_action("multiple_strategies_backtest_started", {
"strategy_count": len(strategies),
"strategy_names": [s.name for s in strategies]
})
results = []
for strategy in strategies:
try:
result = self.run_single_strategy(strategy, trader_params)
results.append(result)
except Exception as e:
logger.error(f"Error running strategy {strategy.name}: {e}")
# Add error result
error_result = {
"strategy_name": strategy.name,
"error": str(e),
"success": False
}
results.append(error_result)
self._log_action("strategy_error", {
"strategy_name": strategy.name,
"error": str(e)
})
self._log_action("multiple_strategies_backtest_completed", {
"total_strategies": len(strategies),
"successful_strategies": len([r for r in results if r.get("success", True)]),
"failed_strategies": len([r for r in results if not r.get("success", True)])
})
return results
def optimize_parameters(self, strategy_class: type, param_grid: Dict[str, List],
trader_param_grid: Optional[Dict[str, List]] = None,
max_workers: Optional[int] = None) -> List[Dict[str, Any]]:
"""
Optimize strategy parameters using grid search with multiprocessing.
Args:
strategy_class: Strategy class to instantiate
param_grid: Grid of strategy parameters to test
trader_param_grid: Grid of trader parameters to test
max_workers: Maximum number of worker processes (uses SystemUtils if None)
Returns:
List of results for each parameter combination
"""
# Generate parameter combinations
strategy_combinations = list(self._generate_param_combinations(param_grid))
trader_combinations = list(self._generate_param_combinations(trader_param_grid or {}))
# If no trader param grid, use default
if not trader_combinations:
trader_combinations = [{}]
# Create all combinations
all_combinations = []
for strategy_params in strategy_combinations:
for trader_params in trader_combinations:
all_combinations.append((strategy_params, trader_params))
logger.info(f"Starting parameter optimization: {len(all_combinations)} combinations")
# Determine number of workers using SystemUtils
if max_workers is None:
max_workers = self.system_utils.get_optimal_workers()
else:
max_workers = min(max_workers, len(all_combinations))
self._log_action("parameter_optimization_started", {
"strategy_class": strategy_class.__name__,
"total_combinations": len(all_combinations),
"max_workers": max_workers,
"strategy_param_grid": param_grid,
"trader_param_grid": trader_param_grid or {}
})
# Run optimization
if max_workers == 1 or len(all_combinations) == 1:
# Single-threaded execution
results = []
for strategy_params, trader_params in all_combinations:
result = self._run_single_combination(strategy_class, strategy_params, trader_params)
results.append(result)
else:
# Multi-threaded execution
results = self._run_parallel_optimization(
strategy_class, all_combinations, max_workers
)
# Sort results by profit ratio
valid_results = [r for r in results if r.get("success", True)]
valid_results.sort(key=lambda x: x.get("profit_ratio", -float('inf')), reverse=True)
logger.info(f"Parameter optimization completed: {len(valid_results)} successful runs")
self._log_action("parameter_optimization_completed", {
"total_runs": len(results),
"successful_runs": len(valid_results),
"failed_runs": len(results) - len(valid_results),
"best_profit_ratio": valid_results[0]["profit_ratio"] if valid_results else None,
"worst_profit_ratio": valid_results[-1]["profit_ratio"] if valid_results else None
})
return results
def _generate_param_combinations(self, param_grid: Dict[str, List]) -> List[Dict]:
"""Generate all parameter combinations from grid."""
if not param_grid:
return [{}]
keys = list(param_grid.keys())
values = list(param_grid.values())
combinations = []
for combination in product(*values):
param_dict = dict(zip(keys, combination))
combinations.append(param_dict)
return combinations
def _run_single_combination(self, strategy_class: type, strategy_params: Dict,
trader_params: Dict) -> Dict[str, Any]:
"""Run backtest for a single parameter combination."""
try:
# Create strategy instance
strategy = strategy_class(params=strategy_params)
# Run backtest
result = self.run_single_strategy(strategy, trader_params)
result["success"] = True
return result
except Exception as e:
logger.error(f"Error in parameter combination {strategy_params}, {trader_params}: {e}")
return {
"strategy_params": strategy_params,
"trader_params": trader_params,
"error": str(e),
"success": False
}
def _run_parallel_optimization(self, strategy_class: type, combinations: List,
max_workers: int) -> List[Dict[str, Any]]:
"""Run parameter optimization in parallel."""
results = []
# Prepare arguments for worker function
worker_args = []
for strategy_params, trader_params in combinations:
args = (strategy_class, strategy_params, trader_params, self.config)
worker_args.append(args)
# Execute in parallel
with ProcessPoolExecutor(max_workers=max_workers) as executor:
# Submit all jobs
future_to_params = {
executor.submit(_worker_function, args): args[1:3] # strategy_params, trader_params
for args in worker_args
}
# Collect results as they complete
for future in as_completed(future_to_params):
combo = future_to_params[future]
try:
result = future.result()
results.append(result)
if result.get("success", True):
logger.info(f"Completed: {combo[0]} -> "
f"${result.get('final_usd', 0):.2f} "
f"({result.get('profit_ratio', 0)*100:.2f}%)")
except Exception as e:
logger.error(f"Worker error for {combo}: {e}")
results.append({
"strategy_params": combo[0],
"trader_params": combo[1],
"error": str(e),
"success": False
})
return results
def get_summary_statistics(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Calculate summary statistics across multiple backtest results.
Args:
results: List of backtest results
Returns:
Dict containing summary statistics
"""
return self.results_saver._calculate_summary_statistics(results)
def save_results(self, results: List[Dict[str, Any]], filename: str) -> None:
"""
Save backtest results to CSV file.
Args:
results: List of backtest results
filename: Output filename
"""
self.results_saver.save_results_csv(results, filename)
def save_comprehensive_results(self, results: List[Dict[str, Any]],
base_filename: str,
summary: Optional[Dict[str, Any]] = None) -> None:
"""
Save comprehensive backtest results including summary, individual results, and action log.
Args:
results: List of backtest results
base_filename: Base filename (without extension)
summary: Optional summary statistics
"""
self.results_saver.save_comprehensive_results(
results=results,
base_filename=base_filename,
summary=summary,
action_log=self.action_log,
session_start_time=self.session_start_time
)
def get_action_log(self) -> List[Dict[str, Any]]:
"""Get the complete action log for this session."""
return self.action_log.copy()
def reset_session(self) -> None:
"""Reset the backtester session (clear cache and logs)."""
self.data = None
self.results_cache.clear()
self.action_log.clear()
self.session_start_time = datetime.now()
logger.info("Backtester session reset")
self._log_action("session_reset", {
"reset_time": self.session_start_time.isoformat()
})
def __repr__(self) -> str:
"""String representation of the backtester."""
return (f"IncBacktester(data_file={self.config.data_file}, "
f"date_range={self.config.start_date} to {self.config.end_date}, "
f"initial_usd=${self.config.initial_usd})")

View File

@@ -0,0 +1,207 @@
"""
Backtester Configuration
This module provides configuration classes and utilities for backtesting
incremental trading strategies.
"""
import os
import pandas as pd
from dataclasses import dataclass
from typing import Optional, Dict, Any, List
import logging
logger = logging.getLogger(__name__)
@dataclass
class BacktestConfig:
"""
Configuration for backtesting runs.
This class encapsulates all configuration parameters needed for running
backtests, including data settings, trading parameters, and performance options.
Attributes:
data_file: Path to the data file (relative to data directory)
start_date: Start date for backtesting (YYYY-MM-DD format)
end_date: End date for backtesting (YYYY-MM-DD format)
initial_usd: Initial USD balance for trading
timeframe: Data timeframe (e.g., "1min", "5min", "15min")
stop_loss_pct: Default stop loss percentage (0.0 to disable)
take_profit_pct: Default take profit percentage (0.0 to disable)
max_workers: Maximum number of worker processes for parallel execution
chunk_size: Chunk size for data processing
data_dir: Directory containing data files
results_dir: Directory for saving results
Example:
config = BacktestConfig(
data_file="btc_1min_2023.csv",
start_date="2023-01-01",
end_date="2023-12-31",
initial_usd=10000,
stop_loss_pct=0.02
)
"""
data_file: str
start_date: str
end_date: str
initial_usd: float = 10000
timeframe: str = "1min"
# Risk management parameters
stop_loss_pct: float = 0.0
take_profit_pct: float = 0.0
# Performance settings
max_workers: Optional[int] = None
chunk_size: int = 1000
# Directory settings
data_dir: str = "data"
results_dir: str = "results"
def __post_init__(self):
"""Validate configuration after initialization."""
self._validate_config()
self._ensure_directories()
def _validate_config(self):
"""Validate configuration parameters."""
# Validate dates
try:
start_dt = pd.to_datetime(self.start_date)
end_dt = pd.to_datetime(self.end_date)
if start_dt >= end_dt:
raise ValueError("start_date must be before end_date")
except Exception as e:
raise ValueError(f"Invalid date format: {e}")
# Validate financial parameters
if self.initial_usd <= 0:
raise ValueError("initial_usd must be positive")
if not (0 <= self.stop_loss_pct <= 1):
raise ValueError("stop_loss_pct must be between 0 and 1")
if not (0 <= self.take_profit_pct <= 1):
raise ValueError("take_profit_pct must be between 0 and 1")
# Validate performance parameters
if self.max_workers is not None and self.max_workers <= 0:
raise ValueError("max_workers must be positive")
if self.chunk_size <= 0:
raise ValueError("chunk_size must be positive")
def _ensure_directories(self):
"""Ensure required directories exist."""
os.makedirs(self.data_dir, exist_ok=True)
os.makedirs(self.results_dir, exist_ok=True)
def get_data_path(self) -> str:
"""Get full path to data file."""
return os.path.join(self.data_dir, self.data_file)
def get_results_path(self, filename: str) -> str:
"""Get full path for results file."""
return os.path.join(self.results_dir, filename)
def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary."""
return {
"data_file": self.data_file,
"start_date": self.start_date,
"end_date": self.end_date,
"initial_usd": self.initial_usd,
"timeframe": self.timeframe,
"stop_loss_pct": self.stop_loss_pct,
"take_profit_pct": self.take_profit_pct,
"max_workers": self.max_workers,
"chunk_size": self.chunk_size,
"data_dir": self.data_dir,
"results_dir": self.results_dir
}
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> 'BacktestConfig':
"""Create configuration from dictionary."""
return cls(**config_dict)
def copy(self, **kwargs) -> 'BacktestConfig':
"""Create a copy of the configuration with optional parameter overrides."""
config_dict = self.to_dict()
config_dict.update(kwargs)
return self.from_dict(config_dict)
def __repr__(self) -> str:
"""String representation of the configuration."""
return (f"BacktestConfig(data_file={self.data_file}, "
f"date_range={self.start_date} to {self.end_date}, "
f"initial_usd=${self.initial_usd})")
class OptimizationConfig:
"""
Configuration for parameter optimization runs.
This class provides additional configuration options specifically for
parameter optimization and grid search operations.
"""
def __init__(self,
base_config: BacktestConfig,
strategy_param_grid: Dict[str, List],
trader_param_grid: Optional[Dict[str, List]] = None,
max_workers: Optional[int] = None,
save_individual_results: bool = True,
save_detailed_logs: bool = False):
"""
Initialize optimization configuration.
Args:
base_config: Base backtesting configuration
strategy_param_grid: Grid of strategy parameters to test
trader_param_grid: Grid of trader parameters to test
max_workers: Maximum number of worker processes
save_individual_results: Whether to save individual strategy results
save_detailed_logs: Whether to save detailed action logs
"""
self.base_config = base_config
self.strategy_param_grid = strategy_param_grid
self.trader_param_grid = trader_param_grid or {}
self.max_workers = max_workers
self.save_individual_results = save_individual_results
self.save_detailed_logs = save_detailed_logs
def get_total_combinations(self) -> int:
"""Calculate total number of parameter combinations."""
from itertools import product
# Calculate strategy combinations
strategy_values = list(self.strategy_param_grid.values())
strategy_combinations = len(list(product(*strategy_values))) if strategy_values else 1
# Calculate trader combinations
trader_values = list(self.trader_param_grid.values())
trader_combinations = len(list(product(*trader_values))) if trader_values else 1
return strategy_combinations * trader_combinations
def to_dict(self) -> Dict[str, Any]:
"""Convert optimization configuration to dictionary."""
return {
"base_config": self.base_config.to_dict(),
"strategy_param_grid": self.strategy_param_grid,
"trader_param_grid": self.trader_param_grid,
"max_workers": self.max_workers,
"save_individual_results": self.save_individual_results,
"save_detailed_logs": self.save_detailed_logs,
"total_combinations": self.get_total_combinations()
}
def __repr__(self) -> str:
"""String representation of the optimization configuration."""
return (f"OptimizationConfig(combinations={self.get_total_combinations()}, "
f"max_workers={self.max_workers})")

View File

@@ -0,0 +1,480 @@
"""
Backtester Utilities
This module provides utility functions for data loading, system resource management,
and result saving for the incremental backtesting framework.
"""
import os
import json
import pandas as pd
import numpy as np
import psutil
from typing import Dict, List, Any, Optional
import logging
from datetime import datetime
logger = logging.getLogger(__name__)
class DataLoader:
"""
Data loading utilities for backtesting.
This class handles loading and preprocessing of market data from various formats
including CSV and JSON files.
"""
def __init__(self, data_dir: str = "data"):
"""
Initialize data loader.
Args:
data_dir: Directory containing data files
"""
self.data_dir = data_dir
os.makedirs(self.data_dir, exist_ok=True)
def load_data(self, file_path: str, start_date: str, end_date: str) -> pd.DataFrame:
"""
Load data with optimized dtypes and filtering, supporting CSV and JSON input.
Args:
file_path: Path to the data file (relative to data_dir)
start_date: Start date for filtering (YYYY-MM-DD format)
end_date: End date for filtering (YYYY-MM-DD format)
Returns:
pd.DataFrame: Loaded OHLCV data with DatetimeIndex
"""
full_path = os.path.join(self.data_dir, file_path)
if not os.path.exists(full_path):
raise FileNotFoundError(f"Data file not found: {full_path}")
# Determine file type
_, ext = os.path.splitext(file_path)
ext = ext.lower()
try:
if ext == ".json":
return self._load_json_data(full_path, start_date, end_date)
else:
return self._load_csv_data(full_path, start_date, end_date)
except Exception as e:
logger.error(f"Error loading data from {file_path}: {e}")
# Return an empty DataFrame with a DatetimeIndex
return pd.DataFrame(index=pd.to_datetime([]))
def _load_json_data(self, file_path: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Load data from JSON file."""
with open(file_path, 'r') as f:
raw = json.load(f)
data = pd.DataFrame(raw["Data"])
# Convert columns to lowercase
data.columns = data.columns.str.lower()
# Convert timestamp to datetime
data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
# Filter by date range
data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= end_date)]
logger.info(f"JSON data loaded: {len(data)} rows for {start_date} to {end_date}")
return data.set_index("timestamp")
def _load_csv_data(self, file_path: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Load data from CSV file."""
# Define optimized dtypes
dtypes = {
'Open': 'float32',
'High': 'float32',
'Low': 'float32',
'Close': 'float32',
'Volume': 'float32'
}
# Read data with original capitalized column names
data = pd.read_csv(file_path, dtype=dtypes)
# Handle timestamp column
if 'Timestamp' in data.columns:
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
# Filter by date range
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= end_date)]
# Convert column names to lowercase
data.columns = data.columns.str.lower()
logger.info(f"CSV data loaded: {len(data)} rows for {start_date} to {end_date}")
return data.set_index('timestamp')
else:
# Attempt to use the first column if 'Timestamp' is not present
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]
data.columns = data.columns.str.lower()
logger.info(f"CSV data loaded (first column as timestamp): {len(data)} rows for {start_date} to {end_date}")
return data.set_index('timestamp')
def validate_data(self, data: pd.DataFrame) -> bool:
"""
Validate loaded data for required columns and basic integrity.
Args:
data: DataFrame to validate
Returns:
bool: True if data is valid
"""
if data.empty:
logger.error("Data is empty")
return False
required_columns = ['open', 'high', 'low', 'close', 'volume']
missing_columns = [col for col in required_columns if col not in data.columns]
if missing_columns:
logger.error(f"Missing required columns: {missing_columns}")
return False
# Check for NaN values
if data[required_columns].isnull().any().any():
logger.warning("Data contains NaN values")
# Check for negative prices
price_columns = ['open', 'high', 'low', 'close']
if (data[price_columns] <= 0).any().any():
logger.warning("Data contains non-positive prices")
# Check OHLC consistency
if not ((data['low'] <= data['open']) &
(data['low'] <= data['close']) &
(data['high'] >= data['open']) &
(data['high'] >= data['close'])).all():
logger.warning("Data contains OHLC inconsistencies")
return True
class SystemUtils:
"""
System resource management utilities.
This class provides methods for determining optimal system resource usage
for parallel processing and performance optimization.
"""
def __init__(self):
"""Initialize system utilities."""
pass
def get_optimal_workers(self) -> int:
"""
Determine optimal number of worker processes based on system resources.
Returns:
int: Optimal number of worker processes
"""
cpu_count = os.cpu_count() or 4
memory_gb = psutil.virtual_memory().total / (1024**3)
# Heuristic: Use 75% of cores, but cap based on available memory
# Assume each worker needs ~2GB for large datasets
workers_by_memory = max(1, int(memory_gb / 2))
workers_by_cpu = max(1, int(cpu_count * 0.75))
optimal_workers = min(workers_by_cpu, workers_by_memory)
logger.info(f"System resources: {cpu_count} CPUs, {memory_gb:.1f}GB RAM")
logger.info(f"Using {optimal_workers} workers for processing")
return optimal_workers
def get_system_info(self) -> Dict[str, Any]:
"""
Get comprehensive system information.
Returns:
Dict containing system information
"""
memory = psutil.virtual_memory()
return {
"cpu_count": os.cpu_count(),
"memory_total_gb": memory.total / (1024**3),
"memory_available_gb": memory.available / (1024**3),
"memory_percent": memory.percent,
"optimal_workers": self.get_optimal_workers()
}
class ResultsSaver:
"""
Results saving utilities for backtesting.
This class handles saving backtest results in various formats including
CSV, JSON, and comprehensive reports.
"""
def __init__(self, results_dir: str = "results"):
"""
Initialize results saver.
Args:
results_dir: Directory for saving results
"""
self.results_dir = results_dir
os.makedirs(self.results_dir, exist_ok=True)
def save_results_csv(self, results: List[Dict[str, Any]], filename: str) -> None:
"""
Save backtest results to CSV file.
Args:
results: List of backtest results
filename: Output filename
"""
try:
# Convert results to DataFrame for easy saving
df_data = []
for result in results:
if result.get("success", True):
row = {
"strategy_name": result.get("strategy_name", ""),
"profit_ratio": result.get("profit_ratio", 0),
"final_usd": result.get("final_usd", 0),
"n_trades": result.get("n_trades", 0),
"win_rate": result.get("win_rate", 0),
"max_drawdown": result.get("max_drawdown", 0),
"avg_trade": result.get("avg_trade", 0),
"total_fees_usd": result.get("total_fees_usd", 0),
"backtest_duration_seconds": result.get("backtest_duration_seconds", 0),
"data_points_processed": result.get("data_points_processed", 0)
}
# Add strategy parameters
strategy_params = result.get("strategy_params", {})
for key, value in strategy_params.items():
row[f"strategy_{key}"] = value
# Add trader parameters
trader_params = result.get("trader_params", {})
for key, value in trader_params.items():
row[f"trader_{key}"] = value
df_data.append(row)
# Save to CSV
df = pd.DataFrame(df_data)
full_path = os.path.join(self.results_dir, filename)
df.to_csv(full_path, index=False)
logger.info(f"Results saved to {full_path}: {len(df_data)} rows")
except Exception as e:
logger.error(f"Error saving results to {filename}: {e}")
raise
def save_comprehensive_results(self, results: List[Dict[str, Any]],
base_filename: str,
summary: Optional[Dict[str, Any]] = None,
action_log: Optional[List[Dict[str, Any]]] = None,
session_start_time: Optional[datetime] = None) -> None:
"""
Save comprehensive backtest results including summary, individual results, and logs.
Args:
results: List of backtest results
base_filename: Base filename (without extension)
summary: Optional summary statistics
action_log: Optional action log
session_start_time: Optional session start time
"""
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
session_start = session_start_time or datetime.now()
# 1. Save summary report
if summary is None:
summary = self._calculate_summary_statistics(results)
summary_data = {
"session_info": {
"timestamp": timestamp,
"session_start": session_start.isoformat(),
"session_duration_seconds": (datetime.now() - session_start).total_seconds()
},
"summary_statistics": summary,
"action_log_summary": {
"total_actions": len(action_log) if action_log else 0,
"action_types": list(set(action["action_type"] for action in action_log)) if action_log else []
}
}
summary_filename = f"{base_filename}_summary_{timestamp}.json"
self._save_json(summary_data, summary_filename)
# 2. Save detailed results CSV
self.save_results_csv(results, f"{base_filename}_detailed_{timestamp}.csv")
# 3. Save individual strategy results
valid_results = [r for r in results if r.get("success", True)]
for i, result in enumerate(valid_results):
strategy_filename = f"{base_filename}_strategy_{i+1}_{result['strategy_name']}_{timestamp}.json"
strategy_data = self._format_strategy_result(result)
self._save_json(strategy_data, strategy_filename)
# 4. Save action log if provided
if action_log:
action_log_filename = f"{base_filename}_actions_{timestamp}.json"
action_log_data = {
"session_info": {
"timestamp": timestamp,
"session_start": session_start.isoformat(),
"total_actions": len(action_log)
},
"actions": action_log
}
self._save_json(action_log_data, action_log_filename)
# 5. Create master index file
index_filename = f"{base_filename}_index_{timestamp}.json"
index_data = self._create_index_file(base_filename, timestamp, valid_results, summary)
self._save_json(index_data, index_filename)
# Print summary
print(f"\n📊 Comprehensive results saved:")
print(f" 📋 Summary: {self.results_dir}/{summary_filename}")
print(f" 📈 Detailed CSV: {self.results_dir}/{base_filename}_detailed_{timestamp}.csv")
if action_log:
print(f" 📝 Action Log: {self.results_dir}/{action_log_filename}")
print(f" 📁 Individual Strategies: {len(valid_results)} files")
print(f" 🗂️ Master Index: {self.results_dir}/{index_filename}")
except Exception as e:
logger.error(f"Error saving comprehensive results: {e}")
raise
def _save_json(self, data: Dict[str, Any], filename: str) -> None:
"""Save data to JSON file."""
full_path = os.path.join(self.results_dir, filename)
with open(full_path, 'w') as f:
json.dump(data, f, indent=2, default=str)
logger.info(f"JSON saved: {full_path}")
def _calculate_summary_statistics(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Calculate summary statistics from results."""
valid_results = [r for r in results if r.get("success", True)]
if not valid_results:
return {
"total_runs": len(results),
"successful_runs": 0,
"failed_runs": len(results),
"error": "No valid results to summarize"
}
# Extract metrics
profit_ratios = [r["profit_ratio"] for r in valid_results]
final_balances = [r["final_usd"] for r in valid_results]
n_trades_list = [r["n_trades"] for r in valid_results]
win_rates = [r["win_rate"] for r in valid_results]
max_drawdowns = [r["max_drawdown"] for r in valid_results]
return {
"total_runs": len(results),
"successful_runs": len(valid_results),
"failed_runs": len(results) - len(valid_results),
"profit_ratio": {
"mean": np.mean(profit_ratios),
"std": np.std(profit_ratios),
"min": np.min(profit_ratios),
"max": np.max(profit_ratios),
"median": np.median(profit_ratios)
},
"final_usd": {
"mean": np.mean(final_balances),
"std": np.std(final_balances),
"min": np.min(final_balances),
"max": np.max(final_balances),
"median": np.median(final_balances)
},
"n_trades": {
"mean": np.mean(n_trades_list),
"std": np.std(n_trades_list),
"min": np.min(n_trades_list),
"max": np.max(n_trades_list),
"median": np.median(n_trades_list)
},
"win_rate": {
"mean": np.mean(win_rates),
"std": np.std(win_rates),
"min": np.min(win_rates),
"max": np.max(win_rates),
"median": np.median(win_rates)
},
"max_drawdown": {
"mean": np.mean(max_drawdowns),
"std": np.std(max_drawdowns),
"min": np.min(max_drawdowns),
"max": np.max(max_drawdowns),
"median": np.median(max_drawdowns)
},
"best_run": max(valid_results, key=lambda x: x["profit_ratio"]),
"worst_run": min(valid_results, key=lambda x: x["profit_ratio"])
}
def _format_strategy_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""Format individual strategy result for saving."""
return {
"strategy_info": {
"name": result['strategy_name'],
"params": result.get('strategy_params', {}),
"trader_params": result.get('trader_params', {})
},
"performance": {
"initial_usd": result['initial_usd'],
"final_usd": result['final_usd'],
"profit_ratio": result['profit_ratio'],
"n_trades": result['n_trades'],
"win_rate": result['win_rate'],
"max_drawdown": result['max_drawdown'],
"avg_trade": result['avg_trade'],
"total_fees_usd": result['total_fees_usd']
},
"execution": {
"backtest_duration_seconds": result.get('backtest_duration_seconds', 0),
"data_points_processed": result.get('data_points_processed', 0),
"warmup_complete": result.get('warmup_complete', False)
},
"trades": result.get('trades', [])
}
def _create_index_file(self, base_filename: str, timestamp: str,
valid_results: List[Dict[str, Any]],
summary: Dict[str, Any]) -> Dict[str, Any]:
"""Create master index file."""
return {
"session_info": {
"timestamp": timestamp,
"base_filename": base_filename,
"total_strategies": len(valid_results)
},
"files": {
"summary": f"{base_filename}_summary_{timestamp}.json",
"detailed_csv": f"{base_filename}_detailed_{timestamp}.csv",
"individual_strategies": [
f"{base_filename}_strategy_{i+1}_{result['strategy_name']}_{timestamp}.json"
for i, result in enumerate(valid_results)
]
},
"quick_stats": {
"best_profit": summary.get("profit_ratio", {}).get("max", 0) if summary.get("profit_ratio") else 0,
"worst_profit": summary.get("profit_ratio", {}).get("min", 0) if summary.get("profit_ratio") else 0,
"avg_profit": summary.get("profit_ratio", {}).get("mean", 0) if summary.get("profit_ratio") else 0,
"total_successful_runs": summary.get("successful_runs", 0),
"total_failed_runs": summary.get("failed_runs", 0)
}
}