Merge branch 'xgboost'
# Conflicts: # .gitignore # README.md # cycles/backtest.py # main.py # pyproject.toml # uv.lock
This commit is contained in:
@@ -1,70 +1,30 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import logging
|
||||
from scipy.signal import find_peaks
|
||||
from matplotlib.patches import Rectangle
|
||||
from scipy import stats
|
||||
import concurrent.futures
|
||||
from functools import partial
|
||||
from functools import lru_cache
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Color configuration
|
||||
# Plot colors
|
||||
DARK_BG_COLOR = '#181C27'
|
||||
LEGEND_BG_COLOR = '#333333'
|
||||
TITLE_COLOR = 'white'
|
||||
AXIS_LABEL_COLOR = 'white'
|
||||
|
||||
# Candlestick colors
|
||||
CANDLE_UP_COLOR = '#089981' # Green
|
||||
CANDLE_DOWN_COLOR = '#F23645' # Red
|
||||
|
||||
# Marker colors
|
||||
MIN_COLOR = 'red'
|
||||
MAX_COLOR = 'green'
|
||||
|
||||
# Line style colors
|
||||
MIN_LINE_STYLE = 'g--' # Green dashed
|
||||
MAX_LINE_STYLE = 'r--' # Red dashed
|
||||
SMA7_LINE_STYLE = 'y-' # Yellow solid
|
||||
SMA15_LINE_STYLE = 'm-' # Magenta solid
|
||||
|
||||
# SuperTrend colors
|
||||
ST_COLOR_UP = 'g-'
|
||||
ST_COLOR_DOWN = 'r-'
|
||||
|
||||
# Cache the calculation results by function parameters
|
||||
@lru_cache(maxsize=32)
|
||||
def cached_supertrend_calculation(period, multiplier, data_tuple):
|
||||
# Convert tuple back to numpy arrays
|
||||
high = np.array(data_tuple[0])
|
||||
low = np.array(data_tuple[1])
|
||||
close = np.array(data_tuple[2])
|
||||
|
||||
# Calculate TR and ATR using vectorized operations
|
||||
tr = np.zeros_like(close)
|
||||
tr[0] = high[0] - low[0]
|
||||
hc_range = np.abs(high[1:] - close[:-1])
|
||||
lc_range = np.abs(low[1:] - close[:-1])
|
||||
hl_range = high[1:] - low[1:]
|
||||
tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
|
||||
|
||||
# Use numpy's exponential moving average
|
||||
atr = np.zeros_like(tr)
|
||||
atr[0] = tr[0]
|
||||
multiplier_ema = 2.0 / (period + 1)
|
||||
for i in range(1, len(tr)):
|
||||
atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
|
||||
|
||||
# Calculate bands
|
||||
upper_band = np.zeros_like(close)
|
||||
lower_band = np.zeros_like(close)
|
||||
for i in range(len(close)):
|
||||
hl_avg = (high[i] + low[i]) / 2
|
||||
upper_band[i] = hl_avg + (multiplier * atr[i])
|
||||
lower_band[i] = hl_avg - (multiplier * atr[i])
|
||||
|
||||
final_upper = np.zeros_like(close)
|
||||
final_lower = np.zeros_like(close)
|
||||
supertrend = np.zeros_like(close)
|
||||
@@ -105,232 +65,151 @@ def cached_supertrend_calculation(period, multiplier, data_tuple):
|
||||
'lower_band': final_lower
|
||||
}
|
||||
|
||||
def calculate_supertrend_external(data, period, multiplier):
|
||||
# Convert DataFrame columns to hashable tuples
|
||||
def calculate_supertrend_external(data, period, multiplier, close_column='close'):
|
||||
"""
|
||||
External function to calculate SuperTrend with configurable close column
|
||||
|
||||
Parameters:
|
||||
- data: DataFrame with OHLC data
|
||||
- period: int, period for ATR calculation
|
||||
- multiplier: float, multiplier for ATR
|
||||
- close_column: str, name of the column to use as close price (default: 'close')
|
||||
"""
|
||||
high_tuple = tuple(data['high'])
|
||||
low_tuple = tuple(data['low'])
|
||||
close_tuple = tuple(data['close'])
|
||||
|
||||
# Call the cached function
|
||||
close_tuple = tuple(data[close_column])
|
||||
return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))
|
||||
|
||||
|
||||
class Supertrends:
|
||||
def __init__(self, data, verbose=False, display=False):
|
||||
def __init__(self, data, close_column='close', verbose=False, display=False):
|
||||
"""
|
||||
Initialize the TrendDetectorSimple class.
|
||||
Initialize Supertrends calculator
|
||||
|
||||
Parameters:
|
||||
- data: pandas DataFrame containing price data
|
||||
- verbose: boolean, whether to display detailed logging information
|
||||
- display: boolean, whether to enable display/plotting features
|
||||
- data: pandas DataFrame with OHLC data or list of prices
|
||||
- close_column: str, name of the column to use as close price (default: 'close')
|
||||
- verbose: bool, enable verbose logging
|
||||
- display: bool, display mode (currently unused)
|
||||
"""
|
||||
|
||||
self.close_column = close_column
|
||||
self.data = data
|
||||
self.verbose = verbose
|
||||
self.display = display
|
||||
|
||||
# Only define display-related variables if display is True
|
||||
if self.display:
|
||||
# Plot style configuration
|
||||
self.plot_style = 'dark_background'
|
||||
self.bg_color = DARK_BG_COLOR
|
||||
self.plot_size = (12, 8)
|
||||
|
||||
# Candlestick configuration
|
||||
self.candle_width = 0.6
|
||||
self.candle_up_color = CANDLE_UP_COLOR
|
||||
self.candle_down_color = CANDLE_DOWN_COLOR
|
||||
self.candle_alpha = 0.8
|
||||
self.wick_width = 1
|
||||
|
||||
# Marker configuration
|
||||
self.min_marker = '^'
|
||||
self.min_color = MIN_COLOR
|
||||
self.min_size = 100
|
||||
self.max_marker = 'v'
|
||||
self.max_color = MAX_COLOR
|
||||
self.max_size = 100
|
||||
self.marker_zorder = 100
|
||||
|
||||
# Line configuration
|
||||
self.line_width = 1
|
||||
self.min_line_style = MIN_LINE_STYLE
|
||||
self.max_line_style = MAX_LINE_STYLE
|
||||
self.sma7_line_style = SMA7_LINE_STYLE
|
||||
self.sma15_line_style = SMA15_LINE_STYLE
|
||||
|
||||
# Text configuration
|
||||
self.title_size = 14
|
||||
self.title_color = TITLE_COLOR
|
||||
self.axis_label_size = 12
|
||||
self.axis_label_color = AXIS_LABEL_COLOR
|
||||
|
||||
# Legend configuration
|
||||
self.legend_loc = 'best'
|
||||
self.legend_bg_color = LEGEND_BG_COLOR
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
self.logger = logging.getLogger('TrendDetectorSimple')
|
||||
|
||||
# Convert data to pandas DataFrame if it's not already
|
||||
if not isinstance(self.data, pd.DataFrame):
|
||||
if isinstance(self.data, list):
|
||||
self.data = pd.DataFrame({'close': self.data})
|
||||
self.data = pd.DataFrame({self.close_column: self.data})
|
||||
else:
|
||||
raise ValueError("Data must be a pandas DataFrame or a list")
|
||||
|
||||
# Validate that required columns exist
|
||||
required_columns = ['high', 'low', self.close_column]
|
||||
missing_columns = [col for col in required_columns if col not in self.data.columns]
|
||||
if missing_columns:
|
||||
raise ValueError(f"Missing required columns: {missing_columns}")
|
||||
|
||||
def calculate_tr(self):
|
||||
"""Calculate True Range using the configured close column"""
|
||||
df = self.data.copy()
|
||||
high = df['high'].values
|
||||
low = df['low'].values
|
||||
close = df[self.close_column].values
|
||||
tr = np.zeros_like(close)
|
||||
tr[0] = high[0] - low[0]
|
||||
for i in range(1, len(close)):
|
||||
hl_range = high[i] - low[i]
|
||||
hc_range = abs(high[i] - close[i-1])
|
||||
lc_range = abs(low[i] - close[i-1])
|
||||
tr[i] = max(hl_range, hc_range, lc_range)
|
||||
return tr
|
||||
|
||||
def calculate_atr(self, period=14):
|
||||
"""Calculate Average True Range"""
|
||||
tr = self.calculate_tr()
|
||||
atr = np.zeros_like(tr)
|
||||
atr[0] = tr[0]
|
||||
multiplier = 2.0 / (period + 1)
|
||||
for i in range(1, len(tr)):
|
||||
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
|
||||
return atr
|
||||
|
||||
def calculate_supertrend(self, period=10, multiplier=3.0):
|
||||
"""
|
||||
Calculate True Range (TR) for the price data.
|
||||
Calculate SuperTrend indicator for the price data using the configured close column.
|
||||
SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
|
||||
|
||||
True Range is the greatest of:
|
||||
1. Current high - current low
|
||||
2. |Current high - previous close|
|
||||
3. |Current low - previous close|
|
||||
Parameters:
|
||||
- period: int, the period for the ATR calculation (default: 10)
|
||||
- multiplier: float, the multiplier for the ATR (default: 3.0)
|
||||
|
||||
Returns:
|
||||
- Numpy array of TR values
|
||||
- Dictionary containing SuperTrend values, trend direction, and upper/lower bands
|
||||
"""
|
||||
df = self.data.copy()
|
||||
high = df['high'].values
|
||||
low = df['low'].values
|
||||
close = df['close'].values
|
||||
|
||||
tr = np.zeros_like(close)
|
||||
tr[0] = high[0] - low[0] # First TR is just the first day's range
|
||||
|
||||
close = df[self.close_column].values
|
||||
atr = self.calculate_atr(period)
|
||||
upper_band = np.zeros_like(close)
|
||||
lower_band = np.zeros_like(close)
|
||||
for i in range(len(close)):
|
||||
hl_avg = (high[i] + low[i]) / 2
|
||||
upper_band[i] = hl_avg + (multiplier * atr[i])
|
||||
lower_band[i] = hl_avg - (multiplier * atr[i])
|
||||
final_upper = np.zeros_like(close)
|
||||
final_lower = np.zeros_like(close)
|
||||
supertrend = np.zeros_like(close)
|
||||
trend = np.zeros_like(close)
|
||||
final_upper[0] = upper_band[0]
|
||||
final_lower[0] = lower_band[0]
|
||||
if close[0] <= upper_band[0]:
|
||||
supertrend[0] = upper_band[0]
|
||||
trend[0] = -1
|
||||
else:
|
||||
supertrend[0] = lower_band[0]
|
||||
trend[0] = 1
|
||||
for i in range(1, len(close)):
|
||||
# Current high - current low
|
||||
hl_range = high[i] - low[i]
|
||||
# |Current high - previous close|
|
||||
hc_range = abs(high[i] - close[i-1])
|
||||
# |Current low - previous close|
|
||||
lc_range = abs(low[i] - close[i-1])
|
||||
|
||||
# TR is the maximum of these three values
|
||||
tr[i] = max(hl_range, hc_range, lc_range)
|
||||
|
||||
return tr
|
||||
|
||||
def calculate_atr(self, period=14):
|
||||
"""
|
||||
Calculate Average True Range (ATR) for the price data.
|
||||
|
||||
ATR is the exponential moving average of the True Range over a specified period.
|
||||
|
||||
Parameters:
|
||||
- period: int, the period for the ATR calculation (default: 14)
|
||||
|
||||
Returns:
|
||||
- Numpy array of ATR values
|
||||
"""
|
||||
|
||||
tr = self.calculate_tr()
|
||||
atr = np.zeros_like(tr)
|
||||
|
||||
# First ATR value is just the first TR
|
||||
atr[0] = tr[0]
|
||||
|
||||
# Calculate exponential moving average (EMA) of TR
|
||||
multiplier = 2.0 / (period + 1)
|
||||
|
||||
for i in range(1, len(tr)):
|
||||
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
|
||||
|
||||
return atr
|
||||
|
||||
def detect_trends(self):
|
||||
"""
|
||||
Detect trends by identifying local minima and maxima in the price data
|
||||
using scipy.signal.find_peaks.
|
||||
|
||||
Parameters:
|
||||
- prominence: float, required prominence of peaks (relative to the price range)
|
||||
- width: int, required width of peaks in data points
|
||||
|
||||
Returns:
|
||||
- DataFrame with columns for timestamps, prices, and trend indicators
|
||||
- Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
|
||||
"""
|
||||
df = self.data
|
||||
# close_prices = df['close'].values
|
||||
|
||||
# max_peaks, _ = find_peaks(close_prices)
|
||||
# min_peaks, _ = find_peaks(-close_prices)
|
||||
|
||||
# df['is_min'] = False
|
||||
# df['is_max'] = False
|
||||
|
||||
# for peak in max_peaks:
|
||||
# df.at[peak, 'is_max'] = True
|
||||
# for peak in min_peaks:
|
||||
# df.at[peak, 'is_min'] = True
|
||||
|
||||
# result = df[['timestamp', 'close', 'is_min', 'is_max']].copy()
|
||||
|
||||
# Perform linear regression on min_peaks and max_peaks
|
||||
# min_prices = df['close'].iloc[min_peaks].values
|
||||
# max_prices = df['close'].iloc[max_peaks].values
|
||||
|
||||
# Linear regression for min peaks if we have at least 2 points
|
||||
# min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
|
||||
# Linear regression for max peaks if we have at least 2 points
|
||||
# max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
|
||||
if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
|
||||
final_upper[i] = upper_band[i]
|
||||
else:
|
||||
final_upper[i] = final_upper[i-1]
|
||||
if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
|
||||
final_lower[i] = lower_band[i]
|
||||
else:
|
||||
final_lower[i] = final_lower[i-1]
|
||||
if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
|
||||
supertrend[i] = final_upper[i]
|
||||
trend[i] = -1
|
||||
elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
|
||||
supertrend[i] = final_lower[i]
|
||||
trend[i] = 1
|
||||
elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
|
||||
supertrend[i] = final_lower[i]
|
||||
trend[i] = 1
|
||||
elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
|
||||
supertrend[i] = final_upper[i]
|
||||
trend[i] = -1
|
||||
supertrend_results = {
|
||||
'supertrend': supertrend,
|
||||
'trend': trend,
|
||||
'upper_band': final_upper,
|
||||
'lower_band': final_lower
|
||||
}
|
||||
return supertrend_results
|
||||
|
||||
# Calculate Simple Moving Averages (SMA) for 7 and 15 periods
|
||||
# sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
|
||||
# sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
|
||||
|
||||
analysis_results = {}
|
||||
# analysis_results['linear_regression'] = {
|
||||
# 'min': {
|
||||
# 'slope': min_slope,
|
||||
# 'intercept': min_intercept,
|
||||
# 'r_squared': min_r_value ** 2
|
||||
# },
|
||||
# 'max': {
|
||||
# 'slope': max_slope,
|
||||
# 'intercept': max_intercept,
|
||||
# 'r_squared': max_r_value ** 2
|
||||
# }
|
||||
# }
|
||||
# analysis_results['sma'] = {
|
||||
# '7': sma_7,
|
||||
# '15': sma_15
|
||||
# }
|
||||
|
||||
# Calculate SuperTrend indicators
|
||||
supertrend_results_list = self._calculate_supertrend_indicators()
|
||||
analysis_results['supertrend'] = supertrend_results_list
|
||||
|
||||
return analysis_results
|
||||
|
||||
def calculate_supertrend_indicators(self):
|
||||
"""
|
||||
Calculate SuperTrend indicators with different parameter sets in parallel.
|
||||
Returns:
|
||||
- list, the SuperTrend results
|
||||
"""
|
||||
supertrend_params = [
|
||||
{"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
||||
{"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
||||
{"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
|
||||
{"period": 12, "multiplier": 3.0},
|
||||
{"period": 10, "multiplier": 1.0},
|
||||
{"period": 11, "multiplier": 2.0}
|
||||
]
|
||||
data = self.data.copy()
|
||||
|
||||
# For just 3 calculations, direct calculation might be faster than process pool
|
||||
results = []
|
||||
for p in supertrend_params:
|
||||
result = calculate_supertrend_external(data, p["period"], p["multiplier"])
|
||||
results.append(result)
|
||||
|
||||
supertrend_results_list = []
|
||||
for params, result in zip(supertrend_params, results):
|
||||
supertrend_results_list.append({
|
||||
result = self.calculate_supertrend(period=p["period"], multiplier=p["multiplier"])
|
||||
results.append({
|
||||
"results": result,
|
||||
"params": params
|
||||
"params": p
|
||||
})
|
||||
return supertrend_results_list
|
||||
return results
|
||||
|
||||
@@ -1,167 +1,332 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
from cycles.supertrend import Supertrends
|
||||
from cycles.market_fees import MarketFees
|
||||
|
||||
class Backtest:
|
||||
def __init__(self, initial_usd, df, min1_df, init_strategy_fields) -> None:
|
||||
self.initial_usd = initial_usd
|
||||
self.usd = initial_usd
|
||||
self.max_balance = initial_usd
|
||||
self.coin = 0
|
||||
self.position = 0
|
||||
self.entry_price = 0
|
||||
self.entry_time = None
|
||||
self.current_trade_min1_start_idx = None
|
||||
self.current_min1_end_idx = None
|
||||
self.price_open = None
|
||||
self.price_close = None
|
||||
self.current_date = None
|
||||
self.strategies = {}
|
||||
self.df = df
|
||||
self.min1_df = min1_df
|
||||
|
||||
self.trade_log = []
|
||||
self.drawdowns = []
|
||||
self.trades = []
|
||||
|
||||
self = init_strategy_fields(self)
|
||||
|
||||
def run(self, entry_strategy, exit_strategy, debug=False):
|
||||
@staticmethod
|
||||
def run(min1_df, df, initial_usd, stop_loss_pct, progress_callback=None, verbose=False):
|
||||
"""
|
||||
Runs the backtest using provided entry and exit strategy functions.
|
||||
|
||||
The method iterates over the main DataFrame (self.df), simulating trades based on the entry and exit strategies.
|
||||
It tracks balances, drawdowns, and logs each trade, including fees. At the end, it returns a dictionary of performance statistics.
|
||||
|
||||
Backtest a simple strategy using the meta supertrend (all three supertrends agree).
|
||||
Buys when meta supertrend is positive, sells when negative, applies a percentage stop loss.
|
||||
|
||||
Parameters:
|
||||
- entry_strategy: function, determines when to enter a trade. Should accept (self, i) and return True to enter.
|
||||
- exit_strategy: function, determines when to exit a trade. Should accept (self, i) and return (exit_reason, sell_price) or (None, None) to hold.
|
||||
- debug: bool, whether to print debug info (default: False)
|
||||
|
||||
Returns:
|
||||
- dict with keys: initial_usd, final_usd, n_trades, win_rate, max_drawdown, avg_trade, trade_log, trades, total_fees_usd, and optionally first_trade and last_trade.
|
||||
- min1_df: pandas DataFrame, 1-minute timeframe data for more accurate stop loss checking (optional)
|
||||
- df: pandas DataFrame, main timeframe data for signals
|
||||
- initial_usd: float, starting USD amount
|
||||
- stop_loss_pct: float, stop loss as a fraction (e.g. 0.05 for 5%)
|
||||
- progress_callback: callable, optional callback function to report progress (current_step)
|
||||
- verbose: bool, enable debug logging for stop loss checks
|
||||
"""
|
||||
_df = df.copy().reset_index()
|
||||
|
||||
# Ensure we have a timestamp column regardless of original index name
|
||||
if 'timestamp' not in _df.columns:
|
||||
# If reset_index() created a column with the original index name, rename it
|
||||
if len(_df.columns) > 0 and _df.columns[0] not in ['open', 'high', 'low', 'close', 'volume', 'predicted_close_price']:
|
||||
_df = _df.rename(columns={_df.columns[0]: 'timestamp'})
|
||||
else:
|
||||
raise ValueError("Unable to identify timestamp column in DataFrame")
|
||||
|
||||
_df['timestamp'] = pd.to_datetime(_df['timestamp'])
|
||||
|
||||
supertrends = Supertrends(_df, verbose=False, close_column='predicted_close_price')
|
||||
|
||||
for i in range(1, len(self.df)):
|
||||
self.price_open = self.df['open'].iloc[i]
|
||||
self.price_close = self.df['close'].iloc[i]
|
||||
supertrend_results_list = supertrends.calculate_supertrend_indicators()
|
||||
trends = [st['results']['trend'] for st in supertrend_results_list]
|
||||
trends_arr = np.stack(trends, axis=1)
|
||||
meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]),
|
||||
trends_arr[:,0], 0)
|
||||
# Shift meta_trend by one to avoid lookahead bias
|
||||
meta_trend_signal = np.roll(meta_trend, 1)
|
||||
meta_trend_signal[0] = 0 # or np.nan, but 0 means 'no signal' for first bar
|
||||
|
||||
position = 0 # 0 = no position, 1 = long
|
||||
entry_price = 0
|
||||
usd = initial_usd
|
||||
coin = 0
|
||||
trade_log = []
|
||||
max_balance = initial_usd
|
||||
drawdowns = []
|
||||
trades = []
|
||||
entry_time = None
|
||||
stop_loss_count = 0 # Track number of stop losses
|
||||
|
||||
# Ensure min1_df has proper DatetimeIndex
|
||||
if min1_df is not None and not min1_df.empty:
|
||||
min1_df.index = pd.to_datetime(min1_df.index)
|
||||
|
||||
for i in range(1, len(_df)):
|
||||
# Report progress if callback is provided
|
||||
if progress_callback:
|
||||
# Update more frequently for better responsiveness
|
||||
update_frequency = max(1, len(_df) // 50) # Update every 2% of dataset (50 updates total)
|
||||
if i % update_frequency == 0 or i == len(_df) - 1: # Always update on last iteration
|
||||
if verbose: # Only print in verbose mode to avoid spam
|
||||
print(f"DEBUG: Progress callback called with i={i}, total={len(_df)-1}")
|
||||
progress_callback(i)
|
||||
|
||||
price_open = _df['open'].iloc[i]
|
||||
price_close = _df['close'].iloc[i]
|
||||
date = _df['timestamp'].iloc[i]
|
||||
prev_mt = meta_trend_signal[i-1]
|
||||
curr_mt = meta_trend_signal[i]
|
||||
|
||||
self.current_date = self.df['timestamp'].iloc[i]
|
||||
# Check stop loss if in position
|
||||
if position == 1:
|
||||
stop_loss_result = Backtest.check_stop_loss(
|
||||
min1_df,
|
||||
entry_time,
|
||||
date,
|
||||
entry_price,
|
||||
stop_loss_pct,
|
||||
coin,
|
||||
verbose=verbose
|
||||
)
|
||||
if stop_loss_result is not None:
|
||||
trade_log_entry, position, coin, entry_price, usd = stop_loss_result
|
||||
trade_log.append(trade_log_entry)
|
||||
stop_loss_count += 1
|
||||
continue
|
||||
|
||||
# check if we are in buy/sell position
|
||||
if self.position == 0:
|
||||
if entry_strategy(self, i):
|
||||
self.handle_entry()
|
||||
elif self.position == 1:
|
||||
exit_test_results, sell_price = exit_strategy(self, i)
|
||||
|
||||
if exit_test_results is not None:
|
||||
self.handle_exit(exit_test_results, sell_price)
|
||||
# Entry: only if not in position and signal changes to 1
|
||||
if position == 0 and prev_mt != 1 and curr_mt == 1:
|
||||
entry_result = Backtest.handle_entry(usd, price_open, date)
|
||||
coin, entry_price, entry_time, usd, position, trade_log_entry = entry_result
|
||||
trade_log.append(trade_log_entry)
|
||||
|
||||
# Exit: only if in position and signal changes from 1 to -1
|
||||
elif position == 1 and prev_mt == 1 and curr_mt == -1:
|
||||
exit_result = Backtest.handle_exit(coin, price_open, entry_price, entry_time, date)
|
||||
usd, coin, position, entry_price, trade_log_entry = exit_result
|
||||
trade_log.append(trade_log_entry)
|
||||
|
||||
# Track drawdown
|
||||
balance = self.usd if self.position == 0 else self.coin * self.price_close
|
||||
balance = usd if position == 0 else coin * price_close
|
||||
if balance > max_balance:
|
||||
max_balance = balance
|
||||
drawdown = (max_balance - balance) / max_balance
|
||||
drawdowns.append(drawdown)
|
||||
|
||||
if balance > self.max_balance:
|
||||
self.max_balance = balance
|
||||
|
||||
drawdown = (self.max_balance - balance) / self.max_balance
|
||||
self.drawdowns.append(drawdown)
|
||||
# Report completion if callback is provided
|
||||
if progress_callback:
|
||||
progress_callback(len(_df) - 1)
|
||||
|
||||
# If still in position at end, sell at last close
|
||||
if self.position == 1:
|
||||
self.handle_exit("EOD", None)
|
||||
|
||||
if position == 1:
|
||||
exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
|
||||
usd, coin, position, entry_price, trade_log_entry = exit_result
|
||||
trade_log.append(trade_log_entry)
|
||||
|
||||
# Calculate statistics
|
||||
final_balance = self.usd
|
||||
n_trades = len(self.trade_log)
|
||||
wins = [1 for t in self.trade_log if t['exit'] is not None and t['exit'] > t['entry']]
|
||||
final_balance = usd
|
||||
n_trades = len(trade_log)
|
||||
wins = [1 for t in trade_log if t['exit'] is not None and t['exit'] > t['entry']]
|
||||
win_rate = len(wins) / n_trades if n_trades > 0 else 0
|
||||
max_drawdown = max(self.drawdowns) if self.drawdowns else 0
|
||||
avg_trade = np.mean([t['exit']/t['entry']-1 for t in self.trade_log if t['exit'] is not None]) if self.trade_log else 0
|
||||
max_drawdown = max(drawdowns) if drawdowns else 0
|
||||
avg_trade = np.mean([t['exit']/t['entry']-1 for t in trade_log if t['exit'] is not None]) if trade_log else 0
|
||||
|
||||
trades = []
|
||||
total_fees_usd = 0.0
|
||||
|
||||
for trade in self.trade_log:
|
||||
for trade in trade_log:
|
||||
if trade['exit'] is not None:
|
||||
profit_pct = (trade['exit'] - trade['entry']) / trade['entry']
|
||||
else:
|
||||
profit_pct = 0.0
|
||||
|
||||
# Validate fee_usd field
|
||||
if 'fee_usd' not in trade:
|
||||
raise ValueError(f"Trade missing required field 'fee_usd': {trade}")
|
||||
fee_usd = trade['fee_usd']
|
||||
if fee_usd is None:
|
||||
raise ValueError(f"Trade fee_usd is None: {trade}")
|
||||
|
||||
# Validate trade type field
|
||||
if 'type' not in trade:
|
||||
raise ValueError(f"Trade missing required field 'type': {trade}")
|
||||
trade_type = trade['type']
|
||||
if trade_type is None:
|
||||
raise ValueError(f"Trade type is None: {trade}")
|
||||
|
||||
trades.append({
|
||||
'entry_time': trade['entry_time'],
|
||||
'exit_time': trade['exit_time'],
|
||||
'entry': trade['entry'],
|
||||
'exit': trade['exit'],
|
||||
'profit_pct': profit_pct,
|
||||
'type': trade['type'],
|
||||
'fee_usd': trade['fee_usd']
|
||||
'type': trade_type,
|
||||
'fee_usd': fee_usd
|
||||
})
|
||||
fee_usd = trade.get('fee_usd')
|
||||
total_fees_usd += fee_usd
|
||||
|
||||
results = {
|
||||
"initial_usd": self.initial_usd,
|
||||
"initial_usd": initial_usd,
|
||||
"final_usd": final_balance,
|
||||
"n_trades": n_trades,
|
||||
"n_stop_loss": stop_loss_count, # Add stop loss count
|
||||
"win_rate": win_rate,
|
||||
"max_drawdown": max_drawdown,
|
||||
"avg_trade": avg_trade,
|
||||
"trade_log": self.trade_log,
|
||||
"trade_log": trade_log,
|
||||
"trades": trades,
|
||||
"total_fees_usd": total_fees_usd,
|
||||
}
|
||||
if n_trades > 0:
|
||||
results["first_trade"] = {
|
||||
"entry_time": self.trade_log[0]['entry_time'],
|
||||
"entry": self.trade_log[0]['entry']
|
||||
"entry_time": trade_log[0]['entry_time'],
|
||||
"entry": trade_log[0]['entry']
|
||||
}
|
||||
results["last_trade"] = {
|
||||
"exit_time": self.trade_log[-1]['exit_time'],
|
||||
"exit": self.trade_log[-1]['exit']
|
||||
"exit_time": trade_log[-1]['exit_time'],
|
||||
"exit": trade_log[-1]['exit']
|
||||
}
|
||||
return results
|
||||
|
||||
def handle_entry(self):
|
||||
entry_fee = MarketFees.calculate_okx_taker_maker_fee(self.usd, is_maker=False)
|
||||
usd_after_fee = self.usd - entry_fee
|
||||
|
||||
self.coin = usd_after_fee / self.price_open
|
||||
self.entry_price = self.price_open
|
||||
self.entry_time = self.current_date
|
||||
self.usd = 0
|
||||
self.position = 1
|
||||
@staticmethod
|
||||
def check_stop_loss(min1_df, entry_time, current_time, entry_price, stop_loss_pct, coin, verbose=False):
|
||||
"""
|
||||
Check if stop loss should be triggered based on 1-minute data
|
||||
|
||||
Args:
|
||||
min1_df: 1-minute DataFrame with DatetimeIndex
|
||||
entry_time: Entry timestamp
|
||||
current_time: Current timestamp
|
||||
entry_price: Entry price
|
||||
stop_loss_pct: Stop loss percentage (e.g. 0.05 for 5%)
|
||||
coin: Current coin position
|
||||
verbose: Enable debug logging
|
||||
|
||||
Returns:
|
||||
Tuple of (trade_log_entry, position, coin, entry_price, usd) if stop loss triggered, None otherwise
|
||||
"""
|
||||
if min1_df is None or min1_df.empty:
|
||||
if verbose:
|
||||
print("Warning: No 1-minute data available for stop loss checking")
|
||||
return None
|
||||
|
||||
stop_price = entry_price * (1 - stop_loss_pct)
|
||||
|
||||
try:
|
||||
# Ensure min1_df has a DatetimeIndex
|
||||
if not isinstance(min1_df.index, pd.DatetimeIndex):
|
||||
if verbose:
|
||||
print("Warning: min1_df does not have DatetimeIndex")
|
||||
return None
|
||||
|
||||
# Convert entry_time and current_time to pandas Timestamps for comparison
|
||||
entry_ts = pd.to_datetime(entry_time)
|
||||
current_ts = pd.to_datetime(current_time)
|
||||
|
||||
if verbose:
|
||||
print(f"Checking stop loss from {entry_ts} to {current_ts}, stop_price: {stop_price:.2f}")
|
||||
|
||||
# Handle edge case where entry and current time are the same (1-minute timeframe)
|
||||
if entry_ts == current_ts:
|
||||
if verbose:
|
||||
print("Entry and current time are the same, no range to check")
|
||||
return None
|
||||
|
||||
# Find the range of 1-minute data to check (exclusive of entry time, inclusive of current time)
|
||||
# We start from the candle AFTER entry to avoid checking the entry candle itself
|
||||
start_check_time = entry_ts + pd.Timedelta(minutes=1)
|
||||
|
||||
# Get the slice of data to check for stop loss
|
||||
mask = (min1_df.index > entry_ts) & (min1_df.index <= current_ts)
|
||||
min1_slice = min1_df.loc[mask]
|
||||
|
||||
if len(min1_slice) == 0:
|
||||
if verbose:
|
||||
print(f"No 1-minute data found between {start_check_time} and {current_ts}")
|
||||
return None
|
||||
|
||||
if verbose:
|
||||
print(f"Checking {len(min1_slice)} candles for stop loss")
|
||||
|
||||
# Check if any low price in the slice hits the stop loss
|
||||
stop_triggered = (min1_slice['low'] <= stop_price).any()
|
||||
|
||||
if stop_triggered:
|
||||
# Find the exact candle where stop loss was triggered
|
||||
stop_candle = min1_slice[min1_slice['low'] <= stop_price].iloc[0]
|
||||
|
||||
if verbose:
|
||||
print(f"Stop loss triggered at {stop_candle.name}, low: {stop_candle['low']:.2f}")
|
||||
|
||||
# More realistic fill: if open < stop, fill at open, else at stop
|
||||
if stop_candle['open'] < stop_price:
|
||||
sell_price = stop_candle['open']
|
||||
if verbose:
|
||||
print(f"Filled at open price: {sell_price:.2f}")
|
||||
else:
|
||||
sell_price = stop_price
|
||||
if verbose:
|
||||
print(f"Filled at stop price: {sell_price:.2f}")
|
||||
|
||||
btc_to_sell = coin
|
||||
usd_gross = btc_to_sell * sell_price
|
||||
exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
|
||||
usd_after_stop = usd_gross - exit_fee
|
||||
|
||||
trade_log_entry = {
|
||||
'type': 'STOP',
|
||||
'entry': entry_price,
|
||||
'exit': sell_price,
|
||||
'entry_time': entry_time,
|
||||
'exit_time': stop_candle.name,
|
||||
'fee_usd': exit_fee
|
||||
}
|
||||
# After stop loss, reset position and entry, return USD balance
|
||||
return trade_log_entry, 0, 0, 0, usd_after_stop
|
||||
elif verbose:
|
||||
print(f"No stop loss triggered, min low in range: {min1_slice['low'].min():.2f}")
|
||||
|
||||
except Exception as e:
|
||||
# In case of any error, don't trigger stop loss but log the issue
|
||||
error_msg = f"Warning: Stop loss check failed: {e}"
|
||||
print(error_msg)
|
||||
if verbose:
|
||||
import traceback
|
||||
print(traceback.format_exc())
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def handle_entry(usd, price_open, date):
|
||||
entry_fee = MarketFees.calculate_okx_taker_maker_fee(usd, is_maker=False)
|
||||
usd_after_fee = usd - entry_fee
|
||||
coin = usd_after_fee / price_open
|
||||
entry_price = price_open
|
||||
entry_time = date
|
||||
usd = 0
|
||||
position = 1
|
||||
trade_log_entry = {
|
||||
'type': 'BUY',
|
||||
'entry': self.entry_price,
|
||||
'entry': entry_price,
|
||||
'exit': None,
|
||||
'entry_time': self.entry_time,
|
||||
'entry_time': entry_time,
|
||||
'exit_time': None,
|
||||
'fee_usd': entry_fee
|
||||
}
|
||||
self.trade_log.append(trade_log_entry)
|
||||
return coin, entry_price, entry_time, usd, position, trade_log_entry
|
||||
|
||||
def handle_exit(self, exit_reason, sell_price):
|
||||
btc_to_sell = self.coin
|
||||
exit_price = sell_price if sell_price is not None else self.price_open
|
||||
usd_gross = btc_to_sell * exit_price
|
||||
@staticmethod
|
||||
def handle_exit(coin, price_open, entry_price, entry_time, date):
|
||||
btc_to_sell = coin
|
||||
usd_gross = btc_to_sell * price_open
|
||||
exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
|
||||
|
||||
self.usd = usd_gross - exit_fee
|
||||
|
||||
exit_log_entry = {
|
||||
'type': exit_reason,
|
||||
'entry': self.entry_price,
|
||||
'exit': exit_price,
|
||||
'entry_time': self.entry_time,
|
||||
'exit_time': self.current_date,
|
||||
usd = usd_gross - exit_fee
|
||||
trade_log_entry = {
|
||||
'type': 'SELL',
|
||||
'entry': entry_price,
|
||||
'exit': price_open,
|
||||
'entry_time': entry_time,
|
||||
'exit_time': date,
|
||||
'fee_usd': exit_fee
|
||||
}
|
||||
self.coin = 0
|
||||
self.position = 0
|
||||
self.entry_price = 0
|
||||
|
||||
self.trade_log.append(exit_log_entry)
|
||||
|
||||
coin = 0
|
||||
position = 0
|
||||
entry_price = 0
|
||||
return usd, coin, position, entry_price, trade_log_entry
|
||||
152
cycles/utils/data_loader.py
Normal file
152
cycles/utils/data_loader.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
from typing import Union, Optional
|
||||
import logging
|
||||
|
||||
from .storage_utils import (
|
||||
_parse_timestamp_column,
|
||||
_filter_by_date_range,
|
||||
_normalize_column_names,
|
||||
TimestampParsingError,
|
||||
DataLoadingError
|
||||
)
|
||||
|
||||
|
||||
class DataLoader:
|
||||
"""Handles loading and preprocessing of data from various file formats"""
|
||||
|
||||
def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
|
||||
"""Initialize data loader
|
||||
|
||||
Args:
|
||||
data_dir: Directory containing data files
|
||||
logging_instance: Optional logging instance
|
||||
"""
|
||||
self.data_dir = data_dir
|
||||
self.logging = logging_instance
|
||||
|
||||
def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp],
|
||||
stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
|
||||
"""Load data with optimized dtypes and filtering, supporting CSV and JSON input
|
||||
|
||||
Args:
|
||||
file_path: path to the data file
|
||||
start_date: start date (string or datetime-like)
|
||||
stop_date: stop date (string or datetime-like)
|
||||
|
||||
Returns:
|
||||
pandas DataFrame with timestamp index
|
||||
|
||||
Raises:
|
||||
DataLoadingError: If data loading fails
|
||||
"""
|
||||
try:
|
||||
# Convert string dates to pandas datetime objects for proper comparison
|
||||
start_date = pd.to_datetime(start_date)
|
||||
stop_date = pd.to_datetime(stop_date)
|
||||
|
||||
# Determine file type
|
||||
_, ext = os.path.splitext(file_path)
|
||||
ext = ext.lower()
|
||||
|
||||
if ext == ".json":
|
||||
return self._load_json_data(file_path, start_date, stop_date)
|
||||
else:
|
||||
return self._load_csv_data(file_path, start_date, stop_date)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error loading data from {file_path}: {e}"
|
||||
if self.logging is not None:
|
||||
self.logging.error(error_msg)
|
||||
# Return an empty DataFrame with a DatetimeIndex
|
||||
return pd.DataFrame(index=pd.to_datetime([]))
|
||||
|
||||
def _load_json_data(self, file_path: str, start_date: pd.Timestamp,
|
||||
stop_date: pd.Timestamp) -> pd.DataFrame:
|
||||
"""Load and process JSON data file
|
||||
|
||||
Args:
|
||||
file_path: Path to JSON file
|
||||
start_date: Start date for filtering
|
||||
stop_date: Stop date for filtering
|
||||
|
||||
Returns:
|
||||
Processed DataFrame with timestamp index
|
||||
"""
|
||||
with open(os.path.join(self.data_dir, file_path), 'r') as f:
|
||||
raw = json.load(f)
|
||||
|
||||
data = pd.DataFrame(raw["Data"])
|
||||
data = _normalize_column_names(data)
|
||||
|
||||
# Convert timestamp to datetime
|
||||
data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
|
||||
|
||||
# Filter by date range
|
||||
data = _filter_by_date_range(data, "timestamp", start_date, stop_date)
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
|
||||
|
||||
return data.set_index("timestamp")
|
||||
|
||||
def _load_csv_data(self, file_path: str, start_date: pd.Timestamp,
|
||||
stop_date: pd.Timestamp) -> pd.DataFrame:
|
||||
"""Load and process CSV data file
|
||||
|
||||
Args:
|
||||
file_path: Path to CSV file
|
||||
start_date: Start date for filtering
|
||||
stop_date: Stop date for filtering
|
||||
|
||||
Returns:
|
||||
Processed DataFrame with timestamp index
|
||||
"""
|
||||
# Define optimized dtypes
|
||||
dtypes = {
|
||||
'Open': 'float32',
|
||||
'High': 'float32',
|
||||
'Low': 'float32',
|
||||
'Close': 'float32',
|
||||
'Volume': 'float32'
|
||||
}
|
||||
|
||||
# Read data with original capitalized column names
|
||||
data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
|
||||
|
||||
return self._process_csv_timestamps(data, start_date, stop_date, file_path)
|
||||
|
||||
def _process_csv_timestamps(self, data: pd.DataFrame, start_date: pd.Timestamp,
|
||||
stop_date: pd.Timestamp, file_path: str) -> pd.DataFrame:
|
||||
"""Process timestamps in CSV data and filter by date range
|
||||
|
||||
Args:
|
||||
data: DataFrame with CSV data
|
||||
start_date: Start date for filtering
|
||||
stop_date: Stop date for filtering
|
||||
file_path: Original file path for logging
|
||||
|
||||
Returns:
|
||||
Processed DataFrame with timestamp index
|
||||
"""
|
||||
if 'Timestamp' in data.columns:
|
||||
data = _parse_timestamp_column(data, 'Timestamp')
|
||||
data = _filter_by_date_range(data, 'Timestamp', start_date, stop_date)
|
||||
data = _normalize_column_names(data)
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
|
||||
|
||||
return data.set_index('timestamp')
|
||||
else:
|
||||
# Attempt to use the first column if 'Timestamp' is not present
|
||||
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
|
||||
data = _parse_timestamp_column(data, 'timestamp')
|
||||
data = _filter_by_date_range(data, 'timestamp', start_date, stop_date)
|
||||
data = _normalize_column_names(data)
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
|
||||
|
||||
return data.set_index('timestamp')
|
||||
106
cycles/utils/data_saver.py
Normal file
106
cycles/utils/data_saver.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
from .storage_utils import DataSavingError
|
||||
|
||||
|
||||
class DataSaver:
|
||||
"""Handles saving data to various file formats"""
|
||||
|
||||
def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
|
||||
"""Initialize data saver
|
||||
|
||||
Args:
|
||||
data_dir: Directory for saving data files
|
||||
logging_instance: Optional logging instance
|
||||
"""
|
||||
self.data_dir = data_dir
|
||||
self.logging = logging_instance
|
||||
|
||||
def save_data(self, data: pd.DataFrame, file_path: str) -> None:
|
||||
"""Save processed data to a CSV file.
|
||||
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
|
||||
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
|
||||
|
||||
Args:
|
||||
data: DataFrame to save
|
||||
file_path: path to the data file relative to the data_dir
|
||||
|
||||
Raises:
|
||||
DataSavingError: If saving fails
|
||||
"""
|
||||
try:
|
||||
data_to_save = data.copy()
|
||||
data_to_save = self._prepare_data_for_saving(data_to_save)
|
||||
|
||||
# Save to CSV, ensuring the 'timestamp' column (if created) is written
|
||||
full_path = os.path.join(self.data_dir, file_path)
|
||||
data_to_save.to_csv(full_path, index=False)
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to save data to {file_path}: {e}"
|
||||
if self.logging is not None:
|
||||
self.logging.error(error_msg)
|
||||
raise DataSavingError(error_msg) from e
|
||||
|
||||
def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Prepare DataFrame for saving by handling different index types
|
||||
|
||||
Args:
|
||||
data: DataFrame to prepare
|
||||
|
||||
Returns:
|
||||
DataFrame ready for saving
|
||||
"""
|
||||
if isinstance(data.index, pd.DatetimeIndex):
|
||||
return self._convert_datetime_index_to_timestamp(data)
|
||||
elif pd.api.types.is_numeric_dtype(data.index.dtype):
|
||||
return self._convert_numeric_index_to_timestamp(data)
|
||||
else:
|
||||
# For other index types, save with the current index
|
||||
return data
|
||||
|
||||
def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Convert DatetimeIndex to Unix timestamp column
|
||||
|
||||
Args:
|
||||
data: DataFrame with DatetimeIndex
|
||||
|
||||
Returns:
|
||||
DataFrame with timestamp column
|
||||
"""
|
||||
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
|
||||
data['timestamp'] = data.index.astype('int64') / 1e9
|
||||
data.reset_index(drop=True, inplace=True)
|
||||
|
||||
# Ensure 'timestamp' is the first column if other columns exist
|
||||
if 'timestamp' in data.columns and len(data.columns) > 1:
|
||||
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
|
||||
data = data[cols]
|
||||
|
||||
return data
|
||||
|
||||
def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Convert numeric index to timestamp column
|
||||
|
||||
Args:
|
||||
data: DataFrame with numeric index
|
||||
|
||||
Returns:
|
||||
DataFrame with timestamp column
|
||||
"""
|
||||
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
|
||||
data['timestamp'] = data.index
|
||||
data.reset_index(drop=True, inplace=True)
|
||||
|
||||
# Ensure 'timestamp' is the first column if other columns exist
|
||||
if 'timestamp' in data.columns and len(data.columns) > 1:
|
||||
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
|
||||
data = data[cols]
|
||||
|
||||
return data
|
||||
@@ -1,128 +0,0 @@
|
||||
import threading
|
||||
import time
|
||||
import queue
|
||||
from google.oauth2.service_account import Credentials
|
||||
import gspread
|
||||
import math
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class GSheetBatchPusher(threading.Thread):
|
||||
|
||||
def __init__(self, queue, timestamp, spreadsheet_name, interval=60, logging=None):
|
||||
super().__init__(daemon=True)
|
||||
self.queue = queue
|
||||
self.timestamp = timestamp
|
||||
self.spreadsheet_name = spreadsheet_name
|
||||
self.interval = interval
|
||||
self._stop_event = threading.Event()
|
||||
self.logging = logging
|
||||
|
||||
def run(self):
|
||||
while not self._stop_event.is_set():
|
||||
self.push_all()
|
||||
time.sleep(self.interval)
|
||||
# Final push on stop
|
||||
self.push_all()
|
||||
|
||||
def stop(self):
|
||||
self._stop_event.set()
|
||||
|
||||
def push_all(self):
|
||||
batch_results = []
|
||||
batch_trades = []
|
||||
while True:
|
||||
try:
|
||||
results, trades = self.queue.get_nowait()
|
||||
batch_results.extend(results)
|
||||
batch_trades.extend(trades)
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
if batch_results or batch_trades:
|
||||
self.write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)
|
||||
|
||||
|
||||
def write_results_per_combination_gsheet(self, results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
|
||||
scopes = [
|
||||
"https://www.googleapis.com/auth/spreadsheets",
|
||||
"https://www.googleapis.com/auth/drive"
|
||||
]
|
||||
creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
|
||||
gc = gspread.authorize(creds)
|
||||
sh = gc.open(spreadsheet_name)
|
||||
|
||||
try:
|
||||
worksheet = sh.worksheet("Results")
|
||||
except gspread.exceptions.WorksheetNotFound:
|
||||
worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")
|
||||
|
||||
# Clear the worksheet before writing new results
|
||||
worksheet.clear()
|
||||
|
||||
# Updated fieldnames to match your data rows
|
||||
fieldnames = [
|
||||
"timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
|
||||
"max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
|
||||
]
|
||||
|
||||
def to_native(val):
|
||||
if isinstance(val, (np.generic, np.ndarray)):
|
||||
val = val.item()
|
||||
if hasattr(val, 'isoformat'):
|
||||
return val.isoformat()
|
||||
# Handle inf, -inf, nan
|
||||
if isinstance(val, float):
|
||||
if math.isinf(val):
|
||||
return "∞" if val > 0 else "-∞"
|
||||
if math.isnan(val):
|
||||
return ""
|
||||
return val
|
||||
|
||||
# Write header if sheet is empty
|
||||
if len(worksheet.get_all_values()) == 0:
|
||||
worksheet.append_row(fieldnames)
|
||||
|
||||
for row in results_rows:
|
||||
values = [to_native(row.get(field, "")) for field in fieldnames]
|
||||
worksheet.append_row(values)
|
||||
|
||||
trades_fieldnames = [
|
||||
"entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
|
||||
]
|
||||
trades_by_combo = defaultdict(list)
|
||||
|
||||
for trade in trade_rows:
|
||||
tf = trade.get("timeframe")
|
||||
sl = trade.get("stop_loss_pct")
|
||||
trades_by_combo[(tf, sl)].append(trade)
|
||||
|
||||
for (tf, sl), trades in trades_by_combo.items():
|
||||
sl_percent = int(round(sl * 100))
|
||||
sheet_name = f"Trades_{tf}_ST{sl_percent}%"
|
||||
|
||||
try:
|
||||
trades_ws = sh.worksheet(sheet_name)
|
||||
except gspread.exceptions.WorksheetNotFound:
|
||||
trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
|
||||
|
||||
# Clear the trades worksheet before writing new trades
|
||||
trades_ws.clear()
|
||||
|
||||
if len(trades_ws.get_all_values()) == 0:
|
||||
trades_ws.append_row(trades_fieldnames)
|
||||
|
||||
for trade in trades:
|
||||
trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
|
||||
try:
|
||||
trades_ws.append_row(trade_row)
|
||||
except gspread.exceptions.APIError as e:
|
||||
if '429' in str(e):
|
||||
if self.logging is not None:
|
||||
self.logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
|
||||
# Re-queue the failed batch for retry
|
||||
self.queue.put((results_rows, trade_rows))
|
||||
return # Stop pushing for this batch, will retry next interval
|
||||
else:
|
||||
raise
|
||||
233
cycles/utils/progress_manager.py
Normal file
233
cycles/utils/progress_manager.py
Normal file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Progress Manager for tracking multiple parallel backtest tasks
|
||||
"""
|
||||
|
||||
import threading
|
||||
import time
|
||||
import sys
|
||||
from typing import Dict, Optional, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskProgress:
|
||||
"""Represents progress information for a single task"""
|
||||
task_id: str
|
||||
name: str
|
||||
current: int
|
||||
total: int
|
||||
start_time: float
|
||||
last_update: float
|
||||
|
||||
@property
|
||||
def percentage(self) -> float:
|
||||
"""Calculate completion percentage"""
|
||||
if self.total == 0:
|
||||
return 0.0
|
||||
return (self.current / self.total) * 100
|
||||
|
||||
@property
|
||||
def elapsed_time(self) -> float:
|
||||
"""Calculate elapsed time in seconds"""
|
||||
return time.time() - self.start_time
|
||||
|
||||
@property
|
||||
def eta(self) -> Optional[float]:
|
||||
"""Estimate time to completion in seconds"""
|
||||
if self.current == 0 or self.percentage >= 100:
|
||||
return None
|
||||
|
||||
elapsed = self.elapsed_time
|
||||
rate = self.current / elapsed
|
||||
remaining = self.total - self.current
|
||||
return remaining / rate if rate > 0 else None
|
||||
|
||||
|
||||
class ProgressManager:
|
||||
"""Manages progress tracking for multiple parallel tasks"""
|
||||
|
||||
def __init__(self, update_interval: float = 1.0, display_width: int = 50):
|
||||
"""
|
||||
Initialize progress manager
|
||||
|
||||
Args:
|
||||
update_interval: How often to update display (seconds)
|
||||
display_width: Width of progress bar in characters
|
||||
"""
|
||||
self.tasks: Dict[str, TaskProgress] = {}
|
||||
self.update_interval = update_interval
|
||||
self.display_width = display_width
|
||||
self.lock = threading.Lock()
|
||||
self.display_thread: Optional[threading.Thread] = None
|
||||
self.running = False
|
||||
self.last_display_height = 0
|
||||
|
||||
def start_task(self, task_id: str, name: str, total: int) -> None:
|
||||
"""
|
||||
Start tracking a new task
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for the task
|
||||
name: Human-readable name for the task
|
||||
total: Total number of steps in the task
|
||||
"""
|
||||
with self.lock:
|
||||
self.tasks[task_id] = TaskProgress(
|
||||
task_id=task_id,
|
||||
name=name,
|
||||
current=0,
|
||||
total=total,
|
||||
start_time=time.time(),
|
||||
last_update=time.time()
|
||||
)
|
||||
|
||||
def update_progress(self, task_id: str, current: int) -> None:
|
||||
"""
|
||||
Update progress for a specific task
|
||||
|
||||
Args:
|
||||
task_id: Task identifier
|
||||
current: Current progress value
|
||||
"""
|
||||
with self.lock:
|
||||
if task_id in self.tasks:
|
||||
self.tasks[task_id].current = current
|
||||
self.tasks[task_id].last_update = time.time()
|
||||
|
||||
def complete_task(self, task_id: str) -> None:
|
||||
"""
|
||||
Mark a task as completed
|
||||
|
||||
Args:
|
||||
task_id: Task identifier
|
||||
"""
|
||||
with self.lock:
|
||||
if task_id in self.tasks:
|
||||
task = self.tasks[task_id]
|
||||
task.current = task.total
|
||||
task.last_update = time.time()
|
||||
|
||||
def start_display(self) -> None:
|
||||
"""Start the progress display thread"""
|
||||
if not self.running:
|
||||
self.running = True
|
||||
self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
|
||||
self.display_thread.start()
|
||||
|
||||
def stop_display(self) -> None:
|
||||
"""Stop the progress display thread"""
|
||||
self.running = False
|
||||
if self.display_thread:
|
||||
self.display_thread.join(timeout=1.0)
|
||||
self._clear_display()
|
||||
|
||||
def _display_loop(self) -> None:
|
||||
"""Main loop for updating the progress display"""
|
||||
while self.running:
|
||||
self._update_display()
|
||||
time.sleep(self.update_interval)
|
||||
|
||||
def _update_display(self) -> None:
|
||||
"""Update the console display with current progress"""
|
||||
with self.lock:
|
||||
if not self.tasks:
|
||||
return
|
||||
|
||||
# Clear previous display
|
||||
self._clear_display()
|
||||
|
||||
# Build display lines
|
||||
lines = []
|
||||
for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
|
||||
line = self._format_progress_line(task)
|
||||
lines.append(line)
|
||||
|
||||
# Print all lines
|
||||
for line in lines:
|
||||
print(line, flush=True)
|
||||
|
||||
self.last_display_height = len(lines)
|
||||
|
||||
def _clear_display(self) -> None:
|
||||
"""Clear the previous progress display"""
|
||||
if self.last_display_height > 0:
|
||||
# Move cursor up and clear lines
|
||||
for _ in range(self.last_display_height):
|
||||
sys.stdout.write('\033[F') # Move cursor up one line
|
||||
sys.stdout.write('\033[K') # Clear line
|
||||
sys.stdout.flush()
|
||||
|
||||
def _format_progress_line(self, task: TaskProgress) -> str:
|
||||
"""
|
||||
Format a single progress line for display
|
||||
|
||||
Args:
|
||||
task: TaskProgress instance
|
||||
|
||||
Returns:
|
||||
Formatted progress string
|
||||
"""
|
||||
# Progress bar
|
||||
filled_width = int(task.percentage / 100 * self.display_width)
|
||||
bar = '█' * filled_width + '░' * (self.display_width - filled_width)
|
||||
|
||||
# Time information
|
||||
elapsed_str = self._format_time(task.elapsed_time)
|
||||
eta_str = self._format_time(task.eta) if task.eta else "N/A"
|
||||
|
||||
# Format line
|
||||
line = (f"{task.name:<25} │{bar}│ "
|
||||
f"{task.percentage:5.1f}% "
|
||||
f"({task.current:,}/{task.total:,}) "
|
||||
f"⏱ {elapsed_str} ETA: {eta_str}")
|
||||
|
||||
return line
|
||||
|
||||
def _format_time(self, seconds: float) -> str:
|
||||
"""
|
||||
Format time duration for display
|
||||
|
||||
Args:
|
||||
seconds: Time in seconds
|
||||
|
||||
Returns:
|
||||
Formatted time string
|
||||
"""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
elif seconds < 3600:
|
||||
minutes = seconds / 60
|
||||
return f"{minutes:.1f}m"
|
||||
else:
|
||||
hours = seconds / 3600
|
||||
return f"{hours:.1f}h"
|
||||
|
||||
def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
|
||||
"""
|
||||
Get a progress callback function for a specific task
|
||||
|
||||
Args:
|
||||
task_id: Task identifier
|
||||
|
||||
Returns:
|
||||
Callback function that updates progress for this task
|
||||
"""
|
||||
def callback(current: int) -> None:
|
||||
self.update_progress(task_id, current)
|
||||
|
||||
return callback
|
||||
|
||||
def all_tasks_completed(self) -> bool:
|
||||
"""Check if all tasks are completed"""
|
||||
with self.lock:
|
||||
return all(task.current >= task.total for task in self.tasks.values())
|
||||
|
||||
def get_summary(self) -> str:
|
||||
"""Get a summary of all tasks"""
|
||||
with self.lock:
|
||||
total_tasks = len(self.tasks)
|
||||
completed_tasks = sum(1 for task in self.tasks.values()
|
||||
if task.current >= task.total)
|
||||
|
||||
return f"Tasks: {completed_tasks}/{total_tasks} completed"
|
||||
179
cycles/utils/result_formatter.py
Normal file
179
cycles/utils/result_formatter.py
Normal file
@@ -0,0 +1,179 @@
|
||||
import os
|
||||
import csv
|
||||
from typing import Dict, List, Optional, Any
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
|
||||
from .storage_utils import DataSavingError
|
||||
|
||||
|
||||
class ResultFormatter:
|
||||
"""Handles formatting and writing of backtest results to CSV files"""
|
||||
|
||||
def __init__(self, results_dir: str, logging_instance: Optional[logging.Logger] = None):
|
||||
"""Initialize result formatter
|
||||
|
||||
Args:
|
||||
results_dir: Directory for saving result files
|
||||
logging_instance: Optional logging instance
|
||||
"""
|
||||
self.results_dir = results_dir
|
||||
self.logging = logging_instance
|
||||
|
||||
def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Format a row for a combined results CSV file
|
||||
|
||||
Args:
|
||||
row: Dictionary containing row data
|
||||
|
||||
Returns:
|
||||
Dictionary with formatted values
|
||||
"""
|
||||
return {
|
||||
"timeframe": row["timeframe"],
|
||||
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
|
||||
"n_trades": row["n_trades"],
|
||||
"n_stop_loss": row["n_stop_loss"],
|
||||
"win_rate": f"{row['win_rate']*100:.2f}%",
|
||||
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
|
||||
"avg_trade": f"{row['avg_trade']*100:.2f}%",
|
||||
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
|
||||
"final_usd": f"{row['final_usd']:.2f}",
|
||||
"total_fees_usd": f"{row['total_fees_usd']:.2f}",
|
||||
}
|
||||
|
||||
def write_results_chunk(self, filename: str, fieldnames: List[str],
|
||||
rows: List[Dict], write_header: bool = False,
|
||||
initial_usd: Optional[float] = None) -> None:
|
||||
"""Write a chunk of results to a CSV file
|
||||
|
||||
Args:
|
||||
filename: filename to write to
|
||||
fieldnames: list of fieldnames
|
||||
rows: list of rows
|
||||
write_header: whether to write the header
|
||||
initial_usd: initial USD value for header comment
|
||||
|
||||
Raises:
|
||||
DataSavingError: If writing fails
|
||||
"""
|
||||
try:
|
||||
mode = 'w' if write_header else 'a'
|
||||
|
||||
with open(filename, mode, newline="") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
if write_header:
|
||||
if initial_usd is not None:
|
||||
csvfile.write(f"# initial_usd: {initial_usd}\n")
|
||||
writer.writeheader()
|
||||
|
||||
for row in rows:
|
||||
# Only keep keys that are in fieldnames
|
||||
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
|
||||
writer.writerow(filtered_row)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to write results chunk to {filename}: {e}"
|
||||
if self.logging is not None:
|
||||
self.logging.error(error_msg)
|
||||
raise DataSavingError(error_msg) from e
|
||||
|
||||
def write_backtest_results(self, filename: str, fieldnames: List[str],
|
||||
rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
|
||||
"""Write combined backtest results to a CSV file
|
||||
|
||||
Args:
|
||||
filename: filename to write to
|
||||
fieldnames: list of fieldnames
|
||||
rows: list of result dictionaries
|
||||
metadata_lines: optional list of strings to write as header comments
|
||||
|
||||
Returns:
|
||||
Full path to the written file
|
||||
|
||||
Raises:
|
||||
DataSavingError: If writing fails
|
||||
"""
|
||||
try:
|
||||
fname = os.path.join(self.results_dir, filename)
|
||||
with open(fname, "w", newline="") as csvfile:
|
||||
if metadata_lines:
|
||||
for line in metadata_lines:
|
||||
csvfile.write(f"{line}\n")
|
||||
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
|
||||
writer.writeheader()
|
||||
|
||||
for row in rows:
|
||||
writer.writerow(self.format_row(row))
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Combined results written to {fname}")
|
||||
|
||||
return fname
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to write backtest results to {filename}: {e}"
|
||||
if self.logging is not None:
|
||||
self.logging.error(error_msg)
|
||||
raise DataSavingError(error_msg) from e
|
||||
|
||||
def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
|
||||
"""Write trades to separate CSV files grouped by timeframe and stop loss
|
||||
|
||||
Args:
|
||||
all_trade_rows: list of trade dictionaries
|
||||
trades_fieldnames: list of trade fieldnames
|
||||
|
||||
Raises:
|
||||
DataSavingError: If writing fails
|
||||
"""
|
||||
try:
|
||||
trades_by_combo = self._group_trades_by_combination(all_trade_rows)
|
||||
|
||||
for (tf, sl), trades in trades_by_combo.items():
|
||||
self._write_single_trade_file(tf, sl, trades, trades_fieldnames)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to write trades: {e}"
|
||||
if self.logging is not None:
|
||||
self.logging.error(error_msg)
|
||||
raise DataSavingError(error_msg) from e
|
||||
|
||||
def _group_trades_by_combination(self, all_trade_rows: List[Dict]) -> Dict:
|
||||
"""Group trades by timeframe and stop loss combination
|
||||
|
||||
Args:
|
||||
all_trade_rows: List of trade dictionaries
|
||||
|
||||
Returns:
|
||||
Dictionary grouped by (timeframe, stop_loss_pct) tuples
|
||||
"""
|
||||
trades_by_combo = defaultdict(list)
|
||||
for trade in all_trade_rows:
|
||||
tf = trade.get("timeframe")
|
||||
sl = trade.get("stop_loss_pct")
|
||||
trades_by_combo[(tf, sl)].append(trade)
|
||||
return trades_by_combo
|
||||
|
||||
def _write_single_trade_file(self, timeframe: str, stop_loss_pct: float,
|
||||
trades: List[Dict], trades_fieldnames: List[str]) -> None:
|
||||
"""Write trades for a single timeframe/stop-loss combination
|
||||
|
||||
Args:
|
||||
timeframe: Timeframe identifier
|
||||
stop_loss_pct: Stop loss percentage
|
||||
trades: List of trades for this combination
|
||||
trades_fieldnames: List of field names for trades
|
||||
"""
|
||||
sl_percent = int(round(stop_loss_pct * 100))
|
||||
trades_filename = os.path.join(self.results_dir, f"trades_{timeframe}_ST{sl_percent}pct.csv")
|
||||
|
||||
with open(trades_filename, "w", newline="") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
|
||||
writer.writeheader()
|
||||
for trade in trades:
|
||||
writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Trades written to {trades_filename}")
|
||||
@@ -1,17 +1,32 @@
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from typing import Optional, Union, Dict, Any, List
|
||||
import logging
|
||||
|
||||
from .data_loader import DataLoader
|
||||
from .data_saver import DataSaver
|
||||
from .result_formatter import ResultFormatter
|
||||
from .storage_utils import DataLoadingError, DataSavingError
|
||||
|
||||
RESULTS_DIR = "../results"
|
||||
DATA_DIR = "../data"
|
||||
|
||||
RESULTS_DIR = "results"
|
||||
DATA_DIR = "data"
|
||||
|
||||
class Storage:
|
||||
|
||||
"""Storage class for storing and loading results and data"""
|
||||
"""Unified storage interface for data and results operations
|
||||
|
||||
Acts as a coordinator for DataLoader, DataSaver, and ResultFormatter components,
|
||||
maintaining backward compatibility while providing a clean separation of concerns.
|
||||
"""
|
||||
|
||||
def __init__(self, logging=None, results_dir=RESULTS_DIR, data_dir=DATA_DIR):
|
||||
|
||||
"""Initialize storage with component instances
|
||||
|
||||
Args:
|
||||
logging: Optional logging instance
|
||||
results_dir: Directory for results files
|
||||
data_dir: Directory for data files
|
||||
"""
|
||||
self.results_dir = results_dir
|
||||
self.data_dir = data_dir
|
||||
self.logging = logging
|
||||
@@ -20,196 +35,89 @@ class Storage:
|
||||
os.makedirs(self.results_dir, exist_ok=True)
|
||||
os.makedirs(self.data_dir, exist_ok=True)
|
||||
|
||||
def load_data(self, file_path, start_date, stop_date):
|
||||
# Initialize component instances
|
||||
self.data_loader = DataLoader(data_dir, logging)
|
||||
self.data_saver = DataSaver(data_dir, logging)
|
||||
self.result_formatter = ResultFormatter(results_dir, logging)
|
||||
|
||||
def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp],
|
||||
stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
|
||||
"""Load data with optimized dtypes and filtering, supporting CSV and JSON input
|
||||
|
||||
Args:
|
||||
file_path: path to the data file
|
||||
start_date: start date
|
||||
stop_date: stop date
|
||||
start_date: start date (string or datetime-like)
|
||||
stop_date: stop date (string or datetime-like)
|
||||
|
||||
Returns:
|
||||
pandas DataFrame
|
||||
pandas DataFrame with timestamp index
|
||||
|
||||
Raises:
|
||||
DataLoadingError: If data loading fails
|
||||
"""
|
||||
# Determine file type
|
||||
_, ext = os.path.splitext(file_path)
|
||||
ext = ext.lower()
|
||||
try:
|
||||
if ext == ".json":
|
||||
with open(os.path.join(self.data_dir, file_path), 'r') as f:
|
||||
raw = json.load(f)
|
||||
data = pd.DataFrame(raw["Data"])
|
||||
# Convert columns to lowercase
|
||||
data.columns = data.columns.str.lower()
|
||||
# Convert timestamp to datetime
|
||||
data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
|
||||
# Filter by date range
|
||||
data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= stop_date)]
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
|
||||
return data.set_index("timestamp")
|
||||
else:
|
||||
# Define optimized dtypes
|
||||
dtypes = {
|
||||
'Open': 'float32',
|
||||
'High': 'float32',
|
||||
'Low': 'float32',
|
||||
'Close': 'float32',
|
||||
'Volume': 'float32'
|
||||
}
|
||||
# Read data with original capitalized column names
|
||||
data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
|
||||
|
||||
|
||||
# Convert timestamp to datetime
|
||||
if 'Timestamp' in data.columns:
|
||||
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
|
||||
# Filter by date range
|
||||
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
|
||||
# Now convert column names to lowercase
|
||||
data.columns = data.columns.str.lower()
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
|
||||
return data.set_index('timestamp')
|
||||
else: # Attempt to use the first column if 'Timestamp' is not present
|
||||
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
|
||||
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
|
||||
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)]
|
||||
data.columns = data.columns.str.lower() # Ensure all other columns are lower
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
|
||||
return data.set_index('timestamp')
|
||||
except Exception as e:
|
||||
if self.logging is not None:
|
||||
self.logging.error(f"Error loading data from {file_path}: {e}")
|
||||
# Return an empty DataFrame with a DatetimeIndex
|
||||
return pd.DataFrame(index=pd.to_datetime([]))
|
||||
|
||||
def save_data(self, data: pd.DataFrame, file_path: str):
|
||||
"""Save processed data to a CSV file.
|
||||
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
|
||||
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
|
||||
return self.data_loader.load_data(file_path, start_date, stop_date)
|
||||
|
||||
def save_data(self, data: pd.DataFrame, file_path: str) -> None:
|
||||
"""Save processed data to a CSV file
|
||||
|
||||
Args:
|
||||
data (pd.DataFrame): data to save.
|
||||
file_path (str): path to the data file relative to the data_dir.
|
||||
data: DataFrame to save
|
||||
file_path: path to the data file relative to the data_dir
|
||||
|
||||
Raises:
|
||||
DataSavingError: If saving fails
|
||||
"""
|
||||
data_to_save = data.copy()
|
||||
self.data_saver.save_data(data, file_path)
|
||||
|
||||
if isinstance(data_to_save.index, pd.DatetimeIndex):
|
||||
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
|
||||
# and make it a column named 'timestamp'.
|
||||
data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9
|
||||
# Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column.
|
||||
# We want the 'timestamp' column to be the first one.
|
||||
data_to_save.reset_index(drop=True, inplace=True)
|
||||
# Ensure 'timestamp' is the first column if other columns exist
|
||||
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
|
||||
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
|
||||
data_to_save = data_to_save[cols]
|
||||
elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype):
|
||||
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle),
|
||||
# make it a column named 'timestamp'.
|
||||
data_to_save['timestamp'] = data_to_save.index
|
||||
data_to_save.reset_index(drop=True, inplace=True)
|
||||
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
|
||||
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
|
||||
data_to_save = data_to_save[cols]
|
||||
else:
|
||||
# For other index types, or if no index that we want to specifically handle,
|
||||
# save with the current index. pandas to_csv will handle it.
|
||||
# This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save.
|
||||
pass # data_to_save remains as is, to_csv will write its index if index=True
|
||||
|
||||
# Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index.
|
||||
full_path = os.path.join(self.data_dir, file_path)
|
||||
data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
|
||||
|
||||
|
||||
def format_row(self, row):
|
||||
def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Format a row for a combined results CSV file
|
||||
|
||||
Args:
|
||||
row: row to format
|
||||
row: Dictionary containing row data
|
||||
|
||||
Returns:
|
||||
formatted row
|
||||
Dictionary with formatted values
|
||||
"""
|
||||
return self.result_formatter.format_row(row)
|
||||
|
||||
return {
|
||||
"timeframe": row["timeframe"],
|
||||
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
|
||||
"n_trades": row["n_trades"],
|
||||
"n_stop_loss": row["n_stop_loss"],
|
||||
"win_rate": f"{row['win_rate']*100:.2f}%",
|
||||
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
|
||||
"avg_trade": f"{row['avg_trade']*100:.2f}%",
|
||||
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
|
||||
"final_usd": f"{row['final_usd']:.2f}",
|
||||
"total_fees_usd": f"{row['total_fees_usd']:.2f}",
|
||||
}
|
||||
|
||||
def write_results_chunk(self, filename, fieldnames, rows, write_header=False, initial_usd=None):
|
||||
def write_results_chunk(self, filename: str, fieldnames: List[str],
|
||||
rows: List[Dict], write_header: bool = False,
|
||||
initial_usd: Optional[float] = None) -> None:
|
||||
"""Write a chunk of results to a CSV file
|
||||
|
||||
Args:
|
||||
filename: filename to write to
|
||||
fieldnames: list of fieldnames
|
||||
rows: list of rows
|
||||
write_header: whether to write the header
|
||||
initial_usd: initial USD
|
||||
initial_usd: initial USD value for header comment
|
||||
"""
|
||||
mode = 'w' if write_header else 'a'
|
||||
self.result_formatter.write_results_chunk(
|
||||
filename, fieldnames, rows, write_header, initial_usd
|
||||
)
|
||||
|
||||
def write_backtest_results(self, filename: str, fieldnames: List[str],
|
||||
rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
|
||||
"""Write combined backtest results to a CSV file
|
||||
|
||||
with open(filename, mode, newline="") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
if write_header:
|
||||
csvfile.write(f"# initial_usd: {initial_usd}\n")
|
||||
writer.writeheader()
|
||||
|
||||
for row in rows:
|
||||
# Only keep keys that are in fieldnames
|
||||
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
|
||||
writer.writerow(filtered_row)
|
||||
|
||||
def write_backtest_results(self, filename, fieldnames, rows, metadata_lines=None):
|
||||
"""Write a combined results to a CSV file
|
||||
Args:
|
||||
filename: filename to write to
|
||||
fieldnames: list of fieldnames
|
||||
rows: list of rows
|
||||
rows: list of result dictionaries
|
||||
metadata_lines: optional list of strings to write as header comments
|
||||
|
||||
Returns:
|
||||
Full path to the written file
|
||||
"""
|
||||
fname = os.path.join(self.results_dir, filename)
|
||||
with open(fname, "w", newline="") as csvfile:
|
||||
if metadata_lines:
|
||||
for line in metadata_lines:
|
||||
csvfile.write(f"{line}\n")
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(self.format_row(row))
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Combined results written to {fname}")
|
||||
|
||||
def write_trades(self, all_trade_rows, trades_fieldnames):
|
||||
"""Write trades to a CSV file
|
||||
return self.result_formatter.write_backtest_results(
|
||||
filename, fieldnames, rows, metadata_lines
|
||||
)
|
||||
|
||||
def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
|
||||
"""Write trades to separate CSV files grouped by timeframe and stop loss
|
||||
|
||||
Args:
|
||||
all_trade_rows: list of trade rows
|
||||
all_trade_rows: list of trade dictionaries
|
||||
trades_fieldnames: list of trade fieldnames
|
||||
logging: logging object
|
||||
"""
|
||||
|
||||
trades_by_combo = defaultdict(list)
|
||||
for trade in all_trade_rows:
|
||||
tf = trade.get("timeframe")
|
||||
sl = trade.get("stop_loss_pct")
|
||||
trades_by_combo[(tf, sl)].append(trade)
|
||||
|
||||
for (tf, sl), trades in trades_by_combo.items():
|
||||
sl_percent = int(round(sl * 100))
|
||||
trades_filename = os.path.join(self.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
|
||||
with open(trades_filename, "w", newline="") as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
|
||||
writer.writeheader()
|
||||
for trade in trades:
|
||||
writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Trades written to {trades_filename}")
|
||||
self.result_formatter.write_trades(all_trade_rows, trades_fieldnames)
|
||||
73
cycles/utils/storage_utils.py
Normal file
73
cycles/utils/storage_utils.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class TimestampParsingError(Exception):
|
||||
"""Custom exception for timestamp parsing errors"""
|
||||
pass
|
||||
|
||||
|
||||
class DataLoadingError(Exception):
|
||||
"""Custom exception for data loading errors"""
|
||||
pass
|
||||
|
||||
|
||||
class DataSavingError(Exception):
|
||||
"""Custom exception for data saving errors"""
|
||||
pass
|
||||
|
||||
|
||||
def _parse_timestamp_column(data: pd.DataFrame, column_name: str) -> pd.DataFrame:
|
||||
"""Parse timestamp column handling both Unix timestamps and datetime strings
|
||||
|
||||
Args:
|
||||
data: DataFrame containing the timestamp column
|
||||
column_name: Name of the timestamp column
|
||||
|
||||
Returns:
|
||||
DataFrame with parsed timestamp column
|
||||
|
||||
Raises:
|
||||
TimestampParsingError: If timestamp parsing fails
|
||||
"""
|
||||
try:
|
||||
sample_timestamp = str(data[column_name].iloc[0])
|
||||
try:
|
||||
# Check if it's a Unix timestamp (numeric)
|
||||
float(sample_timestamp)
|
||||
# It's a Unix timestamp, convert using unit='s'
|
||||
data[column_name] = pd.to_datetime(data[column_name], unit='s')
|
||||
except ValueError:
|
||||
# It's already in datetime string format, convert without unit
|
||||
data[column_name] = pd.to_datetime(data[column_name])
|
||||
return data
|
||||
except Exception as e:
|
||||
raise TimestampParsingError(f"Failed to parse timestamp column '{column_name}': {e}")
|
||||
|
||||
|
||||
def _filter_by_date_range(data: pd.DataFrame, timestamp_col: str,
|
||||
start_date: pd.Timestamp, stop_date: pd.Timestamp) -> pd.DataFrame:
|
||||
"""Filter DataFrame by date range
|
||||
|
||||
Args:
|
||||
data: DataFrame to filter
|
||||
timestamp_col: Name of timestamp column
|
||||
start_date: Start date for filtering
|
||||
stop_date: Stop date for filtering
|
||||
|
||||
Returns:
|
||||
Filtered DataFrame
|
||||
"""
|
||||
return data[(data[timestamp_col] >= start_date) & (data[timestamp_col] <= stop_date)]
|
||||
|
||||
|
||||
def _normalize_column_names(data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Convert all column names to lowercase
|
||||
|
||||
Args:
|
||||
data: DataFrame to normalize
|
||||
|
||||
Returns:
|
||||
DataFrame with lowercase column names
|
||||
"""
|
||||
data.columns = data.columns.str.lower()
|
||||
return data
|
||||
@@ -10,10 +10,12 @@ class SystemUtils:
|
||||
"""Determine optimal number of worker processes based on system resources"""
|
||||
cpu_count = os.cpu_count() or 4
|
||||
memory_gb = psutil.virtual_memory().total / (1024**3)
|
||||
# Heuristic: Use 75% of cores, but cap based on available memory
|
||||
# Assume each worker needs ~2GB for large datasets
|
||||
workers_by_memory = max(1, int(memory_gb / 2))
|
||||
workers_by_cpu = max(1, int(cpu_count * 0.75))
|
||||
|
||||
# OPTIMIZATION: More aggressive worker allocation for better performance
|
||||
workers_by_memory = max(1, int(memory_gb / 2)) # 2GB per worker
|
||||
workers_by_cpu = max(1, int(cpu_count * 0.8)) # Use 80% of CPU cores
|
||||
optimal_workers = min(workers_by_cpu, workers_by_memory, 8) # Cap at 8 workers
|
||||
|
||||
if self.logging is not None:
|
||||
self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
|
||||
return min(workers_by_cpu, workers_by_memory)
|
||||
self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
|
||||
return optimal_workers
|
||||
Reference in New Issue
Block a user