Merge branch 'xgboost'

# Conflicts:
#	.gitignore
#	README.md
#	cycles/backtest.py
#	main.py
#	pyproject.toml
#	uv.lock
This commit is contained in:
2025-07-11 09:04:49 +08:00
39 changed files with 6311 additions and 1332 deletions

View File

@@ -1,70 +1,30 @@
import pandas as pd
import numpy as np
import logging
from scipy.signal import find_peaks
from matplotlib.patches import Rectangle
from scipy import stats
import concurrent.futures
from functools import partial
from functools import lru_cache
import matplotlib.pyplot as plt
# Color configuration
# Plot colors
DARK_BG_COLOR = '#181C27'
LEGEND_BG_COLOR = '#333333'
TITLE_COLOR = 'white'
AXIS_LABEL_COLOR = 'white'
# Candlestick colors
CANDLE_UP_COLOR = '#089981' # Green
CANDLE_DOWN_COLOR = '#F23645' # Red
# Marker colors
MIN_COLOR = 'red'
MAX_COLOR = 'green'
# Line style colors
MIN_LINE_STYLE = 'g--' # Green dashed
MAX_LINE_STYLE = 'r--' # Red dashed
SMA7_LINE_STYLE = 'y-' # Yellow solid
SMA15_LINE_STYLE = 'm-' # Magenta solid
# SuperTrend colors
ST_COLOR_UP = 'g-'
ST_COLOR_DOWN = 'r-'
# Cache the calculation results by function parameters
@lru_cache(maxsize=32)
def cached_supertrend_calculation(period, multiplier, data_tuple):
# Convert tuple back to numpy arrays
high = np.array(data_tuple[0])
low = np.array(data_tuple[1])
close = np.array(data_tuple[2])
# Calculate TR and ATR using vectorized operations
tr = np.zeros_like(close)
tr[0] = high[0] - low[0]
hc_range = np.abs(high[1:] - close[:-1])
lc_range = np.abs(low[1:] - close[:-1])
hl_range = high[1:] - low[1:]
tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
# Use numpy's exponential moving average
atr = np.zeros_like(tr)
atr[0] = tr[0]
multiplier_ema = 2.0 / (period + 1)
for i in range(1, len(tr)):
atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
# Calculate bands
upper_band = np.zeros_like(close)
lower_band = np.zeros_like(close)
for i in range(len(close)):
hl_avg = (high[i] + low[i]) / 2
upper_band[i] = hl_avg + (multiplier * atr[i])
lower_band[i] = hl_avg - (multiplier * atr[i])
final_upper = np.zeros_like(close)
final_lower = np.zeros_like(close)
supertrend = np.zeros_like(close)
@@ -105,232 +65,151 @@ def cached_supertrend_calculation(period, multiplier, data_tuple):
'lower_band': final_lower
}
def calculate_supertrend_external(data, period, multiplier):
# Convert DataFrame columns to hashable tuples
def calculate_supertrend_external(data, period, multiplier, close_column='close'):
"""
External function to calculate SuperTrend with configurable close column
Parameters:
- data: DataFrame with OHLC data
- period: int, period for ATR calculation
- multiplier: float, multiplier for ATR
- close_column: str, name of the column to use as close price (default: 'close')
"""
high_tuple = tuple(data['high'])
low_tuple = tuple(data['low'])
close_tuple = tuple(data['close'])
# Call the cached function
close_tuple = tuple(data[close_column])
return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))
class Supertrends:
def __init__(self, data, verbose=False, display=False):
def __init__(self, data, close_column='close', verbose=False, display=False):
"""
Initialize the TrendDetectorSimple class.
Initialize Supertrends calculator
Parameters:
- data: pandas DataFrame containing price data
- verbose: boolean, whether to display detailed logging information
- display: boolean, whether to enable display/plotting features
- data: pandas DataFrame with OHLC data or list of prices
- close_column: str, name of the column to use as close price (default: 'close')
- verbose: bool, enable verbose logging
- display: bool, display mode (currently unused)
"""
self.close_column = close_column
self.data = data
self.verbose = verbose
self.display = display
# Only define display-related variables if display is True
if self.display:
# Plot style configuration
self.plot_style = 'dark_background'
self.bg_color = DARK_BG_COLOR
self.plot_size = (12, 8)
# Candlestick configuration
self.candle_width = 0.6
self.candle_up_color = CANDLE_UP_COLOR
self.candle_down_color = CANDLE_DOWN_COLOR
self.candle_alpha = 0.8
self.wick_width = 1
# Marker configuration
self.min_marker = '^'
self.min_color = MIN_COLOR
self.min_size = 100
self.max_marker = 'v'
self.max_color = MAX_COLOR
self.max_size = 100
self.marker_zorder = 100
# Line configuration
self.line_width = 1
self.min_line_style = MIN_LINE_STYLE
self.max_line_style = MAX_LINE_STYLE
self.sma7_line_style = SMA7_LINE_STYLE
self.sma15_line_style = SMA15_LINE_STYLE
# Text configuration
self.title_size = 14
self.title_color = TITLE_COLOR
self.axis_label_size = 12
self.axis_label_color = AXIS_LABEL_COLOR
# Legend configuration
self.legend_loc = 'best'
self.legend_bg_color = LEGEND_BG_COLOR
# Configure logging
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
format='%(asctime)s - %(levelname)s - %(message)s')
self.logger = logging.getLogger('TrendDetectorSimple')
# Convert data to pandas DataFrame if it's not already
if not isinstance(self.data, pd.DataFrame):
if isinstance(self.data, list):
self.data = pd.DataFrame({'close': self.data})
self.data = pd.DataFrame({self.close_column: self.data})
else:
raise ValueError("Data must be a pandas DataFrame or a list")
# Validate that required columns exist
required_columns = ['high', 'low', self.close_column]
missing_columns = [col for col in required_columns if col not in self.data.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
def calculate_tr(self):
"""Calculate True Range using the configured close column"""
df = self.data.copy()
high = df['high'].values
low = df['low'].values
close = df[self.close_column].values
tr = np.zeros_like(close)
tr[0] = high[0] - low[0]
for i in range(1, len(close)):
hl_range = high[i] - low[i]
hc_range = abs(high[i] - close[i-1])
lc_range = abs(low[i] - close[i-1])
tr[i] = max(hl_range, hc_range, lc_range)
return tr
def calculate_atr(self, period=14):
"""Calculate Average True Range"""
tr = self.calculate_tr()
atr = np.zeros_like(tr)
atr[0] = tr[0]
multiplier = 2.0 / (period + 1)
for i in range(1, len(tr)):
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
return atr
def calculate_supertrend(self, period=10, multiplier=3.0):
"""
Calculate True Range (TR) for the price data.
Calculate SuperTrend indicator for the price data using the configured close column.
SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
True Range is the greatest of:
1. Current high - current low
2. |Current high - previous close|
3. |Current low - previous close|
Parameters:
- period: int, the period for the ATR calculation (default: 10)
- multiplier: float, the multiplier for the ATR (default: 3.0)
Returns:
- Numpy array of TR values
- Dictionary containing SuperTrend values, trend direction, and upper/lower bands
"""
df = self.data.copy()
high = df['high'].values
low = df['low'].values
close = df['close'].values
tr = np.zeros_like(close)
tr[0] = high[0] - low[0] # First TR is just the first day's range
close = df[self.close_column].values
atr = self.calculate_atr(period)
upper_band = np.zeros_like(close)
lower_band = np.zeros_like(close)
for i in range(len(close)):
hl_avg = (high[i] + low[i]) / 2
upper_band[i] = hl_avg + (multiplier * atr[i])
lower_band[i] = hl_avg - (multiplier * atr[i])
final_upper = np.zeros_like(close)
final_lower = np.zeros_like(close)
supertrend = np.zeros_like(close)
trend = np.zeros_like(close)
final_upper[0] = upper_band[0]
final_lower[0] = lower_band[0]
if close[0] <= upper_band[0]:
supertrend[0] = upper_band[0]
trend[0] = -1
else:
supertrend[0] = lower_band[0]
trend[0] = 1
for i in range(1, len(close)):
# Current high - current low
hl_range = high[i] - low[i]
# |Current high - previous close|
hc_range = abs(high[i] - close[i-1])
# |Current low - previous close|
lc_range = abs(low[i] - close[i-1])
# TR is the maximum of these three values
tr[i] = max(hl_range, hc_range, lc_range)
return tr
def calculate_atr(self, period=14):
"""
Calculate Average True Range (ATR) for the price data.
ATR is the exponential moving average of the True Range over a specified period.
Parameters:
- period: int, the period for the ATR calculation (default: 14)
Returns:
- Numpy array of ATR values
"""
tr = self.calculate_tr()
atr = np.zeros_like(tr)
# First ATR value is just the first TR
atr[0] = tr[0]
# Calculate exponential moving average (EMA) of TR
multiplier = 2.0 / (period + 1)
for i in range(1, len(tr)):
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
return atr
def detect_trends(self):
"""
Detect trends by identifying local minima and maxima in the price data
using scipy.signal.find_peaks.
Parameters:
- prominence: float, required prominence of peaks (relative to the price range)
- width: int, required width of peaks in data points
Returns:
- DataFrame with columns for timestamps, prices, and trend indicators
- Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
"""
df = self.data
# close_prices = df['close'].values
# max_peaks, _ = find_peaks(close_prices)
# min_peaks, _ = find_peaks(-close_prices)
# df['is_min'] = False
# df['is_max'] = False
# for peak in max_peaks:
# df.at[peak, 'is_max'] = True
# for peak in min_peaks:
# df.at[peak, 'is_min'] = True
# result = df[['timestamp', 'close', 'is_min', 'is_max']].copy()
# Perform linear regression on min_peaks and max_peaks
# min_prices = df['close'].iloc[min_peaks].values
# max_prices = df['close'].iloc[max_peaks].values
# Linear regression for min peaks if we have at least 2 points
# min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
# Linear regression for max peaks if we have at least 2 points
# max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
final_upper[i] = upper_band[i]
else:
final_upper[i] = final_upper[i-1]
if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
final_lower[i] = lower_band[i]
else:
final_lower[i] = final_lower[i-1]
if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
supertrend[i] = final_upper[i]
trend[i] = -1
elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
supertrend[i] = final_lower[i]
trend[i] = 1
elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
supertrend[i] = final_lower[i]
trend[i] = 1
elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
supertrend[i] = final_upper[i]
trend[i] = -1
supertrend_results = {
'supertrend': supertrend,
'trend': trend,
'upper_band': final_upper,
'lower_band': final_lower
}
return supertrend_results
# Calculate Simple Moving Averages (SMA) for 7 and 15 periods
# sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
# sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
analysis_results = {}
# analysis_results['linear_regression'] = {
# 'min': {
# 'slope': min_slope,
# 'intercept': min_intercept,
# 'r_squared': min_r_value ** 2
# },
# 'max': {
# 'slope': max_slope,
# 'intercept': max_intercept,
# 'r_squared': max_r_value ** 2
# }
# }
# analysis_results['sma'] = {
# '7': sma_7,
# '15': sma_15
# }
# Calculate SuperTrend indicators
supertrend_results_list = self._calculate_supertrend_indicators()
analysis_results['supertrend'] = supertrend_results_list
return analysis_results
def calculate_supertrend_indicators(self):
"""
Calculate SuperTrend indicators with different parameter sets in parallel.
Returns:
- list, the SuperTrend results
"""
supertrend_params = [
{"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
{"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
{"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
{"period": 12, "multiplier": 3.0},
{"period": 10, "multiplier": 1.0},
{"period": 11, "multiplier": 2.0}
]
data = self.data.copy()
# For just 3 calculations, direct calculation might be faster than process pool
results = []
for p in supertrend_params:
result = calculate_supertrend_external(data, p["period"], p["multiplier"])
results.append(result)
supertrend_results_list = []
for params, result in zip(supertrend_params, results):
supertrend_results_list.append({
result = self.calculate_supertrend(period=p["period"], multiplier=p["multiplier"])
results.append({
"results": result,
"params": params
"params": p
})
return supertrend_results_list
return results

View File

@@ -1,167 +1,332 @@
import pandas as pd
import numpy as np
import time
from cycles.supertrend import Supertrends
from cycles.market_fees import MarketFees
class Backtest:
def __init__(self, initial_usd, df, min1_df, init_strategy_fields) -> None:
self.initial_usd = initial_usd
self.usd = initial_usd
self.max_balance = initial_usd
self.coin = 0
self.position = 0
self.entry_price = 0
self.entry_time = None
self.current_trade_min1_start_idx = None
self.current_min1_end_idx = None
self.price_open = None
self.price_close = None
self.current_date = None
self.strategies = {}
self.df = df
self.min1_df = min1_df
self.trade_log = []
self.drawdowns = []
self.trades = []
self = init_strategy_fields(self)
def run(self, entry_strategy, exit_strategy, debug=False):
@staticmethod
def run(min1_df, df, initial_usd, stop_loss_pct, progress_callback=None, verbose=False):
"""
Runs the backtest using provided entry and exit strategy functions.
The method iterates over the main DataFrame (self.df), simulating trades based on the entry and exit strategies.
It tracks balances, drawdowns, and logs each trade, including fees. At the end, it returns a dictionary of performance statistics.
Backtest a simple strategy using the meta supertrend (all three supertrends agree).
Buys when meta supertrend is positive, sells when negative, applies a percentage stop loss.
Parameters:
- entry_strategy: function, determines when to enter a trade. Should accept (self, i) and return True to enter.
- exit_strategy: function, determines when to exit a trade. Should accept (self, i) and return (exit_reason, sell_price) or (None, None) to hold.
- debug: bool, whether to print debug info (default: False)
Returns:
- dict with keys: initial_usd, final_usd, n_trades, win_rate, max_drawdown, avg_trade, trade_log, trades, total_fees_usd, and optionally first_trade and last_trade.
- min1_df: pandas DataFrame, 1-minute timeframe data for more accurate stop loss checking (optional)
- df: pandas DataFrame, main timeframe data for signals
- initial_usd: float, starting USD amount
- stop_loss_pct: float, stop loss as a fraction (e.g. 0.05 for 5%)
- progress_callback: callable, optional callback function to report progress (current_step)
- verbose: bool, enable debug logging for stop loss checks
"""
_df = df.copy().reset_index()
# Ensure we have a timestamp column regardless of original index name
if 'timestamp' not in _df.columns:
# If reset_index() created a column with the original index name, rename it
if len(_df.columns) > 0 and _df.columns[0] not in ['open', 'high', 'low', 'close', 'volume', 'predicted_close_price']:
_df = _df.rename(columns={_df.columns[0]: 'timestamp'})
else:
raise ValueError("Unable to identify timestamp column in DataFrame")
_df['timestamp'] = pd.to_datetime(_df['timestamp'])
supertrends = Supertrends(_df, verbose=False, close_column='predicted_close_price')
for i in range(1, len(self.df)):
self.price_open = self.df['open'].iloc[i]
self.price_close = self.df['close'].iloc[i]
supertrend_results_list = supertrends.calculate_supertrend_indicators()
trends = [st['results']['trend'] for st in supertrend_results_list]
trends_arr = np.stack(trends, axis=1)
meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]),
trends_arr[:,0], 0)
# Shift meta_trend by one to avoid lookahead bias
meta_trend_signal = np.roll(meta_trend, 1)
meta_trend_signal[0] = 0 # or np.nan, but 0 means 'no signal' for first bar
position = 0 # 0 = no position, 1 = long
entry_price = 0
usd = initial_usd
coin = 0
trade_log = []
max_balance = initial_usd
drawdowns = []
trades = []
entry_time = None
stop_loss_count = 0 # Track number of stop losses
# Ensure min1_df has proper DatetimeIndex
if min1_df is not None and not min1_df.empty:
min1_df.index = pd.to_datetime(min1_df.index)
for i in range(1, len(_df)):
# Report progress if callback is provided
if progress_callback:
# Update more frequently for better responsiveness
update_frequency = max(1, len(_df) // 50) # Update every 2% of dataset (50 updates total)
if i % update_frequency == 0 or i == len(_df) - 1: # Always update on last iteration
if verbose: # Only print in verbose mode to avoid spam
print(f"DEBUG: Progress callback called with i={i}, total={len(_df)-1}")
progress_callback(i)
price_open = _df['open'].iloc[i]
price_close = _df['close'].iloc[i]
date = _df['timestamp'].iloc[i]
prev_mt = meta_trend_signal[i-1]
curr_mt = meta_trend_signal[i]
self.current_date = self.df['timestamp'].iloc[i]
# Check stop loss if in position
if position == 1:
stop_loss_result = Backtest.check_stop_loss(
min1_df,
entry_time,
date,
entry_price,
stop_loss_pct,
coin,
verbose=verbose
)
if stop_loss_result is not None:
trade_log_entry, position, coin, entry_price, usd = stop_loss_result
trade_log.append(trade_log_entry)
stop_loss_count += 1
continue
# check if we are in buy/sell position
if self.position == 0:
if entry_strategy(self, i):
self.handle_entry()
elif self.position == 1:
exit_test_results, sell_price = exit_strategy(self, i)
if exit_test_results is not None:
self.handle_exit(exit_test_results, sell_price)
# Entry: only if not in position and signal changes to 1
if position == 0 and prev_mt != 1 and curr_mt == 1:
entry_result = Backtest.handle_entry(usd, price_open, date)
coin, entry_price, entry_time, usd, position, trade_log_entry = entry_result
trade_log.append(trade_log_entry)
# Exit: only if in position and signal changes from 1 to -1
elif position == 1 and prev_mt == 1 and curr_mt == -1:
exit_result = Backtest.handle_exit(coin, price_open, entry_price, entry_time, date)
usd, coin, position, entry_price, trade_log_entry = exit_result
trade_log.append(trade_log_entry)
# Track drawdown
balance = self.usd if self.position == 0 else self.coin * self.price_close
balance = usd if position == 0 else coin * price_close
if balance > max_balance:
max_balance = balance
drawdown = (max_balance - balance) / max_balance
drawdowns.append(drawdown)
if balance > self.max_balance:
self.max_balance = balance
drawdown = (self.max_balance - balance) / self.max_balance
self.drawdowns.append(drawdown)
# Report completion if callback is provided
if progress_callback:
progress_callback(len(_df) - 1)
# If still in position at end, sell at last close
if self.position == 1:
self.handle_exit("EOD", None)
if position == 1:
exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
usd, coin, position, entry_price, trade_log_entry = exit_result
trade_log.append(trade_log_entry)
# Calculate statistics
final_balance = self.usd
n_trades = len(self.trade_log)
wins = [1 for t in self.trade_log if t['exit'] is not None and t['exit'] > t['entry']]
final_balance = usd
n_trades = len(trade_log)
wins = [1 for t in trade_log if t['exit'] is not None and t['exit'] > t['entry']]
win_rate = len(wins) / n_trades if n_trades > 0 else 0
max_drawdown = max(self.drawdowns) if self.drawdowns else 0
avg_trade = np.mean([t['exit']/t['entry']-1 for t in self.trade_log if t['exit'] is not None]) if self.trade_log else 0
max_drawdown = max(drawdowns) if drawdowns else 0
avg_trade = np.mean([t['exit']/t['entry']-1 for t in trade_log if t['exit'] is not None]) if trade_log else 0
trades = []
total_fees_usd = 0.0
for trade in self.trade_log:
for trade in trade_log:
if trade['exit'] is not None:
profit_pct = (trade['exit'] - trade['entry']) / trade['entry']
else:
profit_pct = 0.0
# Validate fee_usd field
if 'fee_usd' not in trade:
raise ValueError(f"Trade missing required field 'fee_usd': {trade}")
fee_usd = trade['fee_usd']
if fee_usd is None:
raise ValueError(f"Trade fee_usd is None: {trade}")
# Validate trade type field
if 'type' not in trade:
raise ValueError(f"Trade missing required field 'type': {trade}")
trade_type = trade['type']
if trade_type is None:
raise ValueError(f"Trade type is None: {trade}")
trades.append({
'entry_time': trade['entry_time'],
'exit_time': trade['exit_time'],
'entry': trade['entry'],
'exit': trade['exit'],
'profit_pct': profit_pct,
'type': trade['type'],
'fee_usd': trade['fee_usd']
'type': trade_type,
'fee_usd': fee_usd
})
fee_usd = trade.get('fee_usd')
total_fees_usd += fee_usd
results = {
"initial_usd": self.initial_usd,
"initial_usd": initial_usd,
"final_usd": final_balance,
"n_trades": n_trades,
"n_stop_loss": stop_loss_count, # Add stop loss count
"win_rate": win_rate,
"max_drawdown": max_drawdown,
"avg_trade": avg_trade,
"trade_log": self.trade_log,
"trade_log": trade_log,
"trades": trades,
"total_fees_usd": total_fees_usd,
}
if n_trades > 0:
results["first_trade"] = {
"entry_time": self.trade_log[0]['entry_time'],
"entry": self.trade_log[0]['entry']
"entry_time": trade_log[0]['entry_time'],
"entry": trade_log[0]['entry']
}
results["last_trade"] = {
"exit_time": self.trade_log[-1]['exit_time'],
"exit": self.trade_log[-1]['exit']
"exit_time": trade_log[-1]['exit_time'],
"exit": trade_log[-1]['exit']
}
return results
def handle_entry(self):
entry_fee = MarketFees.calculate_okx_taker_maker_fee(self.usd, is_maker=False)
usd_after_fee = self.usd - entry_fee
self.coin = usd_after_fee / self.price_open
self.entry_price = self.price_open
self.entry_time = self.current_date
self.usd = 0
self.position = 1
@staticmethod
def check_stop_loss(min1_df, entry_time, current_time, entry_price, stop_loss_pct, coin, verbose=False):
"""
Check if stop loss should be triggered based on 1-minute data
Args:
min1_df: 1-minute DataFrame with DatetimeIndex
entry_time: Entry timestamp
current_time: Current timestamp
entry_price: Entry price
stop_loss_pct: Stop loss percentage (e.g. 0.05 for 5%)
coin: Current coin position
verbose: Enable debug logging
Returns:
Tuple of (trade_log_entry, position, coin, entry_price, usd) if stop loss triggered, None otherwise
"""
if min1_df is None or min1_df.empty:
if verbose:
print("Warning: No 1-minute data available for stop loss checking")
return None
stop_price = entry_price * (1 - stop_loss_pct)
try:
# Ensure min1_df has a DatetimeIndex
if not isinstance(min1_df.index, pd.DatetimeIndex):
if verbose:
print("Warning: min1_df does not have DatetimeIndex")
return None
# Convert entry_time and current_time to pandas Timestamps for comparison
entry_ts = pd.to_datetime(entry_time)
current_ts = pd.to_datetime(current_time)
if verbose:
print(f"Checking stop loss from {entry_ts} to {current_ts}, stop_price: {stop_price:.2f}")
# Handle edge case where entry and current time are the same (1-minute timeframe)
if entry_ts == current_ts:
if verbose:
print("Entry and current time are the same, no range to check")
return None
# Find the range of 1-minute data to check (exclusive of entry time, inclusive of current time)
# We start from the candle AFTER entry to avoid checking the entry candle itself
start_check_time = entry_ts + pd.Timedelta(minutes=1)
# Get the slice of data to check for stop loss
mask = (min1_df.index > entry_ts) & (min1_df.index <= current_ts)
min1_slice = min1_df.loc[mask]
if len(min1_slice) == 0:
if verbose:
print(f"No 1-minute data found between {start_check_time} and {current_ts}")
return None
if verbose:
print(f"Checking {len(min1_slice)} candles for stop loss")
# Check if any low price in the slice hits the stop loss
stop_triggered = (min1_slice['low'] <= stop_price).any()
if stop_triggered:
# Find the exact candle where stop loss was triggered
stop_candle = min1_slice[min1_slice['low'] <= stop_price].iloc[0]
if verbose:
print(f"Stop loss triggered at {stop_candle.name}, low: {stop_candle['low']:.2f}")
# More realistic fill: if open < stop, fill at open, else at stop
if stop_candle['open'] < stop_price:
sell_price = stop_candle['open']
if verbose:
print(f"Filled at open price: {sell_price:.2f}")
else:
sell_price = stop_price
if verbose:
print(f"Filled at stop price: {sell_price:.2f}")
btc_to_sell = coin
usd_gross = btc_to_sell * sell_price
exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
usd_after_stop = usd_gross - exit_fee
trade_log_entry = {
'type': 'STOP',
'entry': entry_price,
'exit': sell_price,
'entry_time': entry_time,
'exit_time': stop_candle.name,
'fee_usd': exit_fee
}
# After stop loss, reset position and entry, return USD balance
return trade_log_entry, 0, 0, 0, usd_after_stop
elif verbose:
print(f"No stop loss triggered, min low in range: {min1_slice['low'].min():.2f}")
except Exception as e:
# In case of any error, don't trigger stop loss but log the issue
error_msg = f"Warning: Stop loss check failed: {e}"
print(error_msg)
if verbose:
import traceback
print(traceback.format_exc())
return None
return None
@staticmethod
def handle_entry(usd, price_open, date):
entry_fee = MarketFees.calculate_okx_taker_maker_fee(usd, is_maker=False)
usd_after_fee = usd - entry_fee
coin = usd_after_fee / price_open
entry_price = price_open
entry_time = date
usd = 0
position = 1
trade_log_entry = {
'type': 'BUY',
'entry': self.entry_price,
'entry': entry_price,
'exit': None,
'entry_time': self.entry_time,
'entry_time': entry_time,
'exit_time': None,
'fee_usd': entry_fee
}
self.trade_log.append(trade_log_entry)
return coin, entry_price, entry_time, usd, position, trade_log_entry
def handle_exit(self, exit_reason, sell_price):
btc_to_sell = self.coin
exit_price = sell_price if sell_price is not None else self.price_open
usd_gross = btc_to_sell * exit_price
@staticmethod
def handle_exit(coin, price_open, entry_price, entry_time, date):
btc_to_sell = coin
usd_gross = btc_to_sell * price_open
exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
self.usd = usd_gross - exit_fee
exit_log_entry = {
'type': exit_reason,
'entry': self.entry_price,
'exit': exit_price,
'entry_time': self.entry_time,
'exit_time': self.current_date,
usd = usd_gross - exit_fee
trade_log_entry = {
'type': 'SELL',
'entry': entry_price,
'exit': price_open,
'entry_time': entry_time,
'exit_time': date,
'fee_usd': exit_fee
}
self.coin = 0
self.position = 0
self.entry_price = 0
self.trade_log.append(exit_log_entry)
coin = 0
position = 0
entry_price = 0
return usd, coin, position, entry_price, trade_log_entry

152
cycles/utils/data_loader.py Normal file
View File

@@ -0,0 +1,152 @@
import os
import json
import pandas as pd
from typing import Union, Optional
import logging
from .storage_utils import (
_parse_timestamp_column,
_filter_by_date_range,
_normalize_column_names,
TimestampParsingError,
DataLoadingError
)
class DataLoader:
"""Handles loading and preprocessing of data from various file formats"""
def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
"""Initialize data loader
Args:
data_dir: Directory containing data files
logging_instance: Optional logging instance
"""
self.data_dir = data_dir
self.logging = logging_instance
def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp],
stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
"""Load data with optimized dtypes and filtering, supporting CSV and JSON input
Args:
file_path: path to the data file
start_date: start date (string or datetime-like)
stop_date: stop date (string or datetime-like)
Returns:
pandas DataFrame with timestamp index
Raises:
DataLoadingError: If data loading fails
"""
try:
# Convert string dates to pandas datetime objects for proper comparison
start_date = pd.to_datetime(start_date)
stop_date = pd.to_datetime(stop_date)
# Determine file type
_, ext = os.path.splitext(file_path)
ext = ext.lower()
if ext == ".json":
return self._load_json_data(file_path, start_date, stop_date)
else:
return self._load_csv_data(file_path, start_date, stop_date)
except Exception as e:
error_msg = f"Error loading data from {file_path}: {e}"
if self.logging is not None:
self.logging.error(error_msg)
# Return an empty DataFrame with a DatetimeIndex
return pd.DataFrame(index=pd.to_datetime([]))
def _load_json_data(self, file_path: str, start_date: pd.Timestamp,
stop_date: pd.Timestamp) -> pd.DataFrame:
"""Load and process JSON data file
Args:
file_path: Path to JSON file
start_date: Start date for filtering
stop_date: Stop date for filtering
Returns:
Processed DataFrame with timestamp index
"""
with open(os.path.join(self.data_dir, file_path), 'r') as f:
raw = json.load(f)
data = pd.DataFrame(raw["Data"])
data = _normalize_column_names(data)
# Convert timestamp to datetime
data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
# Filter by date range
data = _filter_by_date_range(data, "timestamp", start_date, stop_date)
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index("timestamp")
def _load_csv_data(self, file_path: str, start_date: pd.Timestamp,
stop_date: pd.Timestamp) -> pd.DataFrame:
"""Load and process CSV data file
Args:
file_path: Path to CSV file
start_date: Start date for filtering
stop_date: Stop date for filtering
Returns:
Processed DataFrame with timestamp index
"""
# Define optimized dtypes
dtypes = {
'Open': 'float32',
'High': 'float32',
'Low': 'float32',
'Close': 'float32',
'Volume': 'float32'
}
# Read data with original capitalized column names
data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
return self._process_csv_timestamps(data, start_date, stop_date, file_path)
def _process_csv_timestamps(self, data: pd.DataFrame, start_date: pd.Timestamp,
stop_date: pd.Timestamp, file_path: str) -> pd.DataFrame:
"""Process timestamps in CSV data and filter by date range
Args:
data: DataFrame with CSV data
start_date: Start date for filtering
stop_date: Stop date for filtering
file_path: Original file path for logging
Returns:
Processed DataFrame with timestamp index
"""
if 'Timestamp' in data.columns:
data = _parse_timestamp_column(data, 'Timestamp')
data = _filter_by_date_range(data, 'Timestamp', start_date, stop_date)
data = _normalize_column_names(data)
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
else:
# Attempt to use the first column if 'Timestamp' is not present
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
data = _parse_timestamp_column(data, 'timestamp')
data = _filter_by_date_range(data, 'timestamp', start_date, stop_date)
data = _normalize_column_names(data)
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
return data.set_index('timestamp')

106
cycles/utils/data_saver.py Normal file
View File

@@ -0,0 +1,106 @@
import os
import pandas as pd
from typing import Optional
import logging
from .storage_utils import DataSavingError
class DataSaver:
"""Handles saving data to various file formats"""
def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
"""Initialize data saver
Args:
data_dir: Directory for saving data files
logging_instance: Optional logging instance
"""
self.data_dir = data_dir
self.logging = logging_instance
def save_data(self, data: pd.DataFrame, file_path: str) -> None:
"""Save processed data to a CSV file.
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
Args:
data: DataFrame to save
file_path: path to the data file relative to the data_dir
Raises:
DataSavingError: If saving fails
"""
try:
data_to_save = data.copy()
data_to_save = self._prepare_data_for_saving(data_to_save)
# Save to CSV, ensuring the 'timestamp' column (if created) is written
full_path = os.path.join(self.data_dir, file_path)
data_to_save.to_csv(full_path, index=False)
if self.logging is not None:
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
except Exception as e:
error_msg = f"Failed to save data to {file_path}: {e}"
if self.logging is not None:
self.logging.error(error_msg)
raise DataSavingError(error_msg) from e
def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
"""Prepare DataFrame for saving by handling different index types
Args:
data: DataFrame to prepare
Returns:
DataFrame ready for saving
"""
if isinstance(data.index, pd.DatetimeIndex):
return self._convert_datetime_index_to_timestamp(data)
elif pd.api.types.is_numeric_dtype(data.index.dtype):
return self._convert_numeric_index_to_timestamp(data)
else:
# For other index types, save with the current index
return data
def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
"""Convert DatetimeIndex to Unix timestamp column
Args:
data: DataFrame with DatetimeIndex
Returns:
DataFrame with timestamp column
"""
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
data['timestamp'] = data.index.astype('int64') / 1e9
data.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data.columns and len(data.columns) > 1:
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
data = data[cols]
return data
def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
"""Convert numeric index to timestamp column
Args:
data: DataFrame with numeric index
Returns:
DataFrame with timestamp column
"""
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
data['timestamp'] = data.index
data.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data.columns and len(data.columns) > 1:
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
data = data[cols]
return data

View File

@@ -1,128 +0,0 @@
import threading
import time
import queue
from google.oauth2.service_account import Credentials
import gspread
import math
import numpy as np
from collections import defaultdict
class GSheetBatchPusher(threading.Thread):
def __init__(self, queue, timestamp, spreadsheet_name, interval=60, logging=None):
super().__init__(daemon=True)
self.queue = queue
self.timestamp = timestamp
self.spreadsheet_name = spreadsheet_name
self.interval = interval
self._stop_event = threading.Event()
self.logging = logging
def run(self):
while not self._stop_event.is_set():
self.push_all()
time.sleep(self.interval)
# Final push on stop
self.push_all()
def stop(self):
self._stop_event.set()
def push_all(self):
batch_results = []
batch_trades = []
while True:
try:
results, trades = self.queue.get_nowait()
batch_results.extend(results)
batch_trades.extend(trades)
except queue.Empty:
break
if batch_results or batch_trades:
self.write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)
def write_results_per_combination_gsheet(self, results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
scopes = [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive"
]
creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
gc = gspread.authorize(creds)
sh = gc.open(spreadsheet_name)
try:
worksheet = sh.worksheet("Results")
except gspread.exceptions.WorksheetNotFound:
worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")
# Clear the worksheet before writing new results
worksheet.clear()
# Updated fieldnames to match your data rows
fieldnames = [
"timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
"max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
]
def to_native(val):
if isinstance(val, (np.generic, np.ndarray)):
val = val.item()
if hasattr(val, 'isoformat'):
return val.isoformat()
# Handle inf, -inf, nan
if isinstance(val, float):
if math.isinf(val):
return "" if val > 0 else "-∞"
if math.isnan(val):
return ""
return val
# Write header if sheet is empty
if len(worksheet.get_all_values()) == 0:
worksheet.append_row(fieldnames)
for row in results_rows:
values = [to_native(row.get(field, "")) for field in fieldnames]
worksheet.append_row(values)
trades_fieldnames = [
"entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
]
trades_by_combo = defaultdict(list)
for trade in trade_rows:
tf = trade.get("timeframe")
sl = trade.get("stop_loss_pct")
trades_by_combo[(tf, sl)].append(trade)
for (tf, sl), trades in trades_by_combo.items():
sl_percent = int(round(sl * 100))
sheet_name = f"Trades_{tf}_ST{sl_percent}%"
try:
trades_ws = sh.worksheet(sheet_name)
except gspread.exceptions.WorksheetNotFound:
trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
# Clear the trades worksheet before writing new trades
trades_ws.clear()
if len(trades_ws.get_all_values()) == 0:
trades_ws.append_row(trades_fieldnames)
for trade in trades:
trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
try:
trades_ws.append_row(trade_row)
except gspread.exceptions.APIError as e:
if '429' in str(e):
if self.logging is not None:
self.logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
# Re-queue the failed batch for retry
self.queue.put((results_rows, trade_rows))
return # Stop pushing for this batch, will retry next interval
else:
raise

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Progress Manager for tracking multiple parallel backtest tasks
"""
import threading
import time
import sys
from typing import Dict, Optional, Callable
from dataclasses import dataclass
@dataclass
class TaskProgress:
"""Represents progress information for a single task"""
task_id: str
name: str
current: int
total: int
start_time: float
last_update: float
@property
def percentage(self) -> float:
"""Calculate completion percentage"""
if self.total == 0:
return 0.0
return (self.current / self.total) * 100
@property
def elapsed_time(self) -> float:
"""Calculate elapsed time in seconds"""
return time.time() - self.start_time
@property
def eta(self) -> Optional[float]:
"""Estimate time to completion in seconds"""
if self.current == 0 or self.percentage >= 100:
return None
elapsed = self.elapsed_time
rate = self.current / elapsed
remaining = self.total - self.current
return remaining / rate if rate > 0 else None
class ProgressManager:
"""Manages progress tracking for multiple parallel tasks"""
def __init__(self, update_interval: float = 1.0, display_width: int = 50):
"""
Initialize progress manager
Args:
update_interval: How often to update display (seconds)
display_width: Width of progress bar in characters
"""
self.tasks: Dict[str, TaskProgress] = {}
self.update_interval = update_interval
self.display_width = display_width
self.lock = threading.Lock()
self.display_thread: Optional[threading.Thread] = None
self.running = False
self.last_display_height = 0
def start_task(self, task_id: str, name: str, total: int) -> None:
"""
Start tracking a new task
Args:
task_id: Unique identifier for the task
name: Human-readable name for the task
total: Total number of steps in the task
"""
with self.lock:
self.tasks[task_id] = TaskProgress(
task_id=task_id,
name=name,
current=0,
total=total,
start_time=time.time(),
last_update=time.time()
)
def update_progress(self, task_id: str, current: int) -> None:
"""
Update progress for a specific task
Args:
task_id: Task identifier
current: Current progress value
"""
with self.lock:
if task_id in self.tasks:
self.tasks[task_id].current = current
self.tasks[task_id].last_update = time.time()
def complete_task(self, task_id: str) -> None:
"""
Mark a task as completed
Args:
task_id: Task identifier
"""
with self.lock:
if task_id in self.tasks:
task = self.tasks[task_id]
task.current = task.total
task.last_update = time.time()
def start_display(self) -> None:
"""Start the progress display thread"""
if not self.running:
self.running = True
self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
self.display_thread.start()
def stop_display(self) -> None:
"""Stop the progress display thread"""
self.running = False
if self.display_thread:
self.display_thread.join(timeout=1.0)
self._clear_display()
def _display_loop(self) -> None:
"""Main loop for updating the progress display"""
while self.running:
self._update_display()
time.sleep(self.update_interval)
def _update_display(self) -> None:
"""Update the console display with current progress"""
with self.lock:
if not self.tasks:
return
# Clear previous display
self._clear_display()
# Build display lines
lines = []
for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
line = self._format_progress_line(task)
lines.append(line)
# Print all lines
for line in lines:
print(line, flush=True)
self.last_display_height = len(lines)
def _clear_display(self) -> None:
"""Clear the previous progress display"""
if self.last_display_height > 0:
# Move cursor up and clear lines
for _ in range(self.last_display_height):
sys.stdout.write('\033[F') # Move cursor up one line
sys.stdout.write('\033[K') # Clear line
sys.stdout.flush()
def _format_progress_line(self, task: TaskProgress) -> str:
"""
Format a single progress line for display
Args:
task: TaskProgress instance
Returns:
Formatted progress string
"""
# Progress bar
filled_width = int(task.percentage / 100 * self.display_width)
bar = '' * filled_width + '' * (self.display_width - filled_width)
# Time information
elapsed_str = self._format_time(task.elapsed_time)
eta_str = self._format_time(task.eta) if task.eta else "N/A"
# Format line
line = (f"{task.name:<25}{bar}"
f"{task.percentage:5.1f}% "
f"({task.current:,}/{task.total:,}) "
f"{elapsed_str} ETA: {eta_str}")
return line
def _format_time(self, seconds: float) -> str:
"""
Format time duration for display
Args:
seconds: Time in seconds
Returns:
Formatted time string
"""
if seconds < 60:
return f"{seconds:.0f}s"
elif seconds < 3600:
minutes = seconds / 60
return f"{minutes:.1f}m"
else:
hours = seconds / 3600
return f"{hours:.1f}h"
def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
"""
Get a progress callback function for a specific task
Args:
task_id: Task identifier
Returns:
Callback function that updates progress for this task
"""
def callback(current: int) -> None:
self.update_progress(task_id, current)
return callback
def all_tasks_completed(self) -> bool:
"""Check if all tasks are completed"""
with self.lock:
return all(task.current >= task.total for task in self.tasks.values())
def get_summary(self) -> str:
"""Get a summary of all tasks"""
with self.lock:
total_tasks = len(self.tasks)
completed_tasks = sum(1 for task in self.tasks.values()
if task.current >= task.total)
return f"Tasks: {completed_tasks}/{total_tasks} completed"

View File

@@ -0,0 +1,179 @@
import os
import csv
from typing import Dict, List, Optional, Any
from collections import defaultdict
import logging
from .storage_utils import DataSavingError
class ResultFormatter:
"""Handles formatting and writing of backtest results to CSV files"""
def __init__(self, results_dir: str, logging_instance: Optional[logging.Logger] = None):
"""Initialize result formatter
Args:
results_dir: Directory for saving result files
logging_instance: Optional logging instance
"""
self.results_dir = results_dir
self.logging = logging_instance
def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
"""Format a row for a combined results CSV file
Args:
row: Dictionary containing row data
Returns:
Dictionary with formatted values
"""
return {
"timeframe": row["timeframe"],
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
"n_trades": row["n_trades"],
"n_stop_loss": row["n_stop_loss"],
"win_rate": f"{row['win_rate']*100:.2f}%",
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
"avg_trade": f"{row['avg_trade']*100:.2f}%",
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
"final_usd": f"{row['final_usd']:.2f}",
"total_fees_usd": f"{row['total_fees_usd']:.2f}",
}
def write_results_chunk(self, filename: str, fieldnames: List[str],
rows: List[Dict], write_header: bool = False,
initial_usd: Optional[float] = None) -> None:
"""Write a chunk of results to a CSV file
Args:
filename: filename to write to
fieldnames: list of fieldnames
rows: list of rows
write_header: whether to write the header
initial_usd: initial USD value for header comment
Raises:
DataSavingError: If writing fails
"""
try:
mode = 'w' if write_header else 'a'
with open(filename, mode, newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if write_header:
if initial_usd is not None:
csvfile.write(f"# initial_usd: {initial_usd}\n")
writer.writeheader()
for row in rows:
# Only keep keys that are in fieldnames
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
writer.writerow(filtered_row)
except Exception as e:
error_msg = f"Failed to write results chunk to {filename}: {e}"
if self.logging is not None:
self.logging.error(error_msg)
raise DataSavingError(error_msg) from e
def write_backtest_results(self, filename: str, fieldnames: List[str],
rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
"""Write combined backtest results to a CSV file
Args:
filename: filename to write to
fieldnames: list of fieldnames
rows: list of result dictionaries
metadata_lines: optional list of strings to write as header comments
Returns:
Full path to the written file
Raises:
DataSavingError: If writing fails
"""
try:
fname = os.path.join(self.results_dir, filename)
with open(fname, "w", newline="") as csvfile:
if metadata_lines:
for line in metadata_lines:
csvfile.write(f"{line}\n")
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
writer.writeheader()
for row in rows:
writer.writerow(self.format_row(row))
if self.logging is not None:
self.logging.info(f"Combined results written to {fname}")
return fname
except Exception as e:
error_msg = f"Failed to write backtest results to {filename}: {e}"
if self.logging is not None:
self.logging.error(error_msg)
raise DataSavingError(error_msg) from e
def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
"""Write trades to separate CSV files grouped by timeframe and stop loss
Args:
all_trade_rows: list of trade dictionaries
trades_fieldnames: list of trade fieldnames
Raises:
DataSavingError: If writing fails
"""
try:
trades_by_combo = self._group_trades_by_combination(all_trade_rows)
for (tf, sl), trades in trades_by_combo.items():
self._write_single_trade_file(tf, sl, trades, trades_fieldnames)
except Exception as e:
error_msg = f"Failed to write trades: {e}"
if self.logging is not None:
self.logging.error(error_msg)
raise DataSavingError(error_msg) from e
def _group_trades_by_combination(self, all_trade_rows: List[Dict]) -> Dict:
"""Group trades by timeframe and stop loss combination
Args:
all_trade_rows: List of trade dictionaries
Returns:
Dictionary grouped by (timeframe, stop_loss_pct) tuples
"""
trades_by_combo = defaultdict(list)
for trade in all_trade_rows:
tf = trade.get("timeframe")
sl = trade.get("stop_loss_pct")
trades_by_combo[(tf, sl)].append(trade)
return trades_by_combo
def _write_single_trade_file(self, timeframe: str, stop_loss_pct: float,
trades: List[Dict], trades_fieldnames: List[str]) -> None:
"""Write trades for a single timeframe/stop-loss combination
Args:
timeframe: Timeframe identifier
stop_loss_pct: Stop loss percentage
trades: List of trades for this combination
trades_fieldnames: List of field names for trades
"""
sl_percent = int(round(stop_loss_pct * 100))
trades_filename = os.path.join(self.results_dir, f"trades_{timeframe}_ST{sl_percent}pct.csv")
with open(trades_filename, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
writer.writeheader()
for trade in trades:
writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
if self.logging is not None:
self.logging.info(f"Trades written to {trades_filename}")

View File

@@ -1,17 +1,32 @@
import os
import json
import pandas as pd
import csv
from collections import defaultdict
from typing import Optional, Union, Dict, Any, List
import logging
from .data_loader import DataLoader
from .data_saver import DataSaver
from .result_formatter import ResultFormatter
from .storage_utils import DataLoadingError, DataSavingError
RESULTS_DIR = "../results"
DATA_DIR = "../data"
RESULTS_DIR = "results"
DATA_DIR = "data"
class Storage:
"""Storage class for storing and loading results and data"""
"""Unified storage interface for data and results operations
Acts as a coordinator for DataLoader, DataSaver, and ResultFormatter components,
maintaining backward compatibility while providing a clean separation of concerns.
"""
def __init__(self, logging=None, results_dir=RESULTS_DIR, data_dir=DATA_DIR):
"""Initialize storage with component instances
Args:
logging: Optional logging instance
results_dir: Directory for results files
data_dir: Directory for data files
"""
self.results_dir = results_dir
self.data_dir = data_dir
self.logging = logging
@@ -20,196 +35,89 @@ class Storage:
os.makedirs(self.results_dir, exist_ok=True)
os.makedirs(self.data_dir, exist_ok=True)
def load_data(self, file_path, start_date, stop_date):
# Initialize component instances
self.data_loader = DataLoader(data_dir, logging)
self.data_saver = DataSaver(data_dir, logging)
self.result_formatter = ResultFormatter(results_dir, logging)
def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp],
stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
"""Load data with optimized dtypes and filtering, supporting CSV and JSON input
Args:
file_path: path to the data file
start_date: start date
stop_date: stop date
start_date: start date (string or datetime-like)
stop_date: stop date (string or datetime-like)
Returns:
pandas DataFrame
pandas DataFrame with timestamp index
Raises:
DataLoadingError: If data loading fails
"""
# Determine file type
_, ext = os.path.splitext(file_path)
ext = ext.lower()
try:
if ext == ".json":
with open(os.path.join(self.data_dir, file_path), 'r') as f:
raw = json.load(f)
data = pd.DataFrame(raw["Data"])
# Convert columns to lowercase
data.columns = data.columns.str.lower()
# Convert timestamp to datetime
data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
# Filter by date range
data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= stop_date)]
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index("timestamp")
else:
# Define optimized dtypes
dtypes = {
'Open': 'float32',
'High': 'float32',
'Low': 'float32',
'Close': 'float32',
'Volume': 'float32'
}
# Read data with original capitalized column names
data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
# Convert timestamp to datetime
if 'Timestamp' in data.columns:
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
# Filter by date range
data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
# Now convert column names to lowercase
data.columns = data.columns.str.lower()
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
else: # Attempt to use the first column if 'Timestamp' is not present
data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)]
data.columns = data.columns.str.lower() # Ensure all other columns are lower
if self.logging is not None:
self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
return data.set_index('timestamp')
except Exception as e:
if self.logging is not None:
self.logging.error(f"Error loading data from {file_path}: {e}")
# Return an empty DataFrame with a DatetimeIndex
return pd.DataFrame(index=pd.to_datetime([]))
def save_data(self, data: pd.DataFrame, file_path: str):
"""Save processed data to a CSV file.
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
return self.data_loader.load_data(file_path, start_date, stop_date)
def save_data(self, data: pd.DataFrame, file_path: str) -> None:
"""Save processed data to a CSV file
Args:
data (pd.DataFrame): data to save.
file_path (str): path to the data file relative to the data_dir.
data: DataFrame to save
file_path: path to the data file relative to the data_dir
Raises:
DataSavingError: If saving fails
"""
data_to_save = data.copy()
self.data_saver.save_data(data, file_path)
if isinstance(data_to_save.index, pd.DatetimeIndex):
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
# and make it a column named 'timestamp'.
data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9
# Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column.
# We want the 'timestamp' column to be the first one.
data_to_save.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
data_to_save = data_to_save[cols]
elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype):
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle),
# make it a column named 'timestamp'.
data_to_save['timestamp'] = data_to_save.index
data_to_save.reset_index(drop=True, inplace=True)
if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
data_to_save = data_to_save[cols]
else:
# For other index types, or if no index that we want to specifically handle,
# save with the current index. pandas to_csv will handle it.
# This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save.
pass # data_to_save remains as is, to_csv will write its index if index=True
# Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index.
full_path = os.path.join(self.data_dir, file_path)
data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column
if self.logging is not None:
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
def format_row(self, row):
def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
"""Format a row for a combined results CSV file
Args:
row: row to format
row: Dictionary containing row data
Returns:
formatted row
Dictionary with formatted values
"""
return self.result_formatter.format_row(row)
return {
"timeframe": row["timeframe"],
"stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
"n_trades": row["n_trades"],
"n_stop_loss": row["n_stop_loss"],
"win_rate": f"{row['win_rate']*100:.2f}%",
"max_drawdown": f"{row['max_drawdown']*100:.2f}%",
"avg_trade": f"{row['avg_trade']*100:.2f}%",
"profit_ratio": f"{row['profit_ratio']*100:.2f}%",
"final_usd": f"{row['final_usd']:.2f}",
"total_fees_usd": f"{row['total_fees_usd']:.2f}",
}
def write_results_chunk(self, filename, fieldnames, rows, write_header=False, initial_usd=None):
def write_results_chunk(self, filename: str, fieldnames: List[str],
rows: List[Dict], write_header: bool = False,
initial_usd: Optional[float] = None) -> None:
"""Write a chunk of results to a CSV file
Args:
filename: filename to write to
fieldnames: list of fieldnames
rows: list of rows
write_header: whether to write the header
initial_usd: initial USD
initial_usd: initial USD value for header comment
"""
mode = 'w' if write_header else 'a'
self.result_formatter.write_results_chunk(
filename, fieldnames, rows, write_header, initial_usd
)
def write_backtest_results(self, filename: str, fieldnames: List[str],
rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
"""Write combined backtest results to a CSV file
with open(filename, mode, newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if write_header:
csvfile.write(f"# initial_usd: {initial_usd}\n")
writer.writeheader()
for row in rows:
# Only keep keys that are in fieldnames
filtered_row = {k: v for k, v in row.items() if k in fieldnames}
writer.writerow(filtered_row)
def write_backtest_results(self, filename, fieldnames, rows, metadata_lines=None):
"""Write a combined results to a CSV file
Args:
filename: filename to write to
fieldnames: list of fieldnames
rows: list of rows
rows: list of result dictionaries
metadata_lines: optional list of strings to write as header comments
Returns:
Full path to the written file
"""
fname = os.path.join(self.results_dir, filename)
with open(fname, "w", newline="") as csvfile:
if metadata_lines:
for line in metadata_lines:
csvfile.write(f"{line}\n")
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
writer.writeheader()
for row in rows:
writer.writerow(self.format_row(row))
if self.logging is not None:
self.logging.info(f"Combined results written to {fname}")
def write_trades(self, all_trade_rows, trades_fieldnames):
"""Write trades to a CSV file
return self.result_formatter.write_backtest_results(
filename, fieldnames, rows, metadata_lines
)
def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
"""Write trades to separate CSV files grouped by timeframe and stop loss
Args:
all_trade_rows: list of trade rows
all_trade_rows: list of trade dictionaries
trades_fieldnames: list of trade fieldnames
logging: logging object
"""
trades_by_combo = defaultdict(list)
for trade in all_trade_rows:
tf = trade.get("timeframe")
sl = trade.get("stop_loss_pct")
trades_by_combo[(tf, sl)].append(trade)
for (tf, sl), trades in trades_by_combo.items():
sl_percent = int(round(sl * 100))
trades_filename = os.path.join(self.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
with open(trades_filename, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
writer.writeheader()
for trade in trades:
writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
if self.logging is not None:
self.logging.info(f"Trades written to {trades_filename}")
self.result_formatter.write_trades(all_trade_rows, trades_fieldnames)

View File

@@ -0,0 +1,73 @@
import pandas as pd
class TimestampParsingError(Exception):
"""Custom exception for timestamp parsing errors"""
pass
class DataLoadingError(Exception):
"""Custom exception for data loading errors"""
pass
class DataSavingError(Exception):
"""Custom exception for data saving errors"""
pass
def _parse_timestamp_column(data: pd.DataFrame, column_name: str) -> pd.DataFrame:
"""Parse timestamp column handling both Unix timestamps and datetime strings
Args:
data: DataFrame containing the timestamp column
column_name: Name of the timestamp column
Returns:
DataFrame with parsed timestamp column
Raises:
TimestampParsingError: If timestamp parsing fails
"""
try:
sample_timestamp = str(data[column_name].iloc[0])
try:
# Check if it's a Unix timestamp (numeric)
float(sample_timestamp)
# It's a Unix timestamp, convert using unit='s'
data[column_name] = pd.to_datetime(data[column_name], unit='s')
except ValueError:
# It's already in datetime string format, convert without unit
data[column_name] = pd.to_datetime(data[column_name])
return data
except Exception as e:
raise TimestampParsingError(f"Failed to parse timestamp column '{column_name}': {e}")
def _filter_by_date_range(data: pd.DataFrame, timestamp_col: str,
start_date: pd.Timestamp, stop_date: pd.Timestamp) -> pd.DataFrame:
"""Filter DataFrame by date range
Args:
data: DataFrame to filter
timestamp_col: Name of timestamp column
start_date: Start date for filtering
stop_date: Stop date for filtering
Returns:
Filtered DataFrame
"""
return data[(data[timestamp_col] >= start_date) & (data[timestamp_col] <= stop_date)]
def _normalize_column_names(data: pd.DataFrame) -> pd.DataFrame:
"""Convert all column names to lowercase
Args:
data: DataFrame to normalize
Returns:
DataFrame with lowercase column names
"""
data.columns = data.columns.str.lower()
return data

View File

@@ -10,10 +10,12 @@ class SystemUtils:
"""Determine optimal number of worker processes based on system resources"""
cpu_count = os.cpu_count() or 4
memory_gb = psutil.virtual_memory().total / (1024**3)
# Heuristic: Use 75% of cores, but cap based on available memory
# Assume each worker needs ~2GB for large datasets
workers_by_memory = max(1, int(memory_gb / 2))
workers_by_cpu = max(1, int(cpu_count * 0.75))
# OPTIMIZATION: More aggressive worker allocation for better performance
workers_by_memory = max(1, int(memory_gb / 2)) # 2GB per worker
workers_by_cpu = max(1, int(cpu_count * 0.8)) # Use 80% of CPU cores
optimal_workers = min(workers_by_cpu, workers_by_memory, 8) # Cap at 8 workers
if self.logging is not None:
self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
return min(workers_by_cpu, workers_by_memory)
self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
return optimal_workers