Merge branch 'xgboost'

# Conflicts: # .gitignore # README.md # cycles/backtest.py # main.py # pyproject.toml # uv.lock
2025-07-11 09:04:49 +08:00
parent b013183f67 65f30a4020
commit 267f040fe8
39 changed files with 6311 additions and 1332 deletions
--- a/cycles/Analysis/supertrend.py
+++ b/cycles/Analysis/supertrend.py
@@ -1,70 +1,30 @@
 import pandas as pd
 import numpy as np
 import logging
-from scipy.signal import find_peaks
-from matplotlib.patches import Rectangle
-from scipy import stats
-import concurrent.futures
-from functools import partial
 from functools import lru_cache
-import matplotlib.pyplot as plt

-# Color configuration
-# Plot colors
-DARK_BG_COLOR = '#181C27'
-LEGEND_BG_COLOR = '#333333'
-TITLE_COLOR = 'white'
-AXIS_LABEL_COLOR = 'white'
-
-# Candlestick colors
-CANDLE_UP_COLOR = '#089981'  # Green
-CANDLE_DOWN_COLOR = '#F23645'  # Red
-
-# Marker colors
-MIN_COLOR = 'red'
-MAX_COLOR = 'green'
-
-# Line style colors
-MIN_LINE_STYLE = 'g--'  # Green dashed
-MAX_LINE_STYLE = 'r--'  # Red dashed
-SMA7_LINE_STYLE = 'y-'  # Yellow solid
-SMA15_LINE_STYLE = 'm-'  # Magenta solid
-
-# SuperTrend colors
-ST_COLOR_UP = 'g-'
-ST_COLOR_DOWN = 'r-'
-
-# Cache the calculation results by function parameters
@lru_cache(maxsize=32)
 def cached_supertrend_calculation(period, multiplier, data_tuple):
-    # Convert tuple back to numpy arrays
    high = np.array(data_tuple[0])
    low = np.array(data_tuple[1])
    close = np.array(data_tuple[2])
-    
-    # Calculate TR and ATR using vectorized operations
    tr = np.zeros_like(close)
    tr[0] = high[0] - low[0]
    hc_range = np.abs(high[1:] - close[:-1])
    lc_range = np.abs(low[1:] - close[:-1])
    hl_range = high[1:] - low[1:]
    tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
-    
-    # Use numpy's exponential moving average
    atr = np.zeros_like(tr)
    atr[0] = tr[0]
    multiplier_ema = 2.0 / (period + 1)
    for i in range(1, len(tr)):
        atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
-
-    # Calculate bands
    upper_band = np.zeros_like(close)
    lower_band = np.zeros_like(close)
    for i in range(len(close)):
        hl_avg = (high[i] + low[i]) / 2
        upper_band[i] = hl_avg + (multiplier * atr[i])
        lower_band[i] = hl_avg - (multiplier * atr[i])
-
    final_upper = np.zeros_like(close)
    final_lower = np.zeros_like(close)
    supertrend = np.zeros_like(close)
@@ -105,232 +65,151 @@ def cached_supertrend_calculation(period, multiplier, data_tuple):
        'lower_band': final_lower
    }

-def calculate_supertrend_external(data, period, multiplier):
-    # Convert DataFrame columns to hashable tuples
+def calculate_supertrend_external(data, period, multiplier, close_column='close'):
+    """
+    External function to calculate SuperTrend with configurable close column
+    
+    Parameters:
+    - data: DataFrame with OHLC data
+    - period: int, period for ATR calculation
+    - multiplier: float, multiplier for ATR
+    - close_column: str, name of the column to use as close price (default: 'close')
+    """
    high_tuple = tuple(data['high'])
    low_tuple = tuple(data['low'])
-    close_tuple = tuple(data['close'])
-    
-    # Call the cached function
+    close_tuple = tuple(data[close_column])
    return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))

-
 class Supertrends:
-    def __init__(self, data, verbose=False, display=False):
+    def __init__(self, data, close_column='close', verbose=False, display=False):
        """
-        Initialize the TrendDetectorSimple class.
+        Initialize Supertrends calculator
        
        Parameters:
-        - data: pandas DataFrame containing price data
-        - verbose: boolean, whether to display detailed logging information
-        - display: boolean, whether to enable display/plotting features
+        - data: pandas DataFrame with OHLC data or list of prices
+        - close_column: str, name of the column to use as close price (default: 'close')
+        - verbose: bool, enable verbose logging
+        - display: bool, display mode (currently unused)
        """
-        
+        self.close_column = close_column
        self.data = data
        self.verbose = verbose
-        self.display = display
-        
-        # Only define display-related variables if display is True
-        if self.display:
-            # Plot style configuration
-            self.plot_style = 'dark_background' 
-            self.bg_color = DARK_BG_COLOR
-            self.plot_size = (12, 8)
-            
-            # Candlestick configuration
-            self.candle_width = 0.6
-            self.candle_up_color = CANDLE_UP_COLOR
-            self.candle_down_color = CANDLE_DOWN_COLOR
-            self.candle_alpha = 0.8
-            self.wick_width = 1
-            
-            # Marker configuration
-            self.min_marker = '^'
-            self.min_color = MIN_COLOR
-            self.min_size = 100
-            self.max_marker = 'v'
-            self.max_color = MAX_COLOR
-            self.max_size = 100
-            self.marker_zorder = 100
-            
-            # Line configuration
-            self.line_width = 1
-            self.min_line_style = MIN_LINE_STYLE
-            self.max_line_style = MAX_LINE_STYLE
-            self.sma7_line_style = SMA7_LINE_STYLE
-            self.sma15_line_style = SMA15_LINE_STYLE
-            
-            # Text configuration
-            self.title_size = 14
-            self.title_color = TITLE_COLOR
-            self.axis_label_size = 12
-            self.axis_label_color = AXIS_LABEL_COLOR
-            
-            # Legend configuration
-            self.legend_loc = 'best'
-            self.legend_bg_color = LEGEND_BG_COLOR
-        
-        # Configure logging
        logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
                           format='%(asctime)s - %(levelname)s - %(message)s')
        self.logger = logging.getLogger('TrendDetectorSimple')
        
-        # Convert data to pandas DataFrame if it's not already
        if not isinstance(self.data, pd.DataFrame):
            if isinstance(self.data, list):
-                self.data = pd.DataFrame({'close': self.data})
+                self.data = pd.DataFrame({self.close_column: self.data})
            else:
                raise ValueError("Data must be a pandas DataFrame or a list")
+        
+        # Validate that required columns exist
+        required_columns = ['high', 'low', self.close_column]
+        missing_columns = [col for col in required_columns if col not in self.data.columns]
+        if missing_columns:
+            raise ValueError(f"Missing required columns: {missing_columns}")

    def calculate_tr(self):
+        """Calculate True Range using the configured close column"""
+        df = self.data.copy()
+        high = df['high'].values
+        low = df['low'].values
+        close = df[self.close_column].values
+        tr = np.zeros_like(close)
+        tr[0] = high[0] - low[0]
+        for i in range(1, len(close)):
+            hl_range = high[i] - low[i]
+            hc_range = abs(high[i] - close[i-1])
+            lc_range = abs(low[i] - close[i-1])
+            tr[i] = max(hl_range, hc_range, lc_range)
+        return tr
+
+    def calculate_atr(self, period=14):
+        """Calculate Average True Range"""
+        tr = self.calculate_tr()
+        atr = np.zeros_like(tr)
+        atr[0] = tr[0]
+        multiplier = 2.0 / (period + 1)
+        for i in range(1, len(tr)):
+            atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
+        return atr
+
+    def calculate_supertrend(self, period=10, multiplier=3.0):
        """
-        Calculate True Range (TR) for the price data.
+        Calculate SuperTrend indicator for the price data using the configured close column.
+        SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
        
-        True Range is the greatest of:
-        1. Current high - current low
-        2. |Current high - previous close|
-        3. |Current low - previous close|
+        Parameters:
+        - period: int, the period for the ATR calculation (default: 10)
+        - multiplier: float, the multiplier for the ATR (default: 3.0)
        
        Returns:
-        - Numpy array of TR values
+        - Dictionary containing SuperTrend values, trend direction, and upper/lower bands
        """
        df = self.data.copy()
        high = df['high'].values
        low = df['low'].values
-        close = df['close'].values
-        
-        tr = np.zeros_like(close)
-        tr[0] = high[0] - low[0]  # First TR is just the first day's range
-        
+        close = df[self.close_column].values
+        atr = self.calculate_atr(period)
+        upper_band = np.zeros_like(close)
+        lower_band = np.zeros_like(close)
+        for i in range(len(close)):
+            hl_avg = (high[i] + low[i]) / 2
+            upper_band[i] = hl_avg + (multiplier * atr[i])
+            lower_band[i] = hl_avg - (multiplier * atr[i])
+        final_upper = np.zeros_like(close)
+        final_lower = np.zeros_like(close)
+        supertrend = np.zeros_like(close)
+        trend = np.zeros_like(close)
+        final_upper[0] = upper_band[0]
+        final_lower[0] = lower_band[0]
+        if close[0] <= upper_band[0]:
+            supertrend[0] = upper_band[0]
+            trend[0] = -1
+        else:
+            supertrend[0] = lower_band[0]
+            trend[0] = 1
        for i in range(1, len(close)):
-            # Current high - current low
-            hl_range = high[i] - low[i]
-            # |Current high - previous close|
-            hc_range = abs(high[i] - close[i-1])
-            # |Current low - previous close|
-            lc_range = abs(low[i] - close[i-1])
-            
-            # TR is the maximum of these three values
-            tr[i] = max(hl_range, hc_range, lc_range)
-            
-        return tr
-    
-    def calculate_atr(self, period=14):
-        """
-        Calculate Average True Range (ATR) for the price data.
-        
-        ATR is the exponential moving average of the True Range over a specified period.
-        
-        Parameters:
-        - period: int, the period for the ATR calculation (default: 14)
-        
-        Returns:
-        - Numpy array of ATR values
-        """
-        
-        tr = self.calculate_tr()
-        atr = np.zeros_like(tr)
-        
-        # First ATR value is just the first TR
-        atr[0] = tr[0]
-        
-        # Calculate exponential moving average (EMA) of TR
-        multiplier = 2.0 / (period + 1)
-        
-        for i in range(1, len(tr)):
-            atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
-            
-        return atr
-    
-    def detect_trends(self):
-        """
-        Detect trends by identifying local minima and maxima in the price data
-        using scipy.signal.find_peaks.
-        
-        Parameters:
-        - prominence: float, required prominence of peaks (relative to the price range)
-        - width: int, required width of peaks in data points
-        
-        Returns:
-        - DataFrame with columns for timestamps, prices, and trend indicators
-        - Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
-        """
-        df = self.data
-        # close_prices = df['close'].values
-        
-        # max_peaks, _ = find_peaks(close_prices)
-        # min_peaks, _ = find_peaks(-close_prices)
-        
-        # df['is_min'] = False
-        # df['is_max'] = False
-        
-        # for peak in max_peaks:
-        #     df.at[peak, 'is_max'] = True
-        # for peak in min_peaks:
-        #     df.at[peak, 'is_min'] = True
-        
-        # result = df[['timestamp', 'close', 'is_min', 'is_max']].copy()
-        
-        # Perform linear regression on min_peaks and max_peaks
-        # min_prices = df['close'].iloc[min_peaks].values
-        # max_prices = df['close'].iloc[max_peaks].values
-        
-        # Linear regression for min peaks if we have at least 2 points
-        # min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
-        # Linear regression for max peaks if we have at least 2 points
-        # max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
+            if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
+                final_upper[i] = upper_band[i]
+            else:
+                final_upper[i] = final_upper[i-1]
+            if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
+                final_lower[i] = lower_band[i]
+            else:
+                final_lower[i] = final_lower[i-1]
+            if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
+                supertrend[i] = final_upper[i]
+                trend[i] = -1
+            elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
+                supertrend[i] = final_lower[i]
+                trend[i] = 1
+            elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
+                supertrend[i] = final_lower[i]
+                trend[i] = 1
+            elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
+                supertrend[i] = final_upper[i]
+                trend[i] = -1
+        supertrend_results = {
+            'supertrend': supertrend,
+            'trend': trend,
+            'upper_band': final_upper,
+            'lower_band': final_lower
+        }
+        return supertrend_results

-        # Calculate Simple Moving Averages (SMA) for 7 and 15 periods        
-        # sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
-        # sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
-        
-        analysis_results = {}
-        # analysis_results['linear_regression'] = {
-        #     'min': {
-        #         'slope': min_slope,
-        #         'intercept': min_intercept,
-        #         'r_squared': min_r_value ** 2
-        #     },
-        #     'max': {
-        #         'slope': max_slope,
-        #         'intercept': max_intercept,
-        #         'r_squared': max_r_value ** 2
-        #     }
-        # }
-        # analysis_results['sma'] = {
-        #     '7': sma_7,
-        #     '15': sma_15
-        # }
-        
-        # Calculate SuperTrend indicators
-        supertrend_results_list = self._calculate_supertrend_indicators()
-        analysis_results['supertrend'] = supertrend_results_list
-        
-        return analysis_results
-        
    def calculate_supertrend_indicators(self):
-        """
-        Calculate SuperTrend indicators with different parameter sets in parallel.
-        Returns:
-        - list, the SuperTrend results
-        """
        supertrend_params = [
-            {"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
-            {"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
-            {"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
+            {"period": 12, "multiplier": 3.0},
+            {"period": 10, "multiplier": 1.0},
+            {"period": 11, "multiplier": 2.0}
        ]
-        data = self.data.copy()
-        
-        # For just 3 calculations, direct calculation might be faster than process pool
        results = []
        for p in supertrend_params:
-            result = calculate_supertrend_external(data, p["period"], p["multiplier"])
-            results.append(result)
-        
-        supertrend_results_list = []
-        for params, result in zip(supertrend_params, results):
-            supertrend_results_list.append({
+            result = self.calculate_supertrend(period=p["period"], multiplier=p["multiplier"])
+            results.append({
                "results": result,
-                "params": params
+                "params": p
            })
-        return supertrend_results_list
+        return results
--- a/cycles/backtest.py
+++ b/cycles/backtest.py
@@ -1,167 +1,332 @@
 import pandas as pd
 import numpy as np
+import time

+from cycles.supertrend import Supertrends
 from cycles.market_fees import MarketFees

 class Backtest:
-    def __init__(self, initial_usd, df, min1_df, init_strategy_fields) -> None:
-        self.initial_usd = initial_usd
-        self.usd = initial_usd
-        self.max_balance = initial_usd
-        self.coin = 0
-        self.position = 0
-        self.entry_price = 0
-        self.entry_time = None
-        self.current_trade_min1_start_idx = None
-        self.current_min1_end_idx = None
-        self.price_open = None
-        self.price_close = None
-        self.current_date = None
-        self.strategies = {}
-        self.df = df
-        self.min1_df = min1_df
-
-        self.trade_log = []
-        self.drawdowns = []
-        self.trades = []
-
-        self = init_strategy_fields(self)
-
-    def run(self, entry_strategy, exit_strategy, debug=False):
+    @staticmethod
+    def run(min1_df, df, initial_usd, stop_loss_pct, progress_callback=None, verbose=False):
        """
-        Runs the backtest using provided entry and exit strategy functions.
-
-        The method iterates over the main DataFrame (self.df), simulating trades based on the entry and exit strategies. 
-        It tracks balances, drawdowns, and logs each trade, including fees. At the end, it returns a dictionary of performance statistics.
-
+        Backtest a simple strategy using the meta supertrend (all three supertrends agree).
+        Buys when meta supertrend is positive, sells when negative, applies a percentage stop loss.
+        
        Parameters:
-        - entry_strategy: function, determines when to enter a trade. Should accept (self, i) and return True to enter.
-        - exit_strategy: function, determines when to exit a trade. Should accept (self, i) and return (exit_reason, sell_price) or (None, None) to hold.
-        - debug: bool, whether to print debug info (default: False)
-
-        Returns:
-        - dict with keys: initial_usd, final_usd, n_trades, win_rate, max_drawdown, avg_trade, trade_log, trades, total_fees_usd, and optionally first_trade and last_trade.
+        - min1_df: pandas DataFrame, 1-minute timeframe data for more accurate stop loss checking (optional)
+        - df: pandas DataFrame, main timeframe data for signals
+        - initial_usd: float, starting USD amount
+        - stop_loss_pct: float, stop loss as a fraction (e.g. 0.05 for 5%)
+        - progress_callback: callable, optional callback function to report progress (current_step)
+        - verbose: bool, enable debug logging for stop loss checks
        """
+        _df = df.copy().reset_index()
+        
+        # Ensure we have a timestamp column regardless of original index name
+        if 'timestamp' not in _df.columns:
+            # If reset_index() created a column with the original index name, rename it
+            if len(_df.columns) > 0 and _df.columns[0] not in ['open', 'high', 'low', 'close', 'volume', 'predicted_close_price']:
+                _df = _df.rename(columns={_df.columns[0]: 'timestamp'})
+            else:
+                raise ValueError("Unable to identify timestamp column in DataFrame")
+        
+        _df['timestamp'] = pd.to_datetime(_df['timestamp'])
+        
+        supertrends = Supertrends(_df, verbose=False, close_column='predicted_close_price')

-        for i in range(1, len(self.df)):
-            self.price_open = self.df['open'].iloc[i]
-            self.price_close = self.df['close'].iloc[i]
+        supertrend_results_list = supertrends.calculate_supertrend_indicators()
+        trends = [st['results']['trend'] for st in supertrend_results_list]
+        trends_arr = np.stack(trends, axis=1)
+        meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]), 
+                                trends_arr[:,0], 0)
+        # Shift meta_trend by one to avoid lookahead bias
+        meta_trend_signal = np.roll(meta_trend, 1)
+        meta_trend_signal[0] = 0  # or np.nan, but 0 means 'no signal' for first bar
+        
+        position = 0  # 0 = no position, 1 = long
+        entry_price = 0
+        usd = initial_usd
+        coin = 0
+        trade_log = []
+        max_balance = initial_usd
+        drawdowns = []
+        trades = []
+        entry_time = None
+        stop_loss_count = 0  # Track number of stop losses
+
+        # Ensure min1_df has proper DatetimeIndex
+        if min1_df is not None and not min1_df.empty:
+            min1_df.index = pd.to_datetime(min1_df.index)
+
+        for i in range(1, len(_df)):
+            # Report progress if callback is provided
+            if progress_callback:
+                # Update more frequently for better responsiveness
+                update_frequency = max(1, len(_df) // 50)  # Update every 2% of dataset (50 updates total)
+                if i % update_frequency == 0 or i == len(_df) - 1:  # Always update on last iteration
+                    if verbose:  # Only print in verbose mode to avoid spam
+                        print(f"DEBUG: Progress callback called with i={i}, total={len(_df)-1}")
+                    progress_callback(i)
+
+            price_open = _df['open'].iloc[i]
+            price_close = _df['close'].iloc[i]
+            date = _df['timestamp'].iloc[i]
+            prev_mt = meta_trend_signal[i-1]
+            curr_mt = meta_trend_signal[i]
            
-            self.current_date = self.df['timestamp'].iloc[i]
+            # Check stop loss if in position
+            if position == 1:
+                stop_loss_result = Backtest.check_stop_loss(
+                    min1_df,
+                    entry_time,
+                    date,
+                    entry_price,
+                    stop_loss_pct,
+                    coin,
+                    verbose=verbose
+                )
+                if stop_loss_result is not None:
+                    trade_log_entry, position, coin, entry_price, usd = stop_loss_result
+                    trade_log.append(trade_log_entry)
+                    stop_loss_count += 1
+                    continue

-            # check if we are in buy/sell position
-            if self.position == 0:
-                if entry_strategy(self, i):
-                    self.handle_entry()
-            elif self.position == 1:
-                exit_test_results, sell_price = exit_strategy(self, i)
-                
-                if exit_test_results is not None:
-                    self.handle_exit(exit_test_results, sell_price)
+            # Entry: only if not in position and signal changes to 1
+            if position == 0 and prev_mt != 1 and curr_mt == 1:
+                entry_result = Backtest.handle_entry(usd, price_open, date)
+                coin, entry_price, entry_time, usd, position, trade_log_entry = entry_result
+                trade_log.append(trade_log_entry)
+            
+            # Exit: only if in position and signal changes from 1 to -1
+            elif position == 1 and prev_mt == 1 and curr_mt == -1:
+                exit_result = Backtest.handle_exit(coin, price_open, entry_price, entry_time, date)
+                usd, coin, position, entry_price, trade_log_entry = exit_result
+                trade_log.append(trade_log_entry)

            # Track drawdown
-            balance = self.usd if self.position == 0 else self.coin * self.price_close
+            balance = usd if position == 0 else coin * price_close
+            if balance > max_balance:
+                max_balance = balance
+            drawdown = (max_balance - balance) / max_balance
+            drawdowns.append(drawdown)

-            if balance > self.max_balance:
-                self.max_balance = balance
-
-            drawdown = (self.max_balance - balance) / self.max_balance
-            self.drawdowns.append(drawdown)
+        # Report completion if callback is provided
+        if progress_callback:
+            progress_callback(len(_df) - 1)

        # If still in position at end, sell at last close
-        if self.position == 1:
-            self.handle_exit("EOD", None)
-            
+        if position == 1:
+            exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
+            usd, coin, position, entry_price, trade_log_entry = exit_result
+            trade_log.append(trade_log_entry)

        # Calculate statistics
-        final_balance = self.usd
-        n_trades = len(self.trade_log)
-        wins = [1 for t in self.trade_log if t['exit'] is not None and t['exit'] > t['entry']]
+        final_balance = usd
+        n_trades = len(trade_log)
+        wins = [1 for t in trade_log if t['exit'] is not None and t['exit'] > t['entry']]
        win_rate = len(wins) / n_trades if n_trades > 0 else 0
-        max_drawdown = max(self.drawdowns) if self.drawdowns else 0
-        avg_trade = np.mean([t['exit']/t['entry']-1 for t in self.trade_log if t['exit'] is not None]) if self.trade_log else 0
+        max_drawdown = max(drawdowns) if drawdowns else 0
+        avg_trade = np.mean([t['exit']/t['entry']-1 for t in trade_log if t['exit'] is not None]) if trade_log else 0

        trades = []
        total_fees_usd = 0.0
-
-        for trade in self.trade_log:
+        for trade in trade_log:
            if trade['exit'] is not None:
                profit_pct = (trade['exit'] - trade['entry']) / trade['entry']
            else:
                profit_pct = 0.0
+            
+            # Validate fee_usd field
+            if 'fee_usd' not in trade:
+                raise ValueError(f"Trade missing required field 'fee_usd': {trade}")
+            fee_usd = trade['fee_usd']
+            if fee_usd is None:
+                raise ValueError(f"Trade fee_usd is None: {trade}")
+                
+            # Validate trade type field
+            if 'type' not in trade:
+                raise ValueError(f"Trade missing required field 'type': {trade}")
+            trade_type = trade['type']
+            if trade_type is None:
+                raise ValueError(f"Trade type is None: {trade}")
+                
            trades.append({
                'entry_time': trade['entry_time'],
                'exit_time': trade['exit_time'],
                'entry': trade['entry'],
                'exit': trade['exit'],
                'profit_pct': profit_pct,
-                'type': trade['type'],
-                'fee_usd': trade['fee_usd']
+                'type': trade_type,
+                'fee_usd': fee_usd
            })
-            fee_usd = trade.get('fee_usd')
            total_fees_usd += fee_usd

        results = {
-            "initial_usd": self.initial_usd,
+            "initial_usd": initial_usd,
            "final_usd": final_balance,
            "n_trades": n_trades,
+            "n_stop_loss": stop_loss_count,  # Add stop loss count
            "win_rate": win_rate,
            "max_drawdown": max_drawdown,
            "avg_trade": avg_trade,
-            "trade_log": self.trade_log,
+            "trade_log": trade_log,
            "trades": trades,
            "total_fees_usd": total_fees_usd,
        }
        if n_trades > 0:
            results["first_trade"] = {
-                "entry_time": self.trade_log[0]['entry_time'],
-                "entry": self.trade_log[0]['entry']
+                "entry_time": trade_log[0]['entry_time'],
+                "entry": trade_log[0]['entry']
            }
            results["last_trade"] = {
-                "exit_time": self.trade_log[-1]['exit_time'],
-                "exit": self.trade_log[-1]['exit']
+                "exit_time": trade_log[-1]['exit_time'],
+                "exit": trade_log[-1]['exit']
            }
        return results

-    def handle_entry(self):
-        entry_fee = MarketFees.calculate_okx_taker_maker_fee(self.usd, is_maker=False)
-        usd_after_fee = self.usd - entry_fee
-        
-        self.coin = usd_after_fee / self.price_open
-        self.entry_price = self.price_open
-        self.entry_time = self.current_date
-        self.usd = 0
-        self.position = 1
+    @staticmethod
+    def check_stop_loss(min1_df, entry_time, current_time, entry_price, stop_loss_pct, coin, verbose=False):
+        """
+        Check if stop loss should be triggered based on 1-minute data
        
+        Args:
+            min1_df: 1-minute DataFrame with DatetimeIndex
+            entry_time: Entry timestamp
+            current_time: Current timestamp  
+            entry_price: Entry price
+            stop_loss_pct: Stop loss percentage (e.g. 0.05 for 5%)
+            coin: Current coin position
+            verbose: Enable debug logging
+            
+        Returns:
+            Tuple of (trade_log_entry, position, coin, entry_price, usd) if stop loss triggered, None otherwise
+        """
+        if min1_df is None or min1_df.empty:
+            if verbose:
+                print("Warning: No 1-minute data available for stop loss checking")
+            return None
+            
+        stop_price = entry_price * (1 - stop_loss_pct)
+
+        try:
+            # Ensure min1_df has a DatetimeIndex
+            if not isinstance(min1_df.index, pd.DatetimeIndex):
+                if verbose:
+                    print("Warning: min1_df does not have DatetimeIndex")
+                return None
+                
+            # Convert entry_time and current_time to pandas Timestamps for comparison
+            entry_ts = pd.to_datetime(entry_time)
+            current_ts = pd.to_datetime(current_time)
+            
+            if verbose:
+                print(f"Checking stop loss from {entry_ts} to {current_ts}, stop_price: {stop_price:.2f}")
+            
+            # Handle edge case where entry and current time are the same (1-minute timeframe)
+            if entry_ts == current_ts:
+                if verbose:
+                    print("Entry and current time are the same, no range to check")
+                return None
+            
+            # Find the range of 1-minute data to check (exclusive of entry time, inclusive of current time)
+            # We start from the candle AFTER entry to avoid checking the entry candle itself
+            start_check_time = entry_ts + pd.Timedelta(minutes=1)
+            
+            # Get the slice of data to check for stop loss
+            mask = (min1_df.index > entry_ts) & (min1_df.index <= current_ts)
+            min1_slice = min1_df.loc[mask]
+            
+            if len(min1_slice) == 0:
+                if verbose:
+                    print(f"No 1-minute data found between {start_check_time} and {current_ts}")
+                return None
+                
+            if verbose:
+                print(f"Checking {len(min1_slice)} candles for stop loss")
+                
+            # Check if any low price in the slice hits the stop loss
+            stop_triggered = (min1_slice['low'] <= stop_price).any()
+            
+            if stop_triggered:
+                # Find the exact candle where stop loss was triggered
+                stop_candle = min1_slice[min1_slice['low'] <= stop_price].iloc[0]
+                
+                if verbose:
+                    print(f"Stop loss triggered at {stop_candle.name}, low: {stop_candle['low']:.2f}")
+                
+                # More realistic fill: if open < stop, fill at open, else at stop
+                if stop_candle['open'] < stop_price:
+                    sell_price = stop_candle['open']
+                    if verbose:
+                        print(f"Filled at open price: {sell_price:.2f}")
+                else:
+                    sell_price = stop_price
+                    if verbose:
+                        print(f"Filled at stop price: {sell_price:.2f}")
+
+                btc_to_sell = coin
+                usd_gross = btc_to_sell * sell_price
+                exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
+                usd_after_stop = usd_gross - exit_fee
+                
+                trade_log_entry = {
+                    'type': 'STOP',
+                    'entry': entry_price,
+                    'exit': sell_price,
+                    'entry_time': entry_time,
+                    'exit_time': stop_candle.name,
+                    'fee_usd': exit_fee
+                }
+                # After stop loss, reset position and entry, return USD balance
+                return trade_log_entry, 0, 0, 0, usd_after_stop
+            elif verbose:
+                print(f"No stop loss triggered, min low in range: {min1_slice['low'].min():.2f}")
+                
+        except Exception as e:
+            # In case of any error, don't trigger stop loss but log the issue
+            error_msg = f"Warning: Stop loss check failed: {e}"
+            print(error_msg)
+            if verbose:
+                import traceback
+                print(traceback.format_exc())
+            return None
+            
+        return None
+
+    @staticmethod
+    def handle_entry(usd, price_open, date):
+        entry_fee = MarketFees.calculate_okx_taker_maker_fee(usd, is_maker=False)
+        usd_after_fee = usd - entry_fee
+        coin = usd_after_fee / price_open
+        entry_price = price_open
+        entry_time = date
+        usd = 0
+        position = 1
        trade_log_entry = {
            'type': 'BUY',
-            'entry': self.entry_price,
+            'entry': entry_price,
            'exit': None,
-            'entry_time': self.entry_time,
+            'entry_time': entry_time,
            'exit_time': None,
            'fee_usd': entry_fee
        }
-        self.trade_log.append(trade_log_entry)
+        return coin, entry_price, entry_time, usd, position, trade_log_entry

-    def handle_exit(self, exit_reason, sell_price):
-        btc_to_sell = self.coin
-        exit_price = sell_price if sell_price is not None else self.price_open
-        usd_gross = btc_to_sell * exit_price
+    @staticmethod
+    def handle_exit(coin, price_open, entry_price, entry_time, date):
+        btc_to_sell = coin
+        usd_gross = btc_to_sell * price_open
        exit_fee = MarketFees.calculate_okx_taker_maker_fee(usd_gross, is_maker=False)
-
-        self.usd = usd_gross - exit_fee
-
-        exit_log_entry = {
-            'type': exit_reason,
-            'entry': self.entry_price,
-            'exit': exit_price,
-            'entry_time': self.entry_time,
-            'exit_time': self.current_date,
+        usd = usd_gross - exit_fee
+        trade_log_entry = {
+            'type': 'SELL',
+            'entry': entry_price,
+            'exit': price_open,
+            'entry_time': entry_time,
+            'exit_time': date,
            'fee_usd': exit_fee
        }
-        self.coin = 0
-        self.position = 0
-        self.entry_price = 0
-
-        self.trade_log.append(exit_log_entry)
-        
+        coin = 0
+        position = 0
+        entry_price = 0
+        return usd, coin, position, entry_price, trade_log_entry
--- a/cycles/utils/data_loader.py
+++ b/cycles/utils/data_loader.py
@@ -0,0 +1,152 @@
+import os
+import json
+import pandas as pd
+from typing import Union, Optional
+import logging
+
+from .storage_utils import (
+    _parse_timestamp_column, 
+    _filter_by_date_range, 
+    _normalize_column_names,
+    TimestampParsingError,
+    DataLoadingError
+)
+
+
+class DataLoader:
+    """Handles loading and preprocessing of data from various file formats"""
+    
+    def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize data loader
+        
+        Args:
+            data_dir: Directory containing data files
+            logging_instance: Optional logging instance
+        """
+        self.data_dir = data_dir
+        self.logging = logging_instance
+
+    def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp], 
+                  stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
+        """Load data with optimized dtypes and filtering, supporting CSV and JSON input
+        
+        Args:
+            file_path: path to the data file
+            start_date: start date (string or datetime-like)
+            stop_date: stop date (string or datetime-like)
+            
+        Returns:
+            pandas DataFrame with timestamp index
+            
+        Raises:
+            DataLoadingError: If data loading fails
+        """
+        try:
+            # Convert string dates to pandas datetime objects for proper comparison
+            start_date = pd.to_datetime(start_date)
+            stop_date = pd.to_datetime(stop_date)
+            
+            # Determine file type
+            _, ext = os.path.splitext(file_path)
+            ext = ext.lower()
+            
+            if ext == ".json":
+                return self._load_json_data(file_path, start_date, stop_date)
+            else:
+                return self._load_csv_data(file_path, start_date, stop_date)
+                
+        except Exception as e:
+            error_msg = f"Error loading data from {file_path}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            # Return an empty DataFrame with a DatetimeIndex
+            return pd.DataFrame(index=pd.to_datetime([]))
+
+    def _load_json_data(self, file_path: str, start_date: pd.Timestamp, 
+                       stop_date: pd.Timestamp) -> pd.DataFrame:
+        """Load and process JSON data file
+        
+        Args:
+            file_path: Path to JSON file
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        with open(os.path.join(self.data_dir, file_path), 'r') as f:
+            raw = json.load(f)
+        
+        data = pd.DataFrame(raw["Data"])
+        data = _normalize_column_names(data)
+        
+        # Convert timestamp to datetime
+        data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
+        
+        # Filter by date range
+        data = _filter_by_date_range(data, "timestamp", start_date, stop_date)
+        
+        if self.logging is not None:
+            self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
+        
+        return data.set_index("timestamp")
+
+    def _load_csv_data(self, file_path: str, start_date: pd.Timestamp, 
+                      stop_date: pd.Timestamp) -> pd.DataFrame:
+        """Load and process CSV data file
+        
+        Args:
+            file_path: Path to CSV file
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        # Define optimized dtypes
+        dtypes = {
+            'Open': 'float32',
+            'High': 'float32', 
+            'Low': 'float32',
+            'Close': 'float32',
+            'Volume': 'float32'
+        }
+        
+        # Read data with original capitalized column names
+        data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
+        
+        return self._process_csv_timestamps(data, start_date, stop_date, file_path)
+
+    def _process_csv_timestamps(self, data: pd.DataFrame, start_date: pd.Timestamp, 
+                               stop_date: pd.Timestamp, file_path: str) -> pd.DataFrame:
+        """Process timestamps in CSV data and filter by date range
+        
+        Args:
+            data: DataFrame with CSV data
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            file_path: Original file path for logging
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        if 'Timestamp' in data.columns:
+            data = _parse_timestamp_column(data, 'Timestamp')
+            data = _filter_by_date_range(data, 'Timestamp', start_date, stop_date)
+            data = _normalize_column_names(data)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
+            
+            return data.set_index('timestamp')
+        else:
+            # Attempt to use the first column if 'Timestamp' is not present
+            data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
+            data = _parse_timestamp_column(data, 'timestamp')
+            data = _filter_by_date_range(data, 'timestamp', start_date, stop_date)
+            data = _normalize_column_names(data)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
+            
+            return data.set_index('timestamp') 
--- a/cycles/utils/data_saver.py
+++ b/cycles/utils/data_saver.py
@@ -0,0 +1,106 @@
+import os
+import pandas as pd
+from typing import Optional
+import logging
+
+from .storage_utils import DataSavingError
+
+
+class DataSaver:
+    """Handles saving data to various file formats"""
+    
+    def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize data saver
+        
+        Args:
+            data_dir: Directory for saving data files
+            logging_instance: Optional logging instance
+        """
+        self.data_dir = data_dir
+        self.logging = logging_instance
+
+    def save_data(self, data: pd.DataFrame, file_path: str) -> None:
+        """Save processed data to a CSV file.
+        If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
+        (seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
+
+        Args:
+            data: DataFrame to save
+            file_path: path to the data file relative to the data_dir
+            
+        Raises:
+            DataSavingError: If saving fails
+        """
+        try:
+            data_to_save = data.copy()
+            data_to_save = self._prepare_data_for_saving(data_to_save)
+            
+            # Save to CSV, ensuring the 'timestamp' column (if created) is written
+            full_path = os.path.join(self.data_dir, file_path)
+            data_to_save.to_csv(full_path, index=False)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
+                
+        except Exception as e:
+            error_msg = f"Failed to save data to {file_path}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare DataFrame for saving by handling different index types
+        
+        Args:
+            data: DataFrame to prepare
+            
+        Returns:
+            DataFrame ready for saving
+        """
+        if isinstance(data.index, pd.DatetimeIndex):
+            return self._convert_datetime_index_to_timestamp(data)
+        elif pd.api.types.is_numeric_dtype(data.index.dtype):
+            return self._convert_numeric_index_to_timestamp(data)
+        else:
+            # For other index types, save with the current index
+            return data
+
+    def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Convert DatetimeIndex to Unix timestamp column
+        
+        Args:
+            data: DataFrame with DatetimeIndex
+            
+        Returns:
+            DataFrame with timestamp column
+        """
+        # Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
+        data['timestamp'] = data.index.astype('int64') / 1e9
+        data.reset_index(drop=True, inplace=True)
+        
+        # Ensure 'timestamp' is the first column if other columns exist
+        if 'timestamp' in data.columns and len(data.columns) > 1:
+            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
+            data = data[cols]
+            
+        return data
+
+    def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Convert numeric index to timestamp column
+        
+        Args:
+            data: DataFrame with numeric index
+            
+        Returns:
+            DataFrame with timestamp column
+        """
+        # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
+        data['timestamp'] = data.index
+        data.reset_index(drop=True, inplace=True)
+        
+        # Ensure 'timestamp' is the first column if other columns exist
+        if 'timestamp' in data.columns and len(data.columns) > 1:
+            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
+            data = data[cols]
+            
+        return data 
--- a/cycles/utils/gsheets.py
+++ b/cycles/utils/gsheets.py
@@ -1,128 +0,0 @@
-import threading
-import time
-import queue
-from google.oauth2.service_account import Credentials
-import gspread
-import math
-import numpy as np
-from collections import defaultdict
-
-
-class GSheetBatchPusher(threading.Thread):
-
-    def __init__(self, queue, timestamp, spreadsheet_name, interval=60, logging=None):
-        super().__init__(daemon=True)
-        self.queue = queue
-        self.timestamp = timestamp
-        self.spreadsheet_name = spreadsheet_name
-        self.interval = interval
-        self._stop_event = threading.Event()
-        self.logging = logging
-
-    def run(self):
-        while not self._stop_event.is_set():
-            self.push_all()
-            time.sleep(self.interval)
-        # Final push on stop
-        self.push_all()
-
-    def stop(self):
-        self._stop_event.set()
-
-    def push_all(self):
-        batch_results = []
-        batch_trades = []
-        while True:
-            try:
-                results, trades = self.queue.get_nowait()
-                batch_results.extend(results)
-                batch_trades.extend(trades)
-            except queue.Empty:
-                break
-            
-        if batch_results or batch_trades:
-            self.write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)
-
-
-    def write_results_per_combination_gsheet(self, results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
-        scopes = [
-            "https://www.googleapis.com/auth/spreadsheets",
-            "https://www.googleapis.com/auth/drive"
-        ]
-        creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
-        gc = gspread.authorize(creds)
-        sh = gc.open(spreadsheet_name)
-        
-        try:
-            worksheet = sh.worksheet("Results")
-        except gspread.exceptions.WorksheetNotFound:
-            worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")
-
-        # Clear the worksheet before writing new results
-        worksheet.clear()
-
-        # Updated fieldnames to match your data rows
-        fieldnames = [
-            "timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
-            "max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
-        ]
-
-        def to_native(val):
-            if isinstance(val, (np.generic, np.ndarray)):
-                val = val.item()
-            if hasattr(val, 'isoformat'):
-                return val.isoformat()
-            # Handle inf, -inf, nan
-            if isinstance(val, float):
-                if math.isinf(val):
-                    return "∞" if val > 0 else "-∞"
-                if math.isnan(val):
-                    return ""
-            return val
-
-        # Write header if sheet is empty
-        if len(worksheet.get_all_values()) == 0:
-            worksheet.append_row(fieldnames)
-
-        for row in results_rows:
-            values = [to_native(row.get(field, "")) for field in fieldnames]
-            worksheet.append_row(values)
-        
-        trades_fieldnames = [
-            "entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
-        ]
-        trades_by_combo = defaultdict(list)
-
-        for trade in trade_rows:
-            tf = trade.get("timeframe")
-            sl = trade.get("stop_loss_pct")
-            trades_by_combo[(tf, sl)].append(trade)
-        
-        for (tf, sl), trades in trades_by_combo.items():
-            sl_percent = int(round(sl * 100))
-            sheet_name = f"Trades_{tf}_ST{sl_percent}%"
-
-            try:
-                trades_ws = sh.worksheet(sheet_name)
-            except gspread.exceptions.WorksheetNotFound:
-                trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
-            
-            # Clear the trades worksheet before writing new trades
-            trades_ws.clear()
-
-            if len(trades_ws.get_all_values()) == 0:
-                trades_ws.append_row(trades_fieldnames)
-            
-            for trade in trades:
-                trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
-                try:
-                    trades_ws.append_row(trade_row)
-                except gspread.exceptions.APIError as e:
-                    if '429' in str(e):
-                        if self.logging is not None:
-                            self.logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
-                        # Re-queue the failed batch for retry
-                        self.queue.put((results_rows, trade_rows))
-                        return  # Stop pushing for this batch, will retry next interval
-                    else:
-                        raise
--- a/cycles/utils/progress_manager.py
+++ b/cycles/utils/progress_manager.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Progress Manager for tracking multiple parallel backtest tasks
+"""
+
+import threading
+import time
+import sys
+from typing import Dict, Optional, Callable
+from dataclasses import dataclass
+
+
+@dataclass
+class TaskProgress:
+    """Represents progress information for a single task"""
+    task_id: str
+    name: str
+    current: int
+    total: int
+    start_time: float
+    last_update: float
+    
+    @property
+    def percentage(self) -> float:
+        """Calculate completion percentage"""
+        if self.total == 0:
+            return 0.0
+        return (self.current / self.total) * 100
+    
+    @property
+    def elapsed_time(self) -> float:
+        """Calculate elapsed time in seconds"""
+        return time.time() - self.start_time
+    
+    @property
+    def eta(self) -> Optional[float]:
+        """Estimate time to completion in seconds"""
+        if self.current == 0 or self.percentage >= 100:
+            return None
+        
+        elapsed = self.elapsed_time
+        rate = self.current / elapsed
+        remaining = self.total - self.current
+        return remaining / rate if rate > 0 else None
+
+
+class ProgressManager:
+    """Manages progress tracking for multiple parallel tasks"""
+    
+    def __init__(self, update_interval: float = 1.0, display_width: int = 50):
+        """
+        Initialize progress manager
+        
+        Args:
+            update_interval: How often to update display (seconds)
+            display_width: Width of progress bar in characters
+        """
+        self.tasks: Dict[str, TaskProgress] = {}
+        self.update_interval = update_interval
+        self.display_width = display_width
+        self.lock = threading.Lock()
+        self.display_thread: Optional[threading.Thread] = None
+        self.running = False
+        self.last_display_height = 0
+        
+    def start_task(self, task_id: str, name: str, total: int) -> None:
+        """
+        Start tracking a new task
+        
+        Args:
+            task_id: Unique identifier for the task
+            name: Human-readable name for the task
+            total: Total number of steps in the task
+        """
+        with self.lock:
+            self.tasks[task_id] = TaskProgress(
+                task_id=task_id,
+                name=name,
+                current=0,
+                total=total,
+                start_time=time.time(),
+                last_update=time.time()
+            )
+    
+    def update_progress(self, task_id: str, current: int) -> None:
+        """
+        Update progress for a specific task
+        
+        Args:
+            task_id: Task identifier
+            current: Current progress value
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                self.tasks[task_id].current = current
+                self.tasks[task_id].last_update = time.time()
+    
+    def complete_task(self, task_id: str) -> None:
+        """
+        Mark a task as completed
+        
+        Args:
+            task_id: Task identifier
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                task = self.tasks[task_id]
+                task.current = task.total
+                task.last_update = time.time()
+    
+    def start_display(self) -> None:
+        """Start the progress display thread"""
+        if not self.running:
+            self.running = True
+            self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
+            self.display_thread.start()
+    
+    def stop_display(self) -> None:
+        """Stop the progress display thread"""
+        self.running = False
+        if self.display_thread:
+            self.display_thread.join(timeout=1.0)
+        self._clear_display()
+    
+    def _display_loop(self) -> None:
+        """Main loop for updating the progress display"""
+        while self.running:
+            self._update_display()
+            time.sleep(self.update_interval)
+    
+    def _update_display(self) -> None:
+        """Update the console display with current progress"""
+        with self.lock:
+            if not self.tasks:
+                return
+            
+            # Clear previous display
+            self._clear_display()
+            
+            # Build display lines
+            lines = []
+            for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
+                line = self._format_progress_line(task)
+                lines.append(line)
+            
+            # Print all lines
+            for line in lines:
+                print(line, flush=True)
+            
+            self.last_display_height = len(lines)
+    
+    def _clear_display(self) -> None:
+        """Clear the previous progress display"""
+        if self.last_display_height > 0:
+            # Move cursor up and clear lines
+            for _ in range(self.last_display_height):
+                sys.stdout.write('\033[F')  # Move cursor up one line
+                sys.stdout.write('\033[K')  # Clear line
+            sys.stdout.flush()
+    
+    def _format_progress_line(self, task: TaskProgress) -> str:
+        """
+        Format a single progress line for display
+        
+        Args:
+            task: TaskProgress instance
+            
+        Returns:
+            Formatted progress string
+        """
+        # Progress bar
+        filled_width = int(task.percentage / 100 * self.display_width)
+        bar = '█' * filled_width + '░' * (self.display_width - filled_width)
+        
+        # Time information
+        elapsed_str = self._format_time(task.elapsed_time)
+        eta_str = self._format_time(task.eta) if task.eta else "N/A"
+        
+        # Format line
+        line = (f"{task.name:<25} │{bar}│ "
+                f"{task.percentage:5.1f}% "
+                f"({task.current:,}/{task.total:,}) "
+                f"⏱ {elapsed_str} ETA: {eta_str}")
+        
+        return line
+    
+    def _format_time(self, seconds: float) -> str:
+        """
+        Format time duration for display
+        
+        Args:
+            seconds: Time in seconds
+            
+        Returns:
+            Formatted time string
+        """
+        if seconds < 60:
+            return f"{seconds:.0f}s"
+        elif seconds < 3600:
+            minutes = seconds / 60
+            return f"{minutes:.1f}m"
+        else:
+            hours = seconds / 3600
+            return f"{hours:.1f}h"
+    
+    def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
+        """
+        Get a progress callback function for a specific task
+        
+        Args:
+            task_id: Task identifier
+            
+        Returns:
+            Callback function that updates progress for this task
+        """
+        def callback(current: int) -> None:
+            self.update_progress(task_id, current)
+        
+        return callback
+    
+    def all_tasks_completed(self) -> bool:
+        """Check if all tasks are completed"""
+        with self.lock:
+            return all(task.current >= task.total for task in self.tasks.values())
+    
+    def get_summary(self) -> str:
+        """Get a summary of all tasks"""
+        with self.lock:
+            total_tasks = len(self.tasks)
+            completed_tasks = sum(1 for task in self.tasks.values() 
+                                if task.current >= task.total)
+            
+            return f"Tasks: {completed_tasks}/{total_tasks} completed" 
--- a/cycles/utils/result_formatter.py
+++ b/cycles/utils/result_formatter.py
@@ -0,0 +1,179 @@
+import os
+import csv
+from typing import Dict, List, Optional, Any
+from collections import defaultdict
+import logging
+
+from .storage_utils import DataSavingError
+
+
+class ResultFormatter:
+    """Handles formatting and writing of backtest results to CSV files"""
+    
+    def __init__(self, results_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize result formatter
+        
+        Args:
+            results_dir: Directory for saving result files
+            logging_instance: Optional logging instance
+        """
+        self.results_dir = results_dir
+        self.logging = logging_instance
+
+    def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
+        """Format a row for a combined results CSV file
+        
+        Args:
+            row: Dictionary containing row data
+            
+        Returns:
+            Dictionary with formatted values
+        """
+        return {
+            "timeframe": row["timeframe"],
+            "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
+            "n_trades": row["n_trades"],
+            "n_stop_loss": row["n_stop_loss"],
+            "win_rate": f"{row['win_rate']*100:.2f}%",
+            "max_drawdown": f"{row['max_drawdown']*100:.2f}%",
+            "avg_trade": f"{row['avg_trade']*100:.2f}%",
+            "profit_ratio": f"{row['profit_ratio']*100:.2f}%",
+            "final_usd": f"{row['final_usd']:.2f}",
+            "total_fees_usd": f"{row['total_fees_usd']:.2f}",
+        }
+
+    def write_results_chunk(self, filename: str, fieldnames: List[str], 
+                           rows: List[Dict], write_header: bool = False, 
+                           initial_usd: Optional[float] = None) -> None:
+        """Write a chunk of results to a CSV file
+        
+        Args:
+            filename: filename to write to
+            fieldnames: list of fieldnames
+            rows: list of rows
+            write_header: whether to write the header
+            initial_usd: initial USD value for header comment
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            mode = 'w' if write_header else 'a'
+            
+            with open(filename, mode, newline="") as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                if write_header:
+                    if initial_usd is not None:
+                        csvfile.write(f"# initial_usd: {initial_usd}\n")
+                    writer.writeheader()
+                
+                for row in rows:
+                    # Only keep keys that are in fieldnames
+                    filtered_row = {k: v for k, v in row.items() if k in fieldnames}
+                    writer.writerow(filtered_row)
+                    
+        except Exception as e:
+            error_msg = f"Failed to write results chunk to {filename}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def write_backtest_results(self, filename: str, fieldnames: List[str], 
+                              rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
+        """Write combined backtest results to a CSV file
+        
+        Args:
+            filename: filename to write to
+            fieldnames: list of fieldnames
+            rows: list of result dictionaries
+            metadata_lines: optional list of strings to write as header comments
+            
+        Returns:
+            Full path to the written file
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            fname = os.path.join(self.results_dir, filename)
+            with open(fname, "w", newline="") as csvfile:
+                if metadata_lines:
+                    for line in metadata_lines:
+                        csvfile.write(f"{line}\n")
+                        
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
+                writer.writeheader()
+                
+                for row in rows:
+                    writer.writerow(self.format_row(row))
+                    
+            if self.logging is not None:
+                self.logging.info(f"Combined results written to {fname}")
+                
+            return fname
+            
+        except Exception as e:
+            error_msg = f"Failed to write backtest results to {filename}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades to separate CSV files grouped by timeframe and stop loss
+        
+        Args:
+            all_trade_rows: list of trade dictionaries
+            trades_fieldnames: list of trade fieldnames
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            trades_by_combo = self._group_trades_by_combination(all_trade_rows)
+            
+            for (tf, sl), trades in trades_by_combo.items():
+                self._write_single_trade_file(tf, sl, trades, trades_fieldnames)
+                
+        except Exception as e:
+            error_msg = f"Failed to write trades: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def _group_trades_by_combination(self, all_trade_rows: List[Dict]) -> Dict:
+        """Group trades by timeframe and stop loss combination
+        
+        Args:
+            all_trade_rows: List of trade dictionaries
+            
+        Returns:
+            Dictionary grouped by (timeframe, stop_loss_pct) tuples
+        """
+        trades_by_combo = defaultdict(list)
+        for trade in all_trade_rows:
+            tf = trade.get("timeframe")
+            sl = trade.get("stop_loss_pct")
+            trades_by_combo[(tf, sl)].append(trade)
+        return trades_by_combo
+
+    def _write_single_trade_file(self, timeframe: str, stop_loss_pct: float, 
+                                trades: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades for a single timeframe/stop-loss combination
+        
+        Args:
+            timeframe: Timeframe identifier
+            stop_loss_pct: Stop loss percentage
+            trades: List of trades for this combination
+            trades_fieldnames: List of field names for trades
+        """
+        sl_percent = int(round(stop_loss_pct * 100))
+        trades_filename = os.path.join(self.results_dir, f"trades_{timeframe}_ST{sl_percent}pct.csv")
+        
+        with open(trades_filename, "w", newline="") as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
+            writer.writeheader()
+            for trade in trades:
+                writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
+                
+        if self.logging is not None:
+            self.logging.info(f"Trades written to {trades_filename}") 
--- a/cycles/utils/storage.py
+++ b/cycles/utils/storage.py
@@ -1,17 +1,32 @@
 import os
-import json
 import pandas as pd
-import csv
-from collections import defaultdict
+from typing import Optional, Union, Dict, Any, List
+import logging
+
+from .data_loader import DataLoader
+from .data_saver import DataSaver
+from .result_formatter import ResultFormatter
+from .storage_utils import DataLoadingError, DataSavingError
+
+RESULTS_DIR = "../results"
+DATA_DIR = "../data"

-RESULTS_DIR = "results"
-DATA_DIR = "data"

 class Storage:
-
-    """Storage class for storing and loading results and data"""
+    """Unified storage interface for data and results operations
+    
+    Acts as a coordinator for DataLoader, DataSaver, and ResultFormatter components,
+    maintaining backward compatibility while providing a clean separation of concerns.
+    """
+    
    def __init__(self, logging=None, results_dir=RESULTS_DIR, data_dir=DATA_DIR):
-
+        """Initialize storage with component instances
+        
+        Args:
+            logging: Optional logging instance
+            results_dir: Directory for results files
+            data_dir: Directory for data files
+        """
        self.results_dir = results_dir
        self.data_dir = data_dir
        self.logging = logging
@@ -20,196 +35,89 @@ class Storage:
        os.makedirs(self.results_dir, exist_ok=True)
        os.makedirs(self.data_dir, exist_ok=True)

-    def load_data(self, file_path, start_date, stop_date):
+        # Initialize component instances
+        self.data_loader = DataLoader(data_dir, logging)
+        self.data_saver = DataSaver(data_dir, logging)
+        self.result_formatter = ResultFormatter(results_dir, logging)
+
+    def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp], 
+                  stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
        """Load data with optimized dtypes and filtering, supporting CSV and JSON input
+        
        Args:
            file_path: path to the data file
-            start_date: start date
-            stop_date: stop date
+            start_date: start date (string or datetime-like)
+            stop_date: stop date (string or datetime-like)
+            
        Returns:
-            pandas DataFrame
+            pandas DataFrame with timestamp index
+            
+        Raises:
+            DataLoadingError: If data loading fails
        """
-        # Determine file type
-        _, ext = os.path.splitext(file_path)
-        ext = ext.lower()
-        try:
-            if ext == ".json":
-                with open(os.path.join(self.data_dir, file_path), 'r') as f:
-                    raw = json.load(f)
-                data = pd.DataFrame(raw["Data"])
-                # Convert columns to lowercase
-                data.columns = data.columns.str.lower()
-                # Convert timestamp to datetime
-                data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
-                # Filter by date range
-                data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= stop_date)]
-                if self.logging is not None:
-                    self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
-                return data.set_index("timestamp")
-            else:
-                # Define optimized dtypes
-                dtypes = {
-                    'Open': 'float32',
-                    'High': 'float32', 
-                    'Low': 'float32',
-                    'Close': 'float32',
-                    'Volume': 'float32'
-                }
-                # Read data with original capitalized column names
-                data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
-
-
-                # Convert timestamp to datetime
-                if 'Timestamp' in data.columns:
-                    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
-                    # Filter by date range
-                    data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
-                    # Now convert column names to lowercase
-                    data.columns = data.columns.str.lower()
-                    if self.logging is not None:
-                        self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
-                    return data.set_index('timestamp')
-                else: # Attempt to use the first column if 'Timestamp' is not present
-                    data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
-                    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
-                    data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)]
-                    data.columns = data.columns.str.lower() # Ensure all other columns are lower
-                    if self.logging is not None:
-                        self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
-                    return data.set_index('timestamp')
-        except Exception as e:
-            if self.logging is not None:
-                self.logging.error(f"Error loading data from {file_path}: {e}")
-            # Return an empty DataFrame with a DatetimeIndex
-            return pd.DataFrame(index=pd.to_datetime([]))
-
-    def save_data(self, data: pd.DataFrame, file_path: str):
-        """Save processed data to a CSV file.
-        If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
-        (seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
+        return self.data_loader.load_data(file_path, start_date, stop_date)

+    def save_data(self, data: pd.DataFrame, file_path: str) -> None:
+        """Save processed data to a CSV file
+        
        Args:
-            data (pd.DataFrame): data to save.
-            file_path (str): path to the data file relative to the data_dir.
+            data: DataFrame to save
+            file_path: path to the data file relative to the data_dir
+            
+        Raises:
+            DataSavingError: If saving fails
        """
-        data_to_save = data.copy()
+        self.data_saver.save_data(data, file_path)

-        if isinstance(data_to_save.index, pd.DatetimeIndex):
-            # Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
-            # and make it a column named 'timestamp'.
-            data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9
-            # Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column.
-            # We want the 'timestamp' column to be the first one.
-            data_to_save.reset_index(drop=True, inplace=True)
-            # Ensure 'timestamp' is the first column if other columns exist
-            if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
-                cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
-                data_to_save = data_to_save[cols]
-        elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype):
-            # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle),
-            # make it a column named 'timestamp'.
-            data_to_save['timestamp'] = data_to_save.index
-            data_to_save.reset_index(drop=True, inplace=True)
-            if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
-                cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
-                data_to_save = data_to_save[cols]
-        else:
-            # For other index types, or if no index that we want to specifically handle,
-            # save with the current index. pandas to_csv will handle it.
-            # This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save.
-            pass # data_to_save remains as is, to_csv will write its index if index=True
-
-        # Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index.
-        full_path = os.path.join(self.data_dir, file_path)
-        data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column
-        if self.logging is not None:
-            self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
-    
-    
-    def format_row(self, row):
+    def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
        """Format a row for a combined results CSV file
+        
        Args:
-            row: row to format
+            row: Dictionary containing row data
+            
        Returns:
-            formatted row
+            Dictionary with formatted values
        """
+        return self.result_formatter.format_row(row)

-        return {
-            "timeframe": row["timeframe"],
-            "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
-            "n_trades": row["n_trades"],
-            "n_stop_loss": row["n_stop_loss"],
-            "win_rate": f"{row['win_rate']*100:.2f}%",
-            "max_drawdown": f"{row['max_drawdown']*100:.2f}%",
-            "avg_trade": f"{row['avg_trade']*100:.2f}%",
-            "profit_ratio": f"{row['profit_ratio']*100:.2f}%",
-            "final_usd": f"{row['final_usd']:.2f}",
-            "total_fees_usd": f"{row['total_fees_usd']:.2f}",
-        }
-    
-    def write_results_chunk(self, filename, fieldnames, rows, write_header=False, initial_usd=None):
+    def write_results_chunk(self, filename: str, fieldnames: List[str], 
+                           rows: List[Dict], write_header: bool = False, 
+                           initial_usd: Optional[float] = None) -> None:
        """Write a chunk of results to a CSV file
+        
        Args:
            filename: filename to write to
            fieldnames: list of fieldnames
            rows: list of rows
            write_header: whether to write the header
-            initial_usd: initial USD
+            initial_usd: initial USD value for header comment
        """
-        mode = 'w' if write_header else 'a'
+        self.result_formatter.write_results_chunk(
+            filename, fieldnames, rows, write_header, initial_usd
+        )
+
+    def write_backtest_results(self, filename: str, fieldnames: List[str], 
+                              rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
+        """Write combined backtest results to a CSV file
        
-        with open(filename, mode, newline="") as csvfile:
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            if write_header:
-                csvfile.write(f"# initial_usd: {initial_usd}\n")
-                writer.writeheader()
-            
-            for row in rows:
-                # Only keep keys that are in fieldnames
-                filtered_row = {k: v for k, v in row.items() if k in fieldnames}
-                writer.writerow(filtered_row)
-    
-    def write_backtest_results(self, filename, fieldnames, rows, metadata_lines=None):
-        """Write a combined results to a CSV file
        Args:
            filename: filename to write to
            fieldnames: list of fieldnames
-            rows: list of rows
+            rows: list of result dictionaries
            metadata_lines: optional list of strings to write as header comments
+            
+        Returns:
+            Full path to the written file
        """
-        fname = os.path.join(self.results_dir, filename)
-        with open(fname, "w", newline="") as csvfile:
-            if metadata_lines:
-                for line in metadata_lines:
-                    csvfile.write(f"{line}\n")
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
-            writer.writeheader()
-            for row in rows:
-                writer.writerow(self.format_row(row))
-        if self.logging is not None:
-            self.logging.info(f"Combined results written to {fname}")
-    
-    def write_trades(self, all_trade_rows, trades_fieldnames):
-        """Write trades to a CSV file
+        return self.result_formatter.write_backtest_results(
+            filename, fieldnames, rows, metadata_lines
+        )
+
+    def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades to separate CSV files grouped by timeframe and stop loss
+        
        Args:
-            all_trade_rows: list of trade rows
+            all_trade_rows: list of trade dictionaries
            trades_fieldnames: list of trade fieldnames
-            logging: logging object
        """
-
-        trades_by_combo = defaultdict(list)
-        for trade in all_trade_rows:
-            tf = trade.get("timeframe")
-            sl = trade.get("stop_loss_pct")
-            trades_by_combo[(tf, sl)].append(trade)
-
-        for (tf, sl), trades in trades_by_combo.items():
-            sl_percent = int(round(sl * 100))
-            trades_filename = os.path.join(self.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
-            with open(trades_filename, "w", newline="") as csvfile:
-                writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
-                writer.writeheader()
-                for trade in trades:
-                    writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
-            if self.logging is not None:
-                self.logging.info(f"Trades written to {trades_filename}")
+        self.result_formatter.write_trades(all_trade_rows, trades_fieldnames)
--- a/cycles/utils/storage_utils.py
+++ b/cycles/utils/storage_utils.py
@@ -0,0 +1,73 @@
+import pandas as pd
+
+
+class TimestampParsingError(Exception):
+    """Custom exception for timestamp parsing errors"""
+    pass
+
+
+class DataLoadingError(Exception):
+    """Custom exception for data loading errors"""
+    pass
+
+
+class DataSavingError(Exception):
+    """Custom exception for data saving errors"""
+    pass
+
+
+def _parse_timestamp_column(data: pd.DataFrame, column_name: str) -> pd.DataFrame:
+    """Parse timestamp column handling both Unix timestamps and datetime strings
+    
+    Args:
+        data: DataFrame containing the timestamp column
+        column_name: Name of the timestamp column
+        
+    Returns:
+        DataFrame with parsed timestamp column
+        
+    Raises:
+        TimestampParsingError: If timestamp parsing fails
+    """
+    try:
+        sample_timestamp = str(data[column_name].iloc[0])
+        try:
+            # Check if it's a Unix timestamp (numeric)
+            float(sample_timestamp)
+            # It's a Unix timestamp, convert using unit='s'
+            data[column_name] = pd.to_datetime(data[column_name], unit='s')
+        except ValueError:
+            # It's already in datetime string format, convert without unit
+            data[column_name] = pd.to_datetime(data[column_name])
+        return data
+    except Exception as e:
+        raise TimestampParsingError(f"Failed to parse timestamp column '{column_name}': {e}")
+
+
+def _filter_by_date_range(data: pd.DataFrame, timestamp_col: str, 
+                         start_date: pd.Timestamp, stop_date: pd.Timestamp) -> pd.DataFrame:
+    """Filter DataFrame by date range
+    
+    Args:
+        data: DataFrame to filter
+        timestamp_col: Name of timestamp column
+        start_date: Start date for filtering
+        stop_date: Stop date for filtering
+        
+    Returns:
+        Filtered DataFrame
+    """
+    return data[(data[timestamp_col] >= start_date) & (data[timestamp_col] <= stop_date)]
+
+
+def _normalize_column_names(data: pd.DataFrame) -> pd.DataFrame:
+    """Convert all column names to lowercase
+    
+    Args:
+        data: DataFrame to normalize
+        
+    Returns:
+        DataFrame with lowercase column names
+    """
+    data.columns = data.columns.str.lower()
+    return data 
--- a/cycles/utils/system.py
+++ b/cycles/utils/system.py
@@ -10,10 +10,12 @@ class SystemUtils:
        """Determine optimal number of worker processes based on system resources"""
        cpu_count = os.cpu_count() or 4
        memory_gb = psutil.virtual_memory().total / (1024**3)
-        # Heuristic: Use 75% of cores, but cap based on available memory
-        # Assume each worker needs ~2GB for large datasets
-        workers_by_memory = max(1, int(memory_gb / 2))
-        workers_by_cpu = max(1, int(cpu_count * 0.75))
+        
+        # OPTIMIZATION: More aggressive worker allocation for better performance
+        workers_by_memory = max(1, int(memory_gb / 2))  # 2GB per worker
+        workers_by_cpu = max(1, int(cpu_count * 0.8))  # Use 80% of CPU cores
+        optimal_workers = min(workers_by_cpu, workers_by_memory, 8)  # Cap at 8 workers
+        
        if self.logging is not None:
-            self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
-        return min(workers_by_cpu, workers_by_memory)
+            self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
+        return optimal_workers