Merge branch 'xgboost'

# Conflicts: # .gitignore # README.md # cycles/backtest.py # main.py # pyproject.toml # uv.lock
2025-07-11 09:04:49 +08:00
parent b013183f67 65f30a4020
commit 267f040fe8
39 changed files with 6311 additions and 1332 deletions
--- a/cycles/utils/data_loader.py
+++ b/cycles/utils/data_loader.py
@@ -0,0 +1,152 @@
+import os
+import json
+import pandas as pd
+from typing import Union, Optional
+import logging
+
+from .storage_utils import (
+    _parse_timestamp_column, 
+    _filter_by_date_range, 
+    _normalize_column_names,
+    TimestampParsingError,
+    DataLoadingError
+)
+
+
+class DataLoader:
+    """Handles loading and preprocessing of data from various file formats"""
+    
+    def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize data loader
+        
+        Args:
+            data_dir: Directory containing data files
+            logging_instance: Optional logging instance
+        """
+        self.data_dir = data_dir
+        self.logging = logging_instance
+
+    def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp], 
+                  stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
+        """Load data with optimized dtypes and filtering, supporting CSV and JSON input
+        
+        Args:
+            file_path: path to the data file
+            start_date: start date (string or datetime-like)
+            stop_date: stop date (string or datetime-like)
+            
+        Returns:
+            pandas DataFrame with timestamp index
+            
+        Raises:
+            DataLoadingError: If data loading fails
+        """
+        try:
+            # Convert string dates to pandas datetime objects for proper comparison
+            start_date = pd.to_datetime(start_date)
+            stop_date = pd.to_datetime(stop_date)
+            
+            # Determine file type
+            _, ext = os.path.splitext(file_path)
+            ext = ext.lower()
+            
+            if ext == ".json":
+                return self._load_json_data(file_path, start_date, stop_date)
+            else:
+                return self._load_csv_data(file_path, start_date, stop_date)
+                
+        except Exception as e:
+            error_msg = f"Error loading data from {file_path}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            # Return an empty DataFrame with a DatetimeIndex
+            return pd.DataFrame(index=pd.to_datetime([]))
+
+    def _load_json_data(self, file_path: str, start_date: pd.Timestamp, 
+                       stop_date: pd.Timestamp) -> pd.DataFrame:
+        """Load and process JSON data file
+        
+        Args:
+            file_path: Path to JSON file
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        with open(os.path.join(self.data_dir, file_path), 'r') as f:
+            raw = json.load(f)
+        
+        data = pd.DataFrame(raw["Data"])
+        data = _normalize_column_names(data)
+        
+        # Convert timestamp to datetime
+        data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
+        
+        # Filter by date range
+        data = _filter_by_date_range(data, "timestamp", start_date, stop_date)
+        
+        if self.logging is not None:
+            self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
+        
+        return data.set_index("timestamp")
+
+    def _load_csv_data(self, file_path: str, start_date: pd.Timestamp, 
+                      stop_date: pd.Timestamp) -> pd.DataFrame:
+        """Load and process CSV data file
+        
+        Args:
+            file_path: Path to CSV file
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        # Define optimized dtypes
+        dtypes = {
+            'Open': 'float32',
+            'High': 'float32', 
+            'Low': 'float32',
+            'Close': 'float32',
+            'Volume': 'float32'
+        }
+        
+        # Read data with original capitalized column names
+        data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
+        
+        return self._process_csv_timestamps(data, start_date, stop_date, file_path)
+
+    def _process_csv_timestamps(self, data: pd.DataFrame, start_date: pd.Timestamp, 
+                               stop_date: pd.Timestamp, file_path: str) -> pd.DataFrame:
+        """Process timestamps in CSV data and filter by date range
+        
+        Args:
+            data: DataFrame with CSV data
+            start_date: Start date for filtering
+            stop_date: Stop date for filtering
+            file_path: Original file path for logging
+            
+        Returns:
+            Processed DataFrame with timestamp index
+        """
+        if 'Timestamp' in data.columns:
+            data = _parse_timestamp_column(data, 'Timestamp')
+            data = _filter_by_date_range(data, 'Timestamp', start_date, stop_date)
+            data = _normalize_column_names(data)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
+            
+            return data.set_index('timestamp')
+        else:
+            # Attempt to use the first column if 'Timestamp' is not present
+            data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
+            data = _parse_timestamp_column(data, 'timestamp')
+            data = _filter_by_date_range(data, 'timestamp', start_date, stop_date)
+            data = _normalize_column_names(data)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
+            
+            return data.set_index('timestamp') 
--- a/cycles/utils/data_saver.py
+++ b/cycles/utils/data_saver.py
@@ -0,0 +1,106 @@
+import os
+import pandas as pd
+from typing import Optional
+import logging
+
+from .storage_utils import DataSavingError
+
+
+class DataSaver:
+    """Handles saving data to various file formats"""
+    
+    def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize data saver
+        
+        Args:
+            data_dir: Directory for saving data files
+            logging_instance: Optional logging instance
+        """
+        self.data_dir = data_dir
+        self.logging = logging_instance
+
+    def save_data(self, data: pd.DataFrame, file_path: str) -> None:
+        """Save processed data to a CSV file.
+        If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
+        (seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
+
+        Args:
+            data: DataFrame to save
+            file_path: path to the data file relative to the data_dir
+            
+        Raises:
+            DataSavingError: If saving fails
+        """
+        try:
+            data_to_save = data.copy()
+            data_to_save = self._prepare_data_for_saving(data_to_save)
+            
+            # Save to CSV, ensuring the 'timestamp' column (if created) is written
+            full_path = os.path.join(self.data_dir, file_path)
+            data_to_save.to_csv(full_path, index=False)
+            
+            if self.logging is not None:
+                self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
+                
+        except Exception as e:
+            error_msg = f"Failed to save data to {file_path}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare DataFrame for saving by handling different index types
+        
+        Args:
+            data: DataFrame to prepare
+            
+        Returns:
+            DataFrame ready for saving
+        """
+        if isinstance(data.index, pd.DatetimeIndex):
+            return self._convert_datetime_index_to_timestamp(data)
+        elif pd.api.types.is_numeric_dtype(data.index.dtype):
+            return self._convert_numeric_index_to_timestamp(data)
+        else:
+            # For other index types, save with the current index
+            return data
+
+    def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Convert DatetimeIndex to Unix timestamp column
+        
+        Args:
+            data: DataFrame with DatetimeIndex
+            
+        Returns:
+            DataFrame with timestamp column
+        """
+        # Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
+        data['timestamp'] = data.index.astype('int64') / 1e9
+        data.reset_index(drop=True, inplace=True)
+        
+        # Ensure 'timestamp' is the first column if other columns exist
+        if 'timestamp' in data.columns and len(data.columns) > 1:
+            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
+            data = data[cols]
+            
+        return data
+
+    def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Convert numeric index to timestamp column
+        
+        Args:
+            data: DataFrame with numeric index
+            
+        Returns:
+            DataFrame with timestamp column
+        """
+        # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
+        data['timestamp'] = data.index
+        data.reset_index(drop=True, inplace=True)
+        
+        # Ensure 'timestamp' is the first column if other columns exist
+        if 'timestamp' in data.columns and len(data.columns) > 1:
+            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
+            data = data[cols]
+            
+        return data 
--- a/cycles/utils/gsheets.py
+++ b/cycles/utils/gsheets.py
@@ -1,128 +0,0 @@
-import threading
-import time
-import queue
-from google.oauth2.service_account import Credentials
-import gspread
-import math
-import numpy as np
-from collections import defaultdict
-
-
-class GSheetBatchPusher(threading.Thread):
-
-    def __init__(self, queue, timestamp, spreadsheet_name, interval=60, logging=None):
-        super().__init__(daemon=True)
-        self.queue = queue
-        self.timestamp = timestamp
-        self.spreadsheet_name = spreadsheet_name
-        self.interval = interval
-        self._stop_event = threading.Event()
-        self.logging = logging
-
-    def run(self):
-        while not self._stop_event.is_set():
-            self.push_all()
-            time.sleep(self.interval)
-        # Final push on stop
-        self.push_all()
-
-    def stop(self):
-        self._stop_event.set()
-
-    def push_all(self):
-        batch_results = []
-        batch_trades = []
-        while True:
-            try:
-                results, trades = self.queue.get_nowait()
-                batch_results.extend(results)
-                batch_trades.extend(trades)
-            except queue.Empty:
-                break
-            
-        if batch_results or batch_trades:
-            self.write_results_per_combination_gsheet(batch_results, batch_trades, self.timestamp, self.spreadsheet_name)
-
-
-    def write_results_per_combination_gsheet(self, results_rows, trade_rows, timestamp, spreadsheet_name="GlimBit Backtest Results"):
-        scopes = [
-            "https://www.googleapis.com/auth/spreadsheets",
-            "https://www.googleapis.com/auth/drive"
-        ]
-        creds = Credentials.from_service_account_file('credentials/service_account.json', scopes=scopes)
-        gc = gspread.authorize(creds)
-        sh = gc.open(spreadsheet_name)
-        
-        try:
-            worksheet = sh.worksheet("Results")
-        except gspread.exceptions.WorksheetNotFound:
-            worksheet = sh.add_worksheet(title="Results", rows="1000", cols="20")
-
-        # Clear the worksheet before writing new results
-        worksheet.clear()
-
-        # Updated fieldnames to match your data rows
-        fieldnames = [
-            "timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate",
-            "max_drawdown", "avg_trade", "profit_ratio", "initial_usd", "final_usd"
-        ]
-
-        def to_native(val):
-            if isinstance(val, (np.generic, np.ndarray)):
-                val = val.item()
-            if hasattr(val, 'isoformat'):
-                return val.isoformat()
-            # Handle inf, -inf, nan
-            if isinstance(val, float):
-                if math.isinf(val):
-                    return "∞" if val > 0 else "-∞"
-                if math.isnan(val):
-                    return ""
-            return val
-
-        # Write header if sheet is empty
-        if len(worksheet.get_all_values()) == 0:
-            worksheet.append_row(fieldnames)
-
-        for row in results_rows:
-            values = [to_native(row.get(field, "")) for field in fieldnames]
-            worksheet.append_row(values)
-        
-        trades_fieldnames = [
-            "entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type"
-        ]
-        trades_by_combo = defaultdict(list)
-
-        for trade in trade_rows:
-            tf = trade.get("timeframe")
-            sl = trade.get("stop_loss_pct")
-            trades_by_combo[(tf, sl)].append(trade)
-        
-        for (tf, sl), trades in trades_by_combo.items():
-            sl_percent = int(round(sl * 100))
-            sheet_name = f"Trades_{tf}_ST{sl_percent}%"
-
-            try:
-                trades_ws = sh.worksheet(sheet_name)
-            except gspread.exceptions.WorksheetNotFound:
-                trades_ws = sh.add_worksheet(title=sheet_name, rows="1000", cols="20")
-            
-            # Clear the trades worksheet before writing new trades
-            trades_ws.clear()
-
-            if len(trades_ws.get_all_values()) == 0:
-                trades_ws.append_row(trades_fieldnames)
-            
-            for trade in trades:
-                trade_row = [to_native(trade.get(field, "")) for field in trades_fieldnames]
-                try:
-                    trades_ws.append_row(trade_row)
-                except gspread.exceptions.APIError as e:
-                    if '429' in str(e):
-                        if self.logging is not None:
-                            self.logging.warning(f"Google Sheets API quota exceeded (429). Please wait one minute. Will retry on next batch push. Sheet: {sheet_name}")
-                        # Re-queue the failed batch for retry
-                        self.queue.put((results_rows, trade_rows))
-                        return  # Stop pushing for this batch, will retry next interval
-                    else:
-                        raise
--- a/cycles/utils/progress_manager.py
+++ b/cycles/utils/progress_manager.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Progress Manager for tracking multiple parallel backtest tasks
+"""
+
+import threading
+import time
+import sys
+from typing import Dict, Optional, Callable
+from dataclasses import dataclass
+
+
+@dataclass
+class TaskProgress:
+    """Represents progress information for a single task"""
+    task_id: str
+    name: str
+    current: int
+    total: int
+    start_time: float
+    last_update: float
+    
+    @property
+    def percentage(self) -> float:
+        """Calculate completion percentage"""
+        if self.total == 0:
+            return 0.0
+        return (self.current / self.total) * 100
+    
+    @property
+    def elapsed_time(self) -> float:
+        """Calculate elapsed time in seconds"""
+        return time.time() - self.start_time
+    
+    @property
+    def eta(self) -> Optional[float]:
+        """Estimate time to completion in seconds"""
+        if self.current == 0 or self.percentage >= 100:
+            return None
+        
+        elapsed = self.elapsed_time
+        rate = self.current / elapsed
+        remaining = self.total - self.current
+        return remaining / rate if rate > 0 else None
+
+
+class ProgressManager:
+    """Manages progress tracking for multiple parallel tasks"""
+    
+    def __init__(self, update_interval: float = 1.0, display_width: int = 50):
+        """
+        Initialize progress manager
+        
+        Args:
+            update_interval: How often to update display (seconds)
+            display_width: Width of progress bar in characters
+        """
+        self.tasks: Dict[str, TaskProgress] = {}
+        self.update_interval = update_interval
+        self.display_width = display_width
+        self.lock = threading.Lock()
+        self.display_thread: Optional[threading.Thread] = None
+        self.running = False
+        self.last_display_height = 0
+        
+    def start_task(self, task_id: str, name: str, total: int) -> None:
+        """
+        Start tracking a new task
+        
+        Args:
+            task_id: Unique identifier for the task
+            name: Human-readable name for the task
+            total: Total number of steps in the task
+        """
+        with self.lock:
+            self.tasks[task_id] = TaskProgress(
+                task_id=task_id,
+                name=name,
+                current=0,
+                total=total,
+                start_time=time.time(),
+                last_update=time.time()
+            )
+    
+    def update_progress(self, task_id: str, current: int) -> None:
+        """
+        Update progress for a specific task
+        
+        Args:
+            task_id: Task identifier
+            current: Current progress value
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                self.tasks[task_id].current = current
+                self.tasks[task_id].last_update = time.time()
+    
+    def complete_task(self, task_id: str) -> None:
+        """
+        Mark a task as completed
+        
+        Args:
+            task_id: Task identifier
+        """
+        with self.lock:
+            if task_id in self.tasks:
+                task = self.tasks[task_id]
+                task.current = task.total
+                task.last_update = time.time()
+    
+    def start_display(self) -> None:
+        """Start the progress display thread"""
+        if not self.running:
+            self.running = True
+            self.display_thread = threading.Thread(target=self._display_loop, daemon=True)
+            self.display_thread.start()
+    
+    def stop_display(self) -> None:
+        """Stop the progress display thread"""
+        self.running = False
+        if self.display_thread:
+            self.display_thread.join(timeout=1.0)
+        self._clear_display()
+    
+    def _display_loop(self) -> None:
+        """Main loop for updating the progress display"""
+        while self.running:
+            self._update_display()
+            time.sleep(self.update_interval)
+    
+    def _update_display(self) -> None:
+        """Update the console display with current progress"""
+        with self.lock:
+            if not self.tasks:
+                return
+            
+            # Clear previous display
+            self._clear_display()
+            
+            # Build display lines
+            lines = []
+            for task in sorted(self.tasks.values(), key=lambda t: t.task_id):
+                line = self._format_progress_line(task)
+                lines.append(line)
+            
+            # Print all lines
+            for line in lines:
+                print(line, flush=True)
+            
+            self.last_display_height = len(lines)
+    
+    def _clear_display(self) -> None:
+        """Clear the previous progress display"""
+        if self.last_display_height > 0:
+            # Move cursor up and clear lines
+            for _ in range(self.last_display_height):
+                sys.stdout.write('\033[F')  # Move cursor up one line
+                sys.stdout.write('\033[K')  # Clear line
+            sys.stdout.flush()
+    
+    def _format_progress_line(self, task: TaskProgress) -> str:
+        """
+        Format a single progress line for display
+        
+        Args:
+            task: TaskProgress instance
+            
+        Returns:
+            Formatted progress string
+        """
+        # Progress bar
+        filled_width = int(task.percentage / 100 * self.display_width)
+        bar = '█' * filled_width + '░' * (self.display_width - filled_width)
+        
+        # Time information
+        elapsed_str = self._format_time(task.elapsed_time)
+        eta_str = self._format_time(task.eta) if task.eta else "N/A"
+        
+        # Format line
+        line = (f"{task.name:<25} │{bar}│ "
+                f"{task.percentage:5.1f}% "
+                f"({task.current:,}/{task.total:,}) "
+                f"⏱ {elapsed_str} ETA: {eta_str}")
+        
+        return line
+    
+    def _format_time(self, seconds: float) -> str:
+        """
+        Format time duration for display
+        
+        Args:
+            seconds: Time in seconds
+            
+        Returns:
+            Formatted time string
+        """
+        if seconds < 60:
+            return f"{seconds:.0f}s"
+        elif seconds < 3600:
+            minutes = seconds / 60
+            return f"{minutes:.1f}m"
+        else:
+            hours = seconds / 3600
+            return f"{hours:.1f}h"
+    
+    def get_task_progress_callback(self, task_id: str) -> Callable[[int], None]:
+        """
+        Get a progress callback function for a specific task
+        
+        Args:
+            task_id: Task identifier
+            
+        Returns:
+            Callback function that updates progress for this task
+        """
+        def callback(current: int) -> None:
+            self.update_progress(task_id, current)
+        
+        return callback
+    
+    def all_tasks_completed(self) -> bool:
+        """Check if all tasks are completed"""
+        with self.lock:
+            return all(task.current >= task.total for task in self.tasks.values())
+    
+    def get_summary(self) -> str:
+        """Get a summary of all tasks"""
+        with self.lock:
+            total_tasks = len(self.tasks)
+            completed_tasks = sum(1 for task in self.tasks.values() 
+                                if task.current >= task.total)
+            
+            return f"Tasks: {completed_tasks}/{total_tasks} completed" 
--- a/cycles/utils/result_formatter.py
+++ b/cycles/utils/result_formatter.py
@@ -0,0 +1,179 @@
+import os
+import csv
+from typing import Dict, List, Optional, Any
+from collections import defaultdict
+import logging
+
+from .storage_utils import DataSavingError
+
+
+class ResultFormatter:
+    """Handles formatting and writing of backtest results to CSV files"""
+    
+    def __init__(self, results_dir: str, logging_instance: Optional[logging.Logger] = None):
+        """Initialize result formatter
+        
+        Args:
+            results_dir: Directory for saving result files
+            logging_instance: Optional logging instance
+        """
+        self.results_dir = results_dir
+        self.logging = logging_instance
+
+    def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
+        """Format a row for a combined results CSV file
+        
+        Args:
+            row: Dictionary containing row data
+            
+        Returns:
+            Dictionary with formatted values
+        """
+        return {
+            "timeframe": row["timeframe"],
+            "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
+            "n_trades": row["n_trades"],
+            "n_stop_loss": row["n_stop_loss"],
+            "win_rate": f"{row['win_rate']*100:.2f}%",
+            "max_drawdown": f"{row['max_drawdown']*100:.2f}%",
+            "avg_trade": f"{row['avg_trade']*100:.2f}%",
+            "profit_ratio": f"{row['profit_ratio']*100:.2f}%",
+            "final_usd": f"{row['final_usd']:.2f}",
+            "total_fees_usd": f"{row['total_fees_usd']:.2f}",
+        }
+
+    def write_results_chunk(self, filename: str, fieldnames: List[str], 
+                           rows: List[Dict], write_header: bool = False, 
+                           initial_usd: Optional[float] = None) -> None:
+        """Write a chunk of results to a CSV file
+        
+        Args:
+            filename: filename to write to
+            fieldnames: list of fieldnames
+            rows: list of rows
+            write_header: whether to write the header
+            initial_usd: initial USD value for header comment
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            mode = 'w' if write_header else 'a'
+            
+            with open(filename, mode, newline="") as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                if write_header:
+                    if initial_usd is not None:
+                        csvfile.write(f"# initial_usd: {initial_usd}\n")
+                    writer.writeheader()
+                
+                for row in rows:
+                    # Only keep keys that are in fieldnames
+                    filtered_row = {k: v for k, v in row.items() if k in fieldnames}
+                    writer.writerow(filtered_row)
+                    
+        except Exception as e:
+            error_msg = f"Failed to write results chunk to {filename}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def write_backtest_results(self, filename: str, fieldnames: List[str], 
+                              rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
+        """Write combined backtest results to a CSV file
+        
+        Args:
+            filename: filename to write to
+            fieldnames: list of fieldnames
+            rows: list of result dictionaries
+            metadata_lines: optional list of strings to write as header comments
+            
+        Returns:
+            Full path to the written file
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            fname = os.path.join(self.results_dir, filename)
+            with open(fname, "w", newline="") as csvfile:
+                if metadata_lines:
+                    for line in metadata_lines:
+                        csvfile.write(f"{line}\n")
+                        
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
+                writer.writeheader()
+                
+                for row in rows:
+                    writer.writerow(self.format_row(row))
+                    
+            if self.logging is not None:
+                self.logging.info(f"Combined results written to {fname}")
+                
+            return fname
+            
+        except Exception as e:
+            error_msg = f"Failed to write backtest results to {filename}: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades to separate CSV files grouped by timeframe and stop loss
+        
+        Args:
+            all_trade_rows: list of trade dictionaries
+            trades_fieldnames: list of trade fieldnames
+            
+        Raises:
+            DataSavingError: If writing fails
+        """
+        try:
+            trades_by_combo = self._group_trades_by_combination(all_trade_rows)
+            
+            for (tf, sl), trades in trades_by_combo.items():
+                self._write_single_trade_file(tf, sl, trades, trades_fieldnames)
+                
+        except Exception as e:
+            error_msg = f"Failed to write trades: {e}"
+            if self.logging is not None:
+                self.logging.error(error_msg)
+            raise DataSavingError(error_msg) from e
+
+    def _group_trades_by_combination(self, all_trade_rows: List[Dict]) -> Dict:
+        """Group trades by timeframe and stop loss combination
+        
+        Args:
+            all_trade_rows: List of trade dictionaries
+            
+        Returns:
+            Dictionary grouped by (timeframe, stop_loss_pct) tuples
+        """
+        trades_by_combo = defaultdict(list)
+        for trade in all_trade_rows:
+            tf = trade.get("timeframe")
+            sl = trade.get("stop_loss_pct")
+            trades_by_combo[(tf, sl)].append(trade)
+        return trades_by_combo
+
+    def _write_single_trade_file(self, timeframe: str, stop_loss_pct: float, 
+                                trades: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades for a single timeframe/stop-loss combination
+        
+        Args:
+            timeframe: Timeframe identifier
+            stop_loss_pct: Stop loss percentage
+            trades: List of trades for this combination
+            trades_fieldnames: List of field names for trades
+        """
+        sl_percent = int(round(stop_loss_pct * 100))
+        trades_filename = os.path.join(self.results_dir, f"trades_{timeframe}_ST{sl_percent}pct.csv")
+        
+        with open(trades_filename, "w", newline="") as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
+            writer.writeheader()
+            for trade in trades:
+                writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
+                
+        if self.logging is not None:
+            self.logging.info(f"Trades written to {trades_filename}") 
--- a/cycles/utils/storage.py
+++ b/cycles/utils/storage.py
@@ -1,17 +1,32 @@
 import os
-import json
 import pandas as pd
-import csv
-from collections import defaultdict
+from typing import Optional, Union, Dict, Any, List
+import logging
+
+from .data_loader import DataLoader
+from .data_saver import DataSaver
+from .result_formatter import ResultFormatter
+from .storage_utils import DataLoadingError, DataSavingError
+
+RESULTS_DIR = "../results"
+DATA_DIR = "../data"

-RESULTS_DIR = "results"
-DATA_DIR = "data"

 class Storage:
-
-    """Storage class for storing and loading results and data"""
+    """Unified storage interface for data and results operations
+    
+    Acts as a coordinator for DataLoader, DataSaver, and ResultFormatter components,
+    maintaining backward compatibility while providing a clean separation of concerns.
+    """
+    
    def __init__(self, logging=None, results_dir=RESULTS_DIR, data_dir=DATA_DIR):
-
+        """Initialize storage with component instances
+        
+        Args:
+            logging: Optional logging instance
+            results_dir: Directory for results files
+            data_dir: Directory for data files
+        """
        self.results_dir = results_dir
        self.data_dir = data_dir
        self.logging = logging
@@ -20,196 +35,89 @@ class Storage:
        os.makedirs(self.results_dir, exist_ok=True)
        os.makedirs(self.data_dir, exist_ok=True)

-    def load_data(self, file_path, start_date, stop_date):
+        # Initialize component instances
+        self.data_loader = DataLoader(data_dir, logging)
+        self.data_saver = DataSaver(data_dir, logging)
+        self.result_formatter = ResultFormatter(results_dir, logging)
+
+    def load_data(self, file_path: str, start_date: Union[str, pd.Timestamp], 
+                  stop_date: Union[str, pd.Timestamp]) -> pd.DataFrame:
        """Load data with optimized dtypes and filtering, supporting CSV and JSON input
+        
        Args:
            file_path: path to the data file
-            start_date: start date
-            stop_date: stop date
+            start_date: start date (string or datetime-like)
+            stop_date: stop date (string or datetime-like)
+            
        Returns:
-            pandas DataFrame
+            pandas DataFrame with timestamp index
+            
+        Raises:
+            DataLoadingError: If data loading fails
        """
-        # Determine file type
-        _, ext = os.path.splitext(file_path)
-        ext = ext.lower()
-        try:
-            if ext == ".json":
-                with open(os.path.join(self.data_dir, file_path), 'r') as f:
-                    raw = json.load(f)
-                data = pd.DataFrame(raw["Data"])
-                # Convert columns to lowercase
-                data.columns = data.columns.str.lower()
-                # Convert timestamp to datetime
-                data["timestamp"] = pd.to_datetime(data["timestamp"], unit="s")
-                # Filter by date range
-                data = data[(data["timestamp"] >= start_date) & (data["timestamp"] <= stop_date)]
-                if self.logging is not None:
-                    self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
-                return data.set_index("timestamp")
-            else:
-                # Define optimized dtypes
-                dtypes = {
-                    'Open': 'float32',
-                    'High': 'float32', 
-                    'Low': 'float32',
-                    'Close': 'float32',
-                    'Volume': 'float32'
-                }
-                # Read data with original capitalized column names
-                data = pd.read_csv(os.path.join(self.data_dir, file_path), dtype=dtypes)
-
-
-                # Convert timestamp to datetime
-                if 'Timestamp' in data.columns:
-                    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
-                    # Filter by date range
-                    data = data[(data['Timestamp'] >= start_date) & (data['Timestamp'] <= stop_date)]
-                    # Now convert column names to lowercase
-                    data.columns = data.columns.str.lower()
-                    if self.logging is not None:
-                        self.logging.info(f"Data loaded from {file_path} for date range {start_date} to {stop_date}")
-                    return data.set_index('timestamp')
-                else: # Attempt to use the first column if 'Timestamp' is not present
-                    data.rename(columns={data.columns[0]: 'timestamp'}, inplace=True)
-                    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
-                    data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= stop_date)]
-                    data.columns = data.columns.str.lower() # Ensure all other columns are lower
-                    if self.logging is not None:
-                        self.logging.info(f"Data loaded from {file_path} (using first column as timestamp) for date range {start_date} to {stop_date}")
-                    return data.set_index('timestamp')
-        except Exception as e:
-            if self.logging is not None:
-                self.logging.error(f"Error loading data from {file_path}: {e}")
-            # Return an empty DataFrame with a DatetimeIndex
-            return pd.DataFrame(index=pd.to_datetime([]))
-
-    def save_data(self, data: pd.DataFrame, file_path: str):
-        """Save processed data to a CSV file.
-        If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
-        (seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
+        return self.data_loader.load_data(file_path, start_date, stop_date)

+    def save_data(self, data: pd.DataFrame, file_path: str) -> None:
+        """Save processed data to a CSV file
+        
        Args:
-            data (pd.DataFrame): data to save.
-            file_path (str): path to the data file relative to the data_dir.
+            data: DataFrame to save
+            file_path: path to the data file relative to the data_dir
+            
+        Raises:
+            DataSavingError: If saving fails
        """
-        data_to_save = data.copy()
+        self.data_saver.save_data(data, file_path)

-        if isinstance(data_to_save.index, pd.DatetimeIndex):
-            # Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
-            # and make it a column named 'timestamp'.
-            data_to_save['timestamp'] = data_to_save.index.astype('int64') / 1e9
-            # Reset index so 'timestamp' column is saved and old DatetimeIndex is not saved as a column.
-            # We want the 'timestamp' column to be the first one.
-            data_to_save.reset_index(drop=True, inplace=True)
-            # Ensure 'timestamp' is the first column if other columns exist
-            if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
-                cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
-                data_to_save = data_to_save[cols]
-        elif pd.api.types.is_numeric_dtype(data_to_save.index.dtype):
-            # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle),
-            # make it a column named 'timestamp'.
-            data_to_save['timestamp'] = data_to_save.index
-            data_to_save.reset_index(drop=True, inplace=True)
-            if 'timestamp' in data_to_save.columns and len(data_to_save.columns) > 1:
-                cols = ['timestamp'] + [col for col in data_to_save.columns if col != 'timestamp']
-                data_to_save = data_to_save[cols]
-        else:
-            # For other index types, or if no index that we want to specifically handle,
-            # save with the current index. pandas to_csv will handle it.
-            # This branch might be removed if we strictly expect either DatetimeIndex or a numeric one from previous save.
-            pass # data_to_save remains as is, to_csv will write its index if index=True
-
-        # Save to CSV, ensuring the 'timestamp' column (if created) is written, and not the DataFrame's active index.
-        full_path = os.path.join(self.data_dir, file_path)
-        data_to_save.to_csv(full_path, index=False) # index=False because timestamp is now a column
-        if self.logging is not None:
-            self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
-    
-    
-    def format_row(self, row):
+    def format_row(self, row: Dict[str, Any]) -> Dict[str, str]:
        """Format a row for a combined results CSV file
+        
        Args:
-            row: row to format
+            row: Dictionary containing row data
+            
        Returns:
-            formatted row
+            Dictionary with formatted values
        """
+        return self.result_formatter.format_row(row)

-        return {
-            "timeframe": row["timeframe"],
-            "stop_loss_pct": f"{row['stop_loss_pct']*100:.2f}%",
-            "n_trades": row["n_trades"],
-            "n_stop_loss": row["n_stop_loss"],
-            "win_rate": f"{row['win_rate']*100:.2f}%",
-            "max_drawdown": f"{row['max_drawdown']*100:.2f}%",
-            "avg_trade": f"{row['avg_trade']*100:.2f}%",
-            "profit_ratio": f"{row['profit_ratio']*100:.2f}%",
-            "final_usd": f"{row['final_usd']:.2f}",
-            "total_fees_usd": f"{row['total_fees_usd']:.2f}",
-        }
-    
-    def write_results_chunk(self, filename, fieldnames, rows, write_header=False, initial_usd=None):
+    def write_results_chunk(self, filename: str, fieldnames: List[str], 
+                           rows: List[Dict], write_header: bool = False, 
+                           initial_usd: Optional[float] = None) -> None:
        """Write a chunk of results to a CSV file
+        
        Args:
            filename: filename to write to
            fieldnames: list of fieldnames
            rows: list of rows
            write_header: whether to write the header
-            initial_usd: initial USD
+            initial_usd: initial USD value for header comment
        """
-        mode = 'w' if write_header else 'a'
+        self.result_formatter.write_results_chunk(
+            filename, fieldnames, rows, write_header, initial_usd
+        )
+
+    def write_backtest_results(self, filename: str, fieldnames: List[str], 
+                              rows: List[Dict], metadata_lines: Optional[List[str]] = None) -> str:
+        """Write combined backtest results to a CSV file
        
-        with open(filename, mode, newline="") as csvfile:
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            if write_header:
-                csvfile.write(f"# initial_usd: {initial_usd}\n")
-                writer.writeheader()
-            
-            for row in rows:
-                # Only keep keys that are in fieldnames
-                filtered_row = {k: v for k, v in row.items() if k in fieldnames}
-                writer.writerow(filtered_row)
-    
-    def write_backtest_results(self, filename, fieldnames, rows, metadata_lines=None):
-        """Write a combined results to a CSV file
        Args:
            filename: filename to write to
            fieldnames: list of fieldnames
-            rows: list of rows
+            rows: list of result dictionaries
            metadata_lines: optional list of strings to write as header comments
+            
+        Returns:
+            Full path to the written file
        """
-        fname = os.path.join(self.results_dir, filename)
-        with open(fname, "w", newline="") as csvfile:
-            if metadata_lines:
-                for line in metadata_lines:
-                    csvfile.write(f"{line}\n")
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
-            writer.writeheader()
-            for row in rows:
-                writer.writerow(self.format_row(row))
-        if self.logging is not None:
-            self.logging.info(f"Combined results written to {fname}")
-    
-    def write_trades(self, all_trade_rows, trades_fieldnames):
-        """Write trades to a CSV file
+        return self.result_formatter.write_backtest_results(
+            filename, fieldnames, rows, metadata_lines
+        )
+
+    def write_trades(self, all_trade_rows: List[Dict], trades_fieldnames: List[str]) -> None:
+        """Write trades to separate CSV files grouped by timeframe and stop loss
+        
        Args:
-            all_trade_rows: list of trade rows
+            all_trade_rows: list of trade dictionaries
            trades_fieldnames: list of trade fieldnames
-            logging: logging object
        """
-
-        trades_by_combo = defaultdict(list)
-        for trade in all_trade_rows:
-            tf = trade.get("timeframe")
-            sl = trade.get("stop_loss_pct")
-            trades_by_combo[(tf, sl)].append(trade)
-
-        for (tf, sl), trades in trades_by_combo.items():
-            sl_percent = int(round(sl * 100))
-            trades_filename = os.path.join(self.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
-            with open(trades_filename, "w", newline="") as csvfile:
-                writer = csv.DictWriter(csvfile, fieldnames=trades_fieldnames)
-                writer.writeheader()
-                for trade in trades:
-                    writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
-            if self.logging is not None:
-                self.logging.info(f"Trades written to {trades_filename}")
+        self.result_formatter.write_trades(all_trade_rows, trades_fieldnames)
--- a/cycles/utils/storage_utils.py
+++ b/cycles/utils/storage_utils.py
@@ -0,0 +1,73 @@
+import pandas as pd
+
+
+class TimestampParsingError(Exception):
+    """Custom exception for timestamp parsing errors"""
+    pass
+
+
+class DataLoadingError(Exception):
+    """Custom exception for data loading errors"""
+    pass
+
+
+class DataSavingError(Exception):
+    """Custom exception for data saving errors"""
+    pass
+
+
+def _parse_timestamp_column(data: pd.DataFrame, column_name: str) -> pd.DataFrame:
+    """Parse timestamp column handling both Unix timestamps and datetime strings
+    
+    Args:
+        data: DataFrame containing the timestamp column
+        column_name: Name of the timestamp column
+        
+    Returns:
+        DataFrame with parsed timestamp column
+        
+    Raises:
+        TimestampParsingError: If timestamp parsing fails
+    """
+    try:
+        sample_timestamp = str(data[column_name].iloc[0])
+        try:
+            # Check if it's a Unix timestamp (numeric)
+            float(sample_timestamp)
+            # It's a Unix timestamp, convert using unit='s'
+            data[column_name] = pd.to_datetime(data[column_name], unit='s')
+        except ValueError:
+            # It's already in datetime string format, convert without unit
+            data[column_name] = pd.to_datetime(data[column_name])
+        return data
+    except Exception as e:
+        raise TimestampParsingError(f"Failed to parse timestamp column '{column_name}': {e}")
+
+
+def _filter_by_date_range(data: pd.DataFrame, timestamp_col: str, 
+                         start_date: pd.Timestamp, stop_date: pd.Timestamp) -> pd.DataFrame:
+    """Filter DataFrame by date range
+    
+    Args:
+        data: DataFrame to filter
+        timestamp_col: Name of timestamp column
+        start_date: Start date for filtering
+        stop_date: Stop date for filtering
+        
+    Returns:
+        Filtered DataFrame
+    """
+    return data[(data[timestamp_col] >= start_date) & (data[timestamp_col] <= stop_date)]
+
+
+def _normalize_column_names(data: pd.DataFrame) -> pd.DataFrame:
+    """Convert all column names to lowercase
+    
+    Args:
+        data: DataFrame to normalize
+        
+    Returns:
+        DataFrame with lowercase column names
+    """
+    data.columns = data.columns.str.lower()
+    return data 
--- a/cycles/utils/system.py
+++ b/cycles/utils/system.py
@@ -10,10 +10,12 @@ class SystemUtils:
        """Determine optimal number of worker processes based on system resources"""
        cpu_count = os.cpu_count() or 4
        memory_gb = psutil.virtual_memory().total / (1024**3)
-        # Heuristic: Use 75% of cores, but cap based on available memory
-        # Assume each worker needs ~2GB for large datasets
-        workers_by_memory = max(1, int(memory_gb / 2))
-        workers_by_cpu = max(1, int(cpu_count * 0.75))
+        
+        # OPTIMIZATION: More aggressive worker allocation for better performance
+        workers_by_memory = max(1, int(memory_gb / 2))  # 2GB per worker
+        workers_by_cpu = max(1, int(cpu_count * 0.8))  # Use 80% of CPU cores
+        optimal_workers = min(workers_by_cpu, workers_by_memory, 8)  # Cap at 8 workers
+        
        if self.logging is not None:
-            self.logging.info(f"Using {min(workers_by_cpu, workers_by_memory)} workers for processing")
-        return min(workers_by_cpu, workers_by_memory)
+            self.logging.info(f"Using {optimal_workers} workers for processing (CPU-based: {workers_by_cpu}, Memory-based: {workers_by_memory})")
+        return optimal_workers