Cycles/cycles/utils/data_saver.py

import os
import pandas as pd
from typing import Optional
import logging

from .storage_utils import DataSavingError


class DataSaver:
    """Handles saving data to various file formats"""
    
    def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
        """Initialize data saver
        
        Args:
            data_dir: Directory for saving data files
            logging_instance: Optional logging instance
        """
        self.data_dir = data_dir
        self.logging = logging_instance

    def save_data(self, data: pd.DataFrame, file_path: str) -> None:
        """Save processed data to a CSV file.
        If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
        (seconds since epoch) before saving. The index is saved as a column named 'timestamp'.

        Args:
            data: DataFrame to save
            file_path: path to the data file relative to the data_dir
            
        Raises:
            DataSavingError: If saving fails
        """
        try:
            data_to_save = data.copy()
            data_to_save = self._prepare_data_for_saving(data_to_save)
            
            # Save to CSV, ensuring the 'timestamp' column (if created) is written
            full_path = os.path.join(self.data_dir, file_path)
            data_to_save.to_csv(full_path, index=False)
            
            if self.logging is not None:
                self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
                
        except Exception as e:
            error_msg = f"Failed to save data to {file_path}: {e}"
            if self.logging is not None:
                self.logging.error(error_msg)
            raise DataSavingError(error_msg) from e

    def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
        """Prepare DataFrame for saving by handling different index types
        
        Args:
            data: DataFrame to prepare
            
        Returns:
            DataFrame ready for saving
        """
        if isinstance(data.index, pd.DatetimeIndex):
            return self._convert_datetime_index_to_timestamp(data)
        elif pd.api.types.is_numeric_dtype(data.index.dtype):
            return self._convert_numeric_index_to_timestamp(data)
        else:
            # For other index types, save with the current index
            return data

    def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
        """Convert DatetimeIndex to Unix timestamp column
        
        Args:
            data: DataFrame with DatetimeIndex
            
        Returns:
            DataFrame with timestamp column
        """
        # Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
        data['timestamp'] = data.index.astype('int64') / 1e9
        data.reset_index(drop=True, inplace=True)
        
        # Ensure 'timestamp' is the first column if other columns exist
        if 'timestamp' in data.columns and len(data.columns) > 1:
            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
            data = data[cols]
            
        return data

    def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
        """Convert numeric index to timestamp column
        
        Args:
            data: DataFrame with numeric index
            
        Returns:
            DataFrame with timestamp column
        """
        # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
        data['timestamp'] = data.index
        data.reset_index(drop=True, inplace=True)
        
        # Ensure 'timestamp' is the first column if other columns exist
        if 'timestamp' in data.columns and len(data.columns) > 1:
            cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
            data = data[cols]
            
        return data
Implement backtesting framework with modular architecture for data loading, processing, and result management. Introduced BacktestRunner, ConfigManager, and ResultProcessor classes for improved maintainability and error handling. Updated main execution script to utilize new components and added comprehensive logging. Enhanced README with detailed project overview and usage instructions. 2025-06-25 13:08:07 +08:00			`import os`
			`import pandas as pd`
			`from typing import Optional`
			`import logging`

			`from .storage_utils import DataSavingError`


			`class DataSaver:`
			`"""Handles saving data to various file formats"""`

			`def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):`
			`"""Initialize data saver`

			`Args:`
			`data_dir: Directory for saving data files`
			`logging_instance: Optional logging instance`
			`"""`
			`self.data_dir = data_dir`
			`self.logging = logging_instance`

			`def save_data(self, data: pd.DataFrame, file_path: str) -> None:`
			`"""Save processed data to a CSV file.`
			`If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps`
			`(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.`

			`Args:`
			`data: DataFrame to save`
			`file_path: path to the data file relative to the data_dir`

			`Raises:`
			`DataSavingError: If saving fails`
			`"""`
			`try:`
			`data_to_save = data.copy()`
			`data_to_save = self._prepare_data_for_saving(data_to_save)`

			`# Save to CSV, ensuring the 'timestamp' column (if created) is written`
			`full_path = os.path.join(self.data_dir, file_path)`
			`data_to_save.to_csv(full_path, index=False)`

			`if self.logging is not None:`
			`self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")`

			`except Exception as e:`
			`error_msg = f"Failed to save data to {file_path}: {e}"`
			`if self.logging is not None:`
			`self.logging.error(error_msg)`
			`raise DataSavingError(error_msg) from e`

			`def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:`
			`"""Prepare DataFrame for saving by handling different index types`

			`Args:`
			`data: DataFrame to prepare`

			`Returns:`
			`DataFrame ready for saving`
			`"""`
			`if isinstance(data.index, pd.DatetimeIndex):`
			`return self._convert_datetime_index_to_timestamp(data)`
			`elif pd.api.types.is_numeric_dtype(data.index.dtype):`
			`return self._convert_numeric_index_to_timestamp(data)`
			`else:`
			`# For other index types, save with the current index`
			`return data`

			`def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:`
			`"""Convert DatetimeIndex to Unix timestamp column`

			`Args:`
			`data: DataFrame with DatetimeIndex`

			`Returns:`
			`DataFrame with timestamp column`
			`"""`
			`# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)`
			`data['timestamp'] = data.index.astype('int64') / 1e9`
			`data.reset_index(drop=True, inplace=True)`

			`# Ensure 'timestamp' is the first column if other columns exist`
			`if 'timestamp' in data.columns and len(data.columns) > 1:`
			`cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']`
			`data = data[cols]`

			`return data`

			`def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:`
			`"""Convert numeric index to timestamp column`

			`Args:`
			`data: DataFrame with numeric index`

			`Returns:`
			`DataFrame with timestamp column`
			`"""`
			`# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)`
			`data['timestamp'] = data.index`
			`data.reset_index(drop=True, inplace=True)`

			`# Ensure 'timestamp' is the first column if other columns exist`
			`if 'timestamp' in data.columns and len(data.columns) > 1:`
			`cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']`
			`data = data[cols]`

			`return data`