import os import pandas as pd from typing import Optional import logging from .storage_utils import DataSavingError class DataSaver: """Handles saving data to various file formats""" def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None): """Initialize data saver Args: data_dir: Directory for saving data files logging_instance: Optional logging instance """ self.data_dir = data_dir self.logging = logging_instance def save_data(self, data: pd.DataFrame, file_path: str) -> None: """Save processed data to a CSV file. If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps (seconds since epoch) before saving. The index is saved as a column named 'timestamp'. Args: data: DataFrame to save file_path: path to the data file relative to the data_dir Raises: DataSavingError: If saving fails """ try: data_to_save = data.copy() data_to_save = self._prepare_data_for_saving(data_to_save) # Save to CSV, ensuring the 'timestamp' column (if created) is written full_path = os.path.join(self.data_dir, file_path) data_to_save.to_csv(full_path, index=False) if self.logging is not None: self.logging.info(f"Data saved to {full_path} with Unix timestamp column.") except Exception as e: error_msg = f"Failed to save data to {file_path}: {e}" if self.logging is not None: self.logging.error(error_msg) raise DataSavingError(error_msg) from e def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame: """Prepare DataFrame for saving by handling different index types Args: data: DataFrame to prepare Returns: DataFrame ready for saving """ if isinstance(data.index, pd.DatetimeIndex): return self._convert_datetime_index_to_timestamp(data) elif pd.api.types.is_numeric_dtype(data.index.dtype): return self._convert_numeric_index_to_timestamp(data) else: # For other index types, save with the current index return data def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame: """Convert DatetimeIndex to Unix timestamp column Args: data: DataFrame with DatetimeIndex Returns: DataFrame with timestamp column """ # Convert DatetimeIndex to Unix timestamp (float seconds since epoch) data['timestamp'] = data.index.astype('int64') / 1e9 data.reset_index(drop=True, inplace=True) # Ensure 'timestamp' is the first column if other columns exist if 'timestamp' in data.columns and len(data.columns) > 1: cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp'] data = data[cols] return data def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame: """Convert numeric index to timestamp column Args: data: DataFrame with numeric index Returns: DataFrame with timestamp column """ # If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle) data['timestamp'] = data.index data.reset_index(drop=True, inplace=True) # Ensure 'timestamp' is the first column if other columns exist if 'timestamp' in data.columns and len(data.columns) > 1: cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp'] data = data[cols] return data