Cycles/cycles/utils/data_saver.py

106 lines
3.9 KiB
Python
Raw Normal View History

import os
import pandas as pd
from typing import Optional
import logging
from .storage_utils import DataSavingError
class DataSaver:
"""Handles saving data to various file formats"""
def __init__(self, data_dir: str, logging_instance: Optional[logging.Logger] = None):
"""Initialize data saver
Args:
data_dir: Directory for saving data files
logging_instance: Optional logging instance
"""
self.data_dir = data_dir
self.logging = logging_instance
def save_data(self, data: pd.DataFrame, file_path: str) -> None:
"""Save processed data to a CSV file.
If the DataFrame has a DatetimeIndex, it's converted to float Unix timestamps
(seconds since epoch) before saving. The index is saved as a column named 'timestamp'.
Args:
data: DataFrame to save
file_path: path to the data file relative to the data_dir
Raises:
DataSavingError: If saving fails
"""
try:
data_to_save = data.copy()
data_to_save = self._prepare_data_for_saving(data_to_save)
# Save to CSV, ensuring the 'timestamp' column (if created) is written
full_path = os.path.join(self.data_dir, file_path)
data_to_save.to_csv(full_path, index=False)
if self.logging is not None:
self.logging.info(f"Data saved to {full_path} with Unix timestamp column.")
except Exception as e:
error_msg = f"Failed to save data to {file_path}: {e}"
if self.logging is not None:
self.logging.error(error_msg)
raise DataSavingError(error_msg) from e
def _prepare_data_for_saving(self, data: pd.DataFrame) -> pd.DataFrame:
"""Prepare DataFrame for saving by handling different index types
Args:
data: DataFrame to prepare
Returns:
DataFrame ready for saving
"""
if isinstance(data.index, pd.DatetimeIndex):
return self._convert_datetime_index_to_timestamp(data)
elif pd.api.types.is_numeric_dtype(data.index.dtype):
return self._convert_numeric_index_to_timestamp(data)
else:
# For other index types, save with the current index
return data
def _convert_datetime_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
"""Convert DatetimeIndex to Unix timestamp column
Args:
data: DataFrame with DatetimeIndex
Returns:
DataFrame with timestamp column
"""
# Convert DatetimeIndex to Unix timestamp (float seconds since epoch)
data['timestamp'] = data.index.astype('int64') / 1e9
data.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data.columns and len(data.columns) > 1:
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
data = data[cols]
return data
def _convert_numeric_index_to_timestamp(self, data: pd.DataFrame) -> pd.DataFrame:
"""Convert numeric index to timestamp column
Args:
data: DataFrame with numeric index
Returns:
DataFrame with timestamp column
"""
# If index is already numeric (e.g. float Unix timestamps from a previous save/load cycle)
data['timestamp'] = data.index
data.reset_index(drop=True, inplace=True)
# Ensure 'timestamp' is the first column if other columns exist
if 'timestamp' in data.columns and len(data.columns) > 1:
cols = ['timestamp'] + [col for col in data.columns if col != 'timestamp']
data = data[cols]
return data