73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
class TimestampParsingError(Exception):
|
||
|
|
"""Custom exception for timestamp parsing errors"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
class DataLoadingError(Exception):
|
||
|
|
"""Custom exception for data loading errors"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
class DataSavingError(Exception):
|
||
|
|
"""Custom exception for data saving errors"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_timestamp_column(data: pd.DataFrame, column_name: str) -> pd.DataFrame:
|
||
|
|
"""Parse timestamp column handling both Unix timestamps and datetime strings
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: DataFrame containing the timestamp column
|
||
|
|
column_name: Name of the timestamp column
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
DataFrame with parsed timestamp column
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
TimestampParsingError: If timestamp parsing fails
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
sample_timestamp = str(data[column_name].iloc[0])
|
||
|
|
try:
|
||
|
|
# Check if it's a Unix timestamp (numeric)
|
||
|
|
float(sample_timestamp)
|
||
|
|
# It's a Unix timestamp, convert using unit='s'
|
||
|
|
data[column_name] = pd.to_datetime(data[column_name], unit='s')
|
||
|
|
except ValueError:
|
||
|
|
# It's already in datetime string format, convert without unit
|
||
|
|
data[column_name] = pd.to_datetime(data[column_name])
|
||
|
|
return data
|
||
|
|
except Exception as e:
|
||
|
|
raise TimestampParsingError(f"Failed to parse timestamp column '{column_name}': {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def _filter_by_date_range(data: pd.DataFrame, timestamp_col: str,
|
||
|
|
start_date: pd.Timestamp, stop_date: pd.Timestamp) -> pd.DataFrame:
|
||
|
|
"""Filter DataFrame by date range
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: DataFrame to filter
|
||
|
|
timestamp_col: Name of timestamp column
|
||
|
|
start_date: Start date for filtering
|
||
|
|
stop_date: Stop date for filtering
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Filtered DataFrame
|
||
|
|
"""
|
||
|
|
return data[(data[timestamp_col] >= start_date) & (data[timestamp_col] <= stop_date)]
|
||
|
|
|
||
|
|
|
||
|
|
def _normalize_column_names(data: pd.DataFrame) -> pd.DataFrame:
|
||
|
|
"""Convert all column names to lowercase
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: DataFrame to normalize
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
DataFrame with lowercase column names
|
||
|
|
"""
|
||
|
|
data.columns = data.columns.str.lower()
|
||
|
|
return data
|