from typing import List import pandas as pd import numpy as np from .config import PreprocessConfig def add_basic_time_features(df: pd.DataFrame) -> pd.DataFrame: """Add basic time features such as hour-of-day.""" df = df.copy() df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce') df['hour'] = df['Timestamp'].dt.hour return df def downcast_numeric_columns(df: pd.DataFrame) -> pd.DataFrame: """Downcast numeric columns to save memory.""" df = df.copy() for col in df.columns: try: df[col] = pd.to_numeric(df[col], downcast='float') except Exception: # ignore non-numeric columns pass return df def handle_nans(df: pd.DataFrame, cfg: PreprocessConfig) -> pd.DataFrame: """Impute NaNs (mean) or drop rows, based on config.""" df = df.copy() if cfg.impute_nans: numeric_cols = df.select_dtypes(include=[np.number]).columns for col in numeric_cols: df[col] = df[col].fillna(df[col].mean()) else: df = df.dropna().reset_index(drop=True) return df