from dataclasses import dataclass, field from typing import List, Optional @dataclass class DataConfig: """Configuration for data loading and basic filtering.""" csv_path: str min_date: str = "2017-06-01" max_date: Optional[str] = None drop_volume_zero: bool = True @dataclass class FeatureConfig: """Configuration for feature engineering.""" ohlcv_cols: List[str] = field(default_factory=lambda: ["Open", "High", "Low", "Close", "Volume"]) lags: int = 3 window_sizes: List[int] = field(default_factory=lambda: [5, 15, 30]) @dataclass class PreprocessConfig: """Configuration for preprocessing and NaN handling.""" impute_nans: bool = True @dataclass class PruningConfig: """Configuration for feature pruning and CV.""" do_walk_forward_cv: bool = True n_splits: int = 5 auto_prune: bool = True top_k: int = 150 known_low_features: List[str] = field( default_factory=lambda: [ "supertrend_12_3.0", "supertrend_10_1.0", "supertrend_11_2.0", "supertrend_trend_12_3.0", "supertrend_trend_10_1.0", "supertrend_trend_11_2.0", "hour", ] ) @dataclass class OutputConfig: """Configuration for outputs and artifacts.""" charts_dir: str = "charts" results_csv: str = "../data/cumulative_feature_results.csv" model_output_path: str = "../data/xgboost_model_all_features.json" @dataclass class RunConfig: """Top-level configuration grouping for a pipeline run.""" data: DataConfig features: FeatureConfig = field(default_factory=FeatureConfig) preprocess: PreprocessConfig = field(default_factory=PreprocessConfig) pruning: PruningConfig = field(default_factory=PruningConfig) output: OutputConfig = field(default_factory=OutputConfig)