66 lines
1.8 KiB
Python

from dataclasses import dataclass, field
from typing import List, Optional
@dataclass
class DataConfig:
"""Configuration for data loading and basic filtering."""
csv_path: str
min_date: str = "2017-06-01"
max_date: Optional[str] = None
drop_volume_zero: bool = True
@dataclass
class FeatureConfig:
"""Configuration for feature engineering."""
ohlcv_cols: List[str] = field(default_factory=lambda: ["Open", "High", "Low", "Close", "Volume"])
lags: int = 3
window_sizes: List[int] = field(default_factory=lambda: [5, 15, 30])
@dataclass
class PreprocessConfig:
"""Configuration for preprocessing and NaN handling."""
impute_nans: bool = True
@dataclass
class PruningConfig:
"""Configuration for feature pruning and CV."""
do_walk_forward_cv: bool = True
n_splits: int = 5
auto_prune: bool = True
top_k: int = 150
known_low_features: List[str] = field(
default_factory=lambda: [
"supertrend_12_3.0",
"supertrend_10_1.0",
"supertrend_11_2.0",
"supertrend_trend_12_3.0",
"supertrend_trend_10_1.0",
"supertrend_trend_11_2.0",
"hour",
]
)
@dataclass
class OutputConfig:
"""Configuration for outputs and artifacts."""
charts_dir: str = "charts"
results_csv: str = "../data/cumulative_feature_results.csv"
model_output_path: str = "../data/xgboost_model_all_features.json"
@dataclass
class RunConfig:
"""Top-level configuration grouping for a pipeline run."""
data: DataConfig
features: FeatureConfig = field(default_factory=FeatureConfig)
preprocess: PreprocessConfig = field(default_factory=PreprocessConfig)
pruning: PruningConfig = field(default_factory=PruningConfig)
output: OutputConfig = field(default_factory=OutputConfig)