diff --git a/.vscode/launch.json b/.vscode/launch.json
index 653a133..9cfb4e3 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -10,12 +10,125 @@
             "args": [
                 "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv",
                 "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv",
-                // "--rules", "20min,21min,22min,23min,24min,25min,26min,27min,28min,29min,30min,31min,32min,33min,34min,35min,36min,37min,38min,39min,40min,41min,42min,43min,44min,45min,46min,47min,48min,49min,50min,51min,52min,53min,54min,55min,56min,57min,58min,59min,60min",
-                "--rules", "39min",
+                "--rules", "20min,21min,22min,23min,24min,25min,26min,27min,28min,29min,30min,31min,32min,33min,34min,35min,36min,37min,38min,39min,40min,41min,42min,43min,44min,45min,46min,47min,48min,49min,50min,51min,52min,53min,54min,55min,56min,57min,58min,59min,60min",
                 "--states", "3",
-                "--cv_since", "2023-01-01",
                 "--horizon", "60",
-                "--folder_save_path", "models"
+                "--cv_since", "2023-01-01",
+                "--cv_splits", "8",
+                "--cv_test_bars", "500",
+                "--cv_gap_bars", "24",
+                "--cv_seed", "7",
+                "--cv_method", "random",
+                "--feature_selection", "mutual_info",
+                "--n_features", "10"
+            ],
+            "console": "integratedTerminal",
+            "cwd": "${workspaceFolder}",
+            "justMyCode": true,
+            "env": {
+                "PYTHONUNBUFFERED": "1"
+            }
+        },
+        {
+            "name": "Run ETH/BTC - Expanding Window CV",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/main.py",
+            "args": [
+                "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv",
+                "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv",
+                "--rules", "30min,45min,1H",
+                "--states", "3",
+                "--horizon", "60",
+                "--cv_since", "2023-01-01",
+                "--cv_splits", "5",
+                "--cv_test_bars", "1000",
+                "--cv_gap_bars", "24",
+                "--cv_seed", "42",
+                "--cv_method", "expanding",
+                "--feature_selection", "rfe",
+                "--n_features", "12"
+            ],
+            "console": "integratedTerminal",
+            "cwd": "${workspaceFolder}",
+            "justMyCode": true,
+            "env": {
+                "PYTHONUNBUFFERED": "1"
+            }
+        },
+        {
+            "name": "Run ETH/BTC - Rolling Window CV",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/main.py",
+            "args": [
+                "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv",
+                "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv",
+                "--rules", "30min,1H,2H",
+                "--states", "4",
+                "--horizon", "120",
+                "--cv_since", "2023-01-01",
+                "--cv_splits", "6",
+                "--cv_test_bars", "800",
+                "--cv_gap_bars", "12",
+                "--cv_seed", "123",
+                "--cv_method", "rolling",
+                "--feature_selection", "random_forest",
+                "--n_features", "15"
+            ],
+            "console": "integratedTerminal",
+            "cwd": "${workspaceFolder}",
+            "justMyCode": true,
+            "env": {
+                "PYTHONUNBUFFERED": "1"
+            }
+        },
+        {
+            "name": "Run ETH/BTC - Quick Test",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/main.py",
+            "args": [
+                "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv",
+                "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv",
+                "--rules", "30min,1H",
+                "--states", "3",
+                "--horizon", "60",
+                "--cv_since", "2024-01-01",
+                "--cv_splits", "3",
+                "--cv_test_bars", "200",
+                "--cv_gap_bars", "12",
+                "--cv_seed", "7",
+                "--cv_method", "random",
+                "--feature_selection", "mutual_info",
+                "--n_features", "8"
+            ],
+            "console": "integratedTerminal",
+            "cwd": "${workspaceFolder}",
+            "justMyCode": true,
+            "env": {
+                "PYTHONUNBUFFERED": "1"
+            }
+        },
+        {
+            "name": "Run ETH/BTC - No Feature Selection",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/main.py",
+            "args": [
+                "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv",
+                "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv",
+                "--rules", "30min,45min,1H",
+                "--states", "3",
+                "--horizon", "60",
+                "--cv_since", "2023-01-01",
+                "--cv_splits", "5",
+                "--cv_test_bars", "500",
+                "--cv_gap_bars", "24",
+                "--cv_seed", "7",
+                "--cv_method", "random",
+                "--feature_selection", "none",
+                "--n_features", "0"
             ],
             "console": "integratedTerminal",
             "cwd": "${workspaceFolder}",
@@ -25,4 +138,4 @@
             }
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/main.py b/main.py
index a7ecb75..80bae37 100644
--- a/main.py
+++ b/main.py
@@ -1,19 +1,19 @@
 #!/usr/bin/env python3
 from __future__ import annotations
-
 import argparse
 from dataclasses import dataclass
 from pathlib import Path
-
 import numpy as np
 import pandas as pd
 from hmmlearn.hmm import GaussianHMM
 from sklearn.preprocessing import StandardScaler
-import joblib
-
-
-# ============================== CLI ==========================================
+from sklearn.feature_selection import SelectKBest, mutual_info_regression, RFE
+from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestRegressor
+import warnings
+warnings.filterwarnings('ignore')
 
+# ------------------------------- CLI -----------------------------------------
 @dataclass
 class CLI:
     btc_csv: Path
@@ -21,400 +21,487 @@ class CLI:
     resample_rules: list[str]
     n_states: int
     horizon_min: int
-    folder_save_path: str | None
     # CV params
     cv_splits: int
     cv_test_bars: int
     cv_gap_bars: int
     cv_seed: int
     cv_since: str | None  # restrict sampling to recent era
-
+    # Feature selection params
+    feature_selection_method: str  # 'mutual_info', 'rfe', 'random_forest', 'none'
+    n_features: int  # number of features to select
+    # Enhanced CV params
+    cv_method: str  # 'random', 'expanding', 'rolling'
 
 def parse_args() -> CLI:
-    p = argparse.ArgumentParser(description="BTC/ETH regime modeling with properly embargoed time splits")
+    p = argparse.ArgumentParser(description="BTC/ETH regime modeling with robust CV and feature selection")
     p.add_argument("--btc", type=Path, default=Path("btcusd_1-min_data.csv"))
     p.add_argument("--eth", type=Path, default=Path("ethusd_1min_ohlc.csv"))
     p.add_argument("--rules", default="30min,45min,1H", help="Comma-separated pandas offsets")
     p.add_argument("--states", type=int, default=3)
-    p.add_argument("--horizon", type=int, default=60, help="Forward horizon in minutes for the target")
-    p.add_argument("--folder_save_path", default=None, help="Folder path to save fitted HMM models (optional)")
+    p.add_argument("--horizon", type=int, default=60)
 
     # randomized CV controls
     p.add_argument("--cv_splits", type=int, default=8, help="number of random test windows")
     p.add_argument("--cv_test_bars", type=int, default=500, help="length of each test window in bars")
-    p.add_argument("--cv_gap_bars", type=int, default=24, help="extra embargo bars beyond the minimum computed gap")
+    p.add_argument("--cv_gap_bars", type=int, default=24, help="embargo gap before test window")
     p.add_argument("--cv_seed", type=int, default=7, help="rng seed for reproducibility")
     p.add_argument("--cv_since", default=None, help="only sample test starts at/after this date (e.g. 2023-01-01)")
+    
+    # Feature selection
+    p.add_argument("--feature_selection", default="mutual_info", 
+                  choices=['mutual_info', 'rfe', 'random_forest', 'none'],
+                  help="Feature selection method")
+    p.add_argument("--n_features", type=int, default=10, help="Number of features to select")
+    
+    # Enhanced CV method
+    p.add_argument("--cv_method", default="random", choices=['random', 'expanding', 'rolling'],
+                  help="Cross-validation method")
+    
     a = p.parse_args()
-
     rules = [r.strip() for r in a.rules.split(",") if r.strip()]
-    return CLI(
-        btc_csv=a.btc,
-        eth_csv=a.eth,
-        resample_rules=rules,
-        n_states=a.states,
-        horizon_min=a.horizon,
-        folder_save_path=a.folder_save_path,
-        cv_splits=a.cv_splits,
-        cv_test_bars=a.cv_test_bars,
-        cv_gap_bars=a.cv_gap_bars,
-        cv_seed=a.cv_seed,
-        cv_since=a.cv_since,
-    )
-
-
-# ============================ IO / CLEAN =====================================
+    return CLI(a.btc, a.eth, rules, a.states, a.horizon, a.cv_splits, a.cv_test_bars, 
+               a.cv_gap_bars, a.cv_seed, a.cv_since, a.feature_selection, a.n_features, a.cv_method)
 
+# ------------------------------ IO / CLEAN -----------------------------------
 def _norm_headers(df: pd.DataFrame) -> pd.DataFrame:
     df = df.rename(columns={c: c.strip().lower() for c in df.columns})
-    if "unix" in df.columns:
-        df = df.rename(columns={"unix": "timestamp"})
-    if "date" in df.columns:
-        df = df.rename(columns={"date": "timestamp"})
+    if "unix" in df.columns: df = df.rename(columns={"unix": "timestamp"})
+    if "date" in df.columns: df = df.rename(columns={"date": "timestamp"})
     return df
 
-
 def _load_bitstamp_csv(path: Path, prefix: str) -> pd.DataFrame:
     df = pd.read_csv(path)
     df = _norm_headers(df)
-    if "timestamp" not in df.columns:
-        raise ValueError(f"Missing timestamp in {path}")
+    if "timestamp" not in df.columns: raise ValueError(f"Missing timestamp in {path}")
     df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True, errors="coerce")
     df = df.dropna(subset=["timestamp"]).set_index("timestamp").sort_index()
-    for c in ("open", "high", "low", "close", "volume"):
-        if c in df.columns:
-            df[c] = pd.to_numeric(df[c], errors="coerce", downcast="float")
-    df = df[["open", "high", "low", "close", "volume"]].dropna()
+    for c in ("open","high","low","close","volume"):
+        if c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce", downcast="float")
+    df = df[["open","high","low","close","volume"]].dropna()
     return df.add_prefix(prefix + "_")
 
-
 def _align_minutely(btc: pd.DataFrame, eth: pd.DataFrame) -> pd.DataFrame:
     idx = btc.index.intersection(eth.index)
     df = btc.reindex(idx).join(eth.reindex(idx), how="inner")
     return df.ffill(limit=60).dropna()
 
-
-# ======================= FEATURES / TARGET ===================================
-
+# --------------------------- FEATURES (enhanced) -----------------------
 def build_features(df: pd.DataFrame, rule: str, horizon_min: int) -> pd.DataFrame:
     df = df.copy()
-
-    # base returns
     df["btc_ret"] = np.log(df["btc_close"]).diff()
     df["eth_ret"] = np.log(df["eth_close"]).diff()
-    df["ratio"] = df["eth_close"] / df["btc_close"]
+    df["ratio"] = df["eth_close"]/df["btc_close"]
     df["ratio_ret"] = np.log(df["ratio"]).diff()
-
-    # volatility (minutes)
-    for win in (15, 30, 60, 120, 240, 360):
+    
+    # Volatility features
+    for win in (15,30,60,120,240,360):
         df[f"rv_{win}m"] = df["ratio_ret"].rolling(win, min_periods=win).std()
-
-    # trend vs long MA (minutes)
-    for win in (60, 240, 1440):
+    
+    # Trend features
+    for win in (60,240,1440):
         ma = df["ratio"].rolling(win, min_periods=win).mean()
-        df[f"trend_{win}m"] = df["ratio"] / (ma + 1e-12) - 1.0
-
-    # rolling correlation (minutes)
-    for win in (60, 120, 240):
+        df[f"trend_{win}m"] = df["ratio"]/(ma+1e-12)-1.0
+    
+    # Correlation features
+    for win in (60,120,240):
         df[f"corr_{win}m"] = df["btc_ret"].rolling(win, min_periods=win).corr(df["eth_ret"])
-
-    # beta-like measure over 120m
+    
+    # Beta and risk features
     cov_120 = df["eth_ret"].rolling(120, min_periods=120).cov(df["btc_ret"])
     var_120 = df["btc_ret"].rolling(120, min_periods=120).var()
-    df["beta_2h"] = cov_120 / (var_120 + 1e-12)
-
-    # divergence and volume structure
+    df["beta_2h"] = cov_120/(var_120+1e-12)
+    
     std_b = df["btc_ret"].rolling(120, min_periods=120).std()
     std_e = df["eth_ret"].rolling(120, min_periods=120).std()
-    df["divergence_2h"] = np.abs(df["btc_ret"] / (std_b + 1e-12) - df["eth_ret"] / (std_e + 1e-12))
-    df["volratio"] = np.log((df["eth_volume"] + 1e-9) / (df["btc_volume"] + 1e-9))
-    df["vol_sum"] = np.log(df["eth_volume"] + df["btc_volume"] + 1e-9)
-    df["vol_diff"] = (df["eth_volume"] - df["btc_volume"]) / (df["eth_volume"] + df["btc_volume"] + 1e-9)
-
-    # convenience aliases
-    df["rv_2h"] = df.get("rv_120m", df["ratio_ret"].rolling(120, min_periods=120).std())
-    df["corr_2h"] = df.get("corr_120m", df["btc_ret"].rolling(120, min_periods=120).corr(df["eth_ret"]))
-    df["ratio_trend"] = df.get(
-        "trend_1440m",
-        df["ratio"] / (df["ratio"].rolling(1440, min_periods=1440).mean() + 1e-12) - 1.0,
-    )
-
-    # aggregate to rule
-    agg = {"btc_close": "last", "eth_close": "last", "ratio": "last", "ratio_ret": "sum"}
+    df["divergence_2h"] = np.abs(df["btc_ret"]/(std_b+1e-12) - df["eth_ret"]/(std_e+1e-12))
+    
+    # Volume features
+    df["volratio"] = np.log((df["eth_volume"]+1e-9)/(df["btc_volume"]+1e-9))
+    df["vol_sum"]  = np.log(df["eth_volume"]+df["btc_volume"]+1e-9)
+    df["vol_diff"] = (df["eth_volume"]-df["btc_volume"])/(df["eth_volume"]+df["btc_volume"]+1e-9)
+    
+    # Additional momentum features
+    df["momentum_1h"] = df["ratio_ret"].rolling(60).sum()
+    df["momentum_4h"] = df["ratio_ret"].rolling(240).sum()
+    
+    # Mean reversion features
+    for win in (60, 120, 240):
+        rolling_mean = df["ratio_ret"].rolling(win).mean()
+        rolling_std = df["ratio_ret"].rolling(win).std()
+        df[f"zscore_{win}m"] = (df["ratio_ret"] - rolling_mean) / (rolling_std + 1e-12)
+    
+    # Price position features
+    for win in (240, 1440):
+        high = df["ratio"].rolling(win).max()
+        low = df["ratio"].rolling(win).min()
+        df[f"position_{win}m"] = (df["ratio"] - low) / (high - low + 1e-12)
+    
+    # Aggregate to target timeframe
+    agg = {"btc_close":"last","eth_close":"last","ratio":"last","ratio_ret":"sum"}
     for c in df.columns:
-        if c not in agg:
-            agg[c] = "mean"
-
+        if c not in agg: agg[c] = "mean"
+    
     g = df.resample(rule).agg(agg).dropna()
-
-    step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60))
-    ahead = max(1, int(round(horizon_min / step_min)))
+    step_min = max(1, int(pd.Timedelta(rule).total_seconds()//60))
+    ahead = max(1, int(round(horizon_min/step_min)))
     g["fut_ret"] = g["ratio_ret"].shift(-ahead)
-
+    
     return g.dropna()
 
-
-def feature_matrix(g: pd.DataFrame) -> tuple[np.ndarray, np.ndarray, list[str]]:
-    ban = {"fut_ret", "btc_close", "eth_close", "ratio"}
-    keep = ("rv_", "corr_", "trend_", "beta_", "divergence_", "vol")
-    feats: list[str] = []
-
-    if "ratio_ret" in g.columns:
+def feature_matrix(g: pd.DataFrame) -> tuple[np.ndarray,np.ndarray,list[str]]:
+    ban = {"fut_ret","btc_close","eth_close","ratio"}
+    keep = ("rv_","corr_","trend_","beta_","divergence_","vol","momentum_","zscore_","position_")
+    feats = []
+    
+    if "ratio_ret" in g.columns: 
         feats.append("ratio_ret")
-    feats += [
-        c for c in g.columns
-        if c not in ban and c != "ratio_ret" and any(c.startswith(p) for p in keep)
-    ]
-
-    if not feats:
-        feats = ["ratio_ret", "rv_30m", "rv_2h", "corr_2h", "ratio_trend", "volratio"]
-
+    feats += [c for c in g.columns if c not in ban and c!="ratio_ret" and any(c.startswith(p) for p in keep)]
+    
+    if not feats: 
+        feats = ["ratio_ret","rv_30m","rv_2h","corr_2h","ratio_trend","volratio","momentum_1h","zscore_60m"]
+    
     X = g[feats].astype(np.float32).values
     y = g["fut_ret"].astype(np.float32).values
+    
     return X, y, feats
 
+# ------------------------- Enhanced Feature Selection ------------------------
+def select_features(X_train: np.ndarray, y_train: np.ndarray, 
+                   X_test: np.ndarray, feature_names: list[str],
+                   method: str, n_features: int) -> tuple[np.ndarray, np.ndarray, list[str]]:
+    """
+    Apply feature selection to training and test data
+    """
+    if method == "none" or n_features >= len(feature_names):
+        return X_train, X_test, feature_names
+    
+    if n_features <= 0:
+        n_features = max(1, len(feature_names) // 2)
+    
+    try:
+        if method == "mutual_info":
+            selector = SelectKBest(score_func=mutual_info_regression, k=n_features)
+            X_train_selected = selector.fit_transform(X_train, y_train)
+            X_test_selected = selector.transform(X_test)
+            selected_indices = selector.get_support(indices=True)
+            selected_features = [feature_names[i] for i in selected_indices]
+            
+        elif method == "rfe":
+            # Use linear regression as base estimator for RFE
+            estimator = LinearRegression()
+            selector = RFE(estimator, n_features_to_select=n_features, step=1)
+            X_train_selected = selector.fit_transform(X_train, y_train)
+            X_test_selected = selector.transform(X_test)
+            selected_indices = selector.get_support(indices=True)
+            selected_features = [feature_names[i] for i in selected_indices]
+            
+        elif method == "random_forest":
+            # Use feature importance from random forest
+            rf = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
+            rf.fit(X_train, y_train)
+            importances = rf.feature_importances_
+            selected_indices = np.argsort(importances)[-n_features:]
+            X_train_selected = X_train[:, selected_indices]
+            X_test_selected = X_test[:, selected_indices]
+            selected_features = [feature_names[i] for i in selected_indices]
+        
+        else:
+            return X_train, X_test, feature_names
+            
+        print(f"  Selected {len(selected_features)} features: {selected_features}")
+        return X_train_selected, X_test_selected, selected_features
+        
+    except Exception as e:
+        print(f"  Feature selection failed: {e}, using all features")
+        return X_train, X_test, feature_names
 
-# ====================== OVERLAP-/LEAKAGE-AWARE UTILITIES =====================
-
-def max_lookback_minutes() -> int:
-    # From feature construction: the maximum rolling window is 1440 minutes.
-    return 1440
-
-
-def bars_from_minutes(rule: str, minutes: int) -> int:
-    step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60))
-    return int(np.ceil(minutes / step_min))
-
-
+# ------------------------- Robust Cross-Validation Methods -------------------
 def sample_random_splits(
     g: pd.DataFrame,
-    rule: str,
     n_splits: int,
     test_bars: int,
-    gap_bars_extra: int,
+    gap_bars: int,
     seed: int,
-    since: str | None,
-    horizon_min: int,
+    since: str | None
 ):
-    """
-    Random test windows with an embargo that guarantees disjoint information sets.
-    Embargo (in bars) = max(gap_bars_extra, ceil((max_lookback + horizon_min)/rule_minutes)).
-    Train uses only data strictly before (test_start - embargo).
-    """
+    """Original random sampling method"""
     rng = np.random.default_rng(seed)
     idx = g.index
+
     if since is not None:
         idx = idx[idx >= pd.Timestamp(since, tz="UTC")]
-
-    if len(idx) <= test_bars:
-        return
-
-    # Compute minimal embargo based on lookback + horizon
-    gap_min_bars = bars_from_minutes(rule, max_lookback_minutes() + horizon_min)
-    embargo_bars = int(max(gap_bars_extra, gap_min_bars))
-
-    # Valid start indices ensure full test window fits
+    
     valid = np.arange(len(idx) - test_bars)
     if len(valid) <= 0:
-        return
-
+        raise ValueError("Not enough data for requested test window")
+    
     starts = rng.choice(valid, size=min(n_splits, len(valid)), replace=False)
-    starts = np.sort(starts)
 
-    for s in starts:
+    for s in np.sort(starts):
         test_start = idx[s]
-        test_end = idx[s + test_bars - 1]
-
-        # Train: strictly before test_start - embargo_bars
-        left_end_pos = s - embargo_bars - 1
-        if left_end_pos < 0:
-            # No room for non-overlapping training information
-            continue
-
-        embargo_end = idx[left_end_pos]
-        train = g.loc[:embargo_end]
-        test = g.loc[test_start:test_end]
+        test_end   = idx[s + test_bars - 1]
+        embargo_end = idx[max(0, s - gap_bars - 1)] if s - gap_bars - 1 >= 0 else None
+        train = g.loc[:embargo_end] if embargo_end is not None else g.iloc[0:0]
+        test  = g.loc[test_start:test_end]
 
         if len(train) == 0 or len(test) < test_bars:
             continue
 
-        yield train, test, (test_start, test_end), embargo_bars
+        yield train, test, (test_start, test_end)
 
-
-# ============================ MODEL / FIT ====================================
-
-def fit_and_predict_train_test(
-    train: pd.DataFrame,
-    test: pd.DataFrame,
-    n_states: int,
-    full_save_path: str | None = None,
+def sample_expanding_window_splits(
+    g: pd.DataFrame,
+    n_splits: int,
+    test_bars: int,
+    gap_bars: int,
+    since: str | None
 ):
+    """Expanding window CV - training set grows over time"""
+    idx = g.index
+    if since is not None:
+        idx = idx[idx >= pd.Timestamp(since, tz="UTC")]
+    
+    total_bars = len(idx)
+    min_train_size = test_bars * 2  # Minimum training set size
+    
+    # Calculate split points
+    available_splits = max(1, (total_bars - min_train_size - test_bars - gap_bars) // test_bars)
+    n_splits = min(n_splits, available_splits)
+    
+    if n_splits <= 0:
+        raise ValueError("Not enough data for expanding window splits")
+    
+    for i in range(n_splits):
+        test_start_idx = min_train_size + gap_bars + (i * test_bars)
+        test_end_idx = test_start_idx + test_bars - 1
+        
+        if test_end_idx >= total_bars:
+            break
+            
+        train_end_idx = test_start_idx - gap_bars - 1
+        train = g.iloc[:train_end_idx + 1]
+        test = g.iloc[test_start_idx:test_end_idx + 1]
+        
+        if len(train) < min_train_size or len(test) < test_bars:
+            continue
+            
+        yield train, test, (idx[test_start_idx], idx[test_end_idx])
+
+def sample_rolling_window_splits(
+    g: pd.DataFrame,
+    n_splits: int,
+    test_bars: int,
+    gap_bars: int,
+    since: str | None
+):
+    """Rolling window CV - fixed training window size"""
+    idx = g.index
+    if since is not None:
+        idx = idx[idx >= pd.Timestamp(since, tz="UTC")]
+    
+    total_bars = len(idx)
+    train_bars = test_bars * 3  # Fixed training window size
+    
+    # Calculate split points
+    available_splits = max(1, (total_bars - train_bars - test_bars - gap_bars) // test_bars)
+    n_splits = min(n_splits, available_splits)
+    
+    if n_splits <= 0:
+        raise ValueError("Not enough data for rolling window splits")
+    
+    for i in range(n_splits):
+        train_start_idx = i * test_bars
+        train_end_idx = train_start_idx + train_bars - 1
+        test_start_idx = train_end_idx + gap_bars + 1
+        test_end_idx = test_start_idx + test_bars - 1
+        
+        if test_end_idx >= total_bars:
+            break
+            
+        train = g.iloc[train_start_idx:train_end_idx + 1]
+        test = g.iloc[test_start_idx:test_end_idx + 1]
+        
+        if len(train) < train_bars or len(test) < test_bars:
+            continue
+            
+        yield train, test, (idx[test_start_idx], idx[test_end_idx])
+
+def get_cv_splits(g: pd.DataFrame, cv_method: str, n_splits: int, test_bars: int, 
+                 gap_bars: int, seed: int, since: str | None):
+    """Dispatch to appropriate CV method"""
+    if cv_method == "random":
+        return sample_random_splits(g, n_splits, test_bars, gap_bars, seed, since)
+    elif cv_method == "expanding":
+        return sample_expanding_window_splits(g, n_splits, test_bars, gap_bars, since)
+    elif cv_method == "rolling":
+        return sample_rolling_window_splits(g, n_splits, test_bars, gap_bars, since)
+    else:
+        raise ValueError(f"Unknown CV method: {cv_method}")
+
+# ------------------------------ Model / Fit -----------------------------------
+def fit_and_predict_train_test(train: pd.DataFrame, test: pd.DataFrame, 
+                              n_states: int, feature_selection_method: str, 
+                              n_features: int):
     Xtr, ytr, feats = feature_matrix(train)
     Xte, yte, _ = feature_matrix(test)
-
+    
+    # Apply feature selection
+    Xtr_sel, Xte_sel, selected_feats = select_features(
+        Xtr, ytr, Xte, feats, feature_selection_method, n_features
+    )
+    
     scaler = StandardScaler()
-    Xtr_s = scaler.fit_transform(Xtr)
-    Xte_s = scaler.transform(Xte)
-
+    Xtr_s = scaler.fit_transform(Xtr_sel)
+    Xte_s = scaler.transform(Xte_sel)
+    
     hmm = GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=300, random_state=7)
     hmm.fit(Xtr_s)
-
+    
     st_tr = hmm.predict(Xtr_s)
     st_te = hmm.predict(Xte_s)
-
-    # Map HMM states to stances using state-wise mean of future returns in TRAIN
+    
     means = {s: float(np.nanmean(ytr[st_tr == s])) for s in range(n_states)}
     small = np.nanpercentile(np.abs(list(means.values())), 30)
     state_to_stance = {s: (1 if m > +small else (-1 if m < -small else 0)) for s, m in means.items()}
-
     preds = np.vectorize(state_to_stance.get)(st_te).astype(np.int8)
+    
+    return preds, yte, state_to_stance, selected_feats, hmm
 
-    if full_save_path:
-        Path(full_save_path).parent.mkdir(parents=True, exist_ok=True)
-        joblib.dump(
-            {"hmm": hmm, "scaler": scaler, "features": feats, "state_to_stance": state_to_stance},
-            full_save_path,
-        )
-        print(f"Model saved: {full_save_path}")
-
-    return preds, yte, state_to_stance, feats
-
-
-# ============================= METRICS =======================================
-
-def metrics_nonoverlap(y: np.ndarray, preds: np.ndarray, rule: str, horizon_min: int) -> dict[str, float]:
-    """
-    Score only every 'ahead'-th point to remove overlap of forward windows.
-    Adjust annualization for reduced sampling frequency.
-    """
-    T = min(len(y), len(preds))
-    if T == 0:
-        return {"hit_rate": np.nan, "ann_sharpe": np.nan, "n_points": 0}
-
-    y = y[:T]
-    preds = preds[:T]
-
-    step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60))
-    ahead = max(1, int(round(horizon_min / step_min)))
-
-    # Use the last index of each non-overlapping forward window
-    idx = np.arange(ahead - 1, T, ahead)
-    if len(idx) == 0:
-        return {"hit_rate": np.nan, "ann_sharpe": np.nan, "n_points": 0}
-
-    y_s = y[idx]
-    p_s = preds[idx]
-
-    pnl = p_s * y_s
-    hit = float((np.sign(p_s) == np.sign(y_s)).mean())
-
-    bars_per_day = int(round(24 * 60 / step_min))
-    # We only take one observation per 'ahead' bars
-    eff_obs_per_day = bars_per_day / ahead
-    ann = np.sqrt(365 * max(eff_obs_per_day, 1e-12))
-
+def metrics(y: np.ndarray, preds: np.ndarray, rule: str) -> dict[str,float]:
+    T = min(len(y), len(preds)); y, preds = y[:T], preds[:T]
+    pnl = preds * y
+    hit = (np.sign(preds) == np.sign(y)).mean() if T else np.nan
+    bars_per_day = int(round(24 * 60 / max(1, int(pd.Timedelta(rule).total_seconds() // 60))))
+    ann = np.sqrt(365 * bars_per_day)
     sharpe = float(np.nanmean(pnl) / (np.nanstd(pnl) + 1e-12) * ann)
-    return {"hit_rate": hit, "ann_sharpe": sharpe, "n_points": int(len(idx))}
 
+    # Additional metrics
+    positive_returns = (pnl > 0).sum()
+    total_trades = len(pnl)
+    win_rate = positive_returns / total_trades if total_trades > 0 else 0
+    profit_factor = abs(pnl[pnl > 0].sum() / (pnl[pnl < 0].sum() + 1e-12))
+    
+    return {
+        "hit_rate": float(hit), 
+        "ann_sharpe": sharpe, 
+        "n_points": int(T),
+        "win_rate": win_rate,
+        "profit_factor": profit_factor,
+        "total_return": float(pnl.sum())
+    }
 
-# ============================== RUNNER =======================================
-
-def run_rule_mc(
-    minute: pd.DataFrame,
-    rule: str,
-    n_states: int,
-    horizon_min: int,
-    cv: object,
-    folder_save_path: str | None,
-) -> dict:
+# ------------------------------ Runner ---------------------------------------
+def run_rule_mc(minute: pd.DataFrame, rule: str, n_states: int, 
+                horizon_min: int, cv, feature_selection_method: str, 
+                n_features: int) -> dict:
     g = build_features(minute, rule, horizon_min)
     rows = []
+    feature_importance = {}
 
-    for train, test, (ts, te), embargo_bars in sample_random_splits(
-        g=g,
-        rule=rule,
-        n_splits=cv.cv_splits,
-        test_bars=cv.cv_test_bars,
-        gap_bars_extra=cv.cv_gap_bars,
-        seed=cv.cv_seed,
-        since=cv.cv_since,
-        horizon_min=horizon_min,
-    ):
-        full_save_path = None
-        if folder_save_path:
-            full_save_path = f"{folder_save_path}/hmm_btc_eth_{rule}_{horizon_min}.joblib"
-
-        preds, ytest, state_map, feats = fit_and_predict_train_test(
-            train, test, n_states, full_save_path
+    for i, (train, test, (ts, te)) in enumerate(get_cv_splits(g, cv.cv_method, cv.cv_splits, 
+                                                             cv.cv_test_bars, cv.cv_gap_bars, 
+                                                             cv.cv_seed, cv.cv_since)):
+        print(f"  Split {i+1}: Train {len(train)} bars, Test {len(test)} bars")
+        
+        preds, ytest, state_map, feats, hmm = fit_and_predict_train_test(
+            train, test, n_states, feature_selection_method, n_features
         )
-        m = metrics_nonoverlap(ytest, preds, rule, horizon_min)
-        rows.append(
-            {
-                "hit_rate": m["hit_rate"],
-                "ann_sharpe": m["ann_sharpe"],
-                "n_points": m["n_points"],
-                "test_span": (ts, te),
-                "embargo_bars": embargo_bars,
-            }
-        )
-
+        
+        m = metrics(ytest, preds, rule)
+        rows.append({
+            "hit_rate": m["hit_rate"], 
+            "ann_sharpe": m["ann_sharpe"], 
+            "n_points": m["n_points"], 
+            "test_span": (ts, te),
+            "win_rate": m["win_rate"],
+            "profit_factor": m["profit_factor"],
+            "total_return": m["total_return"]
+        })
+        
+        # Track feature usage
+        for feat in feats:
+            feature_importance[feat] = feature_importance.get(feat, 0) + 1
+    
     if not rows:
         return {
-            "rule": rule,
-            "hit_mean": np.nan,
-            "hit_std": np.nan,
-            "sharpe_mean": np.nan,
-            "sharpe_std": np.nan,
-            "splits": 0,
+            "rule": rule, "hit_mean": np.nan, "sharpe_mean": np.nan, 
+            "splits": 0, "hit_std": np.nan, "sharpe_std": np.nan,
+            "win_rate_mean": np.nan, "profit_factor_mean": np.nan
         }
-
+    
     hits = np.array([r["hit_rate"] for r in rows], dtype=float)
     sharpes = np.array([r["ann_sharpe"] for r in rows], dtype=float)
-
+    win_rates = np.array([r["win_rate"] for r in rows], dtype=float)
+    profit_factors = np.array([r["profit_factor"] for r in rows], dtype=float)
+    
+    # Sort features by importance
+    sorted_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
+    top_features = [f[0] for f in sorted_features[:5]]  # Top 5 most frequently selected features
+    
     return {
         "rule": rule,
         "hit_mean": float(np.nanmean(hits)),
         "hit_std": float(np.nanstd(hits)),
         "sharpe_mean": float(np.nanmean(sharpes)),
         "sharpe_std": float(np.nanstd(sharpes)),
+        "win_rate_mean": float(np.nanmean(win_rates)),
+        "profit_factor_mean": float(np.nanmean(profit_factors)),
         "splits": len(rows),
+        "top_features": top_features
     }
 
-
+# ------------------------------ MAIN -----------------------------------------
 def main(args: CLI) -> None:
+    print("Loading data...")
     btc = _load_bitstamp_csv(args.btc_csv, "btc")
     eth = _load_bitstamp_csv(args.eth_csv, "eth")
     minute = _align_minutely(btc, eth)
+    print(f"Aligned data: {len(minute)} minutes")
 
-    class CV:
-        pass
-
+    class CV: pass
     cv = CV()
     cv.cv_splits = args.cv_splits
     cv.cv_test_bars = args.cv_test_bars
     cv.cv_gap_bars = args.cv_gap_bars
     cv.cv_seed = args.cv_seed
     cv.cv_since = args.cv_since
+    cv.cv_method = args.cv_method
+
+    results = []
+    for rule in args.resample_rules:
+        print(f"\nProcessing rule: {rule}")
+        result = run_rule_mc(minute, rule, args.n_states, args.horizon_min, cv, 
+                           args.feature_selection_method, args.n_features)
+        results.append(result)
 
-    results = [
-        run_rule_mc(minute, rule, args.n_states, args.horizon_min, cv, args.folder_save_path)
-        for rule in args.resample_rules
-    ]
     df = pd.DataFrame(results).sort_values(by="sharpe_mean", ascending=False)
-
-    print("# Randomized time-split comparison (embargo = max(user_gap, ceil((lookback+horizon)/rule)))")
-    print(
-        f"States={args.n_states}  HorizonMin={args.horizon_min}  Splits={args.cv_splits}  "
-        f"TestBars={args.cv_test_bars}  ExtraGapBars={args.cv_gap_bars}  Since={args.cv_since}"
-    )
+    
+    print("\n" + "="*80)
+    print("ENHANCED RESULTS: Randomized time-split comparison with Feature Selection")
+    print("="*80)
+    print(f"States={args.n_states} | HorizonMin={args.horizon_min} | Splits={args.cv_splits}")
+    print(f"TestBars={args.cv_test_bars} | GapBars={args.cv_gap_bars} | Since={args.cv_since}")
+    print(f"CV Method={args.cv_method} | Feature Selection={args.feature_selection_method} | N Features={args.n_features}")
+    print("="*80)
+    
     if not df.empty:
         df["hit"] = df["hit_mean"].round(4).astype(str) + " ± " + df["hit_std"].round(4).astype(str)
         df["sharpe"] = df["sharpe_mean"].round(4).astype(str) + " ± " + df["sharpe_std"].round(4).astype(str)
-        print(df[["rule", "splits", "hit", "sharpe"]].to_string(index=False))
+        df["win_rate"] = (df["win_rate_mean"] * 100).round(2).astype(str) + "%"
+        df["profit_factor"] = df["profit_factor_mean"].round(3).astype(str)
+        
+        display_cols = ["rule", "splits", "hit", "sharpe", "win_rate", "profit_factor"]
+        print(df[display_cols].to_string(index=False))
+        
+        # Show top features for best performing rule
+        best_rule = df.iloc[0]
+        print(f"\nTop features for best rule '{best_rule['rule']}':")
+        for i, feat in enumerate(best_rule.get('top_features', [])[:5]):
+            print(f"  {i+1}. {feat}")
     else:
         print("No valid splits found")
 
-
 if __name__ == "__main__":
     args = parse_args()
-    main(args)
+    main(args)
\ No newline at end of file