From a771909eef18f2e205067aa3705283594f928b1f Mon Sep 17 00:00:00 2001 From: Simon Moisy Date: Thu, 23 Oct 2025 10:35:22 +0800 Subject: [PATCH] Enhance BTC-ETH regime modeling with improved CLI arguments, added model saving functionality, and refined metrics calculation. Updated run_grid.sh for better parameter handling and logging. Adjusted VSCode launch configuration for new rules and paths. --- .vscode/launch.json | 8 +- main.py | 378 ++++++++++++++++++++++++++++++++------------ run_grid.sh | 97 ++++++------ 3 files changed, 337 insertions(+), 146 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index dc534a0..653a133 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,10 +10,12 @@ "args": [ "--btc", "${workspaceFolder}/../data/btcusd_1-min_data.csv", "--eth", "${workspaceFolder}/../data/ethusd_1min_ohlc.csv", - "--rules", "20min,21min,22min,23min,24min,25min,26min,27min,28min,29min,30min,31min,32min,33min,34min,35min,36min,37min,38min,39min,40min,41min,42min,43min,44min,45min,46min,47min,48min,49min,50min,51min,52min,53min,54min,55min,56min,57min,58min,59min,60min", + // "--rules", "20min,21min,22min,23min,24min,25min,26min,27min,28min,29min,30min,31min,32min,33min,34min,35min,36min,37min,38min,39min,40min,41min,42min,43min,44min,45min,46min,47min,48min,49min,50min,51min,52min,53min,54min,55min,56min,57min,58min,59min,60min", + "--rules", "39min", "--states", "3", - "--split", "2023-01-01", - "--horizon", "60" + "--cv_since", "2023-01-01", + "--horizon", "60", + "--folder_save_path", "models" ], "console": "integratedTerminal", "cwd": "${workspaceFolder}", diff --git a/main.py b/main.py index 61be5dc..a7ecb75 100644 --- a/main.py +++ b/main.py @@ -1,14 +1,19 @@ #!/usr/bin/env python3 from __future__ import annotations + import argparse from dataclasses import dataclass from pathlib import Path + import numpy as np import pandas as pd from hmmlearn.hmm import GaussianHMM from sklearn.preprocessing import StandardScaler +import joblib + + +# ============================== CLI ========================================== -# ------------------------------- CLI ----------------------------------------- @dataclass class CLI: btc_csv: Path @@ -16,6 +21,7 @@ class CLI: resample_rules: list[str] n_states: int horizon_min: int + folder_save_path: str | None # CV params cv_splits: int cv_test_bars: int @@ -23,188 +29,348 @@ class CLI: cv_seed: int cv_since: str | None # restrict sampling to recent era + def parse_args() -> CLI: - p = argparse.ArgumentParser(description="BTC/ETH regime modeling with randomized time splits") + p = argparse.ArgumentParser(description="BTC/ETH regime modeling with properly embargoed time splits") p.add_argument("--btc", type=Path, default=Path("btcusd_1-min_data.csv")) p.add_argument("--eth", type=Path, default=Path("ethusd_1min_ohlc.csv")) p.add_argument("--rules", default="30min,45min,1H", help="Comma-separated pandas offsets") p.add_argument("--states", type=int, default=3) - p.add_argument("--horizon", type=int, default=60) + p.add_argument("--horizon", type=int, default=60, help="Forward horizon in minutes for the target") + p.add_argument("--folder_save_path", default=None, help="Folder path to save fitted HMM models (optional)") # randomized CV controls p.add_argument("--cv_splits", type=int, default=8, help="number of random test windows") p.add_argument("--cv_test_bars", type=int, default=500, help="length of each test window in bars") - p.add_argument("--cv_gap_bars", type=int, default=24, help="embargo gap before test window") + p.add_argument("--cv_gap_bars", type=int, default=24, help="extra embargo bars beyond the minimum computed gap") p.add_argument("--cv_seed", type=int, default=7, help="rng seed for reproducibility") p.add_argument("--cv_since", default=None, help="only sample test starts at/after this date (e.g. 2023-01-01)") a = p.parse_args() - rules = [r.strip() for r in a.rules.split(",") if r.strip()] - return CLI(a.btc, a.eth, rules, a.states, a.horizon, a.cv_splits, a.cv_test_bars, a.cv_gap_bars, a.cv_seed, a.cv_since) -# ------------------------------ IO / CLEAN ----------------------------------- + rules = [r.strip() for r in a.rules.split(",") if r.strip()] + return CLI( + btc_csv=a.btc, + eth_csv=a.eth, + resample_rules=rules, + n_states=a.states, + horizon_min=a.horizon, + folder_save_path=a.folder_save_path, + cv_splits=a.cv_splits, + cv_test_bars=a.cv_test_bars, + cv_gap_bars=a.cv_gap_bars, + cv_seed=a.cv_seed, + cv_since=a.cv_since, + ) + + +# ============================ IO / CLEAN ===================================== + def _norm_headers(df: pd.DataFrame) -> pd.DataFrame: df = df.rename(columns={c: c.strip().lower() for c in df.columns}) - if "unix" in df.columns: df = df.rename(columns={"unix": "timestamp"}) - if "date" in df.columns: df = df.rename(columns={"date": "timestamp"}) + if "unix" in df.columns: + df = df.rename(columns={"unix": "timestamp"}) + if "date" in df.columns: + df = df.rename(columns={"date": "timestamp"}) return df + def _load_bitstamp_csv(path: Path, prefix: str) -> pd.DataFrame: df = pd.read_csv(path) df = _norm_headers(df) - if "timestamp" not in df.columns: raise ValueError(f"Missing timestamp in {path}") + if "timestamp" not in df.columns: + raise ValueError(f"Missing timestamp in {path}") df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True, errors="coerce") df = df.dropna(subset=["timestamp"]).set_index("timestamp").sort_index() - for c in ("open","high","low","close","volume"): - if c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce", downcast="float") - df = df[["open","high","low","close","volume"]].dropna() + for c in ("open", "high", "low", "close", "volume"): + if c in df.columns: + df[c] = pd.to_numeric(df[c], errors="coerce", downcast="float") + df = df[["open", "high", "low", "close", "volume"]].dropna() return df.add_prefix(prefix + "_") + def _align_minutely(btc: pd.DataFrame, eth: pd.DataFrame) -> pd.DataFrame: idx = btc.index.intersection(eth.index) df = btc.reindex(idx).join(eth.reindex(idx), how="inner") return df.ffill(limit=60).dropna() -# --------------------------- FEATURES (same as before) ----------------------- + +# ======================= FEATURES / TARGET =================================== + def build_features(df: pd.DataFrame, rule: str, horizon_min: int) -> pd.DataFrame: df = df.copy() + + # base returns df["btc_ret"] = np.log(df["btc_close"]).diff() df["eth_ret"] = np.log(df["eth_close"]).diff() - df["ratio"] = df["eth_close"]/df["btc_close"] + df["ratio"] = df["eth_close"] / df["btc_close"] df["ratio_ret"] = np.log(df["ratio"]).diff() - - for win in (15,30,60,120,240,360): + + # volatility (minutes) + for win in (15, 30, 60, 120, 240, 360): df[f"rv_{win}m"] = df["ratio_ret"].rolling(win, min_periods=win).std() - - for win in (60,240,1440): + + # trend vs long MA (minutes) + for win in (60, 240, 1440): ma = df["ratio"].rolling(win, min_periods=win).mean() - df[f"trend_{win}m"] = df["ratio"]/(ma+1e-12)-1.0 - - for win in (60,120,240): + df[f"trend_{win}m"] = df["ratio"] / (ma + 1e-12) - 1.0 + + # rolling correlation (minutes) + for win in (60, 120, 240): df[f"corr_{win}m"] = df["btc_ret"].rolling(win, min_periods=win).corr(df["eth_ret"]) - + + # beta-like measure over 120m cov_120 = df["eth_ret"].rolling(120, min_periods=120).cov(df["btc_ret"]) - var_120 = df["btc_ret"].rolling(120, min_periods=120).var() - - df["beta_2h"] = cov_120/(var_120+1e-12) - + df["beta_2h"] = cov_120 / (var_120 + 1e-12) + + # divergence and volume structure std_b = df["btc_ret"].rolling(120, min_periods=120).std() std_e = df["eth_ret"].rolling(120, min_periods=120).std() - - df["divergence_2h"] = np.abs(df["btc_ret"]/(std_b+1e-12) - df["eth_ret"]/(std_e+1e-12)) - df["volratio"] = np.log((df["eth_volume"]+1e-9)/(df["btc_volume"]+1e-9)) - df["vol_sum"] = np.log(df["eth_volume"]+df["btc_volume"]+1e-9) - df["vol_diff"] = (df["eth_volume"]-df["btc_volume"])/(df["eth_volume"]+df["btc_volume"]+1e-9) + df["divergence_2h"] = np.abs(df["btc_ret"] / (std_b + 1e-12) - df["eth_ret"] / (std_e + 1e-12)) + df["volratio"] = np.log((df["eth_volume"] + 1e-9) / (df["btc_volume"] + 1e-9)) + df["vol_sum"] = np.log(df["eth_volume"] + df["btc_volume"] + 1e-9) + df["vol_diff"] = (df["eth_volume"] - df["btc_volume"]) / (df["eth_volume"] + df["btc_volume"] + 1e-9) + + # convenience aliases df["rv_2h"] = df.get("rv_120m", df["ratio_ret"].rolling(120, min_periods=120).std()) df["corr_2h"] = df.get("corr_120m", df["btc_ret"].rolling(120, min_periods=120).corr(df["eth_ret"])) - df["ratio_trend"] = df.get("trend_1440m", df["ratio"]/(df["ratio"].rolling(1440, min_periods=1440).mean()+1e-12)-1.0) - - agg = {"btc_close":"last","eth_close":"last","ratio":"last","ratio_ret":"sum"} - + df["ratio_trend"] = df.get( + "trend_1440m", + df["ratio"] / (df["ratio"].rolling(1440, min_periods=1440).mean() + 1e-12) - 1.0, + ) + + # aggregate to rule + agg = {"btc_close": "last", "eth_close": "last", "ratio": "last", "ratio_ret": "sum"} for c in df.columns: - if c not in agg: agg[c] = "mean" - + if c not in agg: + agg[c] = "mean" + g = df.resample(rule).agg(agg).dropna() - step_min = max(1, int(pd.Timedelta(rule).total_seconds()//60)) - ahead = max(1, int(round(horizon_min/step_min))) + + step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60)) + ahead = max(1, int(round(horizon_min / step_min))) g["fut_ret"] = g["ratio_ret"].shift(-ahead) - + return g.dropna() -def feature_matrix(g: pd.DataFrame) -> tuple[np.ndarray,np.ndarray,list[str]]: - ban = {"fut_ret","btc_close","eth_close","ratio"} - keep = ("rv_","corr_","trend_","beta_","divergence_","vol") - feats = [] - - if "ratio_ret" in g.columns: + +def feature_matrix(g: pd.DataFrame) -> tuple[np.ndarray, np.ndarray, list[str]]: + ban = {"fut_ret", "btc_close", "eth_close", "ratio"} + keep = ("rv_", "corr_", "trend_", "beta_", "divergence_", "vol") + feats: list[str] = [] + + if "ratio_ret" in g.columns: feats.append("ratio_ret") - feats += [c for c in g.columns if c not in ban and c!="ratio_ret" and any(c.startswith(p) for p in keep)] - - if not feats: feats = ["ratio_ret","rv_30m","rv_2h","corr_2h","ratio_trend","volratio"] - + feats += [ + c for c in g.columns + if c not in ban and c != "ratio_ret" and any(c.startswith(p) for p in keep) + ] + + if not feats: + feats = ["ratio_ret", "rv_30m", "rv_2h", "corr_2h", "ratio_trend", "volratio"] + X = g[feats].astype(np.float32).values y = g["fut_ret"].astype(np.float32).values - - return X,y,feats + return X, y, feats + + +# ====================== OVERLAP-/LEAKAGE-AWARE UTILITIES ===================== + +def max_lookback_minutes() -> int: + # From feature construction: the maximum rolling window is 1440 minutes. + return 1440 + + +def bars_from_minutes(rule: str, minutes: int) -> int: + step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60)) + return int(np.ceil(minutes / step_min)) + -# ------------------------- Randomized time splits ----------------------------- def sample_random_splits( g: pd.DataFrame, + rule: str, n_splits: int, test_bars: int, - gap_bars: int, + gap_bars_extra: int, seed: int, - since: str | None + since: str | None, + horizon_min: int, ): + """ + Random test windows with an embargo that guarantees disjoint information sets. + Embargo (in bars) = max(gap_bars_extra, ceil((max_lookback + horizon_min)/rule_minutes)). + Train uses only data strictly before (test_start - embargo). + """ rng = np.random.default_rng(seed) idx = g.index - if since is not None: idx = idx[idx >= pd.Timestamp(since, tz="UTC")] - # valid start indices ensure full test window fits + + if len(idx) <= test_bars: + return + + # Compute minimal embargo based on lookback + horizon + gap_min_bars = bars_from_minutes(rule, max_lookback_minutes() + horizon_min) + embargo_bars = int(max(gap_bars_extra, gap_min_bars)) + + # Valid start indices ensure full test window fits valid = np.arange(len(idx) - test_bars) - if len(valid) <= 0: - raise ValueError("Not enough data for requested test window") - + return + starts = rng.choice(valid, size=min(n_splits, len(valid)), replace=False) + starts = np.sort(starts) - for s in np.sort(starts): + for s in starts: test_start = idx[s] - test_end = idx[s + test_bars - 1] - # train uses all data strictly before (test_start - gap) - embargo_end = idx[max(0, s - gap_bars - 1)] if s - gap_bars - 1 >= 0 else None - train = g.loc[:embargo_end] if embargo_end is not None else g.iloc[0:0] - test = g.loc[test_start:test_end] + test_end = idx[s + test_bars - 1] - if len(train) == 0 or len(test) < test_bars: # skip degenerate + # Train: strictly before test_start - embargo_bars + left_end_pos = s - embargo_bars - 1 + if left_end_pos < 0: + # No room for non-overlapping training information continue - yield train, test, (test_start, test_end) + embargo_end = idx[left_end_pos] + train = g.loc[:embargo_end] + test = g.loc[test_start:test_end] -# ------------------------------ Model / Fit ----------------------------------- -def fit_and_predict_train_test(train: pd.DataFrame, test: pd.DataFrame, n_states: int): + if len(train) == 0 or len(test) < test_bars: + continue + + yield train, test, (test_start, test_end), embargo_bars + + +# ============================ MODEL / FIT ==================================== + +def fit_and_predict_train_test( + train: pd.DataFrame, + test: pd.DataFrame, + n_states: int, + full_save_path: str | None = None, +): Xtr, ytr, feats = feature_matrix(train) Xte, yte, _ = feature_matrix(test) + scaler = StandardScaler() Xtr_s = scaler.fit_transform(Xtr) Xte_s = scaler.transform(Xte) + hmm = GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=300, random_state=7) hmm.fit(Xtr_s) + st_tr = hmm.predict(Xtr_s) st_te = hmm.predict(Xte_s) + + # Map HMM states to stances using state-wise mean of future returns in TRAIN means = {s: float(np.nanmean(ytr[st_tr == s])) for s in range(n_states)} small = np.nanpercentile(np.abs(list(means.values())), 30) state_to_stance = {s: (1 if m > +small else (-1 if m < -small else 0)) for s, m in means.items()} + preds = np.vectorize(state_to_stance.get)(st_te).astype(np.int8) + + if full_save_path: + Path(full_save_path).parent.mkdir(parents=True, exist_ok=True) + joblib.dump( + {"hmm": hmm, "scaler": scaler, "features": feats, "state_to_stance": state_to_stance}, + full_save_path, + ) + print(f"Model saved: {full_save_path}") + return preds, yte, state_to_stance, feats -def metrics(y: np.ndarray, preds: np.ndarray, rule: str) -> dict[str,float]: - T = min(len(y), len(preds)); y, preds = y[:T], preds[:T] - pnl = preds * y - hit = (np.sign(preds) == np.sign(y)).mean() if T else np.nan - bars_per_day = int(round(24 * 60 / max(1, int(pd.Timedelta(rule).total_seconds() // 60)))) - ann = np.sqrt(365 * bars_per_day) + +# ============================= METRICS ======================================= + +def metrics_nonoverlap(y: np.ndarray, preds: np.ndarray, rule: str, horizon_min: int) -> dict[str, float]: + """ + Score only every 'ahead'-th point to remove overlap of forward windows. + Adjust annualization for reduced sampling frequency. + """ + T = min(len(y), len(preds)) + if T == 0: + return {"hit_rate": np.nan, "ann_sharpe": np.nan, "n_points": 0} + + y = y[:T] + preds = preds[:T] + + step_min = max(1, int(pd.Timedelta(rule).total_seconds() // 60)) + ahead = max(1, int(round(horizon_min / step_min))) + + # Use the last index of each non-overlapping forward window + idx = np.arange(ahead - 1, T, ahead) + if len(idx) == 0: + return {"hit_rate": np.nan, "ann_sharpe": np.nan, "n_points": 0} + + y_s = y[idx] + p_s = preds[idx] + + pnl = p_s * y_s + hit = float((np.sign(p_s) == np.sign(y_s)).mean()) + + bars_per_day = int(round(24 * 60 / step_min)) + # We only take one observation per 'ahead' bars + eff_obs_per_day = bars_per_day / ahead + ann = np.sqrt(365 * max(eff_obs_per_day, 1e-12)) + sharpe = float(np.nanmean(pnl) / (np.nanstd(pnl) + 1e-12) * ann) + return {"hit_rate": hit, "ann_sharpe": sharpe, "n_points": int(len(idx))} - return {"hit_rate": float(hit), "ann_sharpe": sharpe, "n_points": int(T)} -# ------------------------------ Runner --------------------------------------- -def run_rule_mc(minute: pd.DataFrame, rule: str, n_states: int, horizon_min: int, cv) -> dict: +# ============================== RUNNER ======================================= + +def run_rule_mc( + minute: pd.DataFrame, + rule: str, + n_states: int, + horizon_min: int, + cv: object, + folder_save_path: str | None, +) -> dict: g = build_features(minute, rule, horizon_min) rows = [] - for train, test, (ts, te) in sample_random_splits(g, cv.cv_splits, cv.cv_test_bars, cv.cv_gap_bars, cv.cv_seed, cv.cv_since): - preds, ytest, state_map, feats = fit_and_predict_train_test(train, test, n_states) - m = metrics(ytest, preds, rule) - rows.append({"hit_rate": m["hit_rate"], "ann_sharpe": m["ann_sharpe"], "n_points": m["n_points"], "test_span": (ts, te)}) - + for train, test, (ts, te), embargo_bars in sample_random_splits( + g=g, + rule=rule, + n_splits=cv.cv_splits, + test_bars=cv.cv_test_bars, + gap_bars_extra=cv.cv_gap_bars, + seed=cv.cv_seed, + since=cv.cv_since, + horizon_min=horizon_min, + ): + full_save_path = None + if folder_save_path: + full_save_path = f"{folder_save_path}/hmm_btc_eth_{rule}_{horizon_min}.joblib" + + preds, ytest, state_map, feats = fit_and_predict_train_test( + train, test, n_states, full_save_path + ) + m = metrics_nonoverlap(ytest, preds, rule, horizon_min) + rows.append( + { + "hit_rate": m["hit_rate"], + "ann_sharpe": m["ann_sharpe"], + "n_points": m["n_points"], + "test_span": (ts, te), + "embargo_bars": embargo_bars, + } + ) + if not rows: - return {"rule": rule, "hit_mean": np.nan, "sharpe_mean": np.nan, "splits": 0, "hit_std": np.nan, "sharpe_std": np.nan} - - hits = np.array([r["hit_rate"] for r in rows], dtype=float) + return { + "rule": rule, + "hit_mean": np.nan, + "hit_std": np.nan, + "sharpe_mean": np.nan, + "sharpe_std": np.nan, + "splits": 0, + } + + hits = np.array([r["hit_rate"] for r in rows], dtype=float) sharpes = np.array([r["ann_sharpe"] for r in rows], dtype=float) - + return { "rule": rule, "hit_mean": float(np.nanmean(hits)), @@ -214,27 +380,41 @@ def run_rule_mc(minute: pd.DataFrame, rule: str, n_states: int, horizon_min: int "splits": len(rows), } -# ------------------------------ MAIN ----------------------------------------- + def main(args: CLI) -> None: btc = _load_bitstamp_csv(args.btc_csv, "btc") eth = _load_bitstamp_csv(args.eth_csv, "eth") minute = _align_minutely(btc, eth) - class CV: pass - cv = CV(); cv.cv_splits=args.cv_splits; cv.cv_test_bars=args.cv_test_bars; cv.cv_gap_bars=args.cv_gap_bars - cv.cv_seed=args.cv_seed; cv.cv_since=args.cv_since + class CV: + pass - results = [run_rule_mc(minute, rule, args.n_states, args.horizon_min, cv) for rule in args.resample_rules] + cv = CV() + cv.cv_splits = args.cv_splits + cv.cv_test_bars = args.cv_test_bars + cv.cv_gap_bars = args.cv_gap_bars + cv.cv_seed = args.cv_seed + cv.cv_since = args.cv_since + + results = [ + run_rule_mc(minute, rule, args.n_states, args.horizon_min, cv, args.folder_save_path) + for rule in args.resample_rules + ] df = pd.DataFrame(results).sort_values(by="sharpe_mean", ascending=False) - print("# Randomized time-split comparison") - print(f"States={args.n_states} HorizonMin={args.horizon_min} Splits={args.cv_splits} TestBars={args.cv_test_bars} GapBars={args.cv_gap_bars} Since={args.cv_since}") + + print("# Randomized time-split comparison (embargo = max(user_gap, ceil((lookback+horizon)/rule)))") + print( + f"States={args.n_states} HorizonMin={args.horizon_min} Splits={args.cv_splits} " + f"TestBars={args.cv_test_bars} ExtraGapBars={args.cv_gap_bars} Since={args.cv_since}" + ) if not df.empty: - df["hit"] = df["hit_mean"].round(4).astype(str) + " ± " + df["hit_std"].round(4).astype(str) - df["sharpe"]= df["sharpe_mean"].round(4).astype(str) + " ± " + df["sharpe_std"].round(4).astype(str) - print(df[["rule","splits","hit","sharpe"]].to_string(index=False)) + df["hit"] = df["hit_mean"].round(4).astype(str) + " ± " + df["hit_std"].round(4).astype(str) + df["sharpe"] = df["sharpe_mean"].round(4).astype(str) + " ± " + df["sharpe_std"].round(4).astype(str) + print(df[["rule", "splits", "hit", "sharpe"]].to_string(index=False)) else: print("No valid splits found") + if __name__ == "__main__": args = parse_args() main(args) diff --git a/run_grid.sh b/run_grid.sh index 79163d3..a190668 100755 --- a/run_grid.sh +++ b/run_grid.sh @@ -1,55 +1,64 @@ #!/usr/bin/env bash -# run_grid.sh — loop over timeframes and 2–6 bars-ahead horizons set -euo pipefail -# Paths (edit if different) -PROJ_DIR="${PROJ_DIR:-$HOME/Documents/Work/TCP/BTC_ETH_regime_predictor}" -DATA_DIR="${DATA_DIR:-$PROJ_DIR/../data}" -PY="${PY:-$PROJ_DIR/.venv/bin/python}" +# Required CSVs (must exist in ../data) +BTC_CSV="../data/btcusd_1-min_data.csv" +ETH_CSV="../data/ethusd_1min_ohlc.csv" -BTC_CSV="${BTC_CSV:-$DATA_DIR/btcusd_1-min_data.csv}" -ETH_CSV="${ETH_CSV:-$DATA_DIR/ethusd_1min_ohlc.csv}" -SPLIT_DATE="${SPLIT_DATE:-2023-01-01}" -N_STATES="${N_STATES:-3}" +# Grid parameters +RULE_SETS="30min,45min,1H" +HORIZONS="60 120" +N_STATES_LIST="3 4" -# Timeframes to test: 20–60 min inclusive -readarray -t RULES < <(seq 20 60 | awk '{printf "%dmin\n",$1}') +CV_SPLITS=8 +CV_TEST_BARS=500 +CV_GAP_BARS=24 +CV_SEED=7 +CV_SINCE="" +FOLDER_SAVE_PATH="" -# Convert a pandas-like offset to minutes: supports Nmin, NH, ND -to_minutes() { - local r="$1" - if [[ "$r" =~ ^([0-9]+)min$ ]]; then - echo "${BASH_REMATCH[1]}" - elif [[ "$r" =~ ^([0-9]+)H$ ]]; then - echo $(( ${BASH_REMATCH[1]} * 60 )) - elif [[ "$r" =~ ^([0-9]+)D$ ]]; then - echo $(( ${BASH_REMATCH[1]} * 1440 )) - else - echo "Unsupported rule: $r" >&2 - exit 2 - fi +PYRUN="uv run python" + +STAMP="$(date +%Y%m%d_%H%M%S)" +mkdir -p logs +LOG_FILE="logs/grid_${STAMP}.log" + +{ + echo "# GRID START $(date -Is)" + echo "BTC_CSV=$BTC_CSV" + echo "ETH_CSV=$ETH_CSV" + echo +} | tee -a "$LOG_FILE" + +run_job() { + local rules="$1" horizon="$2" n_states="$3" + + cmd=( $PYRUN main.py + --btc "$BTC_CSV" + --eth "$ETH_CSV" + --rules "$rules" + --states "$n_states" + --horizon "$horizon" + --cv_splits "$CV_SPLITS" + --cv_test_bars "$CV_TEST_BARS" + --cv_gap_bars "$CV_GAP_BARS" + --cv_seed "$CV_SEED" + ) + [[ -n "$CV_SINCE" ]] && cmd+=( --cv_since "$CV_SINCE" ) + [[ -n "$FOLDER_SAVE_PATH" ]] && cmd+=( --folder_save_path "$FOLDER_SAVE_PATH" ) + + echo "----------------------------------------------------------------" | tee -a "$LOG_FILE" + echo "# $(date -Is) rules=${rules} horizon=${horizon} states=${n_states}" | tee -a "$LOG_FILE" + "${cmd[@]}" 2>&1 | tee -a "$LOG_FILE" } -# Logs -OUT_DIR="${OUT_DIR:-$PROJ_DIR/run_logs}" -mkdir -p "$OUT_DIR" -TS="$(date +"%Y%m%d_%H%M%S")" -LOG="$OUT_DIR/grid_${TS}.log" - -# Run grid -for BARS in 2 3 4 5 6; do - for RULE in "${RULES[@]}"; do - BAR_MIN=$(to_minutes "$RULE") - HORIZON=$(( BARS * BAR_MIN )) - echo "Running rule='$RULE' bars_ahead=$BARS horizon_min=$HORIZON" | tee -a "$LOG" - "$PY" "$PROJ_DIR/main.py" \ - --btc "$BTC_CSV" \ - --eth "$ETH_CSV" \ - --rules "$RULE" \ - --states "$N_STATES" \ - --split "$SPLIT_DATE" \ - --horizon "$HORIZON" | tee -a "$LOG" - echo | tee -a "$LOG" +IFS=$'\n' read -r -d '' -a RULE_GROUPS < <(printf '%s\0' "$RULE_SETS") +for rules in "${RULE_GROUPS[@]}"; do + for horizon in $HORIZONS; do + for n_states in $N_STATES_LIST; do + run_job "$rules" "$horizon" "$n_states" + done done done +echo "# GRID END $(date -Is)" | tee -a "$LOG_FILE"