Add daily model training scripts and terminal UI for live trading

- Introduced `train_daily.sh` for automating daily model retraining, including data download and model training steps. - Added `install_cron.sh` for setting up a cron job to run the daily training script. - Created `setup_schedule.sh` for configuring Systemd timers for daily training tasks. - Implemented a terminal UI using Rich for real-time monitoring of trading performance, including metrics display and log handling. - Updated `pyproject.toml` to include the `rich` dependency for UI functionality. - Enhanced `.gitignore` to exclude model and log files. - Added database support for trade persistence and metrics calculation. - Updated README with installation and usage instructions for the new features.
2026-01-18 11:08:57 +08:00
parent 35992ee374
commit b5550f4ff4
27 changed files with 3582 additions and 113 deletions
--- a/research/regime_detection.py
+++ b/research/regime_detection.py
@@ -3,7 +3,16 @@ Regime Detection Research Script with Walk-Forward Training.

 Tests multiple holding horizons to find optimal parameters
 without look-ahead bias.
+
+Usage:
+    uv run python research/regime_detection.py [options]
+    
+Options:
+    --days DAYS        Number of days of data (default: 90)
+    --start DATE       Start date (YYYY-MM-DD), overrides --days
+    --end DATE         End date (YYYY-MM-DD), defaults to now
 """
+import argparse
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -25,18 +34,36 @@ TRAIN_RATIO = 0.7  # 70% train, 30% test
 PROFIT_THRESHOLD = 0.005  # 0.5% profit target
 Z_WINDOW = 24
 FEE_RATE = 0.001  # 0.1% round-trip fee
+DEFAULT_DAYS = 90  # Default lookback period in days


-def load_data():
-    """Load and align BTC/ETH data."""
+def load_data(days: int = DEFAULT_DAYS, start_date: str = None, end_date: str = None):
+    """
+    Load and align BTC/ETH data.
+    
+    Args:
+        days: Number of days of historical data (default: 90)
+        start_date: Optional start date (YYYY-MM-DD), overrides days
+        end_date: Optional end date (YYYY-MM-DD), defaults to now
+        
+    Returns:
+        Tuple of (df_btc, df_eth) DataFrames
+    """
    dm = DataManager()
    
    df_btc = dm.load_data("okx", "BTC-USDT", "1h", MarketType.SPOT)
    df_eth = dm.load_data("okx", "ETH-USDT", "1h", MarketType.SPOT)
    
-    # Filter to Oct-Dec 2025
-    start = pd.Timestamp("2025-10-01", tz="UTC")
-    end = pd.Timestamp("2025-12-31", tz="UTC")
+    # Determine date range
+    if end_date:
+        end = pd.Timestamp(end_date, tz="UTC")
+    else:
+        end = pd.Timestamp.now(tz="UTC")
+    
+    if start_date:
+        start = pd.Timestamp(start_date, tz="UTC")
+    else:
+        start = end - pd.Timedelta(days=days)
    
    df_btc = df_btc[(df_btc.index >= start) & (df_btc.index <= end)]
    df_eth = df_eth[(df_eth.index >= start) & (df_eth.index <= end)]
@@ -46,7 +73,7 @@ def load_data():
    df_btc = df_btc.loc[common]
    df_eth = df_eth.loc[common]
    
-    logger.info(f"Loaded {len(common)} aligned hourly bars")
+    logger.info(f"Loaded {len(common)} aligned hourly bars from {start} to {end}")
    return df_btc, df_eth


@@ -168,7 +195,10 @@ def calculate_mae(features, predictions, test_idx, horizon):


 def calculate_net_profit(features, predictions, test_idx, horizon):
-    """Calculate estimated net profit including fees."""
+    """
+    Calculate estimated net profit including fees.
+    Enforces 'one trade at a time' to avoid inflating returns with overlapping signals.
+    """
    test_features = features.loc[test_idx]
    spread = test_features['spread']
    z_score = test_features['z_score']
@@ -176,7 +206,14 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
    total_pnl = 0.0
    n_trades = 0
    
+    # Track when we are free to trade again
+    next_trade_idx = 0
+    
    for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
+        # Skip if we are still in a trade
+        if i < next_trade_idx:
+            continue
+            
        if pred != 1:
            continue
        
@@ -203,6 +240,10 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
        net_pnl = pnl - FEE_RATE
        total_pnl += net_pnl
        n_trades += 1
+        
+        # Set next available trade index (simple non-overlapping logic)
+        # We assume we hold for 'horizon' bars
+        next_trade_idx = i + horizon
    
    return total_pnl, n_trades

@@ -295,10 +336,54 @@ def test_horizons(features, horizons):
    return results


+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Regime detection research - test multiple horizons"
+    )
+    parser.add_argument(
+        "--days",
+        type=int,
+        default=DEFAULT_DAYS,
+        help=f"Number of days of data (default: {DEFAULT_DAYS})"
+    )
+    parser.add_argument(
+        "--start",
+        type=str,
+        default=None,
+        help="Start date (YYYY-MM-DD), overrides --days"
+    )
+    parser.add_argument(
+        "--end",
+        type=str,
+        default=None,
+        help="End date (YYYY-MM-DD), defaults to now"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="research/horizon_optimization_results.csv",
+        help="Output CSV path"
+    )
+    parser.add_argument(
+        "--output-horizon",
+        type=str,
+        default=None,
+        help="Path to save the best horizon (integer) to a file"
+    )
+    return parser.parse_args()
+
+
 def main():
    """Main research function."""
-    # Load data
-    df_btc, df_eth = load_data()
+    args = parse_args()
+    
+    # Load data with dynamic date range
+    df_btc, df_eth = load_data(
+        days=args.days,
+        start_date=args.start,
+        end_date=args.end
+    )
    cq_df = load_cryptoquant_data()
    
    # Calculate features
@@ -312,7 +397,7 @@ def main():
    
    if not results:
        print("No valid results!")
-        return
+        return None
    
    # Find best by different metrics
    results_df = pd.DataFrame(results)
@@ -331,9 +416,15 @@ def main():
    print(f"Lowest MAE:     {lowest_mae['horizon']:.0f}h (MAE={lowest_mae['avg_mae']:.2f}%)")
    
    # Save results
-    output_path = "research/horizon_optimization_results.csv"
-    results_df.to_csv(output_path, index=False)
-    print(f"\nResults saved to {output_path}")
+    results_df.to_csv(args.output, index=False)
+    print(f"\nResults saved to {args.output}")
+    
+    # Save best horizon if requested
+    if args.output_horizon:
+        best_h = int(best_pnl['horizon'])
+        with open(args.output_horizon, 'w') as f:
+            f.write(str(best_h))
+        print(f"Best horizon {best_h}h saved to {args.output_horizon}")
    
    return results_df