Remove deprecated training scripts and Systemd service files

- Deleted `install_cron.sh`, `setup_schedule.sh`, and `train_daily.sh` as part of the transition to a new scheduling mechanism. - Removed associated Systemd service and timer files for daily model training. - Updated `live_regime_strategy.py` and `main.py` to reflect changes in model training and scheduling logic. - Adjusted `regime_strategy.py` to align with new target calculation methods and updated optimal parameters. - Enhanced `regime_detection.py` to incorporate path-dependent labeling for target calculations.
2026-01-18 14:35:46 +08:00
parent b5550f4ff4
commit 582a43cd4a
10 changed files with 285 additions and 638 deletions
--- a/strategies/regime_strategy.py
+++ b/strategies/regime_strategy.py
@@ -30,7 +30,7 @@ class RegimeReversionStrategy(BaseStrategy):
    
    # Optimal parameters from walk-forward research (2025-10 to 2025-12)
    # Research: research/horizon_optimization_results.csv
-    OPTIMAL_HORIZON = 102        # 4.25 days - best Net PnL (+232%)
+    OPTIMAL_HORIZON = 54         # Updated from 102h based on corrected labeling
    OPTIMAL_Z_WINDOW = 24        # 24h rolling window for spread Z-score
    OPTIMAL_TRAIN_RATIO = 0.7    # 70% train / 30% test split
    OPTIMAL_PROFIT_TARGET = 0.005  # 0.5% profit threshold for target definition
@@ -321,21 +321,64 @@ class RegimeReversionStrategy(BaseStrategy):
            train_features: DataFrame containing features for training period only
        """
        threshold = self.profit_target
+        stop_loss_pct = self.stop_loss
        horizon = self.horizon
        z_thresh = self.z_entry_threshold
        
-        # Define targets using ONLY training data
-        # For Short Spread (Z > threshold): Did spread drop below target within horizon?
-        future_min = train_features['spread'].rolling(window=horizon).min().shift(-horizon)
-        target_short = train_features['spread'] * (1 - threshold)
-        success_short = (train_features['z_score'] > z_thresh) & (future_min < target_short)
-
-        # For Long Spread (Z < -threshold): Did spread rise above target within horizon?
-        future_max = train_features['spread'].rolling(window=horizon).max().shift(-horizon)
-        target_long = train_features['spread'] * (1 + threshold)
-        success_long = (train_features['z_score'] < -z_thresh) & (future_max > target_long)
+        # Calculate targets path-dependently (checking SL before TP)
+        spread = train_features['spread'].values
+        z_score = train_features['z_score'].values
+        n = len(spread)
        
-        targets = np.select([success_short, success_long], [1, 1], default=0)
+        targets = np.zeros(n, dtype=int)
+        
+        # Only iterate relevant rows for efficiency
+        candidates = np.where((z_score > z_thresh) | (z_score < -z_thresh))[0]
+        
+        for i in candidates:
+            if i + horizon >= n:
+                continue
+                
+            entry_price = spread[i]
+            future_prices = spread[i+1 : i+1+horizon]
+            
+            if z_score[i] > z_thresh: # Short
+                target_price = entry_price * (1 - threshold)
+                stop_price = entry_price * (1 + stop_loss_pct)
+                
+                hit_tp = future_prices <= target_price
+                hit_sl = future_prices >= stop_price
+                
+                if not np.any(hit_tp):
+                    targets[i] = 0
+                elif not np.any(hit_sl):
+                    targets[i] = 1
+                else:
+                    first_tp_idx = np.argmax(hit_tp)
+                    first_sl_idx = np.argmax(hit_sl)
+                    if first_tp_idx < first_sl_idx:
+                        targets[i] = 1
+                    else:
+                        targets[i] = 0
+                        
+            else: # Long
+                target_price = entry_price * (1 + threshold)
+                stop_price = entry_price * (1 - stop_loss_pct)
+                
+                hit_tp = future_prices >= target_price
+                hit_sl = future_prices <= stop_price
+                
+                if not np.any(hit_tp):
+                    targets[i] = 0
+                elif not np.any(hit_sl):
+                    targets[i] = 1
+                else:
+                    first_tp_idx = np.argmax(hit_tp)
+                    first_sl_idx = np.argmax(hit_sl)
+                    if first_tp_idx < first_sl_idx:
+                        targets[i] = 1
+                    else:
+                        targets[i] = 0
        
        # Build model
        model = RandomForestClassifier(
@@ -351,10 +394,9 @@ class RegimeReversionStrategy(BaseStrategy):
        X_train = train_features[cols].fillna(0)
        X_train = X_train.replace([np.inf, -np.inf], 0)
        
-        # Remove rows with NaN targets (from rolling window at end of training period)
-        valid_mask = ~np.isnan(targets) & ~np.isinf(targets)
-        # Also check for rows where future data doesn't exist (shift created NaNs)
-        valid_mask = valid_mask & (future_min.notna().values) & (future_max.notna().values)
+        # Use rows where we had enough data to look ahead
+        valid_mask = np.zeros(n, dtype=bool)
+        valid_mask[:n-horizon] = True
        
        X_train_clean = X_train[valid_mask]
        targets_clean = targets[valid_mask]