Remove deprecated training scripts and Systemd service files

- Deleted `install_cron.sh`, `setup_schedule.sh`, and `train_daily.sh` as part of the transition to a new scheduling mechanism. - Removed associated Systemd service and timer files for daily model training. - Updated `live_regime_strategy.py` and `main.py` to reflect changes in model training and scheduling logic. - Adjusted `regime_strategy.py` to align with new target calculation methods and updated optimal parameters. - Enhanced `regime_detection.py` to incorporate path-dependent labeling for target calculations.
2026-01-18 14:35:46 +08:00
parent b5550f4ff4
commit 582a43cd4a
10 changed files with 285 additions and 638 deletions
--- a/research/regime_detection.py
+++ b/research/regime_detection.py
@@ -32,6 +32,7 @@ logger = get_logger(__name__)
 # Configuration
 TRAIN_RATIO = 0.7  # 70% train, 30% test
 PROFIT_THRESHOLD = 0.005  # 0.5% profit target
+STOP_LOSS_PCT = 0.06  # 6% stop loss
 Z_WINDOW = 24
 FEE_RATE = 0.001  # 0.1% round-trip fee
 DEFAULT_DAYS = 90  # Default lookback period in days
@@ -139,26 +140,74 @@ def calculate_features(df_btc, df_eth, cq_df=None):


 def calculate_targets(features, horizon):
-    """Calculate target labels for a given horizon."""
-    spread = features['spread']
-    z_score = features['z_score']
+    """
+    Calculate target labels for a given horizon.
    
-    # For Short (Z > 1): Did spread drop below target?
-    future_min = spread.rolling(window=horizon).min().shift(-horizon)
-    target_short = spread * (1 - PROFIT_THRESHOLD)
-    success_short = (z_score > 1.0) & (future_min < target_short)
+    Uses path-dependent labeling: Success is hitting Profit Target BEFORE Stop Loss.
+    """
+    spread = features['spread'].values
+    z_score = features['z_score'].values
+    n = len(spread)
    
-    # For Long (Z < -1): Did spread rise above target?
-    future_max = spread.rolling(window=horizon).max().shift(-horizon)
-    target_long = spread * (1 + PROFIT_THRESHOLD)
-    success_long = (z_score < -1.0) & (future_max > target_long)
-    
-    targets = np.select([success_short, success_long], [1, 1], default=0)
+    targets = np.zeros(n, dtype=int)
    
    # Create valid mask (rows with complete future data)
-    valid_mask = future_min.notna() & future_max.notna()
+    valid_mask = np.zeros(n, dtype=bool)
+    valid_mask[:n-horizon] = True
    
-    return targets, valid_mask, future_min, future_max
+    # Only iterate relevant rows for efficiency
+    candidates = np.where((z_score > 1.0) | (z_score < -1.0))[0]
+    
+    for i in candidates:
+        if i + horizon >= n:
+            continue
+            
+        entry_price = spread[i]
+        future_prices = spread[i+1 : i+1+horizon]
+        
+        if z_score[i] > 1.0: # Short
+            target_price = entry_price * (1 - PROFIT_THRESHOLD)
+            stop_price = entry_price * (1 + STOP_LOSS_PCT)
+            
+            # Identify first hit indices
+            hit_tp = future_prices <= target_price
+            hit_sl = future_prices >= stop_price
+            
+            if not np.any(hit_tp):
+                targets[i] = 0 # Target never hit
+            elif not np.any(hit_sl):
+                targets[i] = 1 # Target hit, SL never hit
+            else:
+                first_tp_idx = np.argmax(hit_tp)
+                first_sl_idx = np.argmax(hit_sl)
+                
+                # Success if TP hit before SL
+                if first_tp_idx < first_sl_idx:
+                    targets[i] = 1
+                else:
+                    targets[i] = 0
+                    
+        else: # Long
+            target_price = entry_price * (1 + PROFIT_THRESHOLD)
+            stop_price = entry_price * (1 - STOP_LOSS_PCT)
+            
+            hit_tp = future_prices >= target_price
+            hit_sl = future_prices <= stop_price
+            
+            if not np.any(hit_tp):
+                targets[i] = 0
+            elif not np.any(hit_sl):
+                targets[i] = 1
+            else:
+                first_tp_idx = np.argmax(hit_tp)
+                first_sl_idx = np.argmax(hit_sl)
+                
+                if first_tp_idx < first_sl_idx:
+                    targets[i] = 1
+                else:
+                    targets[i] = 0
+    
+    return targets, pd.Series(valid_mask, index=features.index), None, None


 def calculate_mae(features, predictions, test_idx, horizon):
@@ -197,7 +246,7 @@ def calculate_mae(features, predictions, test_idx, horizon):
 def calculate_net_profit(features, predictions, test_idx, horizon):
    """
    Calculate estimated net profit including fees.
-    Enforces 'one trade at a time' to avoid inflating returns with overlapping signals.
+    Enforces 'one trade at a time' and simulates SL/TP exits.
    """
    test_features = features.loc[test_idx]
    spread = test_features['spread']
@@ -209,6 +258,9 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
    # Track when we are free to trade again
    next_trade_idx = 0
    
+    # Pre-calculate indices for speed
+    all_indices = features.index
+    
    for i, (idx, pred) in enumerate(zip(test_idx, predictions)):
        # Skip if we are still in a trade
        if i < next_trade_idx:
@@ -221,29 +273,76 @@ def calculate_net_profit(features, predictions, test_idx, horizon):
        z = z_score.loc[idx]
        
        # Get future spread values
-        future_idx = features.index.get_loc(idx)
-        future_end = min(future_idx + horizon, len(features))
-        future_spreads = features['spread'].iloc[future_idx:future_end]
+        current_loc = features.index.get_loc(idx)
+        future_end_loc = min(current_loc + horizon, len(features))
+        future_spreads = features['spread'].iloc[current_loc+1 : future_end_loc]
        
-        if len(future_spreads) < 2:
+        if len(future_spreads) < 1:
            continue
        
-        # Calculate PnL based on direction
-        if z > 1.0:  # Short trade - profit if spread drops
-            exit_spread = future_spreads.iloc[-1]  # Exit at horizon
-            pnl = (entry_spread - exit_spread) / entry_spread
-        else:  # Long trade - profit if spread rises
-            exit_spread = future_spreads.iloc[-1]
-            pnl = (exit_spread - entry_spread) / entry_spread
+        pnl = 0.0
+        trade_duration = len(future_spreads)
+        
+        if z > 1.0:  # Short trade
+            tp_price = entry_spread * (1 - PROFIT_THRESHOLD)
+            sl_price = entry_spread * (1 + STOP_LOSS_PCT)
+            
+            hit_tp = future_spreads <= tp_price
+            hit_sl = future_spreads >= sl_price
+            
+            # Check what happened first
+            first_tp = np.argmax(hit_tp.values) if hit_tp.any() else 99999
+            first_sl = np.argmax(hit_sl.values) if hit_sl.any() else 99999
+            
+            if first_sl < first_tp and first_sl < 99999:
+                # Stopped out
+                exit_price = future_spreads.iloc[first_sl] # Approx SL price
+                # Use exact SL price for realistic simulation? Or close
+                # Let's use the close price of the bar where it crossed
+                pnl = (entry_spread - exit_price) / entry_spread
+                trade_duration = first_sl + 1
+            elif first_tp < first_sl and first_tp < 99999:
+                # Take profit
+                exit_price = future_spreads.iloc[first_tp]
+                pnl = (entry_spread - exit_price) / entry_spread
+                trade_duration = first_tp + 1
+            else:
+                # Held to horizon
+                exit_price = future_spreads.iloc[-1]
+                pnl = (entry_spread - exit_price) / entry_spread
+                
+        else:  # Long trade
+            tp_price = entry_spread * (1 + PROFIT_THRESHOLD)
+            sl_price = entry_spread * (1 - STOP_LOSS_PCT)
+            
+            hit_tp = future_spreads >= tp_price
+            hit_sl = future_spreads <= sl_price
+            
+            first_tp = np.argmax(hit_tp.values) if hit_tp.any() else 99999
+            first_sl = np.argmax(hit_sl.values) if hit_sl.any() else 99999
+            
+            if first_sl < first_tp and first_sl < 99999:
+                # Stopped out
+                exit_price = future_spreads.iloc[first_sl]
+                pnl = (exit_price - entry_spread) / entry_spread
+                trade_duration = first_sl + 1
+            elif first_tp < first_sl and first_tp < 99999:
+                # Take profit
+                exit_price = future_spreads.iloc[first_tp]
+                pnl = (exit_price - entry_spread) / entry_spread
+                trade_duration = first_tp + 1
+            else:
+                # Held to horizon
+                exit_price = future_spreads.iloc[-1]
+                pnl = (exit_price - entry_spread) / entry_spread
        
        # Subtract fees
        net_pnl = pnl - FEE_RATE
        total_pnl += net_pnl
        n_trades += 1
        
-        # Set next available trade index (simple non-overlapping logic)
-        # We assume we hold for 'horizon' bars
-        next_trade_idx = i + horizon
+        # Set next available trade index
+        next_trade_idx = i + trade_duration
    
    return total_pnl, n_trades

@@ -321,7 +420,7 @@ def test_horizons(features, horizons):
    
    print("\n" + "=" * 80)
    print("WALK-FORWARD HORIZON OPTIMIZATION")
-    print(f"Train Ratio: {TRAIN_RATIO*100:.0f}% | Profit Target: {PROFIT_THRESHOLD*100:.1f}% | Fee Rate: {FEE_RATE*100:.2f}%")
+    print(f"Train Ratio: {TRAIN_RATIO*100:.0f}% | Profit Target: {PROFIT_THRESHOLD*100:.1f}% | Stop Loss: {STOP_LOSS_PCT*100:.1f}% | Fee Rate: {FEE_RATE*100:.2f}%")
    print("=" * 80)
    
    for h in horizons: