Add check_symbols.py for ETH perpetuals filtering and enhance backtester with size handling

- Introduced `check_symbols.py` to load and filter ETH perpetual markets from the OKX exchange using CCXT. - Updated the backtester to normalize signals to a 5-tuple format, incorporating size management for trades. - Enhanced portfolio functions to support variable size and leverage adjustments based on initial capital. - Added a new method in `CryptoQuantClient` for chunked historical data fetching to avoid API limits. - Improved market symbol normalization in `market.py` to handle different formats. - Updated regime strategy parameters based on recent research findings for optimal performance.
2026-01-14 09:46:51 +08:00
parent 10bb371054
commit 1e4cb87da3
8 changed files with 617 additions and 111 deletions
--- a/engine/backtester.py
+++ b/engine/backtester.py
@@ -132,9 +132,22 @@ class Backtester:
            **strategy_params
        )
        
-        # Normalize signals to 4-tuple format
+        # Normalize signals to 5-tuple format
        signals = self._normalize_signals(signals, close_price, market_config)
-        long_entries, long_exits, short_entries, short_exits = signals
+        long_entries, long_exits, short_entries, short_exits, size = signals
+        
+        # Default size if None
+        if size is None:
+            size = 1.0
+            
+        # Convert leverage multiplier to raw value (USD) for vbt
+        # This works around "SizeType.Percent reversal" error
+        # Effectively "Fixed Fractional" sizing based on Initial Capital
+        # (Does not compound, but safe for backtesting)
+        if isinstance(size, pd.Series):
+            size = size * init_cash
+        else:
+            size = size * init_cash
        
        # Process liquidations - inject forced exits at liquidation points
        liquidation_events: list[LiquidationEvent] = []
@@ -164,7 +177,8 @@ class Backtester:
            long_entries, long_exits,
            short_entries, short_exits,
            init_cash, effective_fees, slippage, timeframe,
-            sl_stop, tp_stop, sl_trail, effective_leverage
+            sl_stop, tp_stop, sl_trail, effective_leverage,
+            size=size
        )
        
        # Calculate adjusted returns accounting for liquidation losses
@@ -242,39 +256,45 @@ class Backtester:
        market_config
    ) -> tuple:
        """
-        Normalize strategy signals to 4-tuple format.
+        Normalize strategy signals to 5-tuple format.
        
-        Handles backward compatibility with 2-tuple (long-only) returns.
+        Returns:
+            (long_entries, long_exits, short_entries, short_exits, size)
        """
+        # Default size is None (will be treated as 1.0 or default later)
+        size = None
+        
        if len(signals) == 2:
            long_entries, long_exits = signals
            short_entries = BaseStrategy.create_empty_signals(long_entries)
            short_exits = BaseStrategy.create_empty_signals(long_entries)
-            return long_entries, long_exits, short_entries, short_exits
+            return long_entries, long_exits, short_entries, short_exits, size
        
        if len(signals) == 4:
            long_entries, long_exits, short_entries, short_exits = signals
+        elif len(signals) == 5:
+            long_entries, long_exits, short_entries, short_exits, size = signals
+        else:
+            raise ValueError(
+                f"Strategy must return 2, 4, or 5 signal arrays, got {len(signals)}"
+            )
            
-            # Warn and clear short signals on spot markets
-            if not market_config.supports_short:
-                has_shorts = (
-                    short_entries.any().any() 
-                    if hasattr(short_entries, 'any') 
-                    else short_entries.any()
+        # Warn and clear short signals on spot markets
+        if not market_config.supports_short:
+            has_shorts = (
+                short_entries.any().any() 
+                if hasattr(short_entries, 'any') 
+                else short_entries.any()
+            )
+            if has_shorts:
+                logger.warning(
+                    "Short signals detected but market type is SPOT. "
+                    "Short signals will be ignored."
                )
-                if has_shorts:
-                    logger.warning(
-                        "Short signals detected but market type is SPOT. "
-                        "Short signals will be ignored."
-                    )
-                    short_entries = BaseStrategy.create_empty_signals(long_entries)
-                    short_exits = BaseStrategy.create_empty_signals(long_entries)
-                    
-            return long_entries, long_exits, short_entries, short_exits
-        
-        raise ValueError(
-            f"Strategy must return 2 or 4 signal arrays, got {len(signals)}"
-        )
+                short_entries = BaseStrategy.create_empty_signals(long_entries)
+                short_exits = BaseStrategy.create_empty_signals(long_entries)
+                
+        return long_entries, long_exits, short_entries, short_exits, size
    
    def _run_portfolio(
        self,
@@ -289,7 +309,8 @@ class Backtester:
        sl_stop: float | None,
        tp_stop: float | None,
        sl_trail: bool,
-        leverage: int
+        leverage: int,
+        size: pd.Series | float = 1.0
    ) -> vbt.Portfolio:
        """Select and run appropriate portfolio simulation."""
        has_shorts = (
@@ -304,14 +325,18 @@ class Backtester:
                long_entries, long_exits,
                short_entries, short_exits,
                init_cash, fees, slippage, freq,
-                sl_stop, tp_stop, sl_trail, leverage
+                sl_stop, tp_stop, sl_trail, leverage,
+                size=size
            )
        
        return run_long_only_portfolio(
            close,
            long_entries, long_exits,
            init_cash, fees, slippage, freq,
-            sl_stop, tp_stop, sl_trail, leverage
+            sl_stop, tp_stop, sl_trail, leverage,
+            # Long-only doesn't support variable size in current implementation
+            # without modification, but we can add it if needed.
+            # For now, only regime strategy uses it, which is Long/Short.
        )
    
    def run_wfa(
--- a/engine/cryptoquant.py
+++ b/engine/cryptoquant.py
@@ -133,12 +133,57 @@ class CryptoQuantClient:
                
        return combined_df

+    def fetch_history_chunked(
+        self, 
+        symbols: list[str], 
+        metrics: dict, 
+        start_date: str, 
+        end_date: str, 
+        chunk_months: int = 3
+    ) -> pd.DataFrame:
+        """
+        Fetch historical data in chunks to avoid API limits.
+        """
+        start_dt = datetime.strptime(start_date, "%Y%m%d")
+        end_dt = datetime.strptime(end_date, "%Y%m%d")
+        
+        all_data = []
+        
+        current = start_dt
+        while current < end_dt:
+            next_chunk = current + timedelta(days=chunk_months * 30)
+            if next_chunk > end_dt:
+                next_chunk = end_dt
+            
+            s_str = current.strftime("%Y%m%d")
+            e_str = next_chunk.strftime("%Y%m%d")
+            
+            logger.info(f"Processing chunk: {s_str} to {e_str}")
+            chunk_df = self.fetch_multi_metrics(symbols, metrics, s_str, e_str)
+            
+            if not chunk_df.empty:
+                all_data.append(chunk_df)
+            
+            current = next_chunk + timedelta(days=1)
+            time.sleep(1) # Be nice to API
+            
+        if not all_data:
+            return pd.DataFrame()
+            
+        # Combine all chunks
+        full_df = pd.concat(all_data)
+        # Remove duplicates if any overlap
+        full_df = full_df[~full_df.index.duplicated(keep='first')]
+        full_df.sort_index(inplace=True)
+        
+        return full_df
+
 if __name__ == "__main__":
    cq = CryptoQuantClient()
    
-    # 3 Months Data (Oct 1 2025 - Dec 31 2025)
-    start = "20251001"
-    end = "20251231"
+    # 12 Months Data (Jan 1 2025 - Jan 14 2026)
+    start = "20250101"
+    end = "20260114"
    
    metrics = {
        "reserves": "exchange-flows/exchange-reserve",
@@ -147,7 +192,7 @@ if __name__ == "__main__":
    }
    
    print(f"Fetching training data from {start} to {end}...")
-    df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end)
+    df = cq.fetch_history_chunked(["btc", "eth"], metrics, start, end)
    
    output_file = "data/cq_training_data.csv"
    os.makedirs("data", exist_ok=True)
--- a/engine/market.py
+++ b/engine/market.py
@@ -94,8 +94,20 @@ def get_ccxt_symbol(symbol: str, market_type: MarketType) -> str:
    """
    if market_type == MarketType.PERPETUAL:
        # OKX perpetual format: BTC/USDT:USDT
-        quote = symbol.split('/')[1] if '/' in symbol else 'USDT'
-        return f"{symbol}:{quote}"
+        if '/' in symbol:
+            base, quote = symbol.split('/')
+            return f"{symbol}:{quote}"
+        elif '-' in symbol:
+            base, quote = symbol.split('-')
+            return f"{base}/{quote}:{quote}"
+        else:
+            # Assume base is symbol, quote is USDT default
+            return f"{symbol}/USDT:USDT"
+            
+    # For spot, normalize dash to slash for CCXT
+    if '-' in symbol:
+        return symbol.replace('-', '/')
+        
    return symbol


--- a/engine/portfolio.py
+++ b/engine/portfolio.py
@@ -74,75 +74,35 @@ def run_long_short_portfolio(
    sl_stop: float | None,
    tp_stop: float | None,
    sl_trail: bool,
-    leverage: int
+    leverage: int,
+    size: pd.Series | float = 1.0,
+    size_type: str = 'value'  # Changed to 'value' to support reversals/sizing
 ) -> vbt.Portfolio:
    """
    Run a portfolio supporting both long and short positions.
    
-    Runs two separate portfolios (long and short) and combines results.
-    Each gets half the capital.
-    
-    Args:
-        close: Close price series
-        long_entries: Long entry signals
-        long_exits: Long exit signals
-        short_entries: Short entry signals
-        short_exits: Short exit signals
-        init_cash: Initial capital
-        fees: Transaction fee percentage
-        slippage: Slippage percentage
-        freq: Data frequency string
-        sl_stop: Stop loss percentage
-        tp_stop: Take profit percentage
-        sl_trail: Enable trailing stop loss
-        leverage: Leverage multiplier
-        
-    Returns:
-        VectorBT Portfolio object (long portfolio, short stats logged)
+    Uses VectorBT's native support for short_entries/short_exits
+    to simulate a single unified portfolio.
    """
    effective_cash = init_cash * leverage
-    half_cash = effective_cash / 2
    
-    # Run long-only portfolio
-    long_pf = vbt.Portfolio.from_signals(
+    # If size is passed as value (USD), we don't scale it by leverage here
+    # The backtester has already scaled it by init_cash.
+    # If using 'value', vbt treats it as "Amount of CASH to use for the trade"
+    
+    return vbt.Portfolio.from_signals(
        close=close,
        entries=long_entries,
        exits=long_exits,
-        direction='longonly',
-        init_cash=half_cash,
+        short_entries=short_entries,
+        short_exits=short_exits,
+        init_cash=effective_cash,
        fees=fees,
        slippage=slippage,
        freq=freq,
        sl_stop=sl_stop,
        tp_stop=tp_stop,
        sl_trail=sl_trail,
-        size=1.0,
-        size_type='percent',
+        size=size,
+        size_type=size_type,
    )
-    
-    # Run short-only portfolio
-    short_pf = vbt.Portfolio.from_signals(
-        close=close,
-        entries=short_entries,
-        exits=short_exits,
-        direction='shortonly',
-        init_cash=half_cash,
-        fees=fees,
-        slippage=slippage,
-        freq=freq,
-        sl_stop=sl_stop,
-        tp_stop=tp_stop,
-        sl_trail=sl_trail,
-        size=1.0,
-        size_type='percent',
-    )
-    
-    # Log both portfolio stats
-    # TODO: Implement proper portfolio combination
-    logger.info(
-        "Long portfolio: %.2f%% return, Short portfolio: %.2f%% return",
-        long_pf.total_return().mean() * 100,
-        short_pf.total_return().mean() * 100
-    )
-    
-    return long_pf