Add check_symbols.py for ETH perpetuals filtering and enhance backtester with size handling

- Introduced `check_symbols.py` to load and filter ETH perpetual markets from the OKX exchange using CCXT. - Updated the backtester to normalize signals to a 5-tuple format, incorporating size management for trades. - Enhanced portfolio functions to support variable size and leverage adjustments based on initial capital. - Added a new method in `CryptoQuantClient` for chunked historical data fetching to avoid API limits. - Improved market symbol normalization in `market.py` to handle different formats. - Updated regime strategy parameters based on recent research findings for optimal performance.
2026-01-14 09:46:51 +08:00
parent 10bb371054
commit 1e4cb87da3
8 changed files with 617 additions and 111 deletions
--- a/engine/cryptoquant.py
+++ b/engine/cryptoquant.py
@@ -133,12 +133,57 @@ class CryptoQuantClient:
                
        return combined_df

+    def fetch_history_chunked(
+        self, 
+        symbols: list[str], 
+        metrics: dict, 
+        start_date: str, 
+        end_date: str, 
+        chunk_months: int = 3
+    ) -> pd.DataFrame:
+        """
+        Fetch historical data in chunks to avoid API limits.
+        """
+        start_dt = datetime.strptime(start_date, "%Y%m%d")
+        end_dt = datetime.strptime(end_date, "%Y%m%d")
+        
+        all_data = []
+        
+        current = start_dt
+        while current < end_dt:
+            next_chunk = current + timedelta(days=chunk_months * 30)
+            if next_chunk > end_dt:
+                next_chunk = end_dt
+            
+            s_str = current.strftime("%Y%m%d")
+            e_str = next_chunk.strftime("%Y%m%d")
+            
+            logger.info(f"Processing chunk: {s_str} to {e_str}")
+            chunk_df = self.fetch_multi_metrics(symbols, metrics, s_str, e_str)
+            
+            if not chunk_df.empty:
+                all_data.append(chunk_df)
+            
+            current = next_chunk + timedelta(days=1)
+            time.sleep(1) # Be nice to API
+            
+        if not all_data:
+            return pd.DataFrame()
+            
+        # Combine all chunks
+        full_df = pd.concat(all_data)
+        # Remove duplicates if any overlap
+        full_df = full_df[~full_df.index.duplicated(keep='first')]
+        full_df.sort_index(inplace=True)
+        
+        return full_df
+
 if __name__ == "__main__":
    cq = CryptoQuantClient()
    
-    # 3 Months Data (Oct 1 2025 - Dec 31 2025)
-    start = "20251001"
-    end = "20251231"
+    # 12 Months Data (Jan 1 2025 - Jan 14 2026)
+    start = "20250101"
+    end = "20260114"
    
    metrics = {
        "reserves": "exchange-flows/exchange-reserve",
@@ -147,7 +192,7 @@ if __name__ == "__main__":
    }
    
    print(f"Fetching training data from {start} to {end}...")
-    df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end)
+    df = cq.fetch_history_chunked(["btc", "eth"], metrics, start, end)
    
    output_file = "data/cq_training_data.csv"
    os.makedirs("data", exist_ok=True)