Add check_symbols.py for ETH perpetuals filtering and enhance backtester with size handling

- Introduced `check_symbols.py` to load and filter ETH perpetual markets from the OKX exchange using CCXT.
- Updated the backtester to normalize signals to a 5-tuple format, incorporating size management for trades.
- Enhanced portfolio functions to support variable size and leverage adjustments based on initial capital.
- Added a new method in `CryptoQuantClient` for chunked historical data fetching to avoid API limits.
- Improved market symbol normalization in `market.py` to handle different formats.
- Updated regime strategy parameters based on recent research findings for optimal performance.
This commit is contained in:
2026-01-14 09:46:51 +08:00
parent 10bb371054
commit 1e4cb87da3
8 changed files with 617 additions and 111 deletions

View File

@@ -133,12 +133,57 @@ class CryptoQuantClient:
return combined_df
def fetch_history_chunked(
self,
symbols: list[str],
metrics: dict,
start_date: str,
end_date: str,
chunk_months: int = 3
) -> pd.DataFrame:
"""
Fetch historical data in chunks to avoid API limits.
"""
start_dt = datetime.strptime(start_date, "%Y%m%d")
end_dt = datetime.strptime(end_date, "%Y%m%d")
all_data = []
current = start_dt
while current < end_dt:
next_chunk = current + timedelta(days=chunk_months * 30)
if next_chunk > end_dt:
next_chunk = end_dt
s_str = current.strftime("%Y%m%d")
e_str = next_chunk.strftime("%Y%m%d")
logger.info(f"Processing chunk: {s_str} to {e_str}")
chunk_df = self.fetch_multi_metrics(symbols, metrics, s_str, e_str)
if not chunk_df.empty:
all_data.append(chunk_df)
current = next_chunk + timedelta(days=1)
time.sleep(1) # Be nice to API
if not all_data:
return pd.DataFrame()
# Combine all chunks
full_df = pd.concat(all_data)
# Remove duplicates if any overlap
full_df = full_df[~full_df.index.duplicated(keep='first')]
full_df.sort_index(inplace=True)
return full_df
if __name__ == "__main__":
cq = CryptoQuantClient()
# 3 Months Data (Oct 1 2025 - Dec 31 2025)
start = "20251001"
end = "20251231"
# 12 Months Data (Jan 1 2025 - Jan 14 2026)
start = "20250101"
end = "20260114"
metrics = {
"reserves": "exchange-flows/exchange-reserve",
@@ -147,7 +192,7 @@ if __name__ == "__main__":
}
print(f"Fetching training data from {start} to {end}...")
df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end)
df = cq.fetch_history_chunked(["btc", "eth"], metrics, start, end)
output_file = "data/cq_training_data.csv"
os.makedirs("data", exist_ok=True)