Files
lowkey_backtest/engine/cryptoquant.py
Simon Moisy e6d69ed04d Add CryptoQuant client and regime detection analysis
- Introduced `CryptoQuantClient` for fetching data from the CryptoQuant API.
- Added `regime_detection.py` for advanced regime detection analysis using machine learning.
- Updated dependencies in `pyproject.toml` and `uv.lock` to include `scikit-learn`, `matplotlib`, `plotly`, `requests`, and `python-dotenv`.
- Enhanced `.gitignore` to exclude `regime_results.html` and CSV files.
- Created an interactive HTML plot for regime detection results and saved it as `regime_results.html`.
2026-01-13 16:13:57 +08:00

157 lines
5.6 KiB
Python

import os
import sys
import time
import requests
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
# Load env vars from .env file
load_dotenv()
# Fix path for direct execution
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from engine.logging_config import get_logger
logger = get_logger(__name__)
class CryptoQuantClient:
"""
Client for fetching data from CryptoQuant API.
"""
BASE_URL = "https://api.cryptoquant.com/v1"
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.getenv("CRYPTOQUANT_API_KEY")
if not self.api_key:
raise ValueError("CryptoQuant API Key not found. Set CRYPTOQUANT_API_KEY env var.")
self.headers = {
"Authorization": f"Bearer {self.api_key}"
}
def fetch_metric(
self,
metric_path: str,
symbol: str,
start_date: str,
end_date: str,
exchange: str | None = "all_exchange",
window: str = "day"
) -> pd.DataFrame:
"""
Fetch a specific metric from CryptoQuant.
"""
url = f"{self.BASE_URL}/{metric_path}"
params = {
"window": window,
"from": start_date,
"to": end_date,
"limit": 100000
}
if exchange:
params["exchange"] = exchange
logger.info(f"Fetching {metric_path} for {symbol} ({start_date}-{end_date})...")
try:
response = requests.get(url, headers=self.headers, params=params)
response.raise_for_status()
data = response.json()
if 'result' in data and 'data' in data['result']:
df = pd.DataFrame(data['result']['data'])
if not df.empty:
if 'date' in df.columns:
df['timestamp'] = pd.to_datetime(df['date'])
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
return df
return pd.DataFrame()
except Exception as e:
logger.error(f"Error fetching CQ data {metric_path}: {e}")
if 'response' in locals() and hasattr(response, 'text'):
logger.error(f"Response: {response.text}")
return pd.DataFrame()
def fetch_multi_metrics(self, symbols: list[str], metrics: dict, start_date: str, end_date: str):
"""
Fetch multiple metrics for multiple symbols and combine them.
"""
combined_df = pd.DataFrame()
for symbol in symbols:
asset = symbol.lower()
for metric_name, api_path in metrics.items():
full_path = f"{asset}/{api_path}"
# Some metrics (like funding rates) might need specific exchange vs all_exchange
# Defaulting to all_exchange is usually safe for flows, but check specific logic if needed
exchange_param = "all_exchange"
if "funding-rates" in api_path:
# For funding rates, 'all_exchange' might not be valid or might be aggregated
# Let's try 'binance' as a proxy for market sentiment if all fails,
# or keep 'all_exchange' if supported.
# Based on testing, 'all_exchange' is standard for flows.
pass
df = self.fetch_metric(full_path, asset, start_date, end_date, exchange=exchange_param)
if not df.empty:
target_col = None
# Heuristic to find the value column
candidates = ['funding_rate', 'reserve', 'inflow_total', 'outflow_total', 'open_interest', 'ratio', 'value']
for col in df.columns:
if col in candidates:
target_col = col
break
if not target_col:
# Fallback: take first numeric col that isn't date
for col in df.columns:
if col not in ['date', 'datetime', 'timestamp_str', 'block_height']:
target_col = col
break
if target_col:
col_name = f"{asset}_{metric_name}"
subset = df[[target_col]].rename(columns={target_col: col_name})
if combined_df.empty:
combined_df = subset
else:
combined_df = combined_df.join(subset, how='outer')
time.sleep(0.2)
return combined_df
if __name__ == "__main__":
cq = CryptoQuantClient()
# 3 Months Data (Oct 1 2025 - Dec 31 2025)
start = "20251001"
end = "20251231"
metrics = {
"reserves": "exchange-flows/exchange-reserve",
"inflow": "exchange-flows/inflow",
"funding": "market-data/funding-rates"
}
print(f"Fetching training data from {start} to {end}...")
df = cq.fetch_multi_metrics(["btc", "eth"], metrics, start, end)
output_file = "data/cq_training_data.csv"
os.makedirs("data", exist_ok=True)
df.to_csv(output_file)
print(f"\nSaved {len(df)} rows to {output_file}")
print(df.head())