""" Data status and symbol information endpoints. """ from pathlib import Path import pandas as pd from fastapi import APIRouter from api.models.schemas import DataStatusResponse, SymbolInfo router = APIRouter() # Base path for CCXT data DATA_BASE = Path(__file__).parent.parent.parent / "data" / "ccxt" def _scan_available_data() -> list[SymbolInfo]: """ Scan the data directory for available symbols and timeframes. Returns list of SymbolInfo with date ranges and row counts. """ symbols = [] if not DATA_BASE.exists(): return symbols # Structure: data/ccxt/{exchange}/{market_type}/{symbol}/{timeframe}.csv for exchange_dir in DATA_BASE.iterdir(): if not exchange_dir.is_dir(): continue exchange = exchange_dir.name for market_dir in exchange_dir.iterdir(): if not market_dir.is_dir(): continue market_type = market_dir.name for symbol_dir in market_dir.iterdir(): if not symbol_dir.is_dir(): continue symbol = symbol_dir.name # Find all timeframes timeframes = [] start_date = None end_date = None row_count = 0 for csv_file in symbol_dir.glob("*.csv"): tf = csv_file.stem timeframes.append(tf) # Read first and last rows for date range try: df = pd.read_csv(csv_file, parse_dates=['timestamp']) if not df.empty: row_count = len(df) start_date = df['timestamp'].min().strftime("%Y-%m-%d") end_date = df['timestamp'].max().strftime("%Y-%m-%d") except Exception: pass if timeframes: symbols.append(SymbolInfo( symbol=symbol, exchange=exchange, market_type=market_type, timeframes=sorted(timeframes), start_date=start_date, end_date=end_date, row_count=row_count, )) return symbols @router.get("/symbols", response_model=DataStatusResponse) async def get_symbols(): """ Get list of available symbols with their data ranges. Scans the local data directory for downloaded OHLCV data. """ symbols = _scan_available_data() return DataStatusResponse(symbols=symbols) @router.get("/data/status", response_model=DataStatusResponse) async def get_data_status(): """ Get detailed data inventory status. Alias for /symbols with additional metadata. """ symbols = _scan_available_data() return DataStatusResponse(symbols=symbols)