""" OHLCV data structure and validation utilities. This module provides standardized OHLCV (Open, High, Low, Close, Volume) data structures and validation functions for financial market data. """ from dataclasses import dataclass from datetime import datetime, timezone from decimal import Decimal from typing import Dict, Any, Optional class DataValidationError(Exception): """Exception raised when OHLCV data validation fails.""" pass @dataclass class OHLCVData: """OHLCV (Open, High, Low, Close, Volume) data structure.""" symbol: str timeframe: str timestamp: datetime open: Decimal high: Decimal low: Decimal close: Decimal volume: Decimal trades_count: Optional[int] = None def __post_init__(self): """Validate OHLCV data after initialization.""" if not self.timestamp.tzinfo: self.timestamp = self.timestamp.replace(tzinfo=timezone.utc) # Validate price data if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]): raise DataValidationError("All OHLCV prices must be numeric") if not isinstance(self.volume, (Decimal, float, int)): raise DataValidationError("Volume must be numeric") # Convert to Decimal for precision self.open = Decimal(str(self.open)) self.high = Decimal(str(self.high)) self.low = Decimal(str(self.low)) self.close = Decimal(str(self.close)) self.volume = Decimal(str(self.volume)) # Validate price relationships if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high): raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}") def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData: """ Validate and convert raw OHLCV data to standardized format. Args: data: Raw OHLCV data dictionary symbol: Trading symbol timeframe: Timeframe (e.g., '1m', '5m', '1h') Returns: Validated OHLCVData object Raises: DataValidationError: If data validation fails """ required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume'] # Check required fields for field in required_fields: if field not in data: raise DataValidationError(f"Missing required field: {field}") try: # Parse timestamp timestamp = data['timestamp'] if isinstance(timestamp, (int, float)): # Assume Unix timestamp in milliseconds timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc) elif isinstance(timestamp, str): timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) elif not isinstance(timestamp, datetime): raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}") return OHLCVData( symbol=symbol, timeframe=timeframe, timestamp=timestamp, open=Decimal(str(data['open'])), high=Decimal(str(data['high'])), low=Decimal(str(data['low'])), close=Decimal(str(data['close'])), volume=Decimal(str(data['volume'])), trades_count=data.get('trades_count') ) except (ValueError, TypeError, KeyError) as e: raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}") except Exception as e: # Catch any other exceptions (like Decimal InvalidOperation) raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")