105 lines
3.7 KiB
Python
105 lines
3.7 KiB
Python
|
|
"""
|
||
|
|
OHLCV data structure and validation utilities.
|
||
|
|
|
||
|
|
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
|
||
|
|
structures and validation functions for financial market data.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from decimal import Decimal
|
||
|
|
from typing import Dict, Any, Optional
|
||
|
|
|
||
|
|
|
||
|
|
class DataValidationError(Exception):
|
||
|
|
"""Exception raised when OHLCV data validation fails."""
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class OHLCVData:
|
||
|
|
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
||
|
|
symbol: str
|
||
|
|
timeframe: str
|
||
|
|
timestamp: datetime
|
||
|
|
open: Decimal
|
||
|
|
high: Decimal
|
||
|
|
low: Decimal
|
||
|
|
close: Decimal
|
||
|
|
volume: Decimal
|
||
|
|
trades_count: Optional[int] = None
|
||
|
|
|
||
|
|
def __post_init__(self):
|
||
|
|
"""Validate OHLCV data after initialization."""
|
||
|
|
if not self.timestamp.tzinfo:
|
||
|
|
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
||
|
|
|
||
|
|
# Validate price data
|
||
|
|
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
||
|
|
raise DataValidationError("All OHLCV prices must be numeric")
|
||
|
|
|
||
|
|
if not isinstance(self.volume, (Decimal, float, int)):
|
||
|
|
raise DataValidationError("Volume must be numeric")
|
||
|
|
|
||
|
|
# Convert to Decimal for precision
|
||
|
|
self.open = Decimal(str(self.open))
|
||
|
|
self.high = Decimal(str(self.high))
|
||
|
|
self.low = Decimal(str(self.low))
|
||
|
|
self.close = Decimal(str(self.close))
|
||
|
|
self.volume = Decimal(str(self.volume))
|
||
|
|
|
||
|
|
# Validate price relationships
|
||
|
|
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
||
|
|
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
||
|
|
|
||
|
|
|
||
|
|
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
||
|
|
"""
|
||
|
|
Validate and convert raw OHLCV data to standardized format.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: Raw OHLCV data dictionary
|
||
|
|
symbol: Trading symbol
|
||
|
|
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Validated OHLCVData object
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
DataValidationError: If data validation fails
|
||
|
|
"""
|
||
|
|
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||
|
|
|
||
|
|
# Check required fields
|
||
|
|
for field in required_fields:
|
||
|
|
if field not in data:
|
||
|
|
raise DataValidationError(f"Missing required field: {field}")
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Parse timestamp
|
||
|
|
timestamp = data['timestamp']
|
||
|
|
if isinstance(timestamp, (int, float)):
|
||
|
|
# Assume Unix timestamp in milliseconds
|
||
|
|
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
||
|
|
elif isinstance(timestamp, str):
|
||
|
|
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||
|
|
elif not isinstance(timestamp, datetime):
|
||
|
|
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
||
|
|
|
||
|
|
return OHLCVData(
|
||
|
|
symbol=symbol,
|
||
|
|
timeframe=timeframe,
|
||
|
|
timestamp=timestamp,
|
||
|
|
open=Decimal(str(data['open'])),
|
||
|
|
high=Decimal(str(data['high'])),
|
||
|
|
low=Decimal(str(data['low'])),
|
||
|
|
close=Decimal(str(data['close'])),
|
||
|
|
volume=Decimal(str(data['volume'])),
|
||
|
|
trades_count=data.get('trades_count')
|
||
|
|
)
|
||
|
|
|
||
|
|
except (ValueError, TypeError, KeyError) as e:
|
||
|
|
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||
|
|
except Exception as e:
|
||
|
|
# Catch any other exceptions (like Decimal InvalidOperation)
|
||
|
|
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|