- Extracted `OHLCVData` and validation logic into a new `common/ohlcv_data.py` module, promoting better organization and reusability. - Updated `BaseDataCollector` to utilize the new `validate_ohlcv_data` function for improved data validation, enhancing code clarity and maintainability. - Refactored imports in `data/__init__.py` to reflect the new structure, ensuring consistent access to common data types and exceptions. - Removed redundant data validation logic from `BaseDataCollector`, streamlining its responsibilities. - Added unit tests for `OHLCVData` and validation functions to ensure correctness and reliability. These changes improve the architecture of the data module, aligning with project standards for maintainability and performance.
105 lines
3.7 KiB
Python
105 lines
3.7 KiB
Python
"""
|
|
OHLCV data structure and validation utilities.
|
|
|
|
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
|
|
structures and validation functions for financial market data.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from decimal import Decimal
|
|
from typing import Dict, Any, Optional
|
|
|
|
|
|
class DataValidationError(Exception):
|
|
"""Exception raised when OHLCV data validation fails."""
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class OHLCVData:
|
|
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
|
symbol: str
|
|
timeframe: str
|
|
timestamp: datetime
|
|
open: Decimal
|
|
high: Decimal
|
|
low: Decimal
|
|
close: Decimal
|
|
volume: Decimal
|
|
trades_count: Optional[int] = None
|
|
|
|
def __post_init__(self):
|
|
"""Validate OHLCV data after initialization."""
|
|
if not self.timestamp.tzinfo:
|
|
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
|
|
|
# Validate price data
|
|
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
|
raise DataValidationError("All OHLCV prices must be numeric")
|
|
|
|
if not isinstance(self.volume, (Decimal, float, int)):
|
|
raise DataValidationError("Volume must be numeric")
|
|
|
|
# Convert to Decimal for precision
|
|
self.open = Decimal(str(self.open))
|
|
self.high = Decimal(str(self.high))
|
|
self.low = Decimal(str(self.low))
|
|
self.close = Decimal(str(self.close))
|
|
self.volume = Decimal(str(self.volume))
|
|
|
|
# Validate price relationships
|
|
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
|
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
|
|
|
|
|
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
|
"""
|
|
Validate and convert raw OHLCV data to standardized format.
|
|
|
|
Args:
|
|
data: Raw OHLCV data dictionary
|
|
symbol: Trading symbol
|
|
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
|
|
|
Returns:
|
|
Validated OHLCVData object
|
|
|
|
Raises:
|
|
DataValidationError: If data validation fails
|
|
"""
|
|
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
|
|
|
# Check required fields
|
|
for field in required_fields:
|
|
if field not in data:
|
|
raise DataValidationError(f"Missing required field: {field}")
|
|
|
|
try:
|
|
# Parse timestamp
|
|
timestamp = data['timestamp']
|
|
if isinstance(timestamp, (int, float)):
|
|
# Assume Unix timestamp in milliseconds
|
|
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
|
elif isinstance(timestamp, str):
|
|
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
|
elif not isinstance(timestamp, datetime):
|
|
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
|
|
|
return OHLCVData(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
timestamp=timestamp,
|
|
open=Decimal(str(data['open'])),
|
|
high=Decimal(str(data['high'])),
|
|
low=Decimal(str(data['low'])),
|
|
close=Decimal(str(data['close'])),
|
|
volume=Decimal(str(data['volume'])),
|
|
trades_count=data.get('trades_count')
|
|
)
|
|
|
|
except (ValueError, TypeError, KeyError) as e:
|
|
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
|
except Exception as e:
|
|
# Catch any other exceptions (like Decimal InvalidOperation)
|
|
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}") |