Refactor data module to enhance modularity and maintainability
- Extracted `OHLCVData` and validation logic into a new `common/ohlcv_data.py` module, promoting better organization and reusability. - Updated `BaseDataCollector` to utilize the new `validate_ohlcv_data` function for improved data validation, enhancing code clarity and maintainability. - Refactored imports in `data/__init__.py` to reflect the new structure, ensuring consistent access to common data types and exceptions. - Removed redundant data validation logic from `BaseDataCollector`, streamlining its responsibilities. - Added unit tests for `OHLCVData` and validation functions to ensure correctness and reliability. These changes improve the architecture of the data module, aligning with project standards for maintainability and performance.
This commit is contained in:
parent
3db8fb1c41
commit
33f2110f19
@ -6,9 +6,10 @@ processing and validating the data, and storing it in the database.
|
||||
"""
|
||||
|
||||
from .base_collector import (
|
||||
BaseDataCollector, DataCollectorError, DataValidationError,
|
||||
CollectorStatus, OHLCVData
|
||||
BaseDataCollector, DataCollectorError
|
||||
)
|
||||
from .collector.collector_state_telemetry import CollectorStatus
|
||||
from .common.ohlcv_data import OHLCVData, DataValidationError
|
||||
from .common.data_types import DataType, MarketDataPoint
|
||||
from .collector_manager import CollectorManager, ManagerStatus, CollectorConfig
|
||||
|
||||
|
||||
@ -18,43 +18,7 @@ from .collector.collector_state_telemetry import CollectorStatus, CollectorState
|
||||
from .collector.collector_connection_manager import ConnectionManager
|
||||
from .collector.collector_callback_dispatcher import CallbackDispatcher
|
||||
from .common.data_types import DataType, MarketDataPoint
|
||||
|
||||
|
||||
@dataclass
|
||||
class OHLCVData:
|
||||
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
||||
symbol: str
|
||||
timeframe: str
|
||||
timestamp: datetime
|
||||
open: Decimal
|
||||
high: Decimal
|
||||
low: Decimal
|
||||
close: Decimal
|
||||
volume: Decimal
|
||||
trades_count: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate OHLCV data after initialization."""
|
||||
if not self.timestamp.tzinfo:
|
||||
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Validate price data
|
||||
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
||||
raise DataValidationError("All OHLCV prices must be numeric")
|
||||
|
||||
if not isinstance(self.volume, (Decimal, float, int)):
|
||||
raise DataValidationError("Volume must be numeric")
|
||||
|
||||
# Convert to Decimal for precision
|
||||
self.open = Decimal(str(self.open))
|
||||
self.high = Decimal(str(self.high))
|
||||
self.low = Decimal(str(self.low))
|
||||
self.close = Decimal(str(self.close))
|
||||
self.volume = Decimal(str(self.volume))
|
||||
|
||||
# Validate price relationships
|
||||
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
||||
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
||||
from .common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
|
||||
|
||||
|
||||
class DataCollectorError(Exception):
|
||||
@ -62,11 +26,6 @@ class DataCollectorError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataValidationError(DataCollectorError):
|
||||
"""Exception raised when data validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
class ConnectionError(DataCollectorError):
|
||||
"""Exception raised when connection to data source fails."""
|
||||
pass
|
||||
@ -493,7 +452,17 @@ class BaseDataCollector(ABC):
|
||||
Returns:
|
||||
Dictionary containing status information
|
||||
"""
|
||||
return self._state_telemetry.get_status()
|
||||
status = self._state_telemetry.get_status()
|
||||
|
||||
# Add BaseDataCollector specific information
|
||||
status.update({
|
||||
'symbols': list(self.symbols),
|
||||
'data_types': [dt.value for dt in self.data_types],
|
||||
'timeframes': self.timeframes,
|
||||
'auto_restart': self.auto_restart
|
||||
})
|
||||
|
||||
return status
|
||||
|
||||
def get_health_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
@ -553,38 +522,7 @@ class BaseDataCollector(ABC):
|
||||
Raises:
|
||||
DataValidationError: If data validation fails
|
||||
"""
|
||||
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
raise DataValidationError(f"Missing required field: {field}")
|
||||
|
||||
try:
|
||||
# Parse timestamp
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Assume Unix timestamp in milliseconds
|
||||
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
||||
elif isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
elif not isinstance(timestamp, datetime):
|
||||
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
||||
|
||||
return OHLCVData(
|
||||
symbol=symbol,
|
||||
timeframe=timeframe,
|
||||
timestamp=timestamp,
|
||||
open=Decimal(str(data['open'])),
|
||||
high=Decimal(str(data['high'])),
|
||||
low=Decimal(str(data['low'])),
|
||||
close=Decimal(str(data['close'])),
|
||||
volume=Decimal(str(data['volume'])),
|
||||
trades_count=data.get('trades_count')
|
||||
)
|
||||
|
||||
except (ValueError, TypeError, KeyError) as e:
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
return validate_ohlcv_data(data, symbol, timeframe)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the collector."""
|
||||
|
||||
105
data/common/ohlcv_data.py
Normal file
105
data/common/ohlcv_data.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""
|
||||
OHLCV data structure and validation utilities.
|
||||
|
||||
This module provides standardized OHLCV (Open, High, Low, Close, Volume) data
|
||||
structures and validation functions for financial market data.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
class DataValidationError(Exception):
|
||||
"""Exception raised when OHLCV data validation fails."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class OHLCVData:
|
||||
"""OHLCV (Open, High, Low, Close, Volume) data structure."""
|
||||
symbol: str
|
||||
timeframe: str
|
||||
timestamp: datetime
|
||||
open: Decimal
|
||||
high: Decimal
|
||||
low: Decimal
|
||||
close: Decimal
|
||||
volume: Decimal
|
||||
trades_count: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate OHLCV data after initialization."""
|
||||
if not self.timestamp.tzinfo:
|
||||
self.timestamp = self.timestamp.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Validate price data
|
||||
if not all(isinstance(price, (Decimal, float, int)) for price in [self.open, self.high, self.low, self.close]):
|
||||
raise DataValidationError("All OHLCV prices must be numeric")
|
||||
|
||||
if not isinstance(self.volume, (Decimal, float, int)):
|
||||
raise DataValidationError("Volume must be numeric")
|
||||
|
||||
# Convert to Decimal for precision
|
||||
self.open = Decimal(str(self.open))
|
||||
self.high = Decimal(str(self.high))
|
||||
self.low = Decimal(str(self.low))
|
||||
self.close = Decimal(str(self.close))
|
||||
self.volume = Decimal(str(self.volume))
|
||||
|
||||
# Validate price relationships
|
||||
if not (self.low <= self.open <= self.high and self.low <= self.close <= self.high):
|
||||
raise DataValidationError(f"Invalid OHLCV data: prices don't match expected relationships for {self.symbol}")
|
||||
|
||||
|
||||
def validate_ohlcv_data(data: Dict[str, Any], symbol: str, timeframe: str) -> OHLCVData:
|
||||
"""
|
||||
Validate and convert raw OHLCV data to standardized format.
|
||||
|
||||
Args:
|
||||
data: Raw OHLCV data dictionary
|
||||
symbol: Trading symbol
|
||||
timeframe: Timeframe (e.g., '1m', '5m', '1h')
|
||||
|
||||
Returns:
|
||||
Validated OHLCVData object
|
||||
|
||||
Raises:
|
||||
DataValidationError: If data validation fails
|
||||
"""
|
||||
required_fields = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if field not in data:
|
||||
raise DataValidationError(f"Missing required field: {field}")
|
||||
|
||||
try:
|
||||
# Parse timestamp
|
||||
timestamp = data['timestamp']
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Assume Unix timestamp in milliseconds
|
||||
timestamp = datetime.fromtimestamp(timestamp / 1000, tz=timezone.utc)
|
||||
elif isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
elif not isinstance(timestamp, datetime):
|
||||
raise DataValidationError(f"Invalid timestamp format: {type(timestamp)}")
|
||||
|
||||
return OHLCVData(
|
||||
symbol=symbol,
|
||||
timeframe=timeframe,
|
||||
timestamp=timestamp,
|
||||
open=Decimal(str(data['open'])),
|
||||
high=Decimal(str(data['high'])),
|
||||
low=Decimal(str(data['low'])),
|
||||
close=Decimal(str(data['close'])),
|
||||
volume=Decimal(str(data['volume'])),
|
||||
trades_count=data.get('trades_count')
|
||||
)
|
||||
|
||||
except (ValueError, TypeError, KeyError) as e:
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
except Exception as e:
|
||||
# Catch any other exceptions (like Decimal InvalidOperation)
|
||||
raise DataValidationError(f"Invalid OHLCV data for {symbol}: {e}")
|
||||
@ -1,89 +0,0 @@
|
||||
# Simplified Crypto Trading Bot Platform: Product Requirements Document
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This simplified PRD addresses the need for a rapid-deployment crypto trading bot platform designed for internal testing and strategy development. The platform eliminates microservices complexity in favor of a monolithic architecture that can be functional within 1-2 weeks while supporting approximately 10 concurrent bots. The system focuses on core functionality including data collection, strategy execution, backtesting, and visualization without requiring advanced monitoring or orchestration tools.
|
||||
|
||||
## System Architecture Overview
|
||||
|
||||
The platform follows a streamlined monolithic design that consolidates all components within a single application boundary. This approach enables rapid development while maintaining clear separation between functional modules for future scalability.The architecture consists of six core components working together: Data Collection Module for exchange connectivity, Strategy Engine for unified signal generation, Bot Manager for concurrent bot orchestration, PostgreSQL database for data persistence, Backtesting Engine for historical simulation, and Dashboard for visualization and control.
|
||||
|
||||
## Simplified Technical Stack
|
||||
|
||||
### Core Technologies
|
||||
|
||||
The platform utilizes a Python-based technology stack optimized for rapid development. The backend employs Python 3.10+ with FastAPI or Flask for API services, PostgreSQL 14+ with TimescaleDB extension for time-series optimization, and Redis for real-time pub/sub messaging. The frontend leverages Dash with Plotly for interactive visualization and bot control interfaces.
|
||||
|
||||
### Database Design
|
||||
|
||||
The database schema emphasizes simplicity while supporting essential trading operations. Core tables include raw_market_data for exchange data storage, candles for OHLCV aggregation, strategies for algorithm definitions, bots for instance management, signals for trading decisions, trades for execution records, and bot_portfolio for performance tracking.
|
||||
|
||||
## Development Methodology
|
||||
|
||||
### Two-Week Implementation Timeline
|
||||
|
||||
The development follows a structured three-phase approach designed for rapid deployment. Phase 1 (Days 1-5) establishes foundational components including database setup, data collection implementation, and basic visualization. Phase 2 (Days 6-10) completes core functionality with backtesting engine development, trading logic implementation, and dashboard enhancement. Phase 3 (Days 11-14) focuses on system refinement, comprehensive testing, and deployment preparation.
|
||||
|
||||
### Strategy Implementation Example
|
||||
|
||||
The platform supports multiple trading strategies through a unified interface design. A simple moving average crossover strategy demonstrates the system's capability to generate buy and sell signals based on technical indicators.This example strategy shows how the system processes market data, calculates moving averages, generates trading signals, and tracks portfolio performance over time. The visualization includes price movements, moving average lines, signal markers, and portfolio value progression.
|
||||
|
||||
## Backtesting and Performance Analysis
|
||||
|
||||
### Strategy Validation Framework
|
||||
|
||||
The backtesting engine enables comprehensive strategy testing using historical market data. The system calculates key performance metrics including total returns, Sharpe ratios, maximum drawdown, and win/loss ratios to evaluate strategy effectiveness.
|
||||
|
||||
### Portfolio Management
|
||||
|
||||
The platform tracks portfolio allocation and performance throughout strategy execution. Real-time monitoring capabilities show the distribution between cryptocurrency holdings and cash reserves.
|
||||
|
||||
## Simplified Data Flow
|
||||
|
||||
### Real-Time Processing
|
||||
|
||||
The data collection module connects to exchange APIs to retrieve market information including order books, trades, and candlestick data. Raw data is stored in PostgreSQL while processed information is published through Redis channels for real-time distribution to active trading bots.
|
||||
|
||||
### Signal Generation and Execution
|
||||
|
||||
Strategies subscribe to relevant data streams and generate trading signals based on configured algorithms. The bot manager validates signals against portfolio constraints and executes simulated or live trades according to bot configurations.
|
||||
|
||||
## Future Scalability Considerations
|
||||
|
||||
### Microservices Migration Path
|
||||
|
||||
While implementing a monolithic architecture for rapid deployment, the system design maintains clear component boundaries that facilitate future extraction into microservices. API-first design principles ensure internal components communicate through well-defined interfaces that can be externalized as needed.
|
||||
|
||||
### Authentication and Multi-User Support
|
||||
|
||||
The current single-user design can be extended to support multiple users through role-based access control implementation. Database schema accommodates user management tables and permission structures without requiring significant architectural changes.
|
||||
|
||||
### Advanced Monitoring Integration
|
||||
|
||||
The simplified monitoring approach can be enhanced with Prometheus and Grafana integration when scaling requirements justify the additional complexity. Current basic monitoring provides foundation metrics that can be extended to comprehensive observability systems.
|
||||
|
||||
## Technical Implementation Details
|
||||
|
||||
### Time Series Data Management
|
||||
|
||||
The platform implements proper time aggregation aligned with exchange standards to ensure accurate candle formation. Timestamp alignment follows right-aligned methodology where 5-minute candles from 09:00:00-09:05:00 receive the 09:05:00 timestamp.
|
||||
|
||||
### Performance Optimization
|
||||
|
||||
Database indexing on timestamp and symbol fields ensures efficient time-series queries. Connection pooling prevents database connection leaks while prepared statements optimize query execution. Memory management includes proper cleanup of data objects after processing to maintain system stability.
|
||||
|
||||
## Success Metrics and Validation
|
||||
|
||||
### Development Milestones
|
||||
|
||||
Platform success is measured through specific deliverables including core functionality completion within 14 days, system stability maintenance at 99% uptime during internal testing, successful backtesting of at least 3 different strategies, and concurrent operation of 2+ bots for 72+ hours.
|
||||
|
||||
### Strategy Testing Capabilities
|
||||
|
||||
The system enables comprehensive strategy validation through historical simulation, real-time testing with virtual portfolios, and performance comparison across multiple algorithms. Backtesting results provide insights into strategy effectiveness before live deployment.
|
||||
|
||||
## Conclusion
|
||||
|
||||
This simplified crypto trading bot platform balances rapid development requirements with future scalability needs. The monolithic architecture enables deployment within 1-2 weeks while maintaining architectural flexibility for future enhancements. Clear component separation, comprehensive database design, and strategic technology choices create a foundation that supports both immediate testing objectives and long-term platform evolution.
|
||||
|
||||
The platform's focus on essential functionality without unnecessary complexity ensures teams can begin strategy testing quickly while building toward more sophisticated implementations as requirements expand. This approach maximizes development velocity while preserving options for future architectural evolution and feature enhancement.
|
||||
@ -1,608 +0,0 @@
|
||||
# Simplified Crypto Trading Bot Platform: Product Requirements Document (PRD)
|
||||
|
||||
**Version:** 1.0
|
||||
**Date:** May 30, 2025
|
||||
**Author:** Vasily
|
||||
**Status:** Draft
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This PRD outlines the development of a simplified crypto trading bot platform that enables strategy testing, development, and execution without the complexity of microservices and advanced monitoring. The goal is to create a functional system within 1-2 weeks that allows for strategy testing while establishing a foundation that can scale in the future. The platform addresses key requirements including data collection, strategy execution, visualization, and backtesting capabilities in a monolithic architecture optimized for internal use.
|
||||
|
||||
## Current Requirements & Constraints
|
||||
|
||||
- **Speed to Deployment**: System must be functional within 1-2 weeks
|
||||
- **Scale**: Support for 5-10 concurrent trading bots
|
||||
- **Architecture**: Monolithic application instead of microservices
|
||||
- **User Access**: Internal use only initially (no multi-user authentication)
|
||||
- **Infrastructure**: Simplified deployment without Kubernetes/Docker Swarm
|
||||
- **Monitoring**: Basic logging for modules
|
||||
|
||||
## System Architecture
|
||||
|
||||
### High-Level Architecture
|
||||
|
||||
The platform will follow a monolithic architecture pattern to enable rapid development while providing clear separation between components:
|
||||
|
||||
### Data Flow Architecture
|
||||
|
||||
```
|
||||
OKX Exchange API (WebSocket)
|
||||
↓
|
||||
Data Collector → OHLCV Aggregator → PostgreSQL (market_data)
|
||||
↓ ↓
|
||||
[Optional] Raw Trade Storage Redis Pub/Sub → Strategy Engine (JSON configs)
|
||||
↓ ↓
|
||||
Files/Database (raw_trades) Signal Generation → Bot Manager
|
||||
↓
|
||||
PostgreSQL (signals, trades, bot_performance)
|
||||
↓
|
||||
Dashboard (REST API) ← PostgreSQL (historical data)
|
||||
↑
|
||||
Real-time Updates ← Redis Channels
|
||||
```
|
||||
|
||||
**Data Processing Priority**:
|
||||
1. **Real-time**: Raw data → OHLCV candles → Redis → Bots (primary flow)
|
||||
2. **Historical**: OHLCV data from PostgreSQL for backtesting and charts
|
||||
3. **Advanced Analysis**: Raw trade data (if stored) for detailed backtesting
|
||||
|
||||
### Redis Channel Design
|
||||
|
||||
```python
|
||||
# Real-time market data distribution
|
||||
MARKET_DATA_CHANNEL = "market:{symbol}" # OHLCV updates
|
||||
BOT_SIGNALS_CHANNEL = "signals:{bot_id}" # Trading decisions
|
||||
BOT_STATUS_CHANNEL = "status:{bot_id}" # Bot lifecycle events
|
||||
SYSTEM_EVENTS_CHANNEL = "system:events" # Global notifications
|
||||
```
|
||||
|
||||
### Configuration Strategy
|
||||
|
||||
**PostgreSQL for**: Market data, bot instances, trades, signals, performance metrics
|
||||
**JSON files for**: Strategy parameters, bot configurations (rapid testing and parameter tuning)
|
||||
|
||||
```json
|
||||
// config/strategies/ema_crossover.json
|
||||
{
|
||||
"strategy_name": "EMA_Crossover",
|
||||
"parameters": {
|
||||
"fast_period": 12,
|
||||
"slow_period": 26,
|
||||
"risk_percentage": 0.02
|
||||
}
|
||||
}
|
||||
|
||||
// config/bots/bot_001.json
|
||||
{
|
||||
"bot_id": "bot_001",
|
||||
"strategy_file": "ema_crossover.json",
|
||||
"symbol": "BTC-USDT",
|
||||
"virtual_balance": 10000,
|
||||
"enabled": true
|
||||
}
|
||||
```
|
||||
|
||||
### Error Handling Strategy
|
||||
|
||||
**Bot Crash Recovery**:
|
||||
- Monitor bot processes every 30 seconds
|
||||
- Auto-restart crashed bots if status = 'active'
|
||||
- Log all crashes with stack traces
|
||||
- Maximum 3 restart attempts per hour
|
||||
|
||||
**Exchange Connection Issues**:
|
||||
- Retry with exponential backoff (1s, 2s, 4s, 8s, max 60s)
|
||||
- Switch to backup WebSocket connection if available
|
||||
- Log connection quality metrics
|
||||
|
||||
**Database Errors**:
|
||||
- Continue operation with in-memory cache for up to 5 minutes
|
||||
- Queue operations for retry when connection restored
|
||||
- Alert on prolonged database disconnection
|
||||
|
||||
**Application Restart Recovery**:
|
||||
- Read bot states from database on startup
|
||||
- Restore active bots to 'active' status
|
||||
- Resume data collection for all monitored symbols
|
||||
|
||||
### Component Details and Functional Requirements
|
||||
|
||||
1. **Data Collection Module**
|
||||
- Connect to exchange APIs (OKX initially) via WebSocket
|
||||
- Aggregate real-time trades into OHLCV candles (1m, 5m, 15m, 1h, 4h, 1d)
|
||||
- Store OHLCV data in PostgreSQL for bot operations and backtesting
|
||||
- Send real-time candle updates through Redis
|
||||
- Optional: Store raw trade data for advanced backtesting
|
||||
|
||||
**FR-001: Unified Data Provider Interface**
|
||||
- Support multiple exchanges through standardized adapters
|
||||
- Real-time OHLCV aggregation with WebSocket connections
|
||||
- Primary focus on candle data, raw data storage optional
|
||||
- Data validation and error handling mechanisms
|
||||
|
||||
**FR-002: Market Data Processing**
|
||||
- OHLCV aggregation with configurable timeframes (1m base, higher timeframes derived)
|
||||
- Technical indicator calculation (SMA, EMA, RSI, MACD, Bollinger Bands) on OHLCV data
|
||||
- Data normalization across different exchanges
|
||||
- Time alignment following exchange standards (right-aligned candles)
|
||||
|
||||
2. **Strategy Engine**
|
||||
- Provide unified interface for all trading strategies
|
||||
- Support multiple strategy types with common parameter structure
|
||||
- Generate trading signals based on market data
|
||||
- Log strategy performance and signals
|
||||
- Strategy implementation as a class.
|
||||
|
||||
**FR-003: Strategy Framework**
|
||||
- Base strategy class with standardized interface
|
||||
- Support for multiple strategy types
|
||||
- Parameter configuration and optimization tools (JSON for the parameters)
|
||||
- Signal generation with confidence scoring
|
||||
|
||||
**FR-004: Signal Processing**
|
||||
- Real-time signal calculation and validation
|
||||
- Signal persistence for analysis and debugging
|
||||
- Multi-timeframe analysis capabilities
|
||||
- Custom indicator development support
|
||||
|
||||
3. **Bot Manager**
|
||||
- Create and manage up to 10 concurrent trading bots
|
||||
- Configure bot parameters and associated strategies
|
||||
- Start/stop individual bots
|
||||
- Track bot status and performance
|
||||
|
||||
**FR-005: Bot Lifecycle Management**
|
||||
- Bot creation with strategy and parameter selection
|
||||
- Start/stop/pause functionality with state persistence
|
||||
- Configuration management
|
||||
- Resource allocation and monitoring (in future)
|
||||
|
||||
**FR-006: Portfolio Management**
|
||||
- Position tracking and balance management
|
||||
- Risk management controls (stop-loss, take-profit, position sizing)
|
||||
- Multi-bot coordination and conflict resolution (in future)
|
||||
- Real-time portfolio valuation (in future)
|
||||
|
||||
5. **Trading Execution**
|
||||
- Simulate or execute trades based on configuration
|
||||
- Stores trade information in database
|
||||
|
||||
**FR-007: Order Management**
|
||||
- Order placement with multiple order types (market, limit, stop)
|
||||
- Order tracking and status monitoring (in future)
|
||||
- Execution confirmation and reconciliation (in future)
|
||||
- Fee calculation and tracking (in future)
|
||||
|
||||
**FR-008: Risk Controls**
|
||||
- Pre-trade risk validation
|
||||
- Position limits and exposure controls (in future)
|
||||
- Emergency stop mechanisms (in future)
|
||||
- Compliance monitoring and reporting (in future)
|
||||
|
||||
4. **Database (PostgreSQL)**
|
||||
- Store market data, bot configurations, and trading history
|
||||
- Optimized schema for time-series data without complexity
|
||||
- Support for data querying and aggregation
|
||||
**Database (JSON)**
|
||||
- Store strategy parameters and bot onfiguration in JSON in the beginning for simplicity of editing and testing
|
||||
|
||||
5. **Backtesting Engine**
|
||||
- Run simulations on historical data using vectorized operations for speed
|
||||
- Calculate performance metrics
|
||||
- Support multiple timeframes and strategy parameter testing
|
||||
- Generate comparison reports between strategies
|
||||
|
||||
**FR-009: Historical Simulation**
|
||||
- Strategy backtesting on historical market data
|
||||
- Performance metric calculation (Sharpe ratio, drawdown, win rate, total return)
|
||||
- Parameter optimization through grid search (limited combinations for speed) (in future)
|
||||
- Side-by-side strategy comparison with statistical significance
|
||||
|
||||
**FR-010: Simulation Engine**
|
||||
- Vectorized signal calculation using pandas operations
|
||||
- Realistic fee modeling (0.1% per trade for OKX)
|
||||
- Look-ahead bias prevention with proper timestamp handling
|
||||
- Configurable test periods (1 day to 24 months)
|
||||
|
||||
6. **Dashboard & Visualization**
|
||||
- Display real-time market data and bot status
|
||||
- Show portfolio value progression over time
|
||||
- Visualize trade history with buy/sell markers on price charts
|
||||
- Provide simple bot control interface (start/stop/configure)
|
||||
|
||||
**FR-011: Dashboard Interface**
|
||||
- Real-time bot monitoring with status indicators
|
||||
- Portfolio performance charts (total value, cash vs crypto allocation)
|
||||
- Trade history table with P&L per trade
|
||||
- Simple bot configuration forms for JSON parameter editing
|
||||
|
||||
**FR-012: Data Visualization**
|
||||
- Interactive price charts with strategy signal overlays
|
||||
- Portfolio value progression charts
|
||||
- Performance comparison tables (multiple bots side-by-side)
|
||||
- Fee tracking and total cost analysis
|
||||
|
||||
### Non-Functional Requirements
|
||||
|
||||
1 Performance Requirements
|
||||
**NFR-001: Latency**
|
||||
- Market data processing: <100ms from exchange to database
|
||||
- Signal generation: <500ms for standard strategies
|
||||
- API response time: <200ms for 95% of requests
|
||||
- Dashboard updates: <2 seconds for real-time data
|
||||
|
||||
**NFR-002: Scalability**
|
||||
- Database queries scalable to 1M+ records per table
|
||||
- Horizontal scaling capability for all services (in future)
|
||||
|
||||
2. Reliability Requirements
|
||||
**NFR-003: Availability**
|
||||
- System uptime: 99.5% excluding planned maintenance
|
||||
- Data collection: 99.9% uptime during market hours
|
||||
- Automatic failover for critical services
|
||||
- Graceful degradation during partial outages
|
||||
|
||||
**NFR-004: Data Integrity**
|
||||
- Zero data loss for executed trades
|
||||
- Transactional consistency for all financial operations
|
||||
- Regular database backups with point-in-time recovery
|
||||
- Data validation and error correction mechanisms
|
||||
|
||||
3. Security Requirements
|
||||
**NFR-005: Authentication & Authorization** (in future)
|
||||
|
||||
**NFR-006: Data Protection**
|
||||
- End-to-end encryption for sensitive data (in future)
|
||||
- Secure storage of API keys and credentials
|
||||
- Regular security audits and penetration testing (in future)
|
||||
- Compliance with financial data protection regulations (in future)
|
||||
|
||||
## Technical Implementation
|
||||
|
||||
### Database Schema
|
||||
|
||||
The database schema separates frequently-accessed OHLCV data from raw tick data to optimize performance and storage.
|
||||
|
||||
```sql
|
||||
-- OHLCV Market Data (primary table for bot operations)
|
||||
CREATE TABLE market_data (
|
||||
id SERIAL PRIMARY KEY,
|
||||
exchange VARCHAR(50) NOT NULL DEFAULT 'okx',
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timeframe VARCHAR(5) NOT NULL, -- 1m, 5m, 15m, 1h, 4h, 1d
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
open DECIMAL(18,8) NOT NULL,
|
||||
high DECIMAL(18,8) NOT NULL,
|
||||
low DECIMAL(18,8) NOT NULL,
|
||||
close DECIMAL(18,8) NOT NULL,
|
||||
volume DECIMAL(18,8) NOT NULL,
|
||||
trades_count INTEGER, -- number of trades in this candle
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(exchange, symbol, timeframe, timestamp)
|
||||
);
|
||||
CREATE INDEX idx_market_data_lookup ON market_data(symbol, timeframe, timestamp);
|
||||
CREATE INDEX idx_market_data_recent ON market_data(timestamp DESC) WHERE timestamp > NOW() - INTERVAL '7 days';
|
||||
|
||||
-- Raw Trade Data (optional, for detailed backtesting only)
|
||||
CREATE TABLE raw_trades (
|
||||
id SERIAL PRIMARY KEY,
|
||||
exchange VARCHAR(50) NOT NULL DEFAULT 'okx',
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
type VARCHAR(10) NOT NULL, -- trade, order, balance, tick, books
|
||||
data JSONB NOT NULL, -- response from the exchange
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
) PARTITION BY RANGE (timestamp);
|
||||
CREATE INDEX idx_raw_trades_symbol_time ON raw_trades(symbol, timestamp);
|
||||
|
||||
-- Monthly partitions for raw data (if using raw data)
|
||||
-- CREATE TABLE raw_trades_y2024m01 PARTITION OF raw_trades
|
||||
-- FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
|
||||
|
||||
-- Bot Management (simplified)
|
||||
CREATE TABLE bots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
strategy_name VARCHAR(50) NOT NULL,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timeframe VARCHAR(5) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'inactive', -- active, inactive, error
|
||||
config_file VARCHAR(200), -- path to JSON config
|
||||
virtual_balance DECIMAL(18,8) DEFAULT 10000,
|
||||
current_balance DECIMAL(18,8) DEFAULT 10000,
|
||||
last_heartbeat TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Trading Signals (for analysis and debugging)
|
||||
CREATE TABLE signals (
|
||||
id SERIAL PRIMARY KEY,
|
||||
bot_id INTEGER REFERENCES bots(id),
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
signal_type VARCHAR(10) NOT NULL, -- buy, sell, hold
|
||||
price DECIMAL(18,8),
|
||||
confidence DECIMAL(5,4),
|
||||
indicators JSONB, -- technical indicator values
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX idx_signals_bot_time ON signals(bot_id, timestamp);
|
||||
|
||||
-- Trade Execution Records
|
||||
CREATE TABLE trades (
|
||||
id SERIAL PRIMARY KEY,
|
||||
bot_id INTEGER REFERENCES bots(id),
|
||||
signal_id INTEGER REFERENCES signals(id),
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
side VARCHAR(5) NOT NULL, -- buy, sell
|
||||
price DECIMAL(18,8) NOT NULL,
|
||||
quantity DECIMAL(18,8) NOT NULL,
|
||||
fees DECIMAL(18,8) DEFAULT 0,
|
||||
pnl DECIMAL(18,8), -- profit/loss for this trade
|
||||
balance_after DECIMAL(18,8), -- portfolio balance after trade
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX idx_trades_bot_time ON trades(bot_id, timestamp);
|
||||
|
||||
-- Performance Snapshots (for plotting portfolio over time)
|
||||
CREATE TABLE bot_performance (
|
||||
id SERIAL PRIMARY KEY,
|
||||
bot_id INTEGER REFERENCES bots(id),
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
total_value DECIMAL(18,8) NOT NULL, -- current portfolio value
|
||||
cash_balance DECIMAL(18,8) NOT NULL,
|
||||
crypto_balance DECIMAL(18,8) NOT NULL,
|
||||
total_trades INTEGER DEFAULT 0,
|
||||
winning_trades INTEGER DEFAULT 0,
|
||||
total_fees DECIMAL(18,8) DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX idx_bot_performance_bot_time ON bot_performance(bot_id, timestamp);
|
||||
```
|
||||
|
||||
**Data Storage Strategy**:
|
||||
- **OHLCV Data**: Primary source for bot operations, kept indefinitely, optimized indexes
|
||||
- **Raw Trade Data**: Optional table, only if detailed backtesting needed, can be partitioned monthly
|
||||
- **Alternative for Raw Data**: Store in compressed files (Parquet/CSV) instead of database for cost efficiency
|
||||
|
||||
**MVP Approach**: Start with OHLCV data only, add raw data storage later if advanced backtesting requires it.
|
||||
|
||||
### Technology Stack
|
||||
|
||||
The platform will be built using the following technologies:
|
||||
|
||||
- **Backend Framework**: Python 3.10+ with Dash (includes built-in Flask server for REST API endpoints)
|
||||
- **Database**: PostgreSQL 14+ (with TimescaleDB extension for time-series optimization)
|
||||
- **Real-time Messaging**: Redis (for pub/sub messaging between components)
|
||||
- **Frontend**: Dash with Plotly (for visualization and control interface) and Mantine UI components
|
||||
- **Configuration**: JSON files for strategy parameters and bot configurations
|
||||
- **Deployment**: Docker container setup for development and production
|
||||
|
||||
### API Design
|
||||
|
||||
**Dash Callbacks**: Real-time updates and user interactions
|
||||
**REST Endpoints**: Historical data queries for backtesting and analysis
|
||||
```python
|
||||
# Built-in Flask routes for historical data
|
||||
@app.server.route('/api/bot/<bot_id>/trades')
|
||||
@app.server.route('/api/market/<symbol>/history')
|
||||
@app.server.route('/api/backtest/results/<test_id>')
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
The data flow follows a simple pattern to ensure efficient processing:
|
||||
|
||||
1. **Market Data Collection**:
|
||||
- Collector fetches data from exchange APIs
|
||||
- Raw data is stored in PostgreSQL
|
||||
- Processed data (e.g., OHLCV candles) are calculated and stored
|
||||
- Real-time updates are published to Redis channels
|
||||
|
||||
2. **Signal Generation**:
|
||||
- Bots subscribe to relevant data channels and generate signals based on the strategy
|
||||
- Signals are stored in database and published to Redis
|
||||
|
||||
3. **Trade Execution**:
|
||||
- Bot manager receives signals from strategies
|
||||
- Validates signals against bot parameters and portfolio
|
||||
- Simulates or executes trades based on configuration
|
||||
- Stores trade information in database
|
||||
|
||||
4. **Visualization**:
|
||||
- Dashboard subscribes to real-time data and trading updates
|
||||
- Queries historical data for charts and performance metrics
|
||||
- Provides interface for bot management and configuration
|
||||
|
||||
## Development Roadmap
|
||||
|
||||
### Phase 1: Foundation (Days 1-5)
|
||||
|
||||
**Objective**: Establish core system components and data flow
|
||||
|
||||
1. **Day 1-2**: Database Setup and Data Collection
|
||||
- Set up PostgreSQL with initial schema
|
||||
- Implement OKX API connector
|
||||
- Create data storage and processing logic
|
||||
|
||||
2. **Day 3-4**: Strategy Engine and Bot Manager
|
||||
- Develop strategy interface and 1-2 example strategies
|
||||
- Create bot manager with basic controls
|
||||
- Implement Redis for real-time messaging
|
||||
|
||||
3. **Day 5**: Basic Visualization
|
||||
- Set up Dash/Plotly for simple charts
|
||||
- Create basic dashboard layout
|
||||
- Connect to real-time data sources
|
||||
- Create mockup strategies and bots
|
||||
|
||||
### Phase 2: Core Functionality (Days 6-10)
|
||||
|
||||
**Objective**: Complete essential features for strategy testing
|
||||
|
||||
1. **Day 6-7**: Backtesting Engine
|
||||
- Get historical data from the database or file (have for BTC/USDT in csv format)
|
||||
- Create performance calculation metrics
|
||||
- Develop strategy comparison tools
|
||||
|
||||
2. **Day 8-9**: Trading Logic
|
||||
- Implement virtual trading capability
|
||||
- Create trade execution logic
|
||||
- Develop portfolio tracking
|
||||
|
||||
3. **Day 10**: Dashboard Enhancement
|
||||
- Improve visualization components
|
||||
- Add bot control interface
|
||||
- Implement real-time performance monitoring
|
||||
|
||||
### Phase 3: Refinement (Days 11-14)
|
||||
|
||||
**Objective**: Polish system and prepare for ongoing use
|
||||
|
||||
1. **Day 11-12**: Testing and Debugging
|
||||
- Comprehensive system testing
|
||||
- Fix identified issues
|
||||
- Performance optimization
|
||||
|
||||
2. **Day 13-14**: Documentation and Deployment
|
||||
- Create user documentation
|
||||
- Prepare deployment process
|
||||
- Set up basic monitoring
|
||||
|
||||
## Technical Considerations
|
||||
|
||||
### Scalability Path
|
||||
|
||||
While the initial system is designed as a monolithic application for rapid development, several considerations ensure future scalability:
|
||||
|
||||
1. **Module Separation**: Clear boundaries between components enable future extraction into microservices
|
||||
2. **Database Design**: Schema supports partitioning and sharding for larger data volumes
|
||||
3. **Message Queue**: Redis implementation paves way for more robust messaging (Kafka/RabbitMQ)
|
||||
4. **API-First Design**: Internal components communicate through well-defined interfaces
|
||||
|
||||
### Time Aggregation
|
||||
|
||||
Special attention is given to time aggregation to ensure consistency with exchanges:
|
||||
|
||||
```python
|
||||
def aggregate_candles(trades, timeframe, alignment='right'):
|
||||
"""
|
||||
Aggregate trade data into OHLCV candles with consistent timestamp alignment.
|
||||
|
||||
Parameters:
|
||||
- trades: List of trade dictionaries with timestamp and price
|
||||
- timeframe: String representing the timeframe (e.g., '1m', '5m', '1h')
|
||||
- alignment: String indicating timestamp alignment ('right' or 'left')
|
||||
|
||||
Returns:
|
||||
- Dictionary with OHLCV data
|
||||
"""
|
||||
# Convert timeframe to pandas offset
|
||||
if timeframe.endswith('m'):
|
||||
offset = pd.Timedelta(minutes=int(timeframe[:-1]))
|
||||
elif timeframe.endswith('h'):
|
||||
offset = pd.Timedelta(hours=int(timeframe[:-1]))
|
||||
elif timeframe.endswith('d'):
|
||||
offset = pd.Timedelta(days=int(timeframe[:-1]))
|
||||
|
||||
# Create DataFrame from trades
|
||||
df = pd.DataFrame(trades)
|
||||
|
||||
# Convert timestamps to pandas datetime
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
|
||||
|
||||
# Floor timestamps to timeframe
|
||||
if alignment == 'right':
|
||||
df['candle_time'] = df['timestamp'].dt.floor(offset)
|
||||
else:
|
||||
df['candle_time'] = df['timestamp'].dt.ceil(offset) - offset
|
||||
|
||||
# Aggregate to OHLCV
|
||||
candles = df.groupby('candle_time').agg({
|
||||
'price': ['first', 'max', 'min', 'last'],
|
||||
'amount': 'sum'
|
||||
}).reset_index()
|
||||
|
||||
# Rename columns
|
||||
candles.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
return candles
|
||||
```
|
||||
|
||||
### Performance Optimization
|
||||
|
||||
For the initial release, several performance optimizations are implemented:
|
||||
|
||||
1. **Database Indexing**: Proper indexes on timestamp and symbol fields
|
||||
2. **Query Optimization**: Prepared statements and efficient query patterns
|
||||
3. **Connection Pooling**: Database connection management to prevent leaks
|
||||
4. **Data Aggregation**: Pre-calculation of common time intervals
|
||||
5. **Memory Management**: Proper cleanup of data objects after processing
|
||||
|
||||
## User Interface
|
||||
|
||||
The initial user interface focuses on functionality over aesthetics, providing essential controls and visualizations, minimalistic design.
|
||||
|
||||
1. **Market Data View**
|
||||
- Real-time price charts for monitored symbols
|
||||
- Order book visualization
|
||||
- Recent trades list
|
||||
|
||||
2. **Bot Management**
|
||||
- Create/configure bot interface
|
||||
- Start/stop controls
|
||||
- Status indicators
|
||||
|
||||
3. **Strategy Dashboard**
|
||||
- Strategy selection and configuration
|
||||
- Signal visualization
|
||||
- Performance metrics
|
||||
|
||||
4. **Backtesting Interface**
|
||||
- Historical data selection
|
||||
- Strategy parameter configuration
|
||||
- Results visualization
|
||||
|
||||
## Risk Management & Mitigation
|
||||
|
||||
### Technical Risks
|
||||
**Risk:** Exchange API rate limiting affecting data collection
|
||||
**Mitigation:** Implement intelligent rate limiting, multiple API keys, and fallback data sources
|
||||
|
||||
**Risk:** Database performance degradation with large datasets
|
||||
**Mitigation:** Implement data partitioning, archival strategies, and query optimization (in future)
|
||||
|
||||
**Risk:** System downtime during market volatility
|
||||
**Mitigation:** Design redundant systems, implement circuit breakers, and emergency procedures (in future)
|
||||
|
||||
### Business Risks
|
||||
**Risk:** Regulatory changes affecting crypto trading
|
||||
**Mitigation:** Implement compliance monitoring, maintain regulatory awareness, design for adaptability
|
||||
|
||||
**Risk:** Competition from established trading platforms
|
||||
**Mitigation:** Focus on unique value propositions, rapid feature development, strong user experience
|
||||
|
||||
### 8.3 User Risks
|
||||
**Risk:** User losses due to platform errors
|
||||
**Mitigation:** Comprehensive testing, simulation modes, risk warnings, and liability disclaimers
|
||||
|
||||
## Future Expansion
|
||||
|
||||
While keeping the initial implementation simple, the design accommodates future enhancements:
|
||||
|
||||
1. **Authentication System**: Add multi-user support with role-based access
|
||||
2. **Advanced Strategies**: Support for machine learning and AI-based strategies
|
||||
3. **Multi-Exchange Support**: Expand beyond OKX to other exchanges
|
||||
4. **Microservices Migration**: Extract components into separate services
|
||||
5. **Advanced Monitoring**: Integration with Prometheus/Grafana
|
||||
6. **Cloud Deployment**: Support for AWS/GCP/Azure deployment
|
||||
|
||||
## Success Metrics
|
||||
|
||||
The platform's success will be measured by these key metrics:
|
||||
|
||||
1. **Development Timeline**: Complete core functionality within 14 days
|
||||
2. **System Stability**: Maintain 99% uptime during internal testing. System should monitor itself and restart if needed (all or just modules)
|
||||
3. **Strategy Testing**: Successfully backtest at least 3 different strategies
|
||||
4. **Bot Performance**: Run at least 2 bots concurrently for 72+ hours
|
||||
@ -1,165 +0,0 @@
|
||||
## Architecture Components
|
||||
|
||||
### 1. Data Collector
|
||||
**Responsibility**: Unified data collection from multiple exchanges
|
||||
```python
|
||||
class DataCollector:
|
||||
def __init__(self):
|
||||
self.providers = {} # Registry of data providers
|
||||
|
||||
def register_provider(self, name: str, provider: DataProvider):
|
||||
"""Register a new data provider"""
|
||||
|
||||
def start_collection(self, symbols: List[str]):
|
||||
"""Start collecting data for specified symbols"""
|
||||
|
||||
def process_raw_data(self, raw_data: dict):
|
||||
"""Process raw data into OHLCV format"""
|
||||
|
||||
def send_signal_to_bots(self, processed_data: dict):
|
||||
"""Send Redis signal to active bots"""
|
||||
```
|
||||
|
||||
### 2. Strategy Engine
|
||||
**Responsibility**: Unified interface for all trading strategies
|
||||
```python
|
||||
class BaseStrategy:
|
||||
def __init__(self, parameters: dict):
|
||||
self.parameters = parameters
|
||||
|
||||
def process_data(self, data: pd.DataFrame) -> Signal:
|
||||
"""Process market data and generate signals"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_indicators(self) -> dict:
|
||||
"""Return calculated indicators for plotting"""
|
||||
return {}
|
||||
```
|
||||
|
||||
### 3. Bot Manager
|
||||
**Responsibility**: Orchestrate bot execution and state management
|
||||
```python
|
||||
class BotManager:
|
||||
def __init__(self):
|
||||
self.active_bots = {}
|
||||
|
||||
def start_bot(self, bot_id: int):
|
||||
"""Start a bot instance"""
|
||||
|
||||
def stop_bot(self, bot_id: int):
|
||||
"""Stop a bot instance"""
|
||||
|
||||
def process_signal(self, bot_id: int, signal: Signal):
|
||||
"""Process signal and make trading decision"""
|
||||
|
||||
def update_bot_state(self, bot_id: int, state: dict):
|
||||
"""Update bot state in database"""
|
||||
```
|
||||
|
||||
## Communication Architecture
|
||||
|
||||
### Redis Pub/Sub Patterns
|
||||
```python
|
||||
# Real-time market data
|
||||
MARKET_DATA_CHANNEL = "market_data:{symbol}"
|
||||
|
||||
# Bot-specific signals
|
||||
BOT_SIGNAL_CHANNEL = "bot_signals:{bot_id}"
|
||||
|
||||
# Trade updates
|
||||
TRADE_UPDATE_CHANNEL = "trade_updates:{bot_id}"
|
||||
|
||||
# System events
|
||||
SYSTEM_EVENT_CHANNEL = "system_events"
|
||||
```
|
||||
|
||||
### WebSocket Communication
|
||||
```python
|
||||
# Frontend real-time updates
|
||||
WS_BOT_STATUS = "/ws/bot/{bot_id}/status"
|
||||
WS_MARKET_DATA = "/ws/market/{symbol}"
|
||||
WS_PORTFOLIO = "/ws/portfolio/{bot_id}"
|
||||
```
|
||||
|
||||
## Time Aggregation Strategy
|
||||
|
||||
### Candlestick Alignment
|
||||
- **Use RIGHT-ALIGNED timestamps** (industry standard)
|
||||
- 5-minute candle with timestamp 09:05:00 represents data from 09:00:01 to 09:05:00
|
||||
- Timestamp = close time of the candle
|
||||
- Aligns with major exchanges (Binance, OKX, Coinbase)
|
||||
|
||||
### Aggregation Logic
|
||||
```python
|
||||
def aggregate_to_timeframe(ticks: List[dict], timeframe: str) -> dict:
|
||||
"""
|
||||
Aggregate tick data to specified timeframe
|
||||
timeframe: '1m', '5m', '15m', '1h', '4h', '1d'
|
||||
"""
|
||||
# Convert timeframe to seconds
|
||||
interval_seconds = parse_timeframe(timeframe)
|
||||
|
||||
# Group ticks by time intervals (right-aligned)
|
||||
for group in group_by_interval(ticks, interval_seconds):
|
||||
candle = {
|
||||
'timestamp': group.end_time, # Right-aligned
|
||||
'open': group.first_price,
|
||||
'high': group.max_price,
|
||||
'low': group.min_price,
|
||||
'close': group.last_price,
|
||||
'volume': group.total_volume
|
||||
}
|
||||
yield candle
|
||||
```
|
||||
|
||||
## Backtesting Optimization
|
||||
|
||||
### Parallel Processing Strategy
|
||||
```python
|
||||
import multiprocessing as mp
|
||||
from joblib import Parallel, delayed
|
||||
import numba
|
||||
|
||||
@numba.jit(nopython=True)
|
||||
def calculate_signals_vectorized(prices, parameters):
|
||||
"""Vectorized signal calculation using Numba"""
|
||||
# High-performance signal calculation
|
||||
return signals
|
||||
|
||||
def backtest_strategy_batch(data_batch, strategy_params):
|
||||
"""Backtest a batch of data in parallel"""
|
||||
# Process batch of signals
|
||||
signals = calculate_signals_vectorized(data_batch, strategy_params)
|
||||
|
||||
# Simulate trades incrementally
|
||||
portfolio = simulate_trades(signals, data_batch)
|
||||
return portfolio
|
||||
|
||||
# Parallel backtesting
|
||||
def run_parallel_backtest(data, strategy_params, n_jobs=4):
|
||||
data_batches = split_data_into_batches(data, n_jobs)
|
||||
|
||||
results = Parallel(n_jobs=n_jobs)(
|
||||
delayed(backtest_strategy_batch)(batch, strategy_params)
|
||||
for batch in data_batches
|
||||
)
|
||||
|
||||
return combine_results(results)
|
||||
```
|
||||
|
||||
### Optimization Techniques
|
||||
1. **Vectorized Operations**: Use NumPy/Pandas for bulk calculations
|
||||
2. **Numba JIT**: Compile critical loops for C-like performance
|
||||
3. **Batch Processing**: Process signals in batches, simulate trades incrementally
|
||||
4. **Memory Management**: Use efficient data structures (arrays vs lists)
|
||||
5. **Parallel Execution**: Utilize multiple CPU cores for independent calculations
|
||||
|
||||
## Key Design Principles
|
||||
|
||||
1. **Data Separation**: Raw and processed data stored separately for audit trail
|
||||
2. **Signal Tracking**: All signals recorded (executed or not) for analysis
|
||||
3. **Real-time State**: Bot states updated in real-time for monitoring
|
||||
4. **Audit Trail**: Complete record of all trading activities
|
||||
5. **Scalability**: Architecture supports multiple bots and strategies
|
||||
6. **Modularity**: Clear separation between data collection, strategy execution, and trading
|
||||
7. **Fault Tolerance**: Redis for reliable message delivery, database transactions for consistency
|
||||
36
docs/decisions/ADR-003-base-collector-refactoring.md
Normal file
36
docs/decisions/ADR-003-base-collector-refactoring.md
Normal file
@ -0,0 +1,36 @@
|
||||
# ADR-002: BaseDataCollector Refactoring and Component Extraction
|
||||
|
||||
## Status
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
The `BaseDataCollector` class was initially monolithic, handling connection management, state and telemetry, and callback dispatching directly. This led to a less modular, harder-to-test, and less maintainable codebase. Additionally, `OHLCVData` and its associated validation, although broadly applicable, were tightly coupled within the `data` module, leading to potential import complexities and naming conflicts.
|
||||
|
||||
## Decision
|
||||
To improve modularity, maintainability, testability, and reusability, we decided to refactor `BaseDataCollector` by extracting its core responsibilities into dedicated, smaller, and focused components. We also decided to relocate `OHLCVData` to a more common and accessible location.
|
||||
|
||||
### Extracted Components:
|
||||
1. **`CollectorStateAndTelemetry`**: Responsible for managing collector status, health, statistics, and logging.
|
||||
2. **`ConnectionManager`**: Responsible for handling WebSocket connection lifecycle (connect, disconnect, reconnect) and related error management.
|
||||
3. **`CallbackDispatcher`**: Responsible for managing and dispatching data callbacks to registered listeners.
|
||||
|
||||
### OHLCVData Relocation:
|
||||
- The `OHLCVData` class and the `validate_ohlcv_data` function, along with the `DataValidationError` exception, were moved from `data/ohlcv_data.py` to `data/common/ohlcv_data.py`.
|
||||
|
||||
## Consequences
|
||||
**Positive:**
|
||||
- **Improved Modularity**: `BaseDataCollector` is now leaner and focuses solely on orchestrating the new components.
|
||||
- **Enhanced Testability**: Each extracted component can be unit-tested in isolation, reducing test complexity and improving test coverage.
|
||||
- **Increased Maintainability**: Changes to connection logic, state management, or callback handling are isolated to their respective components, minimizing impact on other parts of the system.
|
||||
- **Greater Reusability**: `CollectorStateAndTelemetry`, `ConnectionManager`, and `CallbackDispatcher` can potentially be reused in other contexts or for different types of collectors.
|
||||
- **Clearer Separation of Concerns**: Each component has a single, well-defined responsibility.
|
||||
- **Centralized `OHLCVData`**: Moving `OHLCVData` to `data/common` provides a more intuitive and accessible location for a common data structure, resolving potential import conflicts and improving code organization.
|
||||
|
||||
**Negative:**
|
||||
- **Increased File Count**: More files are introduced, potentially increasing initial navigation overhead (mitigated by clear naming and directory structure).
|
||||
- **Refactoring Overhead**: Required updating existing code to use the new components and adjusting imports across multiple files.
|
||||
|
||||
## Alternatives Considered
|
||||
- **Keeping Monolithic `BaseDataCollector`**: Rejected due to the drawbacks of tightly coupled code (poor testability, maintainability).
|
||||
- **Partial Extraction**: Considered extracting only one or two components, but decided against it to achieve maximum modularity benefits.
|
||||
- **Different `OHLCVData` Location**: Considered `utils/data_types.py` or `data/models.py`, but `data/common/ohlcv_data.py` was deemed most appropriate given its nature as a common data structure within the `data` module.
|
||||
@ -62,15 +62,45 @@ For exchange-specific documentation, see [Exchange Implementations (`./exchanges
|
||||
|
||||
### 1. `BaseDataCollector`
|
||||
|
||||
An abstract base class that defines the common interface for all exchange collectors.
|
||||
An abstract base class that defines the common interface for all exchange collectors. It now orchestrates specialized components for connection management, state and telemetry, and callback dispatching.
|
||||
|
||||
**Key Responsibilities:**
|
||||
- Standardized `start`, `stop`, `restart` methods
|
||||
- Built-in health monitoring with heartbeat and data silence detection
|
||||
- Automatic reconnect and restart logic
|
||||
- Asynchronous message handling
|
||||
- Standardized `start`, `stop`, `restart` methods.
|
||||
- Orchestrates connection handling via `ConnectionManager`.
|
||||
- Delegates state, health, and statistics management to `CollectorStateAndTelemetry`.
|
||||
- Utilizes `CallbackDispatcher` for managing and notifying data subscribers.
|
||||
- Defines abstract methods for exchange-specific implementations (e.g., `_actual_connect`, `_actual_disconnect`, `_subscribe_channels`, `_process_message`).
|
||||
|
||||
### 2. `CollectorManager`
|
||||
### 2. `CollectorStateAndTelemetry`
|
||||
|
||||
Manages the operational state, health, and performance statistics of a data collector.
|
||||
|
||||
**Key Responsibilities:**
|
||||
- Tracks `CollectorStatus` (e.g., `RUNNING`, `STOPPED`, `ERROR`).
|
||||
- Monitors health metrics like heartbeat and data silence.
|
||||
- Collects and provides operational statistics (e.g., messages processed, errors).
|
||||
- Provides centralized logging functionality for the collector.
|
||||
|
||||
### 3. `ConnectionManager`
|
||||
|
||||
Handles the WebSocket connection lifecycle and resilience for a data collector.
|
||||
|
||||
**Key Responsibilities:**
|
||||
- Establishes and terminates WebSocket connections.
|
||||
- Manages automatic reconnection attempts with exponential backoff.
|
||||
- Handles connection-related errors and ensures robust connectivity.
|
||||
- Tracks WebSocket connection state and statistics.
|
||||
|
||||
### 4. `CallbackDispatcher`
|
||||
|
||||
Manages and dispatches real-time data to registered callbacks.
|
||||
|
||||
**Key Responsibilities:**
|
||||
- Registers and unregisters data callbacks for different `DataType`s.
|
||||
- Notifies all subscribed listeners when new data points are received.
|
||||
- Ensures efficient and reliable distribution of processed market data.
|
||||
|
||||
### 5. `CollectorManager`
|
||||
|
||||
A singleton class that manages all active data collectors in the system.
|
||||
|
||||
@ -80,7 +110,7 @@ A singleton class that manages all active data collectors in the system.
|
||||
- Global health monitoring
|
||||
- Coordination of restart policies
|
||||
|
||||
### 3. Exchange-Specific Collectors
|
||||
### 6. Exchange-Specific Collectors
|
||||
|
||||
Concrete implementations of `BaseDataCollector` for each exchange (e.g., `OKXCollector`).
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
|
||||
1. **Base Collector**
|
||||
- Inherit from `BaseDataCollector`
|
||||
- Implement required abstract methods
|
||||
- Handle connection lifecycle
|
||||
- Implement exchange-specific abstract methods (e.g., `_actual_connect`, `_actual_disconnect`, `_subscribe_channels`, `_process_message`)
|
||||
- Leverage `ConnectionManager`, `CollectorStateAndTelemetry`, and `CallbackDispatcher` through the inherited `BaseDataCollector` functionalities
|
||||
|
||||
2. **WebSocket Client**
|
||||
- Implement exchange-specific WebSocket handling
|
||||
|
||||
@ -897,13 +897,13 @@ The OKX collector consists of three main components working together:
|
||||
|
||||
### `OKXCollector`
|
||||
|
||||
- **Main class**: `OKXCollector(BaseDataCollector)`
|
||||
- **Responsibilities**:
|
||||
- Manages WebSocket connection state
|
||||
- Subscribes to required data channels
|
||||
- Dispatches raw messages to the data processor
|
||||
- Stores standardized data in the database
|
||||
- Provides health and status monitoring
|
||||
- **Main class**: `OKXCollector(BaseDataCollector)`
|
||||
- **Responsibilities**:
|
||||
- Implements exchange-specific connection and subscription logic (delegating to `ConnectionManager` for core connection handling).
|
||||
- Processes and standardizes raw OKX WebSocket messages (delegating to `OKXDataProcessor`).
|
||||
- Interacts with `CollectorStateAndTelemetry` for status, health, and logging.
|
||||
- Uses `CallbackDispatcher` to notify subscribers of processed data.
|
||||
- Stores standardized data in the database.
|
||||
|
||||
### `OKXWebSocketClient`
|
||||
|
||||
@ -915,12 +915,12 @@ The OKX collector consists of three main components working together:
|
||||
|
||||
### `OKXDataProcessor`
|
||||
|
||||
- **New in v2.0**: `OKXDataProcessor`
|
||||
- **Responsibilities**:
|
||||
- Validates incoming raw data from WebSocket
|
||||
- Transforms data into standardized `StandardizedTrade` and `OHLCVCandle` formats
|
||||
- Aggregates trades into OHLCV candles
|
||||
- Invokes callbacks for processed trades and completed candles
|
||||
- **New in v2.0**: `OKXDataProcessor`
|
||||
- **Responsibilities**:
|
||||
- Validates incoming raw data from WebSocket.
|
||||
- Transforms data into standardized `MarketDataPoint` and `OHLCVData` formats (using the moved `OHLCVData`).
|
||||
- Aggregates trades into OHLCV candles.
|
||||
- Invokes callbacks for processed trades and completed candles.
|
||||
|
||||
## Configuration
|
||||
|
||||
@ -932,12 +932,12 @@ Configuration options for the `OKXCollector` class:
|
||||
|-------------------------|---------------------|---------------------------------------|-----------------------------------------------------------------------------|
|
||||
| `symbol` | `str` | - | Trading symbol (e.g., `BTC-USDT`) |
|
||||
| `data_types` | `List[DataType]` | `[TRADE, ORDERBOOK]` | List of data types to collect |
|
||||
| `auto_restart` | `bool` | `True` | Automatically restart on failures |
|
||||
| `health_check_interval` | `float` | `30.0` | Seconds between health checks |
|
||||
| `auto_restart` | `bool` | `True` | Automatically restart on failures (managed by `BaseDataCollector` via `ConnectionManager`) |
|
||||
| `health_check_interval` | `float` | `30.0` | Seconds between health checks (managed by `BaseDataCollector` via `CollectorStateAndTelemetry`) |
|
||||
| `store_raw_data` | `bool` | `True` | Store raw WebSocket data for debugging |
|
||||
| `force_update_candles` | `bool` | `False` | If `True`, update existing candles; if `False`, keep existing ones unchanged |
|
||||
| `logger` | `Logger` | `None` | Logger instance for conditional logging |
|
||||
| `log_errors_only` | `bool` | `False` | If `True` and logger provided, only log error-level messages |
|
||||
| `logger` | `Logger` | `None` | Logger instance for conditional logging (managed by `BaseDataCollector` via `CollectorStateAndTelemetry`) |
|
||||
| `log_errors_only` | `bool` | `False` | If `True` and logger provided, only log error-level messages (managed by `BaseDataCollector` via `CollectorStateAndTelemetry`) |
|
||||
|
||||
### Health & Status Monitoring
|
||||
|
||||
@ -962,4 +962,4 @@ Example output:
|
||||
}
|
||||
```
|
||||
|
||||
## Database Integration
|
||||
## Database Integration
|
||||
|
||||
@ -26,10 +26,13 @@ This architecture allows for high scalability and fault tolerance.
|
||||
|
||||
- **Location**: `data/exchanges/okx/collector.py`
|
||||
- **Responsibilities**:
|
||||
- Connects to the OKX WebSocket API
|
||||
- Subscribes to real-time data channels
|
||||
- Processes and standardizes incoming data
|
||||
- Stores data in the database
|
||||
- Inherits from `BaseDataCollector` and implements exchange-specific data collection logic.
|
||||
- Utilizes `ConnectionManager` for robust WebSocket connection management.
|
||||
- Leverages `CollectorStateAndTelemetry` for internal status, health, and logging.
|
||||
- Uses `CallbackDispatcher` to notify registered consumers of processed data.
|
||||
- Subscribes to real-time data channels specific to OKX.
|
||||
- Processes and standardizes incoming OKX data before dispatching.
|
||||
- Stores processed data in the database.
|
||||
|
||||
## Configuration
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
|
||||
## Tasks
|
||||
|
||||
- [ ] 0.0 Create `data/collector` directory
|
||||
- [x] 0.0 Create `data/collector` directory
|
||||
- [x] 1.0 Extract `CollectorStateAndTelemetry` Class
|
||||
- [x] 1.1 Create `data/collector/collector_state_telemetry.py`.
|
||||
- [x] 1.2 Move `CollectorStatus` enum to `data/collector/collector_state_telemetry.py`.
|
||||
@ -44,23 +44,23 @@
|
||||
- [x] 3.5 Add necessary imports to both `data/base_collector.py` and `data/collector/collector_callback_dispatcher.py`.
|
||||
- [x] 3.6 Create `tests/data/collector/test_collector_callback_dispatcher.py` and add initial tests for the new class.
|
||||
|
||||
- [ ] 4.0 Refactor `BaseDataCollector` to use new components
|
||||
- [ ] 4.1 Update `BaseDataCollector.__init__` to instantiate and use `CollectorStateAndTelemetry`, `ConnectionManager`, and `CallbackDispatcher` instances.
|
||||
- [ ] 4.2 Replace direct access to moved attributes/methods with calls to the new component instances (e.g., `self.logger.info` becomes `self._state_telemetry.log_info`).
|
||||
- [ ] 4.3 Modify `start`, `stop`, `restart`, `_message_loop`, `_health_monitor` to interact with the new components, delegating responsibilities appropriately.
|
||||
- [ ] 4.4 Update `get_status` and `get_health_status` in `BaseDataCollector` to delegate to `CollectorStateAndTelemetry`.
|
||||
- [ ] 4.5 Review and update abstract methods and their calls as needed, ensuring they interact correctly with the new components.
|
||||
- [ ] 4.6 Ensure all existing tests for `BaseDataCollector` still pass after refactoring.
|
||||
- [ ] 4.7 Update `data/exchanges/okx/collector.py` to use the new `CollectorStateAndTelemetry` and `ConnectionManager` classes for logging, status updates, and connection handling.
|
||||
- [ ] 4.8 Update `data/collector_manager.py` to interact with the new `CollectorStateAndTelemetry` class for health checks and status retrieval from `BaseDataCollector` instances.
|
||||
- [x] 4.0 Refactor `BaseDataCollector` to use new components
|
||||
- [x] 4.1 Update `BaseDataCollector.__init__` to instantiate and use `CollectorStateAndTelemetry`, `ConnectionManager`, and `CallbackDispatcher` instances.
|
||||
- [x] 4.2 Replace direct access to moved attributes/methods with calls to the new component instances (e.g., `self.logger.info` becomes `self._state_telemetry.log_info`).
|
||||
- [x] 4.3 Modify `start`, `stop`, `restart`, `_message_loop`, `_health_monitor` to interact with the new components, delegating responsibilities appropriately.
|
||||
- [x] 4.4 Update `get_status` and `get_health_status` in `BaseDataCollector` to delegate to `CollectorStateAndTelemetry`.
|
||||
- [x] 4.5 Review and update abstract methods and their calls as needed, ensuring they interact correctly with the new components.
|
||||
- [x] 4.6 Ensure all existing tests for `BaseDataCollector` still pass after refactoring.
|
||||
- [x] 4.7 Update `data/exchanges/okx/collector.py` to use the new `CollectorStateAndTelemetry` and `ConnectionManager` classes for logging, status updates, and connection handling.
|
||||
- [x] 4.8 Update `data/collector_manager.py` to interact with the new `CollectorStateAndTelemetry` class for health checks and status retrieval from `BaseDataCollector` instances.
|
||||
|
||||
- [ ] 5.0 Review and potentially extract `OHLCVData` and related validation
|
||||
- [ ] 5.1 Analyze if `OHLCVData` and `validate_ohlcv_data` are frequently used outside of `data/base_collector.py`.
|
||||
- [ ] 5.2 If analysis indicates external usage or clear separation benefits, move `OHLCVData` class and `DataValidationError` to a new `data/ohlcv_data.py` file.
|
||||
- [ ] 5.3 Update imports in `data/base_collector.py` and any other affected files.
|
||||
- [ ] 5.4 If `OHLCVData` is extracted, create `tests/data/test_ohlcv_data.py` with tests for its structure and validation logic.
|
||||
- [x] 5.0 Review and potentially extract `OHLCVData` and related validation
|
||||
- [x] 5.1 Analyze if `OHLCVData` and `validate_ohlcv_data` are frequently used outside of `data/base_collector.py`.
|
||||
- [x] 5.2 If analysis indicates external usage or clear separation benefits, move `OHLCVData` class and `DataValidationError` to a new `data/ohlcv_data.py` file.
|
||||
- [x] 5.3 Update imports in `data/base_collector.py` and any other affected files.
|
||||
- [x] 5.4 If `OHLCVData` is extracted, create `tests/data/test_ohlcv_data.py` with tests for its structure and validation logic.
|
||||
|
||||
- [ ] 6.0 Update Module Imports
|
||||
- [ ] 6.1 Update imports in `data/__init__.py` to reflect the new locations of `CollectorStatus`, `DataCollectorError`, `DataValidationError`, `DataType`, `MarketDataPoint`, and `OHLCVData` (if moved).
|
||||
- [ ] 6.2 Update imports in `data/common/data_types.py` for `DataType` and `MarketDataPoint`.
|
||||
- [ ] 6.3 Review and update imports in all test files (`tests/test_refactored_okx.py`, `tests/test_real_storage.py`, `tests/test_okx_collector.py`, `tests/test_exchange_factory.py`, `tests/test_data_collection_aggregation.py`, `tests/test_collector_manager.py`, `tests/test_base_collector.py`, `tests/database/test_database_operations.py`) and scripts (`scripts/production_clean.py`) that import directly from `data.base_collector`.
|
||||
- [x] 6.0 Update Module Imports
|
||||
- [x] 6.1 Update imports in `data/__init__.py` to reflect the new locations of `CollectorStatus`, `DataCollectorError`, `DataValidationError`, `DataType`, `MarketDataPoint`, and `OHLCVData` (if moved).
|
||||
- [x] 6.2 Update imports in `data/common/data_types.py` for `DataType` and `MarketDataPoint`.
|
||||
- [x] 6.3 Review and update imports in all test files (`tests/test_refactored_okx.py`, `tests/test_real_storage.py`, `tests/test_okx_collector.py`, `tests/test_exchange_factory.py`, `tests/test_data_collection_aggregation.py`, `tests/test_collector_manager.py`, `tests/test_base_collector.py`, `tests/database/test_database_operations.py`) and scripts (`scripts/production_clean.py`) that import directly from `data.base_collector`.
|
||||
@ -23,16 +23,26 @@ class TestDataCollector(BaseDataCollector):
|
||||
self.subscribed = False
|
||||
self.messages = []
|
||||
|
||||
async def connect(self) -> bool:
|
||||
async def _actual_connect(self) -> bool:
|
||||
"""Implementation of actual connection logic for testing."""
|
||||
await asyncio.sleep(0.01) # Simulate connection delay
|
||||
self.connected = True
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
async def _actual_disconnect(self) -> None:
|
||||
"""Implementation of actual disconnection logic for testing."""
|
||||
await asyncio.sleep(0.01) # Simulate disconnection delay
|
||||
self.connected = False
|
||||
self.subscribed = False
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect using the connection manager."""
|
||||
return await self._connection_manager.connect(self._actual_connect)
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect using the connection manager."""
|
||||
await self._connection_manager.disconnect(self._actual_disconnect)
|
||||
|
||||
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
|
||||
if not self.connected:
|
||||
return False
|
||||
@ -44,7 +54,7 @@ class TestDataCollector(BaseDataCollector):
|
||||
return True
|
||||
|
||||
async def _process_message(self, message) -> MarketDataPoint:
|
||||
self._stats['messages_received'] += 1
|
||||
self._state_telemetry.increment_messages_received()
|
||||
return MarketDataPoint(
|
||||
exchange=self.exchange_name,
|
||||
symbol=message.get('symbol', 'BTC-USDT'),
|
||||
@ -58,8 +68,7 @@ class TestDataCollector(BaseDataCollector):
|
||||
if self.messages:
|
||||
message = self.messages.pop(0)
|
||||
data_point = await self._process_message(message)
|
||||
self._stats['messages_processed'] += 1
|
||||
self._stats['last_message_time'] = datetime.now(timezone.utc)
|
||||
# Note: increment_messages_processed() is called in _notify_callbacks()
|
||||
await self._notify_callbacks(data_point)
|
||||
else:
|
||||
await asyncio.sleep(0.1) # Wait for messages
|
||||
@ -83,7 +92,7 @@ class TestBaseDataCollector:
|
||||
assert collector.symbols == {"BTC-USDT", "ETH-USDT"}
|
||||
assert collector.data_types == [DataType.TICKER]
|
||||
assert collector.status == CollectorStatus.STOPPED
|
||||
assert not collector._running
|
||||
assert not collector._state_telemetry._running
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_stop_cycle(self, collector):
|
||||
@ -94,7 +103,7 @@ class TestBaseDataCollector:
|
||||
assert collector.status == CollectorStatus.RUNNING
|
||||
assert collector.connected
|
||||
assert collector.subscribed
|
||||
assert collector._running
|
||||
assert collector._state_telemetry._running
|
||||
|
||||
# Wait a bit for the message loop to start
|
||||
await asyncio.sleep(0.1)
|
||||
@ -102,7 +111,7 @@ class TestBaseDataCollector:
|
||||
# Test stop
|
||||
await collector.stop()
|
||||
assert collector.status == CollectorStatus.STOPPED
|
||||
assert not collector._running
|
||||
assert not collector._state_telemetry._running
|
||||
assert not collector.connected
|
||||
assert not collector.subscribed
|
||||
|
||||
@ -131,8 +140,8 @@ class TestBaseDataCollector:
|
||||
assert len(received_data) == 1
|
||||
assert received_data[0].symbol == "BTC-USDT"
|
||||
assert received_data[0].data_type == DataType.TICKER
|
||||
assert collector._stats['messages_received'] == 1
|
||||
assert collector._stats['messages_processed'] == 1
|
||||
assert collector._state_telemetry._stats['messages_received'] == 1
|
||||
assert collector._state_telemetry._stats['messages_processed'] == 1
|
||||
|
||||
def test_symbol_management(self, collector):
|
||||
"""Test adding and removing symbols."""
|
||||
@ -160,12 +169,12 @@ class TestBaseDataCollector:
|
||||
# Add callbacks
|
||||
collector.add_data_callback(DataType.TICKER, callback1)
|
||||
collector.add_data_callback(DataType.TICKER, callback2)
|
||||
assert len(collector._data_callbacks[DataType.TICKER]) == 2
|
||||
assert len(collector._callback_dispatcher._data_callbacks[DataType.TICKER]) == 2
|
||||
|
||||
# Remove callback
|
||||
collector.remove_data_callback(DataType.TICKER, callback1)
|
||||
assert len(collector._data_callbacks[DataType.TICKER]) == 1
|
||||
assert callback2 in collector._data_callbacks[DataType.TICKER]
|
||||
assert len(collector._callback_dispatcher._data_callbacks[DataType.TICKER]) == 1
|
||||
assert callback2 in collector._callback_dispatcher._data_callbacks[DataType.TICKER]
|
||||
|
||||
def test_get_status(self, collector):
|
||||
"""Test status reporting."""
|
||||
@ -302,11 +311,11 @@ async def test_connection_error_handling():
|
||||
self.connect_attempts = 0
|
||||
self.should_fail = True
|
||||
|
||||
async def connect(self) -> bool:
|
||||
async def _actual_connect(self) -> bool:
|
||||
self.connect_attempts += 1
|
||||
if self.should_fail and self.connect_attempts < 3:
|
||||
return False # Fail first 2 attempts
|
||||
return await super().connect()
|
||||
return await super()._actual_connect()
|
||||
|
||||
collector = FailingCollector()
|
||||
|
||||
@ -316,8 +325,8 @@ async def test_connection_error_handling():
|
||||
assert collector.status == CollectorStatus.ERROR
|
||||
|
||||
# Reset for retry and allow success
|
||||
collector._reconnect_attempts = 0
|
||||
collector.status = CollectorStatus.STOPPED
|
||||
collector._connection_manager._reconnect_attempts = 0
|
||||
collector._state_telemetry.update_status(CollectorStatus.STOPPED)
|
||||
collector.connect_attempts = 0 # Reset connection attempts
|
||||
collector.should_fail = False # Allow connection to succeed
|
||||
|
||||
@ -7,6 +7,7 @@ import pytest
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from utils.logger import get_logger
|
||||
from data.collector_manager import CollectorManager, ManagerStatus, CollectorConfig
|
||||
from data.base_collector import BaseDataCollector, DataType, CollectorStatus
|
||||
|
||||
@ -22,7 +23,8 @@ class MockDataCollector(BaseDataCollector):
|
||||
self.should_fail_subscribe = False
|
||||
self.fail_count = 0
|
||||
|
||||
async def connect(self) -> bool:
|
||||
async def _actual_connect(self) -> bool:
|
||||
"""Implementation of actual connection logic for testing."""
|
||||
if self.should_fail_connect and self.fail_count < 2:
|
||||
self.fail_count += 1
|
||||
return False
|
||||
@ -30,10 +32,19 @@ class MockDataCollector(BaseDataCollector):
|
||||
self.connected = True
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
async def _actual_disconnect(self) -> None:
|
||||
"""Implementation of actual disconnection logic for testing."""
|
||||
await asyncio.sleep(0.01)
|
||||
self.connected = False
|
||||
self.subscribed = False
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect using the connection manager."""
|
||||
return await self._connection_manager.connect(self._actual_connect)
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect using the connection manager."""
|
||||
await self._connection_manager.disconnect(self._actual_disconnect)
|
||||
|
||||
async def subscribe_to_data(self, symbols: list, data_types: list) -> bool:
|
||||
if self.should_fail_subscribe:
|
||||
@ -62,7 +73,8 @@ class TestCollectorManager:
|
||||
@pytest.fixture
|
||||
def manager(self):
|
||||
"""Create a test manager instance."""
|
||||
return CollectorManager("test_manager", global_health_check_interval=1.0)
|
||||
test_logger = get_logger("test_manager_logger")
|
||||
return CollectorManager("test_manager", global_health_check_interval=1.0, logger=test_logger)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_collector(self):
|
||||
230
tests/data/test_ohlcv_data.py
Normal file
230
tests/data/test_ohlcv_data.py
Normal file
@ -0,0 +1,230 @@
|
||||
"""
|
||||
Unit tests for the OHLCVData module.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal
|
||||
|
||||
from data.common.ohlcv_data import OHLCVData, DataValidationError, validate_ohlcv_data
|
||||
|
||||
|
||||
class TestOHLCVData:
|
||||
"""Test cases for OHLCVData validation."""
|
||||
|
||||
def test_valid_ohlcv_data(self):
|
||||
"""Test creating valid OHLCV data."""
|
||||
ohlcv = OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
open=Decimal("50000"),
|
||||
high=Decimal("50100"),
|
||||
low=Decimal("49900"),
|
||||
close=Decimal("50050"),
|
||||
volume=Decimal("1.5"),
|
||||
trades_count=100
|
||||
)
|
||||
|
||||
assert ohlcv.symbol == "BTC-USDT"
|
||||
assert ohlcv.timeframe == "1m"
|
||||
assert isinstance(ohlcv.open, Decimal)
|
||||
assert ohlcv.trades_count == 100
|
||||
|
||||
def test_invalid_ohlcv_relationships(self):
|
||||
"""Test OHLCV validation for invalid price relationships."""
|
||||
with pytest.raises(DataValidationError):
|
||||
OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
open=Decimal("50000"),
|
||||
high=Decimal("49000"), # High is less than open
|
||||
low=Decimal("49900"),
|
||||
close=Decimal("50050"),
|
||||
volume=Decimal("1.5")
|
||||
)
|
||||
|
||||
def test_ohlcv_decimal_conversion(self):
|
||||
"""Test automatic conversion to Decimal."""
|
||||
ohlcv = OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
open=50000.0, # float
|
||||
high=50100, # int
|
||||
low=49900, # int
|
||||
close=50050.0, # float
|
||||
volume=1.5 # float
|
||||
)
|
||||
|
||||
assert isinstance(ohlcv.open, Decimal)
|
||||
assert isinstance(ohlcv.high, Decimal)
|
||||
assert isinstance(ohlcv.low, Decimal)
|
||||
assert isinstance(ohlcv.close, Decimal)
|
||||
assert isinstance(ohlcv.volume, Decimal)
|
||||
|
||||
def test_timezone_handling(self):
|
||||
"""Test that naive datetimes get UTC timezone."""
|
||||
naive_timestamp = datetime(2023, 1, 1, 12, 0, 0)
|
||||
|
||||
ohlcv = OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=naive_timestamp,
|
||||
open=50000,
|
||||
high=50100,
|
||||
low=49900,
|
||||
close=50050,
|
||||
volume=1.5
|
||||
)
|
||||
|
||||
assert ohlcv.timestamp.tzinfo == timezone.utc
|
||||
|
||||
def test_invalid_price_types(self):
|
||||
"""Test validation fails for invalid price types."""
|
||||
with pytest.raises(DataValidationError, match="All OHLCV prices must be numeric"):
|
||||
OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
open="invalid", # Invalid type
|
||||
high=50100,
|
||||
low=49900,
|
||||
close=50050,
|
||||
volume=1.5
|
||||
)
|
||||
|
||||
def test_invalid_volume_type(self):
|
||||
"""Test validation fails for invalid volume type."""
|
||||
with pytest.raises(DataValidationError, match="Volume must be numeric"):
|
||||
OHLCVData(
|
||||
symbol="BTC-USDT",
|
||||
timeframe="1m",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
open=50000,
|
||||
high=50100,
|
||||
low=49900,
|
||||
close=50050,
|
||||
volume="invalid" # Invalid type
|
||||
)
|
||||
|
||||
|
||||
class TestValidateOhlcvData:
|
||||
"""Test cases for validate_ohlcv_data function."""
|
||||
|
||||
def test_validate_success(self):
|
||||
"""Test successful OHLCV data validation."""
|
||||
raw_data = {
|
||||
"timestamp": 1609459200000, # Unix timestamp in ms
|
||||
"open": "50000",
|
||||
"high": "50100",
|
||||
"low": "49900",
|
||||
"close": "50050",
|
||||
"volume": "1.5",
|
||||
"trades_count": 100
|
||||
}
|
||||
|
||||
ohlcv = validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
|
||||
assert ohlcv.symbol == "BTC-USDT"
|
||||
assert ohlcv.timeframe == "1m"
|
||||
assert ohlcv.trades_count == 100
|
||||
assert isinstance(ohlcv.open, Decimal)
|
||||
assert ohlcv.open == Decimal("50000")
|
||||
|
||||
def test_validate_missing_field(self):
|
||||
"""Test validation with missing required field."""
|
||||
raw_data = {
|
||||
"timestamp": 1609459200000,
|
||||
"open": "50000",
|
||||
"high": "50100",
|
||||
# Missing 'low' field
|
||||
"close": "50050",
|
||||
"volume": "1.5"
|
||||
}
|
||||
|
||||
with pytest.raises(DataValidationError, match="Missing required field: low"):
|
||||
validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
|
||||
def test_validate_invalid_timestamp_string(self):
|
||||
"""Test validation with invalid timestamp string."""
|
||||
raw_data = {
|
||||
"timestamp": "invalid_timestamp",
|
||||
"open": "50000",
|
||||
"high": "50100",
|
||||
"low": "49900",
|
||||
"close": "50050",
|
||||
"volume": "1.5"
|
||||
}
|
||||
|
||||
with pytest.raises(DataValidationError):
|
||||
validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
|
||||
def test_validate_timestamp_formats(self):
|
||||
"""Test validation with different timestamp formats."""
|
||||
base_data = {
|
||||
"open": "50000",
|
||||
"high": "50100",
|
||||
"low": "49900",
|
||||
"close": "50050",
|
||||
"volume": "1.5"
|
||||
}
|
||||
|
||||
# Unix timestamp in milliseconds
|
||||
data1 = {**base_data, "timestamp": 1609459200000}
|
||||
ohlcv1 = validate_ohlcv_data(data1, "BTC-USDT", "1m")
|
||||
assert isinstance(ohlcv1.timestamp, datetime)
|
||||
|
||||
# Unix timestamp in seconds (float)
|
||||
data2 = {**base_data, "timestamp": 1609459200.5}
|
||||
ohlcv2 = validate_ohlcv_data(data2, "BTC-USDT", "1m")
|
||||
assert isinstance(ohlcv2.timestamp, datetime)
|
||||
|
||||
# ISO format string
|
||||
data3 = {**base_data, "timestamp": "2021-01-01T00:00:00Z"}
|
||||
ohlcv3 = validate_ohlcv_data(data3, "BTC-USDT", "1m")
|
||||
assert isinstance(ohlcv3.timestamp, datetime)
|
||||
|
||||
# Already a datetime object
|
||||
data4 = {**base_data, "timestamp": datetime.now(timezone.utc)}
|
||||
ohlcv4 = validate_ohlcv_data(data4, "BTC-USDT", "1m")
|
||||
assert isinstance(ohlcv4.timestamp, datetime)
|
||||
|
||||
def test_validate_invalid_numeric_data(self):
|
||||
"""Test validation with invalid numeric price data."""
|
||||
raw_data = {
|
||||
"timestamp": 1609459200000,
|
||||
"open": "invalid_number",
|
||||
"high": "50100",
|
||||
"low": "49900",
|
||||
"close": "50050",
|
||||
"volume": "1.5"
|
||||
}
|
||||
|
||||
with pytest.raises(DataValidationError, match="Invalid OHLCV data for BTC-USDT"):
|
||||
validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
|
||||
def test_validate_with_optional_fields(self):
|
||||
"""Test validation works correctly with optional fields."""
|
||||
raw_data = {
|
||||
"timestamp": 1609459200000,
|
||||
"open": "50000",
|
||||
"high": "50100",
|
||||
"low": "49900",
|
||||
"close": "50050",
|
||||
"volume": "1.5"
|
||||
# No trades_count
|
||||
}
|
||||
|
||||
ohlcv = validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
assert ohlcv.trades_count is None
|
||||
|
||||
# With trades_count
|
||||
raw_data["trades_count"] = 250
|
||||
ohlcv = validate_ohlcv_data(raw_data, "BTC-USDT", "1m")
|
||||
assert ohlcv.trades_count == 250
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Loading…
x
Reference in New Issue
Block a user