- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability. - Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling. - Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices. - Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management. - Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors. - Updated imports across the codebase to reflect the new structure, ensuring consistent access to components. These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
160 lines
6.3 KiB
Python
160 lines
6.3 KiB
Python
"""Repository for raw_trades table operations."""
|
|
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, Optional, List
|
|
|
|
from sqlalchemy import desc, text
|
|
|
|
from ..models import RawTrade
|
|
from data.collector.base_collector import MarketDataPoint
|
|
from .base_repository import BaseRepository, DatabaseOperationError
|
|
|
|
|
|
class RawTradeRepository(BaseRepository):
|
|
"""Repository for raw_trades table operations."""
|
|
|
|
def insert_market_data_point(self, data_point: MarketDataPoint) -> bool:
|
|
"""
|
|
Insert a market data point into raw_trades table using the ORM.
|
|
"""
|
|
try:
|
|
with self.get_session() as session:
|
|
new_trade = RawTrade(
|
|
exchange=data_point.exchange,
|
|
symbol=data_point.symbol,
|
|
timestamp=data_point.timestamp,
|
|
data_type=data_point.data_type.value,
|
|
raw_data=data_point.data
|
|
)
|
|
session.add(new_trade)
|
|
session.commit()
|
|
|
|
self.log_debug(f"Stored raw {data_point.data_type.value} data for {data_point.symbol}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.log_error(f"Error storing raw data for {data_point.symbol}: {e}")
|
|
raise DatabaseOperationError(f"Failed to store raw data: {e}")
|
|
|
|
def insert_raw_websocket_data(self,
|
|
exchange: str,
|
|
symbol: str,
|
|
data_type: str,
|
|
raw_data: Dict[str, Any],
|
|
timestamp: Optional[datetime] = None) -> bool:
|
|
"""
|
|
Insert raw WebSocket data for debugging purposes using the ORM.
|
|
"""
|
|
try:
|
|
with self.get_session() as session:
|
|
new_trade = RawTrade(
|
|
exchange=exchange,
|
|
symbol=symbol,
|
|
timestamp=timestamp or datetime.now(datetime.timezone.utc),
|
|
data_type=data_type,
|
|
raw_data=raw_data
|
|
)
|
|
session.add(new_trade)
|
|
session.commit()
|
|
|
|
self.log_debug(f"Stored raw WebSocket data: {data_type} for {symbol}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.log_error(f"Error storing raw WebSocket data for {symbol}: {e}")
|
|
raise DatabaseOperationError(f"Failed to store raw WebSocket data: {e}")
|
|
|
|
def get_raw_trades(self,
|
|
symbol: str,
|
|
data_type: str,
|
|
start_time: datetime,
|
|
end_time: datetime,
|
|
exchange: str = "okx",
|
|
limit: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
"""
|
|
Retrieve raw trades from the database using the ORM.
|
|
"""
|
|
try:
|
|
with self.get_session() as session:
|
|
query = (
|
|
session.query(RawTrade)
|
|
.filter(
|
|
RawTrade.exchange == exchange,
|
|
RawTrade.symbol == symbol,
|
|
RawTrade.data_type == data_type,
|
|
RawTrade.timestamp >= start_time,
|
|
RawTrade.timestamp <= end_time
|
|
)
|
|
.order_by(RawTrade.timestamp.asc())
|
|
)
|
|
|
|
if limit:
|
|
query = query.limit(limit)
|
|
|
|
results = query.all()
|
|
|
|
trades = [
|
|
{
|
|
"id": r.id, "exchange": r.exchange, "symbol": r.symbol,
|
|
"timestamp": r.timestamp, "data_type": r.data_type,
|
|
"raw_data": r.raw_data, "created_at": r.created_at
|
|
} for r in results
|
|
]
|
|
|
|
self.log_info(f"Retrieved {len(trades)} raw trades for {symbol} {data_type}")
|
|
return trades
|
|
|
|
except Exception as e:
|
|
self.log_error(f"Error retrieving raw trades for {symbol}: {e}")
|
|
raise DatabaseOperationError(f"Failed to retrieve raw trades: {e}")
|
|
|
|
def cleanup_old_raw_data(self, days_to_keep: int = 7) -> int:
|
|
"""
|
|
Clean up old raw data to prevent table bloat.
|
|
|
|
Args:
|
|
days_to_keep: Number of days to retain data.
|
|
|
|
Returns:
|
|
Number of records deleted.
|
|
"""
|
|
try:
|
|
cutoff_date = datetime.now(datetime.timezone.utc) - timedelta(days=days_to_keep)
|
|
|
|
with self.get_session() as session:
|
|
result = session.execute(
|
|
text("DELETE FROM raw_trades WHERE created_at < :cutoff_date"),
|
|
{"cutoff_date": cutoff_date}
|
|
)
|
|
deleted_count = result.rowcount
|
|
session.commit()
|
|
|
|
self.log_info(f"Cleaned up {deleted_count} old raw data records")
|
|
return deleted_count
|
|
except Exception as e:
|
|
self.log_error(f"Failed to cleanup raw data: {e}")
|
|
raise DatabaseOperationError(f"Failed to cleanup raw data: {e}")
|
|
|
|
def get_raw_data_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about raw data storage."""
|
|
try:
|
|
with self.get_session() as session:
|
|
result = session.execute(text("""
|
|
SELECT
|
|
COUNT(*) as total_records,
|
|
COUNT(DISTINCT symbol) as unique_symbols,
|
|
COUNT(DISTINCT data_type) as data_types,
|
|
MIN(created_at) as oldest_record,
|
|
MAX(created_at) as newest_record,
|
|
pg_size_pretty(pg_total_relation_size('raw_trades')) as table_size
|
|
FROM raw_trades
|
|
""")).fetchone()
|
|
|
|
if result:
|
|
return dict(result._mapping)
|
|
|
|
return {"status": "No data available"}
|
|
|
|
except Exception as e:
|
|
self.log_error(f"Failed to get raw data stats: {e}")
|
|
raise DatabaseOperationError(f"Failed to get raw data stats: {e}") |