TCPDashboard/database/repositories/raw_trade_repository.py
Vasily.onl f6cb1485b1 Implement data collection architecture with modular components
- Introduced a comprehensive data collection framework, including `CollectorServiceConfig`, `BaseDataCollector`, and `CollectorManager`, enhancing modularity and maintainability.
- Developed `CollectorFactory` for streamlined collector creation, promoting separation of concerns and improved configuration handling.
- Enhanced `DataCollectionService` to utilize the new architecture, ensuring robust error handling and logging practices.
- Added `TaskManager` for efficient management of asynchronous tasks, improving performance and resource management.
- Implemented health monitoring and auto-recovery features in `CollectorManager`, ensuring reliable operation of data collectors.
- Updated imports across the codebase to reflect the new structure, ensuring consistent access to components.

These changes significantly improve the architecture and maintainability of the data collection service, aligning with project standards for modularity, performance, and error handling.
2025-06-10 13:40:28 +08:00

160 lines
6.3 KiB
Python

"""Repository for raw_trades table operations."""
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List
from sqlalchemy import desc, text
from ..models import RawTrade
from data.collector.base_collector import MarketDataPoint
from .base_repository import BaseRepository, DatabaseOperationError
class RawTradeRepository(BaseRepository):
"""Repository for raw_trades table operations."""
def insert_market_data_point(self, data_point: MarketDataPoint) -> bool:
"""
Insert a market data point into raw_trades table using the ORM.
"""
try:
with self.get_session() as session:
new_trade = RawTrade(
exchange=data_point.exchange,
symbol=data_point.symbol,
timestamp=data_point.timestamp,
data_type=data_point.data_type.value,
raw_data=data_point.data
)
session.add(new_trade)
session.commit()
self.log_debug(f"Stored raw {data_point.data_type.value} data for {data_point.symbol}")
return True
except Exception as e:
self.log_error(f"Error storing raw data for {data_point.symbol}: {e}")
raise DatabaseOperationError(f"Failed to store raw data: {e}")
def insert_raw_websocket_data(self,
exchange: str,
symbol: str,
data_type: str,
raw_data: Dict[str, Any],
timestamp: Optional[datetime] = None) -> bool:
"""
Insert raw WebSocket data for debugging purposes using the ORM.
"""
try:
with self.get_session() as session:
new_trade = RawTrade(
exchange=exchange,
symbol=symbol,
timestamp=timestamp or datetime.now(datetime.timezone.utc),
data_type=data_type,
raw_data=raw_data
)
session.add(new_trade)
session.commit()
self.log_debug(f"Stored raw WebSocket data: {data_type} for {symbol}")
return True
except Exception as e:
self.log_error(f"Error storing raw WebSocket data for {symbol}: {e}")
raise DatabaseOperationError(f"Failed to store raw WebSocket data: {e}")
def get_raw_trades(self,
symbol: str,
data_type: str,
start_time: datetime,
end_time: datetime,
exchange: str = "okx",
limit: Optional[int] = None) -> List[Dict[str, Any]]:
"""
Retrieve raw trades from the database using the ORM.
"""
try:
with self.get_session() as session:
query = (
session.query(RawTrade)
.filter(
RawTrade.exchange == exchange,
RawTrade.symbol == symbol,
RawTrade.data_type == data_type,
RawTrade.timestamp >= start_time,
RawTrade.timestamp <= end_time
)
.order_by(RawTrade.timestamp.asc())
)
if limit:
query = query.limit(limit)
results = query.all()
trades = [
{
"id": r.id, "exchange": r.exchange, "symbol": r.symbol,
"timestamp": r.timestamp, "data_type": r.data_type,
"raw_data": r.raw_data, "created_at": r.created_at
} for r in results
]
self.log_info(f"Retrieved {len(trades)} raw trades for {symbol} {data_type}")
return trades
except Exception as e:
self.log_error(f"Error retrieving raw trades for {symbol}: {e}")
raise DatabaseOperationError(f"Failed to retrieve raw trades: {e}")
def cleanup_old_raw_data(self, days_to_keep: int = 7) -> int:
"""
Clean up old raw data to prevent table bloat.
Args:
days_to_keep: Number of days to retain data.
Returns:
Number of records deleted.
"""
try:
cutoff_date = datetime.now(datetime.timezone.utc) - timedelta(days=days_to_keep)
with self.get_session() as session:
result = session.execute(
text("DELETE FROM raw_trades WHERE created_at < :cutoff_date"),
{"cutoff_date": cutoff_date}
)
deleted_count = result.rowcount
session.commit()
self.log_info(f"Cleaned up {deleted_count} old raw data records")
return deleted_count
except Exception as e:
self.log_error(f"Failed to cleanup raw data: {e}")
raise DatabaseOperationError(f"Failed to cleanup raw data: {e}")
def get_raw_data_stats(self) -> Dict[str, Any]:
"""Get statistics about raw data storage."""
try:
with self.get_session() as session:
result = session.execute(text("""
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT symbol) as unique_symbols,
COUNT(DISTINCT data_type) as data_types,
MIN(created_at) as oldest_record,
MAX(created_at) as newest_record,
pg_size_pretty(pg_total_relation_size('raw_trades')) as table_size
FROM raw_trades
""")).fetchone()
if result:
return dict(result._mapping)
return {"status": "No data available"}
except Exception as e:
self.log_error(f"Failed to get raw data stats: {e}")
raise DatabaseOperationError(f"Failed to get raw data stats: {e}")