2025-06-06 21:54:45 +08:00
|
|
|
"""Repository for raw_trades table operations."""
|
|
|
|
|
|
2025-06-06 23:51:21 +08:00
|
|
|
from datetime import datetime, timedelta
|
2025-06-06 21:54:45 +08:00
|
|
|
from typing import Dict, Any, Optional, List
|
2025-06-06 22:07:19 +08:00
|
|
|
|
2025-06-06 23:51:21 +08:00
|
|
|
from sqlalchemy import desc, text
|
2025-06-06 21:54:45 +08:00
|
|
|
|
|
|
|
|
from ..models import RawTrade
|
|
|
|
|
from data.base_collector import MarketDataPoint
|
|
|
|
|
from .base_repository import BaseRepository, DatabaseOperationError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RawTradeRepository(BaseRepository):
|
|
|
|
|
"""Repository for raw_trades table operations."""
|
|
|
|
|
|
|
|
|
|
def insert_market_data_point(self, data_point: MarketDataPoint) -> bool:
|
|
|
|
|
"""
|
2025-06-06 22:07:19 +08:00
|
|
|
Insert a market data point into raw_trades table using the ORM.
|
2025-06-06 21:54:45 +08:00
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
with self.get_session() as session:
|
2025-06-06 22:07:19 +08:00
|
|
|
new_trade = RawTrade(
|
|
|
|
|
exchange=data_point.exchange,
|
|
|
|
|
symbol=data_point.symbol,
|
|
|
|
|
timestamp=data_point.timestamp,
|
|
|
|
|
data_type=data_point.data_type.value,
|
|
|
|
|
raw_data=data_point.data
|
|
|
|
|
)
|
|
|
|
|
session.add(new_trade)
|
2025-06-06 21:54:45 +08:00
|
|
|
session.commit()
|
|
|
|
|
|
|
|
|
|
self.log_debug(f"Stored raw {data_point.data_type.value} data for {data_point.symbol}")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.log_error(f"Error storing raw data for {data_point.symbol}: {e}")
|
|
|
|
|
raise DatabaseOperationError(f"Failed to store raw data: {e}")
|
|
|
|
|
|
|
|
|
|
def insert_raw_websocket_data(self,
|
|
|
|
|
exchange: str,
|
|
|
|
|
symbol: str,
|
|
|
|
|
data_type: str,
|
|
|
|
|
raw_data: Dict[str, Any],
|
|
|
|
|
timestamp: Optional[datetime] = None) -> bool:
|
|
|
|
|
"""
|
2025-06-06 22:07:19 +08:00
|
|
|
Insert raw WebSocket data for debugging purposes using the ORM.
|
2025-06-06 21:54:45 +08:00
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
with self.get_session() as session:
|
2025-06-06 22:07:19 +08:00
|
|
|
new_trade = RawTrade(
|
|
|
|
|
exchange=exchange,
|
|
|
|
|
symbol=symbol,
|
|
|
|
|
timestamp=timestamp or datetime.now(datetime.timezone.utc),
|
|
|
|
|
data_type=data_type,
|
|
|
|
|
raw_data=raw_data
|
|
|
|
|
)
|
|
|
|
|
session.add(new_trade)
|
2025-06-06 21:54:45 +08:00
|
|
|
session.commit()
|
|
|
|
|
|
|
|
|
|
self.log_debug(f"Stored raw WebSocket data: {data_type} for {symbol}")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.log_error(f"Error storing raw WebSocket data for {symbol}: {e}")
|
|
|
|
|
raise DatabaseOperationError(f"Failed to store raw WebSocket data: {e}")
|
|
|
|
|
|
|
|
|
|
def get_raw_trades(self,
|
|
|
|
|
symbol: str,
|
|
|
|
|
data_type: str,
|
|
|
|
|
start_time: datetime,
|
|
|
|
|
end_time: datetime,
|
|
|
|
|
exchange: str = "okx",
|
|
|
|
|
limit: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
2025-06-06 22:07:19 +08:00
|
|
|
Retrieve raw trades from the database using the ORM.
|
2025-06-06 21:54:45 +08:00
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
with self.get_session() as session:
|
2025-06-06 22:07:19 +08:00
|
|
|
query = (
|
|
|
|
|
session.query(RawTrade)
|
|
|
|
|
.filter(
|
|
|
|
|
RawTrade.exchange == exchange,
|
|
|
|
|
RawTrade.symbol == symbol,
|
|
|
|
|
RawTrade.data_type == data_type,
|
|
|
|
|
RawTrade.timestamp >= start_time,
|
|
|
|
|
RawTrade.timestamp <= end_time
|
|
|
|
|
)
|
|
|
|
|
.order_by(RawTrade.timestamp.asc())
|
|
|
|
|
)
|
2025-06-06 21:54:45 +08:00
|
|
|
|
|
|
|
|
if limit:
|
2025-06-06 22:07:19 +08:00
|
|
|
query = query.limit(limit)
|
2025-06-06 21:54:45 +08:00
|
|
|
|
2025-06-06 22:07:19 +08:00
|
|
|
results = query.all()
|
|
|
|
|
|
|
|
|
|
trades = [
|
|
|
|
|
{
|
|
|
|
|
"id": r.id, "exchange": r.exchange, "symbol": r.symbol,
|
|
|
|
|
"timestamp": r.timestamp, "data_type": r.data_type,
|
|
|
|
|
"raw_data": r.raw_data, "created_at": r.created_at
|
|
|
|
|
} for r in results
|
|
|
|
|
]
|
2025-06-06 21:54:45 +08:00
|
|
|
|
|
|
|
|
self.log_info(f"Retrieved {len(trades)} raw trades for {symbol} {data_type}")
|
|
|
|
|
return trades
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.log_error(f"Error retrieving raw trades for {symbol}: {e}")
|
2025-06-06 23:51:21 +08:00
|
|
|
raise DatabaseOperationError(f"Failed to retrieve raw trades: {e}")
|
|
|
|
|
|
|
|
|
|
def cleanup_old_raw_data(self, days_to_keep: int = 7) -> int:
|
|
|
|
|
"""
|
|
|
|
|
Clean up old raw data to prevent table bloat.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
days_to_keep: Number of days to retain data.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Number of records deleted.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
cutoff_date = datetime.now(datetime.timezone.utc) - timedelta(days=days_to_keep)
|
|
|
|
|
|
|
|
|
|
with self.get_session() as session:
|
|
|
|
|
result = session.execute(
|
|
|
|
|
text("DELETE FROM raw_trades WHERE created_at < :cutoff_date"),
|
|
|
|
|
{"cutoff_date": cutoff_date}
|
|
|
|
|
)
|
|
|
|
|
deleted_count = result.rowcount
|
|
|
|
|
session.commit()
|
|
|
|
|
|
|
|
|
|
self.log_info(f"Cleaned up {deleted_count} old raw data records")
|
|
|
|
|
return deleted_count
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.log_error(f"Failed to cleanup raw data: {e}")
|
|
|
|
|
raise DatabaseOperationError(f"Failed to cleanup raw data: {e}")
|
|
|
|
|
|
|
|
|
|
def get_raw_data_stats(self) -> Dict[str, Any]:
|
|
|
|
|
"""Get statistics about raw data storage."""
|
|
|
|
|
try:
|
|
|
|
|
with self.get_session() as session:
|
|
|
|
|
result = session.execute(text("""
|
|
|
|
|
SELECT
|
|
|
|
|
COUNT(*) as total_records,
|
|
|
|
|
COUNT(DISTINCT symbol) as unique_symbols,
|
|
|
|
|
COUNT(DISTINCT data_type) as data_types,
|
|
|
|
|
MIN(created_at) as oldest_record,
|
|
|
|
|
MAX(created_at) as newest_record,
|
|
|
|
|
pg_size_pretty(pg_total_relation_size('raw_trades')) as table_size
|
|
|
|
|
FROM raw_trades
|
|
|
|
|
""")).fetchone()
|
|
|
|
|
|
|
|
|
|
if result:
|
|
|
|
|
return dict(result._mapping)
|
|
|
|
|
|
|
|
|
|
return {"status": "No data available"}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.log_error(f"Failed to get raw data stats: {e}")
|
|
|
|
|
raise DatabaseOperationError(f"Failed to get raw data stats: {e}")
|