Refactor raw trade management and enhance database operations

- Removed the `RawDataManager` class and integrated its functionality directly into the `RawTradeRepository`, streamlining the management of raw trade data.
- Implemented the `cleanup_old_raw_data` method to delete outdated records, preventing table bloat and improving performance.
- Added the `get_raw_data_stats` method to retrieve statistics about raw data storage, enhancing data management capabilities.
- Updated documentation to reflect the new methods and their usage, ensuring clarity for future developers.

These changes improve the maintainability and efficiency of the database operations related to raw trade data.
This commit is contained in:
Vasily.onl
2025-06-06 23:51:21 +08:00
parent b30c16bc33
commit 1466223b85
4 changed files with 82 additions and 435 deletions

View File

@@ -1,9 +1,9 @@
"""Repository for raw_trades table operations."""
from datetime import datetime
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List
from sqlalchemy import desc
from sqlalchemy import desc, text
from ..models import RawTrade
from data.base_collector import MarketDataPoint
@@ -106,4 +106,55 @@ class RawTradeRepository(BaseRepository):
except Exception as e:
self.log_error(f"Error retrieving raw trades for {symbol}: {e}")
raise DatabaseOperationError(f"Failed to retrieve raw trades: {e}")
raise DatabaseOperationError(f"Failed to retrieve raw trades: {e}")
def cleanup_old_raw_data(self, days_to_keep: int = 7) -> int:
"""
Clean up old raw data to prevent table bloat.
Args:
days_to_keep: Number of days to retain data.
Returns:
Number of records deleted.
"""
try:
cutoff_date = datetime.now(datetime.timezone.utc) - timedelta(days=days_to_keep)
with self.get_session() as session:
result = session.execute(
text("DELETE FROM raw_trades WHERE created_at < :cutoff_date"),
{"cutoff_date": cutoff_date}
)
deleted_count = result.rowcount
session.commit()
self.log_info(f"Cleaned up {deleted_count} old raw data records")
return deleted_count
except Exception as e:
self.log_error(f"Failed to cleanup raw data: {e}")
raise DatabaseOperationError(f"Failed to cleanup raw data: {e}")
def get_raw_data_stats(self) -> Dict[str, Any]:
"""Get statistics about raw data storage."""
try:
with self.get_session() as session:
result = session.execute(text("""
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT symbol) as unique_symbols,
COUNT(DISTINCT data_type) as data_types,
MIN(created_at) as oldest_record,
MAX(created_at) as newest_record,
pg_size_pretty(pg_total_relation_size('raw_trades')) as table_size
FROM raw_trades
""")).fetchone()
if result:
return dict(result._mapping)
return {"status": "No data available"}
except Exception as e:
self.log_error(f"Failed to get raw data stats: {e}")
raise DatabaseOperationError(f"Failed to get raw data stats: {e}")