data ingestion
This commit is contained in:
@@ -5,6 +5,7 @@ from contextlib import contextmanager
|
||||
from typing import Optional
|
||||
|
||||
from ..connection import get_db_manager
|
||||
from utils.logger import get_logger
|
||||
|
||||
|
||||
class DatabaseOperationError(Exception):
|
||||
@@ -17,24 +18,24 @@ class BaseRepository:
|
||||
|
||||
def __init__(self, logger: Optional[logging.Logger] = None):
|
||||
"""Initialize repository with optional logger."""
|
||||
self.logger = logger
|
||||
if logger is None:
|
||||
self.logger = get_logger(self.__class__.__name__)
|
||||
else:
|
||||
self.logger = logger
|
||||
self._db_manager = get_db_manager()
|
||||
self._db_manager.initialize()
|
||||
|
||||
def log_info(self, message: str) -> None:
|
||||
"""Log info message if logger is available."""
|
||||
if self.logger:
|
||||
self.logger.info(message)
|
||||
self.logger.info(message)
|
||||
|
||||
def log_debug(self, message: str) -> None:
|
||||
"""Log debug message if logger is available."""
|
||||
if self.logger:
|
||||
self.logger.debug(message)
|
||||
self.logger.debug(message)
|
||||
|
||||
def log_error(self, message: str) -> None:
|
||||
"""Log error message if logger is available."""
|
||||
if self.logger:
|
||||
self.logger.error(message)
|
||||
self.logger.error(message)
|
||||
|
||||
@contextmanager
|
||||
def get_session(self):
|
||||
|
||||
@@ -10,6 +10,7 @@ from sqlalchemy.dialects.postgresql import insert
|
||||
from ..models import MarketData
|
||||
from data.common.data_types import OHLCVCandle
|
||||
from .base_repository import BaseRepository, DatabaseOperationError
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class MarketDataRepository(BaseRepository):
|
||||
@@ -68,6 +69,63 @@ class MarketDataRepository(BaseRepository):
|
||||
self.log_error(f"Error storing candle {candle.symbol} {candle.timeframe}: {e}")
|
||||
raise DatabaseOperationError(f"Failed to store candle: {e}")
|
||||
|
||||
def upsert_candles_batch(self, candles: List[OHLCVCandle], force_update: bool = False, batch_size: int = 1000) -> int:
|
||||
"""
|
||||
Insert or update multiple candles in the market_data table in batches.
|
||||
"""
|
||||
total_processed = 0
|
||||
try:
|
||||
for i in tqdm(range(0, len(candles), batch_size), desc="Inserting candles in batches"):
|
||||
batch = candles[i:i + batch_size]
|
||||
|
||||
values = [
|
||||
{
|
||||
'exchange': candle.exchange,
|
||||
'symbol': candle.symbol,
|
||||
'timeframe': candle.timeframe,
|
||||
'timestamp': candle.end_time,
|
||||
'open': candle.open,
|
||||
'high': candle.high,
|
||||
'low': candle.low,
|
||||
'close': candle.close,
|
||||
'volume': candle.volume,
|
||||
'trades_count': candle.trade_count
|
||||
}
|
||||
for candle in batch
|
||||
]
|
||||
|
||||
with self.get_session() as session:
|
||||
stmt = insert(MarketData).values(values)
|
||||
|
||||
if force_update:
|
||||
final_stmt = stmt.on_conflict_do_update(
|
||||
index_elements=['exchange', 'symbol', 'timeframe', 'timestamp'],
|
||||
set_={
|
||||
'open': stmt.excluded.open,
|
||||
'high': stmt.excluded.high,
|
||||
'low': stmt.excluded.low,
|
||||
'close': stmt.excluded.close,
|
||||
'volume': stmt.excluded.volume,
|
||||
'trades_count': stmt.excluded.trades_count
|
||||
}
|
||||
)
|
||||
action = "Updated"
|
||||
else:
|
||||
final_stmt = stmt.on_conflict_do_nothing(
|
||||
index_elements=['exchange', 'symbol', 'timeframe', 'timestamp']
|
||||
)
|
||||
action = "Stored"
|
||||
|
||||
session.execute(final_stmt)
|
||||
session.commit()
|
||||
total_processed += len(batch)
|
||||
self.log_debug(f"{action} {len(batch)} candles in batch. Total processed: {total_processed}")
|
||||
return total_processed
|
||||
|
||||
except Exception as e:
|
||||
self.log_error(f"Error storing candles in batch: {e}")
|
||||
raise DatabaseOperationError(f"Failed to store candles in batch: {e}")
|
||||
|
||||
def get_candles(self,
|
||||
symbol: str,
|
||||
timeframe: str,
|
||||
@@ -77,6 +135,7 @@ class MarketDataRepository(BaseRepository):
|
||||
"""
|
||||
Retrieve candles from the database using the ORM.
|
||||
"""
|
||||
self.log_debug(f"DB: get_candles called with: symbol={symbol}, timeframe={timeframe}, start_time={start_time}, end_time={end_time}, exchange={exchange}")
|
||||
try:
|
||||
with self.get_session() as session:
|
||||
query = (
|
||||
@@ -102,7 +161,7 @@ class MarketDataRepository(BaseRepository):
|
||||
} for r in results
|
||||
]
|
||||
|
||||
self.log_debug(f"Retrieved {len(candles)} candles for {symbol} {timeframe}")
|
||||
self.log_debug(f"DB: Retrieved {len(candles)} candles for {symbol} {timeframe} from {start_time} to {end_time}")
|
||||
return candles
|
||||
|
||||
except Exception as e:
|
||||
@@ -195,4 +254,20 @@ class MarketDataRepository(BaseRepository):
|
||||
|
||||
except Exception as e:
|
||||
self.log_error(f"Error retrieving candles as DataFrame: {e}")
|
||||
raise DatabaseOperationError(f"Failed to retrieve candles as DataFrame: {e}")
|
||||
raise DatabaseOperationError(f"Failed to retrieve candles as DataFrame: {e}")
|
||||
|
||||
def delete_candles_before_timestamp(self, timestamp: datetime) -> int:
|
||||
"""
|
||||
Delete candles from the market_data table that are older than the specified timestamp.
|
||||
"""
|
||||
try:
|
||||
with self.get_session() as session:
|
||||
deleted_count = session.query(MarketData).filter(
|
||||
MarketData.timestamp < timestamp
|
||||
).delete(synchronize_session=False)
|
||||
session.commit()
|
||||
self.logger.warning(f"Deleted {deleted_count} candles older than {timestamp}")
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
self.log_error(f"Error deleting candles older than {timestamp}: {e}")
|
||||
raise DatabaseOperationError(f"Failed to delete candles: {e}")
|
||||
Reference in New Issue
Block a user