data ingestion

This commit is contained in:
Vasily.onl
2025-06-13 16:49:29 +08:00
parent f09864d61b
commit 622fda9d2e
6 changed files with 408 additions and 10 deletions

View File

@@ -5,6 +5,7 @@ from contextlib import contextmanager
from typing import Optional
from ..connection import get_db_manager
from utils.logger import get_logger
class DatabaseOperationError(Exception):
@@ -17,24 +18,24 @@ class BaseRepository:
def __init__(self, logger: Optional[logging.Logger] = None):
"""Initialize repository with optional logger."""
self.logger = logger
if logger is None:
self.logger = get_logger(self.__class__.__name__)
else:
self.logger = logger
self._db_manager = get_db_manager()
self._db_manager.initialize()
def log_info(self, message: str) -> None:
"""Log info message if logger is available."""
if self.logger:
self.logger.info(message)
self.logger.info(message)
def log_debug(self, message: str) -> None:
"""Log debug message if logger is available."""
if self.logger:
self.logger.debug(message)
self.logger.debug(message)
def log_error(self, message: str) -> None:
"""Log error message if logger is available."""
if self.logger:
self.logger.error(message)
self.logger.error(message)
@contextmanager
def get_session(self):

View File

@@ -10,6 +10,7 @@ from sqlalchemy.dialects.postgresql import insert
from ..models import MarketData
from data.common.data_types import OHLCVCandle
from .base_repository import BaseRepository, DatabaseOperationError
from tqdm import tqdm
class MarketDataRepository(BaseRepository):
@@ -68,6 +69,63 @@ class MarketDataRepository(BaseRepository):
self.log_error(f"Error storing candle {candle.symbol} {candle.timeframe}: {e}")
raise DatabaseOperationError(f"Failed to store candle: {e}")
def upsert_candles_batch(self, candles: List[OHLCVCandle], force_update: bool = False, batch_size: int = 1000) -> int:
"""
Insert or update multiple candles in the market_data table in batches.
"""
total_processed = 0
try:
for i in tqdm(range(0, len(candles), batch_size), desc="Inserting candles in batches"):
batch = candles[i:i + batch_size]
values = [
{
'exchange': candle.exchange,
'symbol': candle.symbol,
'timeframe': candle.timeframe,
'timestamp': candle.end_time,
'open': candle.open,
'high': candle.high,
'low': candle.low,
'close': candle.close,
'volume': candle.volume,
'trades_count': candle.trade_count
}
for candle in batch
]
with self.get_session() as session:
stmt = insert(MarketData).values(values)
if force_update:
final_stmt = stmt.on_conflict_do_update(
index_elements=['exchange', 'symbol', 'timeframe', 'timestamp'],
set_={
'open': stmt.excluded.open,
'high': stmt.excluded.high,
'low': stmt.excluded.low,
'close': stmt.excluded.close,
'volume': stmt.excluded.volume,
'trades_count': stmt.excluded.trades_count
}
)
action = "Updated"
else:
final_stmt = stmt.on_conflict_do_nothing(
index_elements=['exchange', 'symbol', 'timeframe', 'timestamp']
)
action = "Stored"
session.execute(final_stmt)
session.commit()
total_processed += len(batch)
self.log_debug(f"{action} {len(batch)} candles in batch. Total processed: {total_processed}")
return total_processed
except Exception as e:
self.log_error(f"Error storing candles in batch: {e}")
raise DatabaseOperationError(f"Failed to store candles in batch: {e}")
def get_candles(self,
symbol: str,
timeframe: str,
@@ -77,6 +135,7 @@ class MarketDataRepository(BaseRepository):
"""
Retrieve candles from the database using the ORM.
"""
self.log_debug(f"DB: get_candles called with: symbol={symbol}, timeframe={timeframe}, start_time={start_time}, end_time={end_time}, exchange={exchange}")
try:
with self.get_session() as session:
query = (
@@ -102,7 +161,7 @@ class MarketDataRepository(BaseRepository):
} for r in results
]
self.log_debug(f"Retrieved {len(candles)} candles for {symbol} {timeframe}")
self.log_debug(f"DB: Retrieved {len(candles)} candles for {symbol} {timeframe} from {start_time} to {end_time}")
return candles
except Exception as e:
@@ -195,4 +254,20 @@ class MarketDataRepository(BaseRepository):
except Exception as e:
self.log_error(f"Error retrieving candles as DataFrame: {e}")
raise DatabaseOperationError(f"Failed to retrieve candles as DataFrame: {e}")
raise DatabaseOperationError(f"Failed to retrieve candles as DataFrame: {e}")
def delete_candles_before_timestamp(self, timestamp: datetime) -> int:
"""
Delete candles from the market_data table that are older than the specified timestamp.
"""
try:
with self.get_session() as session:
deleted_count = session.query(MarketData).filter(
MarketData.timestamp < timestamp
).delete(synchronize_session=False)
session.commit()
self.logger.warning(f"Deleted {deleted_count} candles older than {timestamp}")
return deleted_count
except Exception as e:
self.log_error(f"Error deleting candles older than {timestamp}: {e}")
raise DatabaseOperationError(f"Failed to delete candles: {e}")