200 lines
8.2 KiB
Python
200 lines
8.2 KiB
Python
"""Storage utilities to reconstruct an in-memory orderbook from a SQLite DB.
|
|
|
|
This module defines lightweight data structures for orderbook levels, trades,
|
|
and a `Storage` facade that can hydrate a `Book` incrementally from rows stored
|
|
in a SQLite file produced by an external data collector.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import List, Dict, Optional, Iterator, Tuple
|
|
import time
|
|
import logging
|
|
|
|
from models import OrderbookLevel, Trade, BookSnapshot, Book, MetricCalculator, Metric
|
|
from repositories.sqlite_repository import SQLiteOrderflowRepository
|
|
from parsers.orderbook_parser import OrderbookParser
|
|
|
|
class Storage:
|
|
"""High-level facade to read historical orderflow into a `Book`.
|
|
|
|
Attributes:
|
|
instrument: Symbol/instrument name (e.g., "BTC-USDT").
|
|
book: In-memory orderbook that maintains the current state and tracks timestamps.
|
|
"""
|
|
|
|
def __init__(self, instrument: str) -> None:
|
|
self.instrument = instrument
|
|
self.book = Book()
|
|
# Pre-allocate memory for common price points
|
|
self._price_cache = {float(p/10): float(p/10) for p in range(1, 1000001, 5)}
|
|
# Debug flag
|
|
self._debug = False
|
|
self._parser = OrderbookParser(price_cache=self._price_cache, debug=self._debug)
|
|
|
|
def build_booktick_from_db(self, db_path: Path) -> None:
|
|
"""Hydrate the in-memory `book` from a SQLite database and calculate metrics.
|
|
|
|
Builds a Book instance with sequential snapshots and calculates OBI/CVD metrics.
|
|
|
|
Args:
|
|
db_path: Path to the SQLite database file.
|
|
"""
|
|
self.book = Book()
|
|
|
|
metrics_repo = SQLiteOrderflowRepository(db_path)
|
|
with metrics_repo.connect() as conn:
|
|
if not metrics_repo.table_exists(conn, "metrics"):
|
|
metrics_repo.create_metrics_table(conn)
|
|
|
|
trades = metrics_repo.load_trades(conn)
|
|
|
|
total_rows = metrics_repo.count_rows(conn, "book")
|
|
if total_rows == 0:
|
|
logging.info(f"No orderbook data found in {db_path}")
|
|
return
|
|
|
|
rows_iter = metrics_repo.iterate_book_rows(conn)
|
|
self._create_snapshots_and_metrics(rows_iter, trades, total_rows, conn)
|
|
|
|
logging.info(f"Processed {len(self.book.snapshots)} snapshots with metrics from {db_path}")
|
|
|
|
def _create_snapshots_and_metrics(self, rows_iter: Iterator[Tuple[int, str, str, int]], trades: List[Trade], total_rows: int, conn) -> None:
|
|
"""Create BookSnapshot instances and calculate metrics, storing them in database.
|
|
|
|
Args:
|
|
rows_iter: Iterator yielding (id, bids_text, asks_text, timestamp)
|
|
trades: List of trades
|
|
total_rows: Total number of rows in the book table
|
|
conn: Database connection for storing metrics
|
|
"""
|
|
# Initialize CVD tracking
|
|
current_cvd = 0.0
|
|
metrics_batch = []
|
|
batch_size = 1000 # Process metrics in batches for performance
|
|
|
|
# Set batch size and logging frequency
|
|
log_every = max(1, total_rows // 20)
|
|
|
|
processed = 0
|
|
start_time = time.time()
|
|
last_report_time = start_time
|
|
|
|
for row_id, bids_text, asks_text, timestamp in rows_iter:
|
|
snapshot = self._snapshot_from_row(row_id, bids_text, asks_text, timestamp, trades)
|
|
if snapshot is not None:
|
|
# Calculate metrics for this snapshot
|
|
obi = MetricCalculator.calculate_obi(snapshot)
|
|
volume_delta = MetricCalculator.calculate_volume_delta(trades)
|
|
current_cvd = MetricCalculator.calculate_cvd(current_cvd, volume_delta)
|
|
best_bid, best_ask = MetricCalculator.get_best_bid_ask(snapshot)
|
|
|
|
# Create metric record
|
|
metric = Metric(
|
|
snapshot_id=row_id,
|
|
timestamp=int(timestamp),
|
|
obi=obi,
|
|
cvd=current_cvd,
|
|
best_bid=best_bid,
|
|
best_ask=best_ask
|
|
)
|
|
metrics_batch.append(metric)
|
|
|
|
# Add snapshot to book (for compatibility)
|
|
self.book.add_snapshot(snapshot)
|
|
|
|
# Insert metrics batch when it reaches batch_size
|
|
if len(metrics_batch) >= batch_size:
|
|
# Use the metrics repository directly via connection
|
|
metrics_repo = SQLiteOrderflowRepository(Path("dummy")) # Path not used for existing conn
|
|
metrics_repo.insert_metrics_batch(conn, metrics_batch)
|
|
conn.commit()
|
|
metrics_batch = []
|
|
|
|
processed += 1
|
|
|
|
# Report progress
|
|
current_time = time.time()
|
|
if processed % log_every == 0 and current_time - last_report_time > 1.0:
|
|
logging.info(
|
|
f"{processed / total_rows * 100:.1f}% - OBI: {metrics_batch[-1].obi if metrics_batch else 'N/A':.3f} - "
|
|
f"CVD: {current_cvd:.1f} - {processed/(current_time-start_time):.1f} rows/sec"
|
|
)
|
|
last_report_time = current_time
|
|
|
|
# Insert remaining metrics
|
|
if metrics_batch:
|
|
metrics_repo = SQLiteOrderflowRepository(Path("dummy")) # Path not used for existing conn
|
|
metrics_repo.insert_metrics_batch(conn, metrics_batch)
|
|
conn.commit()
|
|
|
|
def _create_snapshots_from_rows(self, rows_iter: Iterator[Tuple[int, str, str, int]], trades: List[Trade], total_rows: int) -> None:
|
|
"""Create BookSnapshot instances from database rows and add them to the book.
|
|
|
|
Args:
|
|
rows_iter: Iterator yielding (id, bids_text, asks_text, timestamp)
|
|
trades: List of trades
|
|
total_rows: Total number of rows in the book table
|
|
"""
|
|
# Get reference to the book
|
|
book = self.book
|
|
|
|
# Set batch size and logging frequency
|
|
log_every = max(1, total_rows // 20)
|
|
|
|
processed = 0
|
|
start_time = time.time()
|
|
last_report_time = start_time
|
|
|
|
for row_id, bids_text, asks_text, timestamp in rows_iter:
|
|
snapshot = self._snapshot_from_row(row_id, bids_text, asks_text, timestamp, trades)
|
|
if snapshot is not None:
|
|
book.add_snapshot(snapshot)
|
|
|
|
processed += 1
|
|
|
|
# Report progress
|
|
current_time = time.time()
|
|
if processed % log_every == 0 and current_time - last_report_time > 1.0:
|
|
logging.info(
|
|
f"{processed / total_rows * 100:.1f}% - asks {len(self.book.snapshots[-1].asks) if self.book.snapshots else 0} - "
|
|
f"bids {len(self.book.snapshots[-1].bids) if self.book.snapshots else 0} - "
|
|
f"{processed/(current_time-start_time):.1f} rows/sec"
|
|
)
|
|
last_report_time = current_time
|
|
|
|
def _snapshot_from_row(
|
|
self,
|
|
row_id: int,
|
|
bids_text: str,
|
|
asks_text: str,
|
|
timestamp: int,
|
|
trades_by_timestamp: Dict[int, List[Trade]],
|
|
) -> Optional[BookSnapshot]:
|
|
"""Create a `BookSnapshot` from a single DB row and attached trades.
|
|
|
|
Returns None if the snapshot has no bids or asks after parsing.
|
|
"""
|
|
timestamp_int = int(timestamp)
|
|
snapshot = BookSnapshot(
|
|
id=row_id,
|
|
timestamp=timestamp_int,
|
|
bids={},
|
|
asks={},
|
|
trades=trades_by_timestamp.get(timestamp_int, []),
|
|
)
|
|
|
|
self._parser.parse_side(bids_text, snapshot.bids)
|
|
self._parser.parse_side(asks_text, snapshot.asks)
|
|
|
|
if snapshot.bids and snapshot.asks:
|
|
return snapshot
|
|
return None
|
|
|
|
def _parse_orderbook_side(self, text: str, side_dict: Dict[float, OrderbookLevel]) -> None:
|
|
"""Compatibility wrapper delegating to `OrderbookParser.parse_side`."""
|
|
self._parser.parse_side(text, side_dict)
|
|
|
|
# The following helper was previously used, kept here for reference
|
|
# and potential future extensions. It has been superseded by repository
|
|
# methods for data access and is intentionally not used. |