orderflow_backtest/repositories/sqlite_metrics_repository.py

133 lines
5.0 KiB
Python

from __future__ import annotations
from pathlib import Path
import sqlite3
import logging
from typing import List, Dict, Tuple
from .sqlite_repository import SQLiteOrderflowRepository
from models import Metric
class SQLiteMetricsRepository(SQLiteOrderflowRepository):
"""Write-enabled repository for storing and loading metrics data alongside orderflow data."""
def create_metrics_table(self, conn: sqlite3.Connection) -> None:
"""Create the metrics table with proper indexes and foreign key constraints.
Args:
conn: Active SQLite database connection.
"""
try:
# Create metrics table following PRD schema
conn.execute("""
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
snapshot_id INTEGER NOT NULL,
timestamp TEXT NOT NULL,
obi REAL NOT NULL,
cvd REAL NOT NULL,
best_bid REAL,
best_ask REAL,
FOREIGN KEY (snapshot_id) REFERENCES book(id)
)
""")
# Create indexes for efficient querying
conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_snapshot_id ON metrics(snapshot_id)")
conn.commit()
logging.info("Metrics table and indexes created successfully")
except sqlite3.Error as e:
logging.error(f"Error creating metrics table: {e}")
raise
def table_exists(self, conn: sqlite3.Connection, table_name: str) -> bool:
"""Check if a table exists in the database.
Args:
conn: Active SQLite database connection.
table_name: Name of the table to check.
Returns:
True if table exists, False otherwise.
"""
try:
cursor = conn.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
(table_name,)
)
return cursor.fetchone() is not None
except sqlite3.Error as e:
logging.error(f"Error checking if table {table_name} exists: {e}")
return False
def insert_metrics_batch(self, conn: sqlite3.Connection, metrics: List[Metric]) -> None:
"""Insert multiple metrics in a single batch operation for performance.
Args:
conn: Active SQLite database connection.
metrics: List of Metric objects to insert.
"""
if not metrics:
return
try:
# Prepare batch data following existing batch pattern
batch_data = [
(m.snapshot_id, m.timestamp, m.obi, m.cvd, m.best_bid, m.best_ask)
for m in metrics
]
# Use executemany for batch insertion
conn.executemany(
"INSERT INTO metrics (snapshot_id, timestamp, obi, cvd, best_bid, best_ask) VALUES (?, ?, ?, ?, ?, ?)",
batch_data
)
logging.debug(f"Inserted {len(metrics)} metrics records")
except sqlite3.Error as e:
logging.error(f"Error inserting metrics batch: {e}")
raise
def load_metrics_by_timerange(self, conn: sqlite3.Connection, start_timestamp: int, end_timestamp: int) -> List[Metric]:
"""Load metrics within a specified timestamp range.
Args:
conn: Active SQLite database connection.
start_timestamp: Start of the time range (inclusive).
end_timestamp: End of the time range (inclusive).
Returns:
List of Metric objects ordered by timestamp.
"""
try:
cursor = conn.cursor()
cursor.execute(
"SELECT snapshot_id, timestamp, obi, cvd, best_bid, best_ask FROM metrics WHERE timestamp >= ? AND timestamp <= ? ORDER BY timestamp ASC",
(start_timestamp, end_timestamp)
)
metrics = []
for batch in iter(lambda: cursor.fetchmany(5000), []):
for snapshot_id, timestamp, obi, cvd, best_bid, best_ask in batch:
metric = Metric(
snapshot_id=int(snapshot_id),
timestamp=int(timestamp),
obi=float(obi),
cvd=float(cvd),
best_bid=float(best_bid) if best_bid is not None else None,
best_ask=float(best_ask) if best_ask is not None else None,
)
metrics.append(metric)
return metrics
except sqlite3.Error as e:
logging.error(f"Error loading metrics by timerange: {e}")
return []