Add interactive visualizer using Plotly and Dash, replacing the static matplotlib implementation. Introduced core modules for Dash app setup, custom components, and callback functions. Enhanced data processing utilities for Plotly format integration and updated dependencies in pyproject.toml.

This commit is contained in:
2025-09-01 11:17:10 +08:00
parent fa6df78c1e
commit 36385af6f3
27 changed files with 1694 additions and 933 deletions

View File

@@ -1,132 +0,0 @@
from __future__ import annotations
from pathlib import Path
import sqlite3
import logging
from typing import List, Dict, Tuple
from .sqlite_repository import SQLiteOrderflowRepository
from models import Metric
class SQLiteMetricsRepository(SQLiteOrderflowRepository):
"""Write-enabled repository for storing and loading metrics data alongside orderflow data."""
def create_metrics_table(self, conn: sqlite3.Connection) -> None:
"""Create the metrics table with proper indexes and foreign key constraints.
Args:
conn: Active SQLite database connection.
"""
try:
# Create metrics table following PRD schema
conn.execute("""
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
snapshot_id INTEGER NOT NULL,
timestamp TEXT NOT NULL,
obi REAL NOT NULL,
cvd REAL NOT NULL,
best_bid REAL,
best_ask REAL,
FOREIGN KEY (snapshot_id) REFERENCES book(id)
)
""")
# Create indexes for efficient querying
conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_snapshot_id ON metrics(snapshot_id)")
conn.commit()
logging.info("Metrics table and indexes created successfully")
except sqlite3.Error as e:
logging.error(f"Error creating metrics table: {e}")
raise
def table_exists(self, conn: sqlite3.Connection, table_name: str) -> bool:
"""Check if a table exists in the database.
Args:
conn: Active SQLite database connection.
table_name: Name of the table to check.
Returns:
True if table exists, False otherwise.
"""
try:
cursor = conn.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
(table_name,)
)
return cursor.fetchone() is not None
except sqlite3.Error as e:
logging.error(f"Error checking if table {table_name} exists: {e}")
return False
def insert_metrics_batch(self, conn: sqlite3.Connection, metrics: List[Metric]) -> None:
"""Insert multiple metrics in a single batch operation for performance.
Args:
conn: Active SQLite database connection.
metrics: List of Metric objects to insert.
"""
if not metrics:
return
try:
# Prepare batch data following existing batch pattern
batch_data = [
(m.snapshot_id, m.timestamp, m.obi, m.cvd, m.best_bid, m.best_ask)
for m in metrics
]
# Use executemany for batch insertion
conn.executemany(
"INSERT INTO metrics (snapshot_id, timestamp, obi, cvd, best_bid, best_ask) VALUES (?, ?, ?, ?, ?, ?)",
batch_data
)
logging.debug(f"Inserted {len(metrics)} metrics records")
except sqlite3.Error as e:
logging.error(f"Error inserting metrics batch: {e}")
raise
def load_metrics_by_timerange(self, conn: sqlite3.Connection, start_timestamp: int, end_timestamp: int) -> List[Metric]:
"""Load metrics within a specified timestamp range.
Args:
conn: Active SQLite database connection.
start_timestamp: Start of the time range (inclusive).
end_timestamp: End of the time range (inclusive).
Returns:
List of Metric objects ordered by timestamp.
"""
try:
cursor = conn.cursor()
cursor.execute(
"SELECT snapshot_id, timestamp, obi, cvd, best_bid, best_ask FROM metrics WHERE timestamp >= ? AND timestamp <= ? ORDER BY timestamp ASC",
(start_timestamp, end_timestamp)
)
metrics = []
for batch in iter(lambda: cursor.fetchmany(5000), []):
for snapshot_id, timestamp, obi, cvd, best_bid, best_ask in batch:
metric = Metric(
snapshot_id=int(snapshot_id),
timestamp=int(timestamp),
obi=float(obi),
cvd=float(cvd),
best_bid=float(best_bid) if best_bid is not None else None,
best_ask=float(best_ask) if best_ask is not None else None,
)
metrics.append(metric)
return metrics
except sqlite3.Error as e:
logging.error(f"Error loading metrics by timerange: {e}")
return []

View File

@@ -5,7 +5,7 @@ from typing import Dict, Iterator, List, Tuple
import sqlite3
import logging
from models import Trade
from models import Trade, Metric
class SQLiteOrderflowRepository:
@@ -13,31 +13,31 @@ class SQLiteOrderflowRepository:
def __init__(self, db_path: Path) -> None:
self.db_path = db_path
self.conn = None
def connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self.db_path))
conn.execute("PRAGMA journal_mode = OFF")
conn.execute("PRAGMA synchronous = OFF")
conn.execute("PRAGMA cache_size = 100000")
conn.execute("PRAGMA temp_store = MEMORY")
conn.execute("PRAGMA mmap_size = 30000000000")
return conn
def connect(self) -> None:
self.conn = sqlite3.connect(str(self.db_path))
self.conn.execute("PRAGMA journal_mode = OFF")
self.conn.execute("PRAGMA synchronous = OFF")
self.conn.execute("PRAGMA cache_size = 100000")
self.conn.execute("PRAGMA temp_store = MEMORY")
self.conn.execute("PRAGMA mmap_size = 30000000000")
def count_rows(self, conn: sqlite3.Connection, table: str) -> int:
def count_rows(self, table: str) -> int:
allowed_tables = {"book", "trades"}
if table not in allowed_tables:
raise ValueError(f"Unsupported table name: {table}")
try:
row = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()
row = self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()
return int(row[0]) if row and row[0] is not None else 0
except sqlite3.Error as e:
logging.error(f"Error counting rows in table {table}: {e}")
return 0
def load_trades_by_timestamp(self, conn: sqlite3.Connection) -> Dict[int, List[Trade]]:
trades_by_timestamp: Dict[int, List[Trade]] = {}
def load_trades(self) -> Dict[int, List[Trade]]:
trades: List[Trade] = []
try:
cursor = conn.cursor()
cursor = self.conn.cursor()
cursor.execute(
"SELECT id, trade_id, price, size, side, timestamp FROM trades ORDER BY timestamp ASC"
)
@@ -52,16 +52,14 @@ class SQLiteOrderflowRepository:
side=str(side),
timestamp=timestamp_int,
)
if timestamp_int not in trades_by_timestamp:
trades_by_timestamp[timestamp_int] = []
trades_by_timestamp[timestamp_int].append(trade)
return trades_by_timestamp
trades.append(trade)
return trades
except sqlite3.Error as e:
logging.error(f"Error loading trades: {e}")
return {}
def iterate_book_rows(self, conn: sqlite3.Connection) -> Iterator[Tuple[int, str, str, int]]:
cursor = conn.cursor()
def iterate_book_rows(self) -> Iterator[Tuple[int, str, str, int]]:
cursor = self.conn.cursor()
cursor.execute("SELECT id, bids, asks, timestamp FROM book ORDER BY timestamp ASC")
while True:
rows = cursor.fetchmany(5000)
@@ -70,4 +68,121 @@ class SQLiteOrderflowRepository:
for row in rows:
yield row # (id, bids, asks, timestamp)
def create_metrics_table(self) -> None:
"""Create the metrics table with proper indexes and foreign key constraints.
Args:
conn: Active SQLite database connection.
"""
try:
# Create metrics table following PRD schema
self.conn.execute("""
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
snapshot_id INTEGER NOT NULL,
timestamp TEXT NOT NULL,
obi REAL NOT NULL,
cvd REAL NOT NULL,
best_bid REAL,
best_ask REAL,
FOREIGN KEY (snapshot_id) REFERENCES book(id)
)
""")
# Create indexes for efficient querying
self.conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)")
self.conn.execute("CREATE INDEX IF NOT EXISTS idx_metrics_snapshot_id ON metrics(snapshot_id)")
self.conn.commit()
logging.info("Metrics table and indexes created successfully")
except sqlite3.Error as e:
logging.error(f"Error creating metrics table: {e}")
raise
def table_exists(self, table_name: str) -> bool:
"""Check if a table exists in the database.
Args:
conn: Active SQLite database connection.
table_name: Name of the table to check.
Returns:
True if table exists, False otherwise.
"""
try:
cursor = self.conn.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
(table_name,)
)
return cursor.fetchone() is not None
except sqlite3.Error as e:
logging.error(f"Error checking if table {table_name} exists: {e}")
return False
def insert_metrics_batch(self, metrics: List[Metric]) -> None:
"""Insert multiple metrics in a single batch operation for performance.
Args:
conn: Active SQLite database connection.
metrics: List of Metric objects to insert.
"""
if not metrics:
return
try:
# Prepare batch data following existing batch pattern
batch_data = [
(m.snapshot_id, m.timestamp, m.obi, m.cvd, m.best_bid, m.best_ask)
for m in metrics
]
# Use executemany for batch insertion
self.conn.executemany(
"INSERT INTO metrics (snapshot_id, timestamp, obi, cvd, best_bid, best_ask) VALUES (?, ?, ?, ?, ?, ?)",
batch_data
)
logging.debug(f"Inserted {len(metrics)} metrics records")
except sqlite3.Error as e:
logging.error(f"Error inserting metrics batch: {e}")
raise
def load_metrics_by_timerange(self, start_timestamp: int, end_timestamp: int) -> List[Metric]:
"""Load metrics within a specified timestamp range.
Args:
conn: Active SQLite database connection.
start_timestamp: Start of the time range (inclusive).
end_timestamp: End of the time range (inclusive).
Returns:
List of Metric objects ordered by timestamp.
"""
try:
cursor = self.conn.cursor()
cursor.execute(
"SELECT snapshot_id, timestamp, obi, cvd, best_bid, best_ask FROM metrics WHERE timestamp >= ? AND timestamp <= ? ORDER BY timestamp ASC",
(start_timestamp, end_timestamp)
)
metrics = []
for batch in iter(lambda: cursor.fetchmany(5000), []):
for snapshot_id, timestamp, obi, cvd, best_bid, best_ask in batch:
metric = Metric(
snapshot_id=int(snapshot_id),
timestamp=int(timestamp),
obi=float(obi),
cvd=float(cvd),
best_bid=float(best_bid) if best_bid is not None else None,
best_ask=float(best_ask) if best_ask is not None else None,
)
metrics.append(metric)
return metrics
except sqlite3.Error as e:
logging.error(f"Error loading metrics by timerange: {e}")
return []