orderflow_backtest/interactive_visualizer.py

215 lines
8.6 KiB
Python
Raw Normal View History

"""
Interactive visualizer using Plotly + Dash for orderflow analysis.
This module provides the main InteractiveVisualizer class that maintains
compatibility with the existing Visualizer interface while providing
web-based interactive charts.
"""
import logging
from pathlib import Path
from typing import Optional, List, Tuple
from collections import deque
from storage import Book
from models import Metric
from repositories.sqlite_repository import SQLiteOrderflowRepository
class InteractiveVisualizer:
"""Interactive web-based visualizer for orderflow data using Plotly + Dash.
Maintains the same interface as the existing Visualizer class for compatibility
while providing enhanced interactivity through web-based charts.
Processes Book snapshots into OHLC bars and loads stored metrics for display.
"""
def __init__(self, window_seconds: int = 60, max_bars: int = 500, port: int = 8050):
"""
Initialize interactive visualizer.
Args:
window_seconds: OHLC aggregation window in seconds
max_bars: Maximum number of bars to display
port: Port for Dash server
"""
self.window_seconds = window_seconds
self.max_bars = max_bars
self.port = port
self._db_path: Optional[Path] = None
# Processed data storage
self._ohlc_data: List[Tuple[int, float, float, float, float, float]] = []
self._metrics_data: List[Metric] = []
# Simple cache for performance
self._cache_book_hash: Optional[int] = None
self._cache_db_path_hash: Optional[int] = None
# OHLC calculation state (matches existing visualizer pattern)
self._current_bucket_ts: Optional[int] = None
self._open = self._high = self._low = self._close = None
self._volume: float = 0.0
def set_db_path(self, db_path: Path) -> None:
"""Set database path for metrics loading."""
self._db_path = db_path
def update_from_book(self, book: Book) -> None:
"""Process book snapshots into OHLC data and load corresponding metrics."""
if not book.snapshots:
logging.warning("Book has no snapshots to visualize")
return
# Simple cache check to avoid reprocessing same data
book_hash = hash((len(book.snapshots), book.first_timestamp, book.last_timestamp))
db_hash = hash(str(self._db_path)) if self._db_path else None
if (self._cache_book_hash == book_hash and
self._cache_db_path_hash == db_hash and
self._ohlc_data):
logging.info(f"Using cached data: {len(self._ohlc_data)} OHLC bars, {len(self._metrics_data)} metrics")
return
# Clear previous data
self._ohlc_data.clear()
self._metrics_data.clear()
self._reset_ohlc_state()
# Process snapshots into OHLC bars (reusing existing logic)
self._process_snapshots_to_ohlc(book.snapshots)
# Load stored metrics for the same time range
if self._db_path and book.snapshots:
start_ts = min(s.timestamp for s in book.snapshots)
end_ts = max(s.timestamp for s in book.snapshots)
self._metrics_data = self._load_stored_metrics(start_ts, end_ts)
# Update cache
self._cache_book_hash = book_hash
self._cache_db_path_hash = db_hash
logging.info(f"Processed {len(self._ohlc_data)} OHLC bars and {len(self._metrics_data)} metrics")
def show(self) -> None:
"""Launch Dash server and display interactive charts with processed data."""
from dash_app import create_dash_app_with_data, create_dash_app
# Create Dash app with real data
if self._ohlc_data:
app = create_dash_app_with_data(
ohlc_data=self._ohlc_data,
metrics_data=self._metrics_data,
debug=True,
port=self.port
)
else:
app = create_dash_app(debug=True, port=self.port)
# Log data summary
logging.info(f"Launching interactive visualizer:")
logging.info(f" - OHLC bars: {len(self._ohlc_data)}")
logging.info(f" - Metrics points: {len(self._metrics_data)}")
if self._ohlc_data:
start_time = self._ohlc_data[0][0]
end_time = self._ohlc_data[-1][0]
logging.info(f" - Time range: {start_time} to {end_time}")
app.run(debug=True, port=self.port, host='127.0.0.1')
def _reset_ohlc_state(self) -> None:
"""Reset OHLC calculation state."""
self._current_bucket_ts = None
self._open = self._high = self._low = self._close = None
self._volume = 0.0
def _bucket_start(self, ts: int) -> int:
"""Calculate bucket start timestamp (matches existing visualizer)."""
normalized_ts = self._normalize_ts_seconds(ts)
return normalized_ts - (normalized_ts % self.window_seconds)
def _normalize_ts_seconds(self, ts: int) -> int:
"""Normalize timestamp to seconds (matches existing visualizer)."""
its = int(ts)
if its > 100_000_000_000_000: # > 1e14 → microseconds
return its // 1_000_000
if its > 100_000_000_000: # > 1e11 → milliseconds
return its // 1_000
return its
def _process_snapshots_to_ohlc(self, snapshots) -> None:
"""Process book snapshots into OHLC bars (adapted from existing visualizer)."""
logging.info(f"Processing {len(snapshots)} snapshots into OHLC bars")
snapshot_count = 0
for snapshot in sorted(snapshots, key=lambda s: s.timestamp):
snapshot_count += 1
if not snapshot.bids or not snapshot.asks:
continue
try:
best_bid = max(snapshot.bids.keys())
best_ask = min(snapshot.asks.keys())
except (ValueError, TypeError):
continue
mid = (float(best_bid) + float(best_ask)) / 2.0
ts_raw = int(snapshot.timestamp)
ts = self._normalize_ts_seconds(ts_raw)
bucket_ts = self._bucket_start(ts)
# Calculate volume from trades in this snapshot
snapshot_volume = sum(trade.size for trade in snapshot.trades)
# New bucket: close and store previous bar
if self._current_bucket_ts is None:
self._current_bucket_ts = bucket_ts
self._open = self._high = self._low = self._close = mid
self._volume = snapshot_volume
elif bucket_ts != self._current_bucket_ts:
self._append_current_bar()
self._current_bucket_ts = bucket_ts
self._open = self._high = self._low = self._close = mid
self._volume = snapshot_volume
else:
# Update current bucket OHLC and accumulate volume
if self._high is None or mid > self._high:
self._high = mid
if self._low is None or mid < self._low:
self._low = mid
self._close = mid
self._volume += snapshot_volume
# Finalize the last bar
self._append_current_bar()
logging.info(f"Created {len(self._ohlc_data)} OHLC bars from {snapshot_count} valid snapshots")
def _append_current_bar(self) -> None:
"""Finalize current OHLC bar and add to data list."""
if self._current_bucket_ts is None or self._open is None:
return
self._ohlc_data.append(
(
self._current_bucket_ts,
float(self._open),
float(self._high if self._high is not None else self._open),
float(self._low if self._low is not None else self._open),
float(self._close if self._close is not None else self._open),
float(self._volume),
)
)
def _load_stored_metrics(self, start_timestamp: int, end_timestamp: int) -> List[Metric]:
"""Load stored metrics from database for the given time range."""
if not self._db_path:
return []
try:
repo = SQLiteOrderflowRepository(self._db_path)
with repo.connect() as conn:
return repo.load_metrics_by_timerange(conn, start_timestamp, end_timestamp)
except Exception as e:
logging.error(f"Error loading metrics for visualization: {e}")
return []