orderflow_backtest/interactive_visualizer.py

"""
Interactive visualizer using Plotly + Dash for orderflow analysis.

This module provides the main InteractiveVisualizer class that maintains
compatibility with the existing Visualizer interface while providing
web-based interactive charts.
"""

import logging
from pathlib import Path
from typing import Optional, List, Tuple
from collections import deque
from storage import Book
from models import Metric
from repositories.sqlite_repository import SQLiteOrderflowRepository


class InteractiveVisualizer:
    """Interactive web-based visualizer for orderflow data using Plotly + Dash.

    Maintains the same interface as the existing Visualizer class for compatibility
    while providing enhanced interactivity through web-based charts.

    Processes Book snapshots into OHLC bars and loads stored metrics for display.
    """

    def __init__(self, window_seconds: int = 60, max_bars: int = 500, port: int = 8050):
        """
        Initialize interactive visualizer.

        Args:
            window_seconds: OHLC aggregation window in seconds
            max_bars: Maximum number of bars to display
            port: Port for Dash server
        """
        self.window_seconds = window_seconds
        self.max_bars = max_bars
        self.port = port
        self._db_path: Optional[Path] = None

        # Processed data storage
        self._ohlc_data: List[Tuple[int, float, float, float, float, float]] = []
        self._metrics_data: List[Metric] = []

        # Simple cache for performance
        self._cache_book_hash: Optional[int] = None
        self._cache_db_path_hash: Optional[int] = None

        # OHLC calculation state (matches existing visualizer pattern)
        self._current_bucket_ts: Optional[int] = None
        self._open = self._high = self._low = self._close = None
        self._volume: float = 0.0

    def set_db_path(self, db_path: Path) -> None:
        """Set database path for metrics loading."""
        self._db_path = db_path

    def update_from_book(self, book: Book) -> None:
        """Process book snapshots into OHLC data and load corresponding metrics."""
        if not book.snapshots:
            logging.warning("Book has no snapshots to visualize")
            return

        # Simple cache check to avoid reprocessing same data
        book_hash = hash((len(book.snapshots), book.first_timestamp, book.last_timestamp))
        db_hash = hash(str(self._db_path)) if self._db_path else None

        if (self._cache_book_hash == book_hash and
            self._cache_db_path_hash == db_hash and
            self._ohlc_data):
            logging.info(f"Using cached data: {len(self._ohlc_data)} OHLC bars, {len(self._metrics_data)} metrics")
            return

        # Clear previous data
        self._ohlc_data.clear()
        self._metrics_data.clear()
        self._reset_ohlc_state()

        # Process snapshots into OHLC bars (reusing existing logic)
        self._process_snapshots_to_ohlc(book.snapshots)

        # Load stored metrics for the same time range
        if self._db_path and book.snapshots:
            start_ts = min(s.timestamp for s in book.snapshots)
            end_ts = max(s.timestamp for s in book.snapshots)
            self._metrics_data = self._load_stored_metrics(start_ts, end_ts)

        # Update cache
        self._cache_book_hash = book_hash
        self._cache_db_path_hash = db_hash

        logging.info(f"Processed {len(self._ohlc_data)} OHLC bars and {len(self._metrics_data)} metrics")

    def show(self) -> None:
        """Launch Dash server and display interactive charts with processed data."""
        from dash_app import create_dash_app_with_data, create_dash_app

        # Create Dash app with real data
        if self._ohlc_data:
            app = create_dash_app_with_data(
                ohlc_data=self._ohlc_data,
                metrics_data=self._metrics_data,
                debug=True,
                port=self.port
            )
        else:
            app = create_dash_app(debug=True, port=self.port)

        # Log data summary
        logging.info(f"Launching interactive visualizer:")
        logging.info(f"  - OHLC bars: {len(self._ohlc_data)}")
        logging.info(f"  - Metrics points: {len(self._metrics_data)}")
        if self._ohlc_data:
            start_time = self._ohlc_data[0][0]
            end_time = self._ohlc_data[-1][0]
            logging.info(f"  - Time range: {start_time} to {end_time}")

        app.run(debug=True, port=self.port, host='127.0.0.1')

    def _reset_ohlc_state(self) -> None:
        """Reset OHLC calculation state."""
        self._current_bucket_ts = None
        self._open = self._high = self._low = self._close = None
        self._volume = 0.0

    def _bucket_start(self, ts: int) -> int:
        """Calculate bucket start timestamp (matches existing visualizer)."""
        normalized_ts = self._normalize_ts_seconds(ts)
        return normalized_ts - (normalized_ts % self.window_seconds)

    def _normalize_ts_seconds(self, ts: int) -> int:
        """Normalize timestamp to seconds (matches existing visualizer)."""
        its = int(ts)
        if its > 100_000_000_000_000:  # > 1e14 → microseconds
            return its // 1_000_000
        if its > 100_000_000_000:      # > 1e11 → milliseconds
            return its // 1_000
        return its

    def _process_snapshots_to_ohlc(self, snapshots) -> None:
        """Process book snapshots into OHLC bars (adapted from existing visualizer)."""
        logging.info(f"Processing {len(snapshots)} snapshots into OHLC bars")

        snapshot_count = 0
        for snapshot in sorted(snapshots, key=lambda s: s.timestamp):
            snapshot_count += 1
            if not snapshot.bids or not snapshot.asks:
                continue

            try:
                best_bid = max(snapshot.bids.keys())
                best_ask = min(snapshot.asks.keys())
            except (ValueError, TypeError):
                continue

            mid = (float(best_bid) + float(best_ask)) / 2.0
            ts_raw = int(snapshot.timestamp)
            ts = self._normalize_ts_seconds(ts_raw)
            bucket_ts = self._bucket_start(ts)

            # Calculate volume from trades in this snapshot
            snapshot_volume = sum(trade.size for trade in snapshot.trades)

            # New bucket: close and store previous bar
            if self._current_bucket_ts is None:
                self._current_bucket_ts = bucket_ts
                self._open = self._high = self._low = self._close = mid
                self._volume = snapshot_volume
            elif bucket_ts != self._current_bucket_ts:
                self._append_current_bar()
                self._current_bucket_ts = bucket_ts
                self._open = self._high = self._low = self._close = mid
                self._volume = snapshot_volume
            else:
                # Update current bucket OHLC and accumulate volume
                if self._high is None or mid > self._high:
                    self._high = mid
                if self._low is None or mid < self._low:
                    self._low = mid
                self._close = mid
                self._volume += snapshot_volume

        # Finalize the last bar
        self._append_current_bar()

        logging.info(f"Created {len(self._ohlc_data)} OHLC bars from {snapshot_count} valid snapshots")

    def _append_current_bar(self) -> None:
        """Finalize current OHLC bar and add to data list."""
        if self._current_bucket_ts is None or self._open is None:
            return
        self._ohlc_data.append(
            (
                self._current_bucket_ts,
                float(self._open),
                float(self._high if self._high is not None else self._open),
                float(self._low if self._low is not None else self._open),
                float(self._close if self._close is not None else self._open),
                float(self._volume),
            )
        )

    def _load_stored_metrics(self, start_timestamp: int, end_timestamp: int) -> List[Metric]:
        """Load stored metrics from database for the given time range."""
        if not self._db_path:
            return []

        try:
            repo = SQLiteOrderflowRepository(self._db_path)
            with repo.connect() as conn:
                return repo.load_metrics_by_timerange(conn, start_timestamp, end_timestamp)
        except Exception as e:
            logging.error(f"Error loading metrics for visualization: {e}")
            return []