orderflow_backtest/interactive_visualizer.py

"""
Interactive visualizer using Plotly + Dash for orderflow analysis.

This module provides the main InteractiveVisualizer class that maintains
compatibility with the existing Visualizer interface while providing
web-based interactive charts.
"""

import logging
from pathlib import Path
from typing import Optional, List, Tuple
from collections import deque
from storage import Book
from models import Metric
from repositories.sqlite_repository import SQLiteOrderflowRepository


class InteractiveVisualizer:
    """Interactive web-based visualizer for orderflow data using Plotly + Dash.
    
    Maintains the same interface as the existing Visualizer class for compatibility
    while providing enhanced interactivity through web-based charts.
    
    Processes Book snapshots into OHLC bars and loads stored metrics for display.
    """
    
    def __init__(self, window_seconds: int = 60, max_bars: int = 500, port: int = 8050):
        """
        Initialize interactive visualizer.
        
        Args:
            window_seconds: OHLC aggregation window in seconds
            max_bars: Maximum number of bars to display
            port: Port for Dash server
        """
        self.window_seconds = window_seconds
        self.max_bars = max_bars
        self.port = port
        self._db_path: Optional[Path] = None
        
        # Processed data storage
        self._ohlc_data: List[Tuple[int, float, float, float, float, float]] = []
        self._metrics_data: List[Metric] = []
        
        # Simple cache for performance
        self._cache_book_hash: Optional[int] = None
        self._cache_db_path_hash: Optional[int] = None
        
        # OHLC calculation state (matches existing visualizer pattern)
        self._current_bucket_ts: Optional[int] = None
        self._open = self._high = self._low = self._close = None
        self._volume: float = 0.0
    
    def set_db_path(self, db_path: Path) -> None:
        """Set database path for metrics loading."""
        self._db_path = db_path
    
    def update_from_book(self, book: Book) -> None:
        """Process book snapshots into OHLC data and load corresponding metrics."""
        if not book.snapshots:
            logging.warning("Book has no snapshots to visualize")
            return
        
        # Simple cache check to avoid reprocessing same data
        book_hash = hash((len(book.snapshots), book.first_timestamp, book.last_timestamp))
        db_hash = hash(str(self._db_path)) if self._db_path else None
        
        if (self._cache_book_hash == book_hash and 
            self._cache_db_path_hash == db_hash and 
            self._ohlc_data):
            logging.info(f"Using cached data: {len(self._ohlc_data)} OHLC bars, {len(self._metrics_data)} metrics")
            return
        
        # Clear previous data
        self._ohlc_data.clear()
        self._metrics_data.clear()
        self._reset_ohlc_state()
        
        # Process snapshots into OHLC bars (reusing existing logic)
        self._process_snapshots_to_ohlc(book.snapshots)
        
        # Load stored metrics for the same time range
        if self._db_path and book.snapshots:
            start_ts = min(s.timestamp for s in book.snapshots)
            end_ts = max(s.timestamp for s in book.snapshots)
            self._metrics_data = self._load_stored_metrics(start_ts, end_ts)
        
        # Update cache
        self._cache_book_hash = book_hash
        self._cache_db_path_hash = db_hash
            
        logging.info(f"Processed {len(self._ohlc_data)} OHLC bars and {len(self._metrics_data)} metrics")
    
    def show(self) -> None:
        """Launch Dash server and display interactive charts with processed data."""
        from dash_app import create_dash_app_with_data, create_dash_app
        
        # Create Dash app with real data
        if self._ohlc_data:
            app = create_dash_app_with_data(
                ohlc_data=self._ohlc_data,
                metrics_data=self._metrics_data,
                debug=True,
                port=self.port
            )
        else:
            app = create_dash_app(debug=True, port=self.port)
        
        # Log data summary
        logging.info(f"Launching interactive visualizer:")
        logging.info(f"  - OHLC bars: {len(self._ohlc_data)}")
        logging.info(f"  - Metrics points: {len(self._metrics_data)}")
        if self._ohlc_data:
            start_time = self._ohlc_data[0][0]
            end_time = self._ohlc_data[-1][0]
            logging.info(f"  - Time range: {start_time} to {end_time}")
        
        app.run(debug=True, port=self.port, host='127.0.0.1')
    
    def _reset_ohlc_state(self) -> None:
        """Reset OHLC calculation state."""
        self._current_bucket_ts = None
        self._open = self._high = self._low = self._close = None
        self._volume = 0.0
    
    def _bucket_start(self, ts: int) -> int:
        """Calculate bucket start timestamp (matches existing visualizer)."""
        normalized_ts = self._normalize_ts_seconds(ts)
        return normalized_ts - (normalized_ts % self.window_seconds)
    
    def _normalize_ts_seconds(self, ts: int) -> int:
        """Normalize timestamp to seconds (matches existing visualizer)."""
        its = int(ts)
        if its > 100_000_000_000_000:  # > 1e14 → microseconds
            return its // 1_000_000
        if its > 100_000_000_000:      # > 1e11 → milliseconds
            return its // 1_000
        return its
    
    def _process_snapshots_to_ohlc(self, snapshots) -> None:
        """Process book snapshots into OHLC bars (adapted from existing visualizer)."""
        logging.info(f"Processing {len(snapshots)} snapshots into OHLC bars")
        
        snapshot_count = 0
        for snapshot in sorted(snapshots, key=lambda s: s.timestamp):
            snapshot_count += 1
            if not snapshot.bids or not snapshot.asks:
                continue

            try:
                best_bid = max(snapshot.bids.keys())
                best_ask = min(snapshot.asks.keys())
            except (ValueError, TypeError):
                continue

            mid = (float(best_bid) + float(best_ask)) / 2.0
            ts_raw = int(snapshot.timestamp)
            ts = self._normalize_ts_seconds(ts_raw)
            bucket_ts = self._bucket_start(ts)

            # Calculate volume from trades in this snapshot
            snapshot_volume = sum(trade.size for trade in snapshot.trades)

            # New bucket: close and store previous bar
            if self._current_bucket_ts is None:
                self._current_bucket_ts = bucket_ts
                self._open = self._high = self._low = self._close = mid
                self._volume = snapshot_volume
            elif bucket_ts != self._current_bucket_ts:
                self._append_current_bar()
                self._current_bucket_ts = bucket_ts
                self._open = self._high = self._low = self._close = mid
                self._volume = snapshot_volume
            else:
                # Update current bucket OHLC and accumulate volume
                if self._high is None or mid > self._high:
                    self._high = mid
                if self._low is None or mid < self._low:
                    self._low = mid
                self._close = mid
                self._volume += snapshot_volume

        # Finalize the last bar
        self._append_current_bar()
        
        logging.info(f"Created {len(self._ohlc_data)} OHLC bars from {snapshot_count} valid snapshots")
    
    def _append_current_bar(self) -> None:
        """Finalize current OHLC bar and add to data list."""
        if self._current_bucket_ts is None or self._open is None:
            return
        self._ohlc_data.append(
            (
                self._current_bucket_ts,
                float(self._open),
                float(self._high if self._high is not None else self._open),
                float(self._low if self._low is not None else self._open),
                float(self._close if self._close is not None else self._open),
                float(self._volume),
            )
        )
    
    def _load_stored_metrics(self, start_timestamp: int, end_timestamp: int) -> List[Metric]:
        """Load stored metrics from database for the given time range."""
        if not self._db_path:
            return []
            
        try:
            repo = SQLiteOrderflowRepository(self._db_path)
            with repo.connect() as conn:
                return repo.load_metrics_by_timerange(conn, start_timestamp, end_timestamp)
        except Exception as e:
            logging.error(f"Error loading metrics for visualization: {e}")
            return []
Add interactive visualizer using Plotly and Dash, replacing the static matplotlib implementation. Introduced core modules for Dash app setup, custom components, and callback functions. Enhanced data processing utilities for Plotly format integration and updated dependencies in pyproject.toml. 2025-09-01 11:17:10 +08:00			`"""`
			`Interactive visualizer using Plotly + Dash for orderflow analysis.`

			`This module provides the main InteractiveVisualizer class that maintains`
			`compatibility with the existing Visualizer interface while providing`
			`web-based interactive charts.`
			`"""`

			`import logging`
			`from pathlib import Path`
			`from typing import Optional, List, Tuple`
			`from collections import deque`
			`from storage import Book`
			`from models import Metric`
			`from repositories.sqlite_repository import SQLiteOrderflowRepository`


			`class InteractiveVisualizer:`
			`"""Interactive web-based visualizer for orderflow data using Plotly + Dash.`

			`Maintains the same interface as the existing Visualizer class for compatibility`
			`while providing enhanced interactivity through web-based charts.`

			`Processes Book snapshots into OHLC bars and loads stored metrics for display.`
			`"""`

			`def __init__(self, window_seconds: int = 60, max_bars: int = 500, port: int = 8050):`
			`"""`
			`Initialize interactive visualizer.`

			`Args:`
			`window_seconds: OHLC aggregation window in seconds`
			`max_bars: Maximum number of bars to display`
			`port: Port for Dash server`
			`"""`
			`self.window_seconds = window_seconds`
			`self.max_bars = max_bars`
			`self.port = port`
			`self._db_path: Optional[Path] = None`

			`# Processed data storage`
			`self._ohlc_data: List[Tuple[int, float, float, float, float, float]] = []`
			`self._metrics_data: List[Metric] = []`

			`# Simple cache for performance`
			`self._cache_book_hash: Optional[int] = None`
			`self._cache_db_path_hash: Optional[int] = None`

			`# OHLC calculation state (matches existing visualizer pattern)`
			`self._current_bucket_ts: Optional[int] = None`
			`self._open = self._high = self._low = self._close = None`
			`self._volume: float = 0.0`

			`def set_db_path(self, db_path: Path) -> None:`
			`"""Set database path for metrics loading."""`
			`self._db_path = db_path`

			`def update_from_book(self, book: Book) -> None:`
			`"""Process book snapshots into OHLC data and load corresponding metrics."""`
			`if not book.snapshots:`
			`logging.warning("Book has no snapshots to visualize")`
			`return`

			`# Simple cache check to avoid reprocessing same data`
			`book_hash = hash((len(book.snapshots), book.first_timestamp, book.last_timestamp))`
			`db_hash = hash(str(self._db_path)) if self._db_path else None`

			`if (self._cache_book_hash == book_hash and`
			`self._cache_db_path_hash == db_hash and`
			`self._ohlc_data):`
			`logging.info(f"Using cached data: {len(self._ohlc_data)} OHLC bars, {len(self._metrics_data)} metrics")`
			`return`

			`# Clear previous data`
			`self._ohlc_data.clear()`
			`self._metrics_data.clear()`
			`self._reset_ohlc_state()`

			`# Process snapshots into OHLC bars (reusing existing logic)`
			`self._process_snapshots_to_ohlc(book.snapshots)`

			`# Load stored metrics for the same time range`
			`if self._db_path and book.snapshots:`
			`start_ts = min(s.timestamp for s in book.snapshots)`
			`end_ts = max(s.timestamp for s in book.snapshots)`
			`self._metrics_data = self._load_stored_metrics(start_ts, end_ts)`

			`# Update cache`
			`self._cache_book_hash = book_hash`
			`self._cache_db_path_hash = db_hash`

			`logging.info(f"Processed {len(self._ohlc_data)} OHLC bars and {len(self._metrics_data)} metrics")`

			`def show(self) -> None:`
			`"""Launch Dash server and display interactive charts with processed data."""`
			`from dash_app import create_dash_app_with_data, create_dash_app`

			`# Create Dash app with real data`
			`if self._ohlc_data:`
			`app = create_dash_app_with_data(`
			`ohlc_data=self._ohlc_data,`
			`metrics_data=self._metrics_data,`
			`debug=True,`
			`port=self.port`
			`)`
			`else:`
			`app = create_dash_app(debug=True, port=self.port)`

			`# Log data summary`
			`logging.info(f"Launching interactive visualizer:")`
			`logging.info(f" - OHLC bars: {len(self._ohlc_data)}")`
			`logging.info(f" - Metrics points: {len(self._metrics_data)}")`
			`if self._ohlc_data:`
			`start_time = self._ohlc_data[0][0]`
			`end_time = self._ohlc_data[-1][0]`
			`logging.info(f" - Time range: {start_time} to {end_time}")`

			`app.run(debug=True, port=self.port, host='127.0.0.1')`

			`def _reset_ohlc_state(self) -> None:`
			`"""Reset OHLC calculation state."""`
			`self._current_bucket_ts = None`
			`self._open = self._high = self._low = self._close = None`
			`self._volume = 0.0`

			`def _bucket_start(self, ts: int) -> int:`
			`"""Calculate bucket start timestamp (matches existing visualizer)."""`
			`normalized_ts = self._normalize_ts_seconds(ts)`
			`return normalized_ts - (normalized_ts % self.window_seconds)`

			`def _normalize_ts_seconds(self, ts: int) -> int:`
			`"""Normalize timestamp to seconds (matches existing visualizer)."""`
			`its = int(ts)`
			`if its > 100_000_000_000_000: # > 1e14 → microseconds`
			`return its // 1_000_000`
			`if its > 100_000_000_000: # > 1e11 → milliseconds`
			`return its // 1_000`
			`return its`

			`def _process_snapshots_to_ohlc(self, snapshots) -> None:`
			`"""Process book snapshots into OHLC bars (adapted from existing visualizer)."""`
			`logging.info(f"Processing {len(snapshots)} snapshots into OHLC bars")`

			`snapshot_count = 0`
			`for snapshot in sorted(snapshots, key=lambda s: s.timestamp):`
			`snapshot_count += 1`
			`if not snapshot.bids or not snapshot.asks:`
			`continue`

			`try:`
			`best_bid = max(snapshot.bids.keys())`
			`best_ask = min(snapshot.asks.keys())`
			`except (ValueError, TypeError):`
			`continue`

			`mid = (float(best_bid) + float(best_ask)) / 2.0`
			`ts_raw = int(snapshot.timestamp)`
			`ts = self._normalize_ts_seconds(ts_raw)`
			`bucket_ts = self._bucket_start(ts)`

			`# Calculate volume from trades in this snapshot`
			`snapshot_volume = sum(trade.size for trade in snapshot.trades)`

			`# New bucket: close and store previous bar`
			`if self._current_bucket_ts is None:`
			`self._current_bucket_ts = bucket_ts`
			`self._open = self._high = self._low = self._close = mid`
			`self._volume = snapshot_volume`
			`elif bucket_ts != self._current_bucket_ts:`
			`self._append_current_bar()`
			`self._current_bucket_ts = bucket_ts`
			`self._open = self._high = self._low = self._close = mid`
			`self._volume = snapshot_volume`
			`else:`
			`# Update current bucket OHLC and accumulate volume`
			`if self._high is None or mid > self._high:`
			`self._high = mid`
			`if self._low is None or mid < self._low:`
			`self._low = mid`
			`self._close = mid`
			`self._volume += snapshot_volume`

			`# Finalize the last bar`
			`self._append_current_bar()`

			`logging.info(f"Created {len(self._ohlc_data)} OHLC bars from {snapshot_count} valid snapshots")`

			`def _append_current_bar(self) -> None:`
			`"""Finalize current OHLC bar and add to data list."""`
			`if self._current_bucket_ts is None or self._open is None:`
			`return`
			`self._ohlc_data.append(`
			`(`
			`self._current_bucket_ts,`
			`float(self._open),`
			`float(self._high if self._high is not None else self._open),`
			`float(self._low if self._low is not None else self._open),`
			`float(self._close if self._close is not None else self._open),`
			`float(self._volume),`
			`)`
			`)`

			`def _load_stored_metrics(self, start_timestamp: int, end_timestamp: int) -> List[Metric]:`
			`"""Load stored metrics from database for the given time range."""`
			`if not self._db_path:`
			`return []`

			`try:`
			`repo = SQLiteOrderflowRepository(self._db_path)`
			`with repo.connect() as conn:`
			`return repo.load_metrics_by_timerange(conn, start_timestamp, end_timestamp)`
			`except Exception as e:`
			`logging.error(f"Error loading metrics for visualization: {e}")`
			`return []`