TCPDashboard/dashboard/components/data_analysis.py

"""
Data analysis components for comprehensive market data analysis.
"""

from dash import html, dcc
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta
from typing import Dict, Any, List, Optional

from utils.logger import get_logger
from database.connection import DatabaseManager
from database.operations import DatabaseOperationError

logger = get_logger("data_analysis")


class VolumeAnalyzer:
    """Analyze trading volume patterns and trends."""

    def __init__(self):
        self.db_manager = DatabaseManager()
        self.db_manager.initialize()

    def get_volume_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
        """Calculate comprehensive volume statistics from a DataFrame."""
        try:
            if df.empty or 'volume' not in df.columns:
                return {'error': 'DataFrame is empty or missing volume column'}

            # Convert all relevant columns to float to avoid type errors with Decimal
            df = df.copy()
            numeric_cols = ['open', 'high', 'low', 'close', 'volume']
            for col in numeric_cols:
                if col in df.columns:
                    df[col] = df[col].astype(float)
            if 'trades_count' in df.columns:
                df['trades_count'] = df['trades_count'].astype(float)

            # Calculate volume statistics
            total_volume = df['volume'].sum()
            avg_volume = df['volume'].mean()
            volume_std = df['volume'].std()

            # Volume trend analysis
            recent_volume = df['volume'].tail(10).mean()  # Last 10 periods
            older_volume = df['volume'].head(10).mean()   # First 10 periods
            volume_trend = "Increasing" if recent_volume > older_volume else "Decreasing"

            # High volume periods (above 2 standard deviations)
            high_volume_threshold = avg_volume + (2 * volume_std)
            high_volume_periods = len(df[df['volume'] > high_volume_threshold])

            # Volume-Price correlation
            price_change = df['close'] - df['open']
            volume_price_corr = df['volume'].corr(price_change.abs())

            # Average trade size (volume per trade)
            if 'trades_count' in df.columns:
                df['avg_trade_size'] = df['volume'] / df['trades_count'].replace(0, 1)
                avg_trade_size = df['avg_trade_size'].mean()
            else:
                avg_trade_size = None # Not available

            return {
                'total_volume': total_volume,
                'avg_volume': avg_volume,
                'volume_std': volume_std,
                'volume_trend': volume_trend,
                'high_volume_periods': high_volume_periods,
                'volume_price_correlation': volume_price_corr,
                'avg_trade_size': avg_trade_size,
                'max_volume': df['volume'].max(),
                'min_volume': df['volume'].min(),
                'volume_percentiles': {
                    '25th': df['volume'].quantile(0.25),
                    '50th': df['volume'].quantile(0.50),
                    '75th': df['volume'].quantile(0.75),
                    '95th': df['volume'].quantile(0.95)
                }
            }

        except Exception as e:
            logger.error(f"Volume analysis error: {e}")
            return {'error': str(e)}


class PriceMovementAnalyzer:
    """Analyze price movement patterns and statistics."""

    def __init__(self):
        self.db_manager = DatabaseManager()
        self.db_manager.initialize()

    def get_price_movement_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
        """Calculate comprehensive price movement statistics from a DataFrame."""
        try:
            if df.empty or not all(col in df.columns for col in ['open', 'high', 'low', 'close']):
                return {'error': 'DataFrame is empty or missing required price columns'}

            # Convert all relevant columns to float to avoid type errors with Decimal
            df = df.copy()
            numeric_cols = ['open', 'high', 'low', 'close', 'volume']
            for col in numeric_cols:
                if col in df.columns:
                    df[col] = df[col].astype(float)

            # Basic price statistics
            current_price = df['close'].iloc[-1]
            period_start_price = df['open'].iloc[0]
            period_return = ((current_price - period_start_price) / period_start_price) * 100

            # Daily returns (percentage changes)
            df['returns'] = df['close'].pct_change() * 100
            df['returns'] = df['returns'].fillna(0)

            # Volatility metrics
            volatility = df['returns'].std()
            avg_return = df['returns'].mean()

            # Price range analysis
            df['range'] = df['high'] - df['low']
            df['range_pct'] = (df['range'] / df['open']) * 100
            avg_range_pct = df['range_pct'].mean()

            # Directional analysis
            bullish_periods = len(df[df['close'] > df['open']])
            bearish_periods = len(df[df['close'] < df['open']])
            neutral_periods = len(df[df['close'] == df['open']])

            total_periods = len(df)
            bullish_ratio = (bullish_periods / total_periods) * 100 if total_periods > 0 else 0

            # Price extremes
            period_high = df['high'].max()
            period_low = df['low'].min()

            # Momentum indicators
            # Simple momentum (current vs N periods ago)
            momentum_periods = min(10, len(df) - 1)
            if momentum_periods > 0:
                momentum = ((current_price - df['close'].iloc[-momentum_periods-1]) / df['close'].iloc[-momentum_periods-1]) * 100
            else:
                momentum = 0

            # Trend strength (linear regression slope)
            if len(df) > 2:
                x = np.arange(len(df))
                slope, _ = np.polyfit(x, df['close'], 1)
                trend_strength = slope / df['close'].mean() * 100  # Normalize by average price
            else:
                trend_strength = 0

            return {
                'current_price': current_price,
                'period_return': period_return,
                'volatility': volatility,
                'avg_return': avg_return,
                'avg_range_pct': avg_range_pct,
                'bullish_periods': bullish_periods,
                'bearish_periods': bearish_periods,
                'neutral_periods': neutral_periods,
                'bullish_ratio': bullish_ratio,
                'period_high': period_high,
                'period_low': period_low,
                'momentum': momentum,
                'trend_strength': trend_strength,
                'return_percentiles': {
                    '5th': df['returns'].quantile(0.05),
                    '25th': df['returns'].quantile(0.25),
                    '75th': df['returns'].quantile(0.75),
                    '95th': df['returns'].quantile(0.95)
                },
                'max_gain': df['returns'].max(),
                'max_loss': df['returns'].min(),
                'positive_returns': len(df[df['returns'] > 0]),
                'negative_returns': len(df[df['returns'] < 0])
            }

        except Exception as e:
            logger.error(f"Price movement analysis error: {e}")
            return {'error': str(e)}


def create_volume_analysis_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
    """Create a comprehensive volume analysis chart."""
    try:
        analyzer = VolumeAnalyzer()

        # Fetch market data for chart
        db_manager = DatabaseManager()
        db_manager.initialize()

        end_time = datetime.now(timezone.utc)
        start_time = end_time - timedelta(days=days_back)

        with db_manager.get_session() as session:
            from sqlalchemy import text

            query = text("""
                SELECT timestamp, open, high, low, close, volume, trades_count
                FROM market_data
                WHERE symbol = :symbol
                  AND timeframe = :timeframe
                  AND timestamp >= :start_time
                  AND timestamp <= :end_time
                ORDER BY timestamp ASC
            """)

            result = session.execute(query, {
                'symbol': symbol,
                'timeframe': timeframe,
                'start_time': start_time,
                'end_time': end_time
            })

            candles = []
            for row in result:
                candles.append({
                    'timestamp': row.timestamp,
                    'open': float(row.open),
                    'high': float(row.high),
                    'low': float(row.low),
                    'close': float(row.close),
                    'volume': float(row.volume),
                    'trades_count': int(row.trades_count) if row.trades_count else 0
                })

        if not candles:
            fig = go.Figure()
            fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
            return fig

        df = pd.DataFrame(candles)

        # Calculate volume moving average
        df['volume_ma'] = df['volume'].rolling(window=20, min_periods=1).mean()

        # Create subplots
        fig = make_subplots(
            rows=3, cols=1,
            subplot_titles=('Price Action', 'Volume Analysis', 'Volume vs Moving Average'),
            vertical_spacing=0.08,
            row_heights=[0.4, 0.3, 0.3]
        )

        # Price candlestick
        fig.add_trace(
            go.Candlestick(
                x=df['timestamp'],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'],
                name='Price',
                increasing_line_color='#26a69a',
                decreasing_line_color='#ef5350'
            ),
            row=1, col=1
        )

        # Volume bars with color coding
        colors = ['#26a69a' if close >= open else '#ef5350' for close, open in zip(df['close'], df['open'])]

        fig.add_trace(
            go.Bar(
                x=df['timestamp'],
                y=df['volume'],
                name='Volume',
                marker_color=colors,
                opacity=0.7
            ),
            row=2, col=1
        )

        # Volume vs moving average
        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['volume'],
                mode='lines',
                name='Volume',
                line=dict(color='#2196f3', width=1)
            ),
            row=3, col=1
        )

        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['volume_ma'],
                mode='lines',
                name='Volume MA(20)',
                line=dict(color='#ff9800', width=2)
            ),
            row=3, col=1
        )

        # Update layout
        fig.update_layout(
            title=f'{symbol} Volume Analysis ({timeframe})',
            xaxis_rangeslider_visible=False,
            height=800,
            showlegend=True,
            template='plotly_white'
        )

        # Update y-axes
        fig.update_yaxes(title_text="Price", row=1, col=1)
        fig.update_yaxes(title_text="Volume", row=2, col=1)
        fig.update_yaxes(title_text="Volume", row=3, col=1)

        return fig

    except Exception as e:
        logger.error(f"Volume chart creation error: {e}")
        fig = go.Figure()
        fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
        return fig


def create_price_movement_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
    """Create a comprehensive price movement analysis chart."""
    try:
        # Fetch market data for chart
        db_manager = DatabaseManager()
        db_manager.initialize()

        end_time = datetime.now(timezone.utc)
        start_time = end_time - timedelta(days=days_back)

        with db_manager.get_session() as session:
            from sqlalchemy import text

            query = text("""
                SELECT timestamp, open, high, low, close, volume
                FROM market_data
                WHERE symbol = :symbol
                  AND timeframe = :timeframe
                  AND timestamp >= :start_time
                  AND timestamp <= :end_time
                ORDER BY timestamp ASC
            """)

            result = session.execute(query, {
                'symbol': symbol,
                'timeframe': timeframe,
                'start_time': start_time,
                'end_time': end_time
            })

            candles = []
            for row in result:
                candles.append({
                    'timestamp': row.timestamp,
                    'open': float(row.open),
                    'high': float(row.high),
                    'low': float(row.low),
                    'close': float(row.close),
                    'volume': float(row.volume)
                })

        if not candles:
            fig = go.Figure()
            fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
            return fig

        df = pd.DataFrame(candles)

        # Calculate returns and statistics
        df['returns'] = df['close'].pct_change() * 100
        df['returns'] = df['returns'].fillna(0)
        df['range_pct'] = ((df['high'] - df['low']) / df['open']) * 100
        df['cumulative_return'] = (1 + df['returns'] / 100).cumprod()

        # Create subplots
        fig = make_subplots(
            rows=3, cols=1,
            subplot_titles=('Cumulative Returns', 'Period Returns (%)', 'Price Range (%)'),
            vertical_spacing=0.08,
            row_heights=[0.4, 0.3, 0.3]
        )

        # Cumulative returns
        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['cumulative_return'],
                mode='lines',
                name='Cumulative Return',
                line=dict(color='#2196f3', width=2)
            ),
            row=1, col=1
        )

        # Period returns with color coding
        colors = ['#26a69a' if ret >= 0 else '#ef5350' for ret in df['returns']]

        fig.add_trace(
            go.Bar(
                x=df['timestamp'],
                y=df['returns'],
                name='Returns (%)',
                marker_color=colors,
                opacity=0.7
            ),
            row=2, col=1
        )

        # Price range percentage
        fig.add_trace(
            go.Scatter(
                x=df['timestamp'],
                y=df['range_pct'],
                mode='lines+markers',
                name='Range %',
                line=dict(color='#ff9800', width=1),
                marker=dict(size=4)
            ),
            row=3, col=1
        )

        # Add zero line for returns
        fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)

        # Update layout
        fig.update_layout(
            title=f'{symbol} Price Movement Analysis ({timeframe})',
            height=800,
            showlegend=True,
            template='plotly_white'
        )

        # Update y-axes
        fig.update_yaxes(title_text="Cumulative Return", row=1, col=1)
        fig.update_yaxes(title_text="Returns (%)", row=2, col=1)
        fig.update_yaxes(title_text="Range (%)", row=3, col=1)

        return fig

    except Exception as e:
        logger.error(f"Price movement chart creation error: {e}")
        fig = go.Figure()
        fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
        return fig


def create_data_analysis_panel():
    """Create the main data analysis panel with tabs for different analyses."""
    return html.Div([
        dcc.Tabs(
            id="data-analysis-tabs",
            value="volume-analysis",
            children=[
                dcc.Tab(label="Volume Analysis", value="volume-analysis", children=[
                    html.Div(id='volume-analysis-content', children=[
                        html.P("Content for Volume Analysis")
                    ]),
                    html.Div(id='volume-stats-container', children=[
                        html.P("Stats container loaded - waiting for callback...")
                    ])
                ]),
                dcc.Tab(label="Price Movement", value="price-movement", children=[
                    html.Div(id='price-movement-content', children=[
                        dbc.Alert("Select a symbol and timeframe to view price movement analysis.", color="primary")
                    ])
                ]),
            ],
        )
    ], id='data-analysis-panel-wrapper')


def format_number(value: float, decimals: int = 2) -> str:
    """Format number with appropriate decimals and units."""
    if pd.isna(value):
        return "N/A"

    if abs(value) >= 1e9:
        return f"{value/1e9:.{decimals}f}B"
    elif abs(value) >= 1e6:
        return f"{value/1e6:.{decimals}f}M"
    elif abs(value) >= 1e3:
        return f"{value/1e3:.{decimals}f}K"
    else:
        return f"{value:.{decimals}f}"


def create_volume_stats_display(stats: Dict[str, Any]) -> html.Div:
    """Create volume statistics display."""
    if 'error' in stats:
        return dbc.Alert(
            "Error loading volume statistics",
            color="danger",
            dismissable=True
        )

    def create_stat_card(icon, title, value, color="primary"):
        return dbc.Col(dbc.Card(dbc.CardBody([
            html.Div([
                html.Div(icon, className="display-6"),
                html.Div([
                    html.P(title, className="card-title mb-1 text-muted"),
                    html.H4(value, className=f"card-text fw-bold text-{color}")
                ], className="ms-3")
            ], className="d-flex align-items-center")
        ])), width=4, className="mb-3")

    return dbc.Row([
        create_stat_card("📊", "Total Volume", format_number(stats['total_volume'])),
        create_stat_card("📈", "Average Volume", format_number(stats['avg_volume'])),
        create_stat_card("🎯", "Volume Trend", stats['volume_trend'],
                         "success" if stats['volume_trend'] == "Increasing" else "danger"),
        create_stat_card("⚡", "High Volume Periods", str(stats['high_volume_periods'])),
        create_stat_card("🔗", "Volume-Price Correlation", f"{stats['volume_price_correlation']:.3f}"),
        create_stat_card("💱", "Avg Trade Size", format_number(stats['avg_trade_size']))
    ], className="mt-3")


def create_price_stats_display(stats: Dict[str, Any]) -> html.Div:
    """Create price movement statistics display."""
    if 'error' in stats:
        return dbc.Alert(
            "Error loading price statistics",
            color="danger",
            dismissable=True
        )

    def create_stat_card(icon, title, value, color="primary"):
        text_color = "text-dark"
        if color == "success":
            text_color = "text-success"
        elif color == "danger":
            text_color = "text-danger"

        return dbc.Col(dbc.Card(dbc.CardBody([
            html.Div([
                html.Div(icon, className="display-6"),
                html.Div([
                    html.P(title, className="card-title mb-1 text-muted"),
                    html.H4(value, className=f"card-text fw-bold {text_color}")
                ], className="ms-3")
            ], className="d-flex align-items-center")
        ])), width=4, className="mb-3")

    return dbc.Row([
        create_stat_card("💰", "Current Price", f"${stats['current_price']:.2f}"),
        create_stat_card("📈", "Period Return", f"{stats['period_return']:+.2f}%",
                         "success" if stats['period_return'] >= 0 else "danger"),
        create_stat_card("📊", "Volatility", f"{stats['volatility']:.2f}%", color="warning"),
        create_stat_card("🎯", "Bullish Ratio", f"{stats['bullish_ratio']:.1f}%"),
        create_stat_card("⚡", "Momentum", f"{stats['momentum']:+.2f}%",
                         "success" if stats['momentum'] >= 0 else "danger"),
        create_stat_card("📉", "Max Loss", f"{stats['max_loss']:.2f}%", "danger")
    ], className="mt-3")


def get_market_statistics(df: pd.DataFrame, symbol: str, timeframe: str) -> html.Div:
    """
    Generate a comprehensive market statistics component from a DataFrame.
    """
    if df.empty:
        return html.Div("No data available for statistics.", className="text-center text-muted")

    try:
        # Get statistics
        price_analyzer = PriceMovementAnalyzer()
        volume_analyzer = VolumeAnalyzer()

        price_stats = price_analyzer.get_price_movement_statistics(df)
        volume_stats = volume_analyzer.get_volume_statistics(df)

        # Format key statistics for display
        start_date = df.index.min().strftime('%Y-%m-%d %H:%M')
        end_date = df.index.max().strftime('%Y-%m-%d %H:%M')

        # Check for errors from analyzers
        if 'error' in price_stats or 'error' in volume_stats:
            error_msg = price_stats.get('error') or volume_stats.get('error')
            return html.Div(f"Error generating statistics: {error_msg}", style={'color': 'red'})

        # Time range for display
        days_back = (df.index.max() - df.index.min()).days
        time_status = f"📅 Analysis Range: {start_date} to {end_date} (~{days_back} days)"

        return html.Div([
            html.H3("📊 Enhanced Market Statistics", className="mb-3"),
            html.P(
                time_status,
                className="lead text-center text-muted mb-4"
            ),
            create_price_stats_display(price_stats),
            create_volume_stats_display(volume_stats)
        ])
    except Exception as e:
        logger.error(f"Error in get_market_statistics: {e}", exc_info=True)
        return dbc.Alert(f"Error generating statistics display: {e}", color="danger")