""" Data analysis components for comprehensive market data analysis. """ from dash import html, dcc import dash_bootstrap_components as dbc import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots import pandas as pd import numpy as np from datetime import datetime, timezone, timedelta from typing import Dict, Any, List, Optional from utils.logger import get_logger from database.connection import DatabaseManager from database.operations import DatabaseOperationError logger = get_logger("data_analysis") class VolumeAnalyzer: """Analyze trading volume patterns and trends.""" def __init__(self): self.db_manager = DatabaseManager() self.db_manager.initialize() def get_volume_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: """Calculate comprehensive volume statistics from a DataFrame.""" try: if df.empty or 'volume' not in df.columns: return {'error': 'DataFrame is empty or missing volume column'} # Convert all relevant columns to float to avoid type errors with Decimal df = df.copy() numeric_cols = ['open', 'high', 'low', 'close', 'volume'] for col in numeric_cols: if col in df.columns: df[col] = df[col].astype(float) if 'trades_count' in df.columns: df['trades_count'] = df['trades_count'].astype(float) # Calculate volume statistics total_volume = df['volume'].sum() avg_volume = df['volume'].mean() volume_std = df['volume'].std() # Volume trend analysis recent_volume = df['volume'].tail(10).mean() # Last 10 periods older_volume = df['volume'].head(10).mean() # First 10 periods volume_trend = "Increasing" if recent_volume > older_volume else "Decreasing" # High volume periods (above 2 standard deviations) high_volume_threshold = avg_volume + (2 * volume_std) high_volume_periods = len(df[df['volume'] > high_volume_threshold]) # Volume-Price correlation price_change = df['close'] - df['open'] volume_price_corr = df['volume'].corr(price_change.abs()) # Average trade size (volume per trade) if 'trades_count' in df.columns: df['avg_trade_size'] = df['volume'] / df['trades_count'].replace(0, 1) avg_trade_size = df['avg_trade_size'].mean() else: avg_trade_size = None # Not available return { 'total_volume': total_volume, 'avg_volume': avg_volume, 'volume_std': volume_std, 'volume_trend': volume_trend, 'high_volume_periods': high_volume_periods, 'volume_price_correlation': volume_price_corr, 'avg_trade_size': avg_trade_size, 'max_volume': df['volume'].max(), 'min_volume': df['volume'].min(), 'volume_percentiles': { '25th': df['volume'].quantile(0.25), '50th': df['volume'].quantile(0.50), '75th': df['volume'].quantile(0.75), '95th': df['volume'].quantile(0.95) } } except Exception as e: logger.error(f"Volume analysis error: {e}") return {'error': str(e)} class PriceMovementAnalyzer: """Analyze price movement patterns and statistics.""" def __init__(self): self.db_manager = DatabaseManager() self.db_manager.initialize() def get_price_movement_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: """Calculate comprehensive price movement statistics from a DataFrame.""" try: if df.empty or not all(col in df.columns for col in ['open', 'high', 'low', 'close']): return {'error': 'DataFrame is empty or missing required price columns'} # Convert all relevant columns to float to avoid type errors with Decimal df = df.copy() numeric_cols = ['open', 'high', 'low', 'close', 'volume'] for col in numeric_cols: if col in df.columns: df[col] = df[col].astype(float) # Basic price statistics current_price = df['close'].iloc[-1] period_start_price = df['open'].iloc[0] period_return = ((current_price - period_start_price) / period_start_price) * 100 # Daily returns (percentage changes) df['returns'] = df['close'].pct_change() * 100 df['returns'] = df['returns'].fillna(0) # Volatility metrics volatility = df['returns'].std() avg_return = df['returns'].mean() # Price range analysis df['range'] = df['high'] - df['low'] df['range_pct'] = (df['range'] / df['open']) * 100 avg_range_pct = df['range_pct'].mean() # Directional analysis bullish_periods = len(df[df['close'] > df['open']]) bearish_periods = len(df[df['close'] < df['open']]) neutral_periods = len(df[df['close'] == df['open']]) total_periods = len(df) bullish_ratio = (bullish_periods / total_periods) * 100 if total_periods > 0 else 0 # Price extremes period_high = df['high'].max() period_low = df['low'].min() # Momentum indicators # Simple momentum (current vs N periods ago) momentum_periods = min(10, len(df) - 1) if momentum_periods > 0: momentum = ((current_price - df['close'].iloc[-momentum_periods-1]) / df['close'].iloc[-momentum_periods-1]) * 100 else: momentum = 0 # Trend strength (linear regression slope) if len(df) > 2: x = np.arange(len(df)) slope, _ = np.polyfit(x, df['close'], 1) trend_strength = slope / df['close'].mean() * 100 # Normalize by average price else: trend_strength = 0 return { 'current_price': current_price, 'period_return': period_return, 'volatility': volatility, 'avg_return': avg_return, 'avg_range_pct': avg_range_pct, 'bullish_periods': bullish_periods, 'bearish_periods': bearish_periods, 'neutral_periods': neutral_periods, 'bullish_ratio': bullish_ratio, 'period_high': period_high, 'period_low': period_low, 'momentum': momentum, 'trend_strength': trend_strength, 'return_percentiles': { '5th': df['returns'].quantile(0.05), '25th': df['returns'].quantile(0.25), '75th': df['returns'].quantile(0.75), '95th': df['returns'].quantile(0.95) }, 'max_gain': df['returns'].max(), 'max_loss': df['returns'].min(), 'positive_returns': len(df[df['returns'] > 0]), 'negative_returns': len(df[df['returns'] < 0]) } except Exception as e: logger.error(f"Price movement analysis error: {e}") return {'error': str(e)} def create_volume_analysis_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure: """Create a comprehensive volume analysis chart.""" try: analyzer = VolumeAnalyzer() # Fetch market data for chart db_manager = DatabaseManager() db_manager.initialize() end_time = datetime.now(timezone.utc) start_time = end_time - timedelta(days=days_back) with db_manager.get_session() as session: from sqlalchemy import text query = text(""" SELECT timestamp, open, high, low, close, volume, trades_count FROM market_data WHERE symbol = :symbol AND timeframe = :timeframe AND timestamp >= :start_time AND timestamp <= :end_time ORDER BY timestamp ASC """) result = session.execute(query, { 'symbol': symbol, 'timeframe': timeframe, 'start_time': start_time, 'end_time': end_time }) candles = [] for row in result: candles.append({ 'timestamp': row.timestamp, 'open': float(row.open), 'high': float(row.high), 'low': float(row.low), 'close': float(row.close), 'volume': float(row.volume), 'trades_count': int(row.trades_count) if row.trades_count else 0 }) if not candles: fig = go.Figure() fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5) return fig df = pd.DataFrame(candles) # Calculate volume moving average df['volume_ma'] = df['volume'].rolling(window=20, min_periods=1).mean() # Create subplots fig = make_subplots( rows=3, cols=1, subplot_titles=('Price Action', 'Volume Analysis', 'Volume vs Moving Average'), vertical_spacing=0.08, row_heights=[0.4, 0.3, 0.3] ) # Price candlestick fig.add_trace( go.Candlestick( x=df['timestamp'], open=df['open'], high=df['high'], low=df['low'], close=df['close'], name='Price', increasing_line_color='#26a69a', decreasing_line_color='#ef5350' ), row=1, col=1 ) # Volume bars with color coding colors = ['#26a69a' if close >= open else '#ef5350' for close, open in zip(df['close'], df['open'])] fig.add_trace( go.Bar( x=df['timestamp'], y=df['volume'], name='Volume', marker_color=colors, opacity=0.7 ), row=2, col=1 ) # Volume vs moving average fig.add_trace( go.Scatter( x=df['timestamp'], y=df['volume'], mode='lines', name='Volume', line=dict(color='#2196f3', width=1) ), row=3, col=1 ) fig.add_trace( go.Scatter( x=df['timestamp'], y=df['volume_ma'], mode='lines', name='Volume MA(20)', line=dict(color='#ff9800', width=2) ), row=3, col=1 ) # Update layout fig.update_layout( title=f'{symbol} Volume Analysis ({timeframe})', xaxis_rangeslider_visible=False, height=800, showlegend=True, template='plotly_white' ) # Update y-axes fig.update_yaxes(title_text="Price", row=1, col=1) fig.update_yaxes(title_text="Volume", row=2, col=1) fig.update_yaxes(title_text="Volume", row=3, col=1) return fig except Exception as e: logger.error(f"Volume chart creation error: {e}") fig = go.Figure() fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5) return fig def create_price_movement_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure: """Create a comprehensive price movement analysis chart.""" try: # Fetch market data for chart db_manager = DatabaseManager() db_manager.initialize() end_time = datetime.now(timezone.utc) start_time = end_time - timedelta(days=days_back) with db_manager.get_session() as session: from sqlalchemy import text query = text(""" SELECT timestamp, open, high, low, close, volume FROM market_data WHERE symbol = :symbol AND timeframe = :timeframe AND timestamp >= :start_time AND timestamp <= :end_time ORDER BY timestamp ASC """) result = session.execute(query, { 'symbol': symbol, 'timeframe': timeframe, 'start_time': start_time, 'end_time': end_time }) candles = [] for row in result: candles.append({ 'timestamp': row.timestamp, 'open': float(row.open), 'high': float(row.high), 'low': float(row.low), 'close': float(row.close), 'volume': float(row.volume) }) if not candles: fig = go.Figure() fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5) return fig df = pd.DataFrame(candles) # Calculate returns and statistics df['returns'] = df['close'].pct_change() * 100 df['returns'] = df['returns'].fillna(0) df['range_pct'] = ((df['high'] - df['low']) / df['open']) * 100 df['cumulative_return'] = (1 + df['returns'] / 100).cumprod() # Create subplots fig = make_subplots( rows=3, cols=1, subplot_titles=('Cumulative Returns', 'Period Returns (%)', 'Price Range (%)'), vertical_spacing=0.08, row_heights=[0.4, 0.3, 0.3] ) # Cumulative returns fig.add_trace( go.Scatter( x=df['timestamp'], y=df['cumulative_return'], mode='lines', name='Cumulative Return', line=dict(color='#2196f3', width=2) ), row=1, col=1 ) # Period returns with color coding colors = ['#26a69a' if ret >= 0 else '#ef5350' for ret in df['returns']] fig.add_trace( go.Bar( x=df['timestamp'], y=df['returns'], name='Returns (%)', marker_color=colors, opacity=0.7 ), row=2, col=1 ) # Price range percentage fig.add_trace( go.Scatter( x=df['timestamp'], y=df['range_pct'], mode='lines+markers', name='Range %', line=dict(color='#ff9800', width=1), marker=dict(size=4) ), row=3, col=1 ) # Add zero line for returns fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1) # Update layout fig.update_layout( title=f'{symbol} Price Movement Analysis ({timeframe})', height=800, showlegend=True, template='plotly_white' ) # Update y-axes fig.update_yaxes(title_text="Cumulative Return", row=1, col=1) fig.update_yaxes(title_text="Returns (%)", row=2, col=1) fig.update_yaxes(title_text="Range (%)", row=3, col=1) return fig except Exception as e: logger.error(f"Price movement chart creation error: {e}") fig = go.Figure() fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5) return fig def create_data_analysis_panel(): """Create the main data analysis panel with tabs for different analyses.""" return html.Div([ dcc.Tabs( id="data-analysis-tabs", value="volume-analysis", children=[ dcc.Tab(label="Volume Analysis", value="volume-analysis", children=[ html.Div(id='volume-analysis-content', children=[ html.P("Content for Volume Analysis") ]), html.Div(id='volume-stats-container', children=[ html.P("Stats container loaded - waiting for callback...") ]) ]), dcc.Tab(label="Price Movement", value="price-movement", children=[ html.Div(id='price-movement-content', children=[ dbc.Alert("Select a symbol and timeframe to view price movement analysis.", color="primary") ]) ]), ], ) ], id='data-analysis-panel-wrapper') def format_number(value: float, decimals: int = 2) -> str: """Format number with appropriate decimals and units.""" if pd.isna(value): return "N/A" if abs(value) >= 1e9: return f"{value/1e9:.{decimals}f}B" elif abs(value) >= 1e6: return f"{value/1e6:.{decimals}f}M" elif abs(value) >= 1e3: return f"{value/1e3:.{decimals}f}K" else: return f"{value:.{decimals}f}" def create_volume_stats_display(stats: Dict[str, Any]) -> html.Div: """Create volume statistics display.""" if 'error' in stats: return dbc.Alert( "Error loading volume statistics", color="danger", dismissable=True ) def create_stat_card(icon, title, value, color="primary"): return dbc.Col(dbc.Card(dbc.CardBody([ html.Div([ html.Div(icon, className="display-6"), html.Div([ html.P(title, className="card-title mb-1 text-muted"), html.H4(value, className=f"card-text fw-bold text-{color}") ], className="ms-3") ], className="d-flex align-items-center") ])), width=4, className="mb-3") return dbc.Row([ create_stat_card("📊", "Total Volume", format_number(stats['total_volume'])), create_stat_card("📈", "Average Volume", format_number(stats['avg_volume'])), create_stat_card("🎯", "Volume Trend", stats['volume_trend'], "success" if stats['volume_trend'] == "Increasing" else "danger"), create_stat_card("⚡", "High Volume Periods", str(stats['high_volume_periods'])), create_stat_card("🔗", "Volume-Price Correlation", f"{stats['volume_price_correlation']:.3f}"), create_stat_card("💱", "Avg Trade Size", format_number(stats['avg_trade_size'])) ], className="mt-3") def create_price_stats_display(stats: Dict[str, Any]) -> html.Div: """Create price movement statistics display.""" if 'error' in stats: return dbc.Alert( "Error loading price statistics", color="danger", dismissable=True ) def create_stat_card(icon, title, value, color="primary"): text_color = "text-dark" if color == "success": text_color = "text-success" elif color == "danger": text_color = "text-danger" return dbc.Col(dbc.Card(dbc.CardBody([ html.Div([ html.Div(icon, className="display-6"), html.Div([ html.P(title, className="card-title mb-1 text-muted"), html.H4(value, className=f"card-text fw-bold {text_color}") ], className="ms-3") ], className="d-flex align-items-center") ])), width=4, className="mb-3") return dbc.Row([ create_stat_card("💰", "Current Price", f"${stats['current_price']:.2f}"), create_stat_card("📈", "Period Return", f"{stats['period_return']:+.2f}%", "success" if stats['period_return'] >= 0 else "danger"), create_stat_card("📊", "Volatility", f"{stats['volatility']:.2f}%", color="warning"), create_stat_card("🎯", "Bullish Ratio", f"{stats['bullish_ratio']:.1f}%"), create_stat_card("⚡", "Momentum", f"{stats['momentum']:+.2f}%", "success" if stats['momentum'] >= 0 else "danger"), create_stat_card("📉", "Max Loss", f"{stats['max_loss']:.2f}%", "danger") ], className="mt-3") def get_market_statistics(df: pd.DataFrame, symbol: str, timeframe: str) -> html.Div: """ Generate a comprehensive market statistics component from a DataFrame. """ if df.empty: return html.Div("No data available for statistics.", className="text-center text-muted") try: # Get statistics price_analyzer = PriceMovementAnalyzer() volume_analyzer = VolumeAnalyzer() price_stats = price_analyzer.get_price_movement_statistics(df) volume_stats = volume_analyzer.get_volume_statistics(df) # Format key statistics for display start_date = df.index.min().strftime('%Y-%m-%d %H:%M') end_date = df.index.max().strftime('%Y-%m-%d %H:%M') # Check for errors from analyzers if 'error' in price_stats or 'error' in volume_stats: error_msg = price_stats.get('error') or volume_stats.get('error') return html.Div(f"Error generating statistics: {error_msg}", style={'color': 'red'}) # Time range for display days_back = (df.index.max() - df.index.min()).days time_status = f"📅 Analysis Range: {start_date} to {end_date} (~{days_back} days)" return html.Div([ html.H3("📊 Enhanced Market Statistics", className="mb-3"), html.P( time_status, className="lead text-center text-muted mb-4" ), create_price_stats_display(price_stats), create_volume_stats_display(volume_stats) ]) except Exception as e: logger.error(f"Error in get_market_statistics: {e}", exc_info=True) return dbc.Alert(f"Error generating statistics display: {e}", color="danger")