""" Data analysis components for comprehensive market data analysis. """ from dash import html, dcc import dash_bootstrap_components as dbc import pandas as pd import numpy as np from datetime import datetime, timezone, timedelta from typing import Dict, Any, List, Optional from utils.logger import get_logger from database.connection import DatabaseManager from database.operations import DatabaseOperationError, get_database_operations from config.constants.chart_constants import CHART_COLORS, UI_TEXT logger = get_logger("data_analysis") class VolumeAnalyzer: """Analyze trading volume patterns and trends.""" def __init__(self): pass def get_volume_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: """Calculate comprehensive volume statistics from a DataFrame.""" try: if df.empty or 'volume' not in df.columns: return {'error': 'DataFrame is empty or missing volume column'} df = df.copy() df = self._ensure_numeric_cols(df) stats = {} stats.update(self._calculate_basic_volume_stats(df)) stats.update(self._analyze_volume_trend(df)) stats.update(self._identify_high_volume_periods(df, stats['avg_volume'], stats['volume_std'])) stats.update(self._calculate_volume_price_correlation(df)) stats.update(self._calculate_avg_trade_size(df)) stats.update(self._calculate_volume_percentiles(df)) return stats except Exception as e: logger.error(f"Volume analysis error: {e}") return {'error': str(e)} def _ensure_numeric_cols(self, df: pd.DataFrame) -> pd.DataFrame: numeric_cols = ['open', 'high', 'low', 'close', 'volume'] for col in numeric_cols: if col in df.columns: df[col] = df[col].astype(float) if 'trades_count' in df.columns: df['trades_count'] = df['trades_count'].astype(float) return df def _calculate_basic_volume_stats(self, df: pd.DataFrame) -> Dict[str, Any]: return { 'total_volume': df['volume'].sum(), 'avg_volume': df['volume'].mean(), 'volume_std': df['volume'].std(), 'max_volume': df['volume'].max(), 'min_volume': df['volume'].min() } def _analyze_volume_trend(self, df: pd.DataFrame) -> Dict[str, Any]: recent_volume = df['volume'].tail(10).mean() older_volume = df['volume'].head(10).mean() volume_trend = "Increasing" if recent_volume > older_volume else "Decreasing" return {'volume_trend': volume_trend} def _identify_high_volume_periods(self, df: pd.DataFrame, avg_volume: float, volume_std: float) -> Dict[str, Any]: high_volume_threshold = avg_volume + (2 * volume_std) high_volume_periods = len(df[df['volume'] > high_volume_threshold]) return {'high_volume_periods': high_volume_periods} def _calculate_volume_price_correlation(self, df: pd.DataFrame) -> Dict[str, Any]: price_change = df['close'] - df['open'] volume_price_corr = df['volume'].corr(price_change.abs()) return {'volume_price_correlation': volume_price_corr} def _calculate_avg_trade_size(self, df: pd.DataFrame) -> Dict[str, Any]: if 'trades_count' in df.columns: df['avg_trade_size'] = df['volume'] / df['trades_count'].replace(0, 1) avg_trade_size = df['avg_trade_size'].mean() else: avg_trade_size = None return {'avg_trade_size': avg_trade_size} def _calculate_volume_percentiles(self, df: pd.DataFrame) -> Dict[str, Any]: return { 'volume_percentiles': { '25th': df['volume'].quantile(0.25), '50th': df['volume'].quantile(0.50), '75th': df['volume'].quantile(0.75), '95th': df['volume'].quantile(0.95) } } class PriceMovementAnalyzer: """Analyze price movement patterns and statistics.""" def __init__(self): pass def get_price_movement_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: """Calculate comprehensive price movement statistics from a DataFrame.""" try: if df.empty or not all(col in df.columns for col in ['open', 'high', 'low', 'close']): return {'error': 'DataFrame is empty or missing required price columns'} df = df.copy() df = self._ensure_numeric_cols(df) stats = {} stats.update(self._calculate_basic_price_stats(df)) stats.update(self._calculate_returns_and_volatility(df)) stats.update(self._analyze_price_range(df)) stats.update(self._analyze_directional_movement(df)) stats.update(self._calculate_price_extremes(df)) stats.update(self._calculate_momentum(df)) stats.update(self._calculate_trend_strength(df)) stats.update(self._calculate_return_percentiles(df)) return stats except Exception as e: logger.error(f"Price movement analysis error: {e}") return {'error': str(e)} def _ensure_numeric_cols(self, df: pd.DataFrame) -> pd.DataFrame: numeric_cols = ['open', 'high', 'low', 'close', 'volume'] for col in numeric_cols: if col in df.columns: df[col] = df[col].astype(float) return df def _calculate_basic_price_stats(self, df: pd.DataFrame) -> Dict[str, Any]: current_price = df['close'].iloc[-1] period_start_price = df['open'].iloc[0] period_return = ((current_price - period_start_price) / period_start_price) * 100 return {'current_price': current_price, 'period_return': period_return} def _calculate_returns_and_volatility(self, df: pd.DataFrame) -> Dict[str, Any]: df['returns'] = df['close'].pct_change() * 100 df['returns'] = df['returns'].fillna(0) volatility = df['returns'].std() avg_return = df['returns'].mean() return {'volatility': volatility, 'avg_return': avg_return, 'returns': df['returns']} def _analyze_price_range(self, df: pd.DataFrame) -> Dict[str, Any]: df['range'] = df['high'] - df['low'] df['range_pct'] = (df['range'] / df['open']) * 100 avg_range_pct = df['range_pct'].mean() return {'avg_range_pct': avg_range_pct} def _analyze_directional_movement(self, df: pd.DataFrame) -> Dict[str, Any]: bullish_periods = len(df[df['close'] > df['open']]) bearish_periods = len(df[df['close'] < df['open']]) neutral_periods = len(df[df['close'] == df['open']]) total_periods = len(df) bullish_ratio = (bullish_periods / total_periods) * 100 if total_periods > 0 else 0 return { 'bullish_periods': bullish_periods, 'bearish_periods': bearish_periods, 'neutral_periods': neutral_periods, 'bullish_ratio': bullish_ratio, 'positive_returns': len(df[df['returns'] > 0]), 'negative_returns': len(df[df['returns'] < 0]) } def _calculate_price_extremes(self, df: pd.DataFrame) -> Dict[str, Any]: period_high = df['high'].max() period_low = df['low'].min() return {'period_high': period_high, 'period_low': period_low} def _calculate_momentum(self, df: pd.DataFrame) -> Dict[str, Any]: current_price = df['close'].iloc[-1] momentum_periods = min(10, len(df) - 1) if momentum_periods > 0: momentum = ((current_price - df['close'].iloc[-momentum_periods-1]) / df['close'].iloc[-momentum_periods-1]) * 100 else: momentum = 0 return {'momentum': momentum} def _calculate_trend_strength(self, df: pd.DataFrame) -> Dict[str, Any]: if len(df) > 2: x = np.arange(len(df)) slope, _ = np.polyfit(x, df['close'], 1) trend_strength = slope / df['close'].mean() * 100 else: trend_strength = 0 return {'trend_strength': trend_strength} def _calculate_return_percentiles(self, df: pd.DataFrame) -> Dict[str, Any]: return { 'return_percentiles': { '5th': df['returns'].quantile(0.05), '25th': df['returns'].quantile(0.25), '75th': df['returns'].quantile(0.75), '95th': df['returns'].quantile(0.95) }, 'max_gain': df['returns'].max(), 'max_loss': df['returns'].min() } def format_number(value: float, decimals: int = 2) -> str: """Formats a number to a string with specified decimals.""" if value is None: return "N/A" return f"{value:,.{decimals}f}" def _create_stat_card(icon, title, value, color="primary") -> dbc.Card: # Extracted helper return dbc.Card( dbc.CardBody( [ html.H4(title, className="card-title"), html.P(value, className="card-text"), html.I(className=f"fas fa-{icon} text-{color}"), ] ), className=f"text-center m-1 bg-light border-{color}" ) def create_volume_stats_display(stats: Dict[str, Any]) -> html.Div: """Creates a display for volume statistics.""" if 'error' in stats: return html.Div(f"Error: {stats['error']}", className="alert alert-danger") return html.Div( [ html.H3("Volume Statistics", className="mb-3 text-primary"), dbc.Row([ dbc.Col(_create_stat_card("chart-bar", "Total Volume", format_number(stats.get('total_volume')), "success"), md=6), dbc.Col(_create_stat_card("calculator", "Avg. Volume", format_number(stats.get('avg_volume')), "info"), md=6), ]), dbc.Row([ dbc.Col(_create_stat_card("arrow-trend-up", "Volume Trend", stats.get('volume_trend'), "warning"), md=6), dbc.Col(_create_stat_card("hand-holding-usd", "Avg. Trade Size", format_number(stats.get('avg_trade_size')), "secondary"), md=6), ]), dbc.Row([ dbc.Col(_create_stat_card("ranking-star", "High Vol. Periods", stats.get('high_volume_periods')), md=6), dbc.Col(_create_stat_card("arrows-left-right", "Vol-Price Corr.", format_number(stats.get('volume_price_correlation'), 4), "primary"), md=6), ]), ] ) def create_price_stats_display(stats: Dict[str, Any]) -> html.Div: """Creates a display for price movement statistics.""" if 'error' in stats: return html.Div(f"Error: {stats['error']}", className="alert alert-danger") return html.Div( [ html.H3("Price Movement Statistics", className="mb-3 text-success"), dbc.Row([ dbc.Col(_create_stat_card("dollar-sign", "Current Price", format_number(stats.get('current_price')), "success"), md=6), dbc.Col(_create_stat_card("percent", "Period Return", f"{format_number(stats.get('period_return'))}%"), md=6), ]), dbc.Row([ dbc.Col(_create_stat_card("wave-square", "Volatility", f"{format_number(stats.get('volatility'))}%"), md=6), dbc.Col(_create_stat_card("chart-line", "Avg. Daily Return", f"{format_number(stats.get('avg_return'))}%"), md=6), ]), dbc.Row([ dbc.Col(_create_stat_card("arrows-up-down-left-right", "Avg. Range %", f"{format_number(stats.get('avg_range_pct'))}%"), md=6), dbc.Col(_create_stat_card("arrow-up", "Bullish Ratio", f"{format_number(stats.get('bullish_ratio'))}%"), md=6), ]), ] ) def get_market_statistics(df: pd.DataFrame, symbol: str, timeframe: str) -> html.Div: """ Generates a display of key market statistics from the provided DataFrame. """ if df.empty: return html.Div([html.P("No market data available for statistics.")], className="alert alert-info mt-4") # Basic Market Overview first_timestamp = df.index.min() last_timestamp = df.index.max() num_candles = len(df) # Price Changes first_close = df['close'].iloc[0] last_close = df['close'].iloc[-1] price_change_abs = last_close - first_close price_change_pct = (price_change_abs / first_close) * 100 if first_close != 0 else 0 # Highs and Lows period_high = df['high'].max() period_low = df['low'].min() # Average True Range (ATR) - A measure of volatility # Requires TA-Lib or manual calculation. For simplicity, we'll use a basic range for now. # Ideally, integrate a proper TA library. df['tr'] = np.maximum(df['high'] - df['low'], np.maximum(abs(df['high'] - df['close'].shift()), abs(df['low'] - df['close'].shift()))) atr = df['tr'].mean() if not df['tr'].empty else 0 # Trading Volume Analysis total_volume = df['volume'].sum() average_volume = df['volume'].mean() # Market Cap (placeholder - requires external data) market_cap_info = "N/A (requires external API)" # Order Book Depth (placeholder - requires real-time order book data) order_book_depth = "N/A (requires real-time data)" stats_content = html.Div([ html.H3(f"Market Statistics for {symbol} ({timeframe})", className="mb-3 text-info"), _create_basic_market_overview( first_timestamp, last_timestamp, num_candles, first_close, last_close, price_change_abs, price_change_pct, total_volume, average_volume, atr ), html.Hr(className="my-4"), _create_advanced_market_stats( period_high, period_low, market_cap_info, order_book_depth ) ], className="mb-4") return stats_content def _create_basic_market_overview( first_timestamp: datetime, last_timestamp: datetime, num_candles: int, first_close: float, last_close: float, price_change_abs: float, price_change_pct: float, total_volume: float, average_volume: float, atr: float ) -> dbc.Row: return dbc.Row([ dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Time Period", className="card-title"), html.P(f"From: {first_timestamp.strftime('%Y-%m-%d %H:%M')}"), html.P(f"To: {last_timestamp.strftime('%Y-%m-%d %H:%M')}"), html.P(f"Candles: {num_candles}"), ] ), className="text-center m-1 bg-light border-info" ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Price Movement", className="card-title"), html.P(f"Initial Price: {format_number(first_close)}"), html.P(f"Final Price: {format_number(last_close)}"), html.P(f"Change: {format_number(price_change_abs)} ({format_number(price_change_pct)}%)", style={'color': 'green' if price_change_pct >= 0 else 'red'}), ] ), className="text-center m-1 bg-light border-info" ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Volume & Volatility", className="card-title"), html.P(f"Total Volume: {format_number(total_volume)}"), html.P(f"Average Volume: {format_number(average_volume)}"), html.P(f"Average True Range: {format_number(atr, 4)}"), ] ), className="text-center m-1 bg-light border-info" ), md=4 ), ]) def _create_advanced_market_stats( period_high: float, period_low: float, market_cap_info: str, order_book_depth: str ) -> dbc.Row: return dbc.Row([ dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Period Extremes", className="card-title"), html.P(f"Period High: {format_number(period_high)}"), html.P(f"Period Low: {format_number(period_low)}"), ] ), className="text-center m-1 bg-light border-warning" ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Liquidity/Depth", className="card-title"), html.P(f"Market Cap: {market_cap_info}"), html.P(f"Order Book Depth: {order_book_depth}"), ] ), className="text-center m-1 bg-light border-warning" ), md=4 ), dbc.Col( dbc.Card( dbc.CardBody( [ html.H4("Custom Indicators", className="card-title"), html.P("RSI: N/A"), # Placeholder html.P("MACD: N/A"), # Placeholder ] ), className="text-center m-1 bg-light border-warning" ), md=4 ) ])