TCPDashboard/dashboard/components/data_analysis.py

673 lines
25 KiB
Python
Raw Normal View History

"""
Data analysis components for comprehensive market data analysis.
"""
from dash import html, dcc
import dash_mantine_components as dmc
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta
from typing import Dict, Any, List, Optional
from utils.logger import get_logger
from database.connection import DatabaseManager
from database.operations import DatabaseOperationError
logger = get_logger("data_analysis")
class VolumeAnalyzer:
"""Analyze trading volume patterns and trends."""
def __init__(self):
self.db_manager = DatabaseManager()
self.db_manager.initialize()
def get_volume_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
"""Calculate comprehensive volume statistics from a DataFrame."""
try:
if df.empty or 'volume' not in df.columns:
return {'error': 'DataFrame is empty or missing volume column'}
# Convert all relevant columns to float to avoid type errors with Decimal
df = df.copy()
numeric_cols = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_cols:
if col in df.columns:
df[col] = df[col].astype(float)
if 'trades_count' in df.columns:
df['trades_count'] = df['trades_count'].astype(float)
# Calculate volume statistics
total_volume = df['volume'].sum()
avg_volume = df['volume'].mean()
volume_std = df['volume'].std()
# Volume trend analysis
recent_volume = df['volume'].tail(10).mean() # Last 10 periods
older_volume = df['volume'].head(10).mean() # First 10 periods
volume_trend = "Increasing" if recent_volume > older_volume else "Decreasing"
# High volume periods (above 2 standard deviations)
high_volume_threshold = avg_volume + (2 * volume_std)
high_volume_periods = len(df[df['volume'] > high_volume_threshold])
# Volume-Price correlation
price_change = df['close'] - df['open']
volume_price_corr = df['volume'].corr(price_change.abs())
# Average trade size (volume per trade)
if 'trades_count' in df.columns:
df['avg_trade_size'] = df['volume'] / df['trades_count'].replace(0, 1)
avg_trade_size = df['avg_trade_size'].mean()
else:
avg_trade_size = None # Not available
return {
'total_volume': total_volume,
'avg_volume': avg_volume,
'volume_std': volume_std,
'volume_trend': volume_trend,
'high_volume_periods': high_volume_periods,
'volume_price_correlation': volume_price_corr,
'avg_trade_size': avg_trade_size,
'max_volume': df['volume'].max(),
'min_volume': df['volume'].min(),
'volume_percentiles': {
'25th': df['volume'].quantile(0.25),
'50th': df['volume'].quantile(0.50),
'75th': df['volume'].quantile(0.75),
'95th': df['volume'].quantile(0.95)
}
}
except Exception as e:
logger.error(f"Volume analysis error: {e}")
return {'error': str(e)}
class PriceMovementAnalyzer:
"""Analyze price movement patterns and statistics."""
def __init__(self):
self.db_manager = DatabaseManager()
self.db_manager.initialize()
def get_price_movement_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
"""Calculate comprehensive price movement statistics from a DataFrame."""
try:
if df.empty or not all(col in df.columns for col in ['open', 'high', 'low', 'close']):
return {'error': 'DataFrame is empty or missing required price columns'}
# Convert all relevant columns to float to avoid type errors with Decimal
df = df.copy()
numeric_cols = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_cols:
if col in df.columns:
df[col] = df[col].astype(float)
# Basic price statistics
current_price = df['close'].iloc[-1]
period_start_price = df['open'].iloc[0]
period_return = ((current_price - period_start_price) / period_start_price) * 100
# Daily returns (percentage changes)
df['returns'] = df['close'].pct_change() * 100
df['returns'] = df['returns'].fillna(0)
# Volatility metrics
volatility = df['returns'].std()
avg_return = df['returns'].mean()
# Price range analysis
df['range'] = df['high'] - df['low']
df['range_pct'] = (df['range'] / df['open']) * 100
avg_range_pct = df['range_pct'].mean()
# Directional analysis
bullish_periods = len(df[df['close'] > df['open']])
bearish_periods = len(df[df['close'] < df['open']])
neutral_periods = len(df[df['close'] == df['open']])
total_periods = len(df)
bullish_ratio = (bullish_periods / total_periods) * 100 if total_periods > 0 else 0
# Price extremes
period_high = df['high'].max()
period_low = df['low'].min()
# Momentum indicators
# Simple momentum (current vs N periods ago)
momentum_periods = min(10, len(df) - 1)
if momentum_periods > 0:
momentum = ((current_price - df['close'].iloc[-momentum_periods-1]) / df['close'].iloc[-momentum_periods-1]) * 100
else:
momentum = 0
# Trend strength (linear regression slope)
if len(df) > 2:
x = np.arange(len(df))
slope, _ = np.polyfit(x, df['close'], 1)
trend_strength = slope / df['close'].mean() * 100 # Normalize by average price
else:
trend_strength = 0
return {
'current_price': current_price,
'period_return': period_return,
'volatility': volatility,
'avg_return': avg_return,
'avg_range_pct': avg_range_pct,
'bullish_periods': bullish_periods,
'bearish_periods': bearish_periods,
'neutral_periods': neutral_periods,
'bullish_ratio': bullish_ratio,
'period_high': period_high,
'period_low': period_low,
'momentum': momentum,
'trend_strength': trend_strength,
'return_percentiles': {
'5th': df['returns'].quantile(0.05),
'25th': df['returns'].quantile(0.25),
'75th': df['returns'].quantile(0.75),
'95th': df['returns'].quantile(0.95)
},
'max_gain': df['returns'].max(),
'max_loss': df['returns'].min(),
'positive_returns': len(df[df['returns'] > 0]),
'negative_returns': len(df[df['returns'] < 0])
}
except Exception as e:
logger.error(f"Price movement analysis error: {e}")
return {'error': str(e)}
def create_volume_analysis_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
"""Create a comprehensive volume analysis chart."""
try:
analyzer = VolumeAnalyzer()
# Fetch market data for chart
db_manager = DatabaseManager()
db_manager.initialize()
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume, trades_count
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume),
'trades_count': int(row.trades_count) if row.trades_count else 0
})
if not candles:
fig = go.Figure()
fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
df = pd.DataFrame(candles)
# Calculate volume moving average
df['volume_ma'] = df['volume'].rolling(window=20, min_periods=1).mean()
# Create subplots
fig = make_subplots(
rows=3, cols=1,
subplot_titles=('Price Action', 'Volume Analysis', 'Volume vs Moving Average'),
vertical_spacing=0.08,
row_heights=[0.4, 0.3, 0.3]
)
# Price candlestick
fig.add_trace(
go.Candlestick(
x=df['timestamp'],
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
name='Price',
increasing_line_color='#26a69a',
decreasing_line_color='#ef5350'
),
row=1, col=1
)
# Volume bars with color coding
colors = ['#26a69a' if close >= open else '#ef5350' for close, open in zip(df['close'], df['open'])]
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['volume'],
name='Volume',
marker_color=colors,
opacity=0.7
),
row=2, col=1
)
# Volume vs moving average
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['volume'],
mode='lines',
name='Volume',
line=dict(color='#2196f3', width=1)
),
row=3, col=1
)
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['volume_ma'],
mode='lines',
name='Volume MA(20)',
line=dict(color='#ff9800', width=2)
),
row=3, col=1
)
# Update layout
fig.update_layout(
title=f'{symbol} Volume Analysis ({timeframe})',
xaxis_rangeslider_visible=False,
height=800,
showlegend=True,
template='plotly_white'
)
# Update y-axes
fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1)
fig.update_yaxes(title_text="Volume", row=3, col=1)
return fig
except Exception as e:
logger.error(f"Volume chart creation error: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
def create_price_movement_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
"""Create a comprehensive price movement analysis chart."""
try:
# Fetch market data for chart
db_manager = DatabaseManager()
db_manager.initialize()
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume)
})
if not candles:
fig = go.Figure()
fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
df = pd.DataFrame(candles)
# Calculate returns and statistics
df['returns'] = df['close'].pct_change() * 100
df['returns'] = df['returns'].fillna(0)
df['range_pct'] = ((df['high'] - df['low']) / df['open']) * 100
df['cumulative_return'] = (1 + df['returns'] / 100).cumprod()
# Create subplots
fig = make_subplots(
rows=3, cols=1,
subplot_titles=('Cumulative Returns', 'Period Returns (%)', 'Price Range (%)'),
vertical_spacing=0.08,
row_heights=[0.4, 0.3, 0.3]
)
# Cumulative returns
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['cumulative_return'],
mode='lines',
name='Cumulative Return',
line=dict(color='#2196f3', width=2)
),
row=1, col=1
)
# Period returns with color coding
colors = ['#26a69a' if ret >= 0 else '#ef5350' for ret in df['returns']]
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['returns'],
name='Returns (%)',
marker_color=colors,
opacity=0.7
),
row=2, col=1
)
# Price range percentage
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['range_pct'],
mode='lines+markers',
name='Range %',
line=dict(color='#ff9800', width=1),
marker=dict(size=4)
),
row=3, col=1
)
# Add zero line for returns
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
# Update layout
fig.update_layout(
title=f'{symbol} Price Movement Analysis ({timeframe})',
height=800,
showlegend=True,
template='plotly_white'
)
# Update y-axes
fig.update_yaxes(title_text="Cumulative Return", row=1, col=1)
fig.update_yaxes(title_text="Returns (%)", row=2, col=1)
fig.update_yaxes(title_text="Range (%)", row=3, col=1)
return fig
except Exception as e:
logger.error(f"Price movement chart creation error: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
def create_data_analysis_panel():
"""Create the main data analysis panel with tabs for different analyses."""
return html.Div([
dcc.Tabs(
id="data-analysis-tabs",
value="volume-analysis",
children=[
dcc.Tab(label="Volume Analysis", value="volume-analysis", children=[
html.Div(id='volume-analysis-content', children=[
html.P("Content for Volume Analysis")
]),
html.Div(id='volume-stats-container', children=[
html.P("Stats container loaded - waiting for callback...")
])
]),
dcc.Tab(label="Price Movement", value="price-movement", children=[
html.Div(id='price-movement-content', children=[
dmc.Alert("Select a symbol and timeframe to view price movement analysis.", color="blue")
])
]),
],
)
], id='data-analysis-panel-wrapper')
def format_number(value: float, decimals: int = 2) -> str:
"""Format number with appropriate decimals and units."""
if pd.isna(value):
return "N/A"
if abs(value) >= 1e9:
return f"{value/1e9:.{decimals}f}B"
elif abs(value) >= 1e6:
return f"{value/1e6:.{decimals}f}M"
elif abs(value) >= 1e3:
return f"{value/1e3:.{decimals}f}K"
else:
return f"{value:.{decimals}f}"
def create_volume_stats_display(stats: Dict[str, Any]) -> html.Div:
"""Create volume statistics display."""
if 'error' in stats:
return dmc.Alert(
"Error loading volume statistics",
title="Volume Analysis Error",
color="red"
)
return dmc.SimpleGrid([
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📊", size="lg", color="blue"),
dmc.Stack([
dmc.Text("Total Volume", size="sm", c="dimmed"),
dmc.Text(format_number(stats['total_volume']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📈", size="lg", color="green"),
dmc.Stack([
dmc.Text("Average Volume", size="sm", c="dimmed"),
dmc.Text(format_number(stats['avg_volume']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🎯", size="lg", color="orange"),
dmc.Stack([
dmc.Text("Volume Trend", size="sm", c="dimmed"),
dmc.Text(stats['volume_trend'], fw=700, size="lg",
c="green" if stats['volume_trend'] == "Increasing" else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("", size="lg", color="red"),
dmc.Stack([
dmc.Text("High Volume Periods", size="sm", c="dimmed"),
dmc.Text(str(stats['high_volume_periods']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🔗", size="lg", color="purple"),
dmc.Stack([
dmc.Text("Volume-Price Correlation", size="sm", c="dimmed"),
dmc.Text(f"{stats['volume_price_correlation']:.3f}", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("💱", size="lg", color="teal"),
dmc.Stack([
dmc.Text("Avg Trade Size", size="sm", c="dimmed"),
dmc.Text(format_number(stats['avg_trade_size']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm")
], cols=3, spacing="md", style={'margin-top': '20px'})
def create_price_stats_display(stats: Dict[str, Any]) -> html.Div:
"""Create price movement statistics display."""
if 'error' in stats:
return dmc.Alert(
"Error loading price statistics",
title="Price Analysis Error",
color="red"
)
return dmc.SimpleGrid([
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("💰", size="lg", color="blue"),
dmc.Stack([
dmc.Text("Current Price", size="sm", c="dimmed"),
dmc.Text(f"${stats['current_price']:.2f}", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📈", size="lg", color="green" if stats['period_return'] >= 0 else "red"),
dmc.Stack([
dmc.Text("Period Return", size="sm", c="dimmed"),
dmc.Text(f"{stats['period_return']:+.2f}%", fw=700, size="lg",
c="green" if stats['period_return'] >= 0 else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📊", size="lg", color="orange"),
dmc.Stack([
dmc.Text("Volatility", size="sm", c="dimmed"),
dmc.Text(f"{stats['volatility']:.2f}%", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🎯", size="lg", color="purple"),
dmc.Stack([
dmc.Text("Bullish Ratio", size="sm", c="dimmed"),
dmc.Text(f"{stats['bullish_ratio']:.1f}%", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("", size="lg", color="teal"),
dmc.Stack([
dmc.Text("Momentum", size="sm", c="dimmed"),
dmc.Text(f"{stats['momentum']:+.2f}%", fw=700, size="lg",
c="green" if stats['momentum'] >= 0 else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📉", size="lg", color="red"),
dmc.Stack([
dmc.Text("Max Loss", size="sm", c="dimmed"),
dmc.Text(f"{stats['max_loss']:.2f}%", fw=700, size="lg", c="red")
], gap="xs")
])
], p="md", shadow="sm")
], cols=3, spacing="md", style={'margin-top': '20px'})
def get_market_statistics(df: pd.DataFrame, symbol: str, timeframe: str) -> html.Div:
"""
Generate a comprehensive market statistics component from a DataFrame.
"""
try:
volume_analyzer = VolumeAnalyzer()
price_analyzer = PriceMovementAnalyzer()
volume_stats = volume_analyzer.get_volume_statistics(df)
price_stats = price_analyzer.get_price_movement_statistics(df)
if 'error' in volume_stats or 'error' in price_stats:
error_msg = volume_stats.get('error') or price_stats.get('error')
return html.Div(f"Error generating statistics: {error_msg}", style={'color': 'red'})
# Time range for display
start_date = df['timestamp'].min().strftime('%Y-%m-%d %H:%M')
end_date = df['timestamp'].max().strftime('%Y-%m-%d %H:%M')
days_back = (df['timestamp'].max() - df['timestamp'].min()).days
time_status = f"📅 Analysis Range: {start_date} to {end_date} (~{days_back} days)"
return html.Div([
html.H3("📊 Enhanced Market Statistics"),
html.P(
time_status,
style={'font-weight': 'bold', 'margin-bottom': '15px', 'color': '#4A4A4A', 'text-align': 'center', 'font-size': '1.1em'}
),
create_price_stats_display(price_stats),
create_volume_stats_display(volume_stats)
])
except Exception as e:
logger.error(f"Error in get_market_statistics: {e}", exc_info=True)
return html.Div(f"Error generating statistics display: {e}", style={'color': 'red'})