TCPDashboard/dashboard/components/data_analysis.py
Vasily.onl 132710a9a7 3.6 Enhance market statistics with comprehensive data analysis features
- Updated `register_chart_callbacks` to include enhanced market statistics.
- Implemented new data analysis callbacks in `dashboard/callbacks/data_analysis.py` for volume and price movement analysis.
- Created `VolumeAnalyzer` and `PriceMovementAnalyzer` classes for detailed statistical calculations.
- Integrated data analysis components into the market statistics layout, providing users with insights on volume trends and price movements.
- Improved error handling and logging for data analysis operations.
- Updated documentation to reflect the new features and usage guidelines.
2025-06-05 11:24:21 +08:00

721 lines
26 KiB
Python

"""
Data analysis components for comprehensive market data analysis.
"""
from dash import html, dcc
import dash_mantine_components as dmc
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta
from typing import Dict, Any, List, Optional
from utils.logger import get_logger
from database.connection import DatabaseManager
from database.operations import DatabaseOperationError
logger = get_logger("data_analysis")
class VolumeAnalyzer:
"""Analyze trading volume patterns and trends."""
def __init__(self):
self.db_manager = DatabaseManager()
self.db_manager.initialize()
def get_volume_statistics(self, symbol: str, timeframe: str = "1h", days_back: int = 7) -> Dict[str, Any]:
"""Calculate comprehensive volume statistics."""
try:
# Fetch recent market data
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with self.db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume, trades_count
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume),
'trades_count': int(row.trades_count) if row.trades_count else 0
})
if not candles:
return {'error': 'No data available'}
df = pd.DataFrame(candles)
# Calculate volume statistics
total_volume = df['volume'].sum()
avg_volume = df['volume'].mean()
volume_std = df['volume'].std()
# Volume trend analysis
recent_volume = df['volume'].tail(10).mean() # Last 10 periods
older_volume = df['volume'].head(10).mean() # First 10 periods
volume_trend = "Increasing" if recent_volume > older_volume else "Decreasing"
# High volume periods (above 2 standard deviations)
high_volume_threshold = avg_volume + (2 * volume_std)
high_volume_periods = len(df[df['volume'] > high_volume_threshold])
# Volume-Price correlation
price_change = df['close'] - df['open']
volume_price_corr = df['volume'].corr(price_change.abs())
# Average trade size (volume per trade)
df['avg_trade_size'] = df['volume'] / df['trades_count'].replace(0, 1)
avg_trade_size = df['avg_trade_size'].mean()
return {
'total_volume': total_volume,
'avg_volume': avg_volume,
'volume_std': volume_std,
'volume_trend': volume_trend,
'high_volume_periods': high_volume_periods,
'volume_price_correlation': volume_price_corr,
'avg_trade_size': avg_trade_size,
'max_volume': df['volume'].max(),
'min_volume': df['volume'].min(),
'volume_percentiles': {
'25th': df['volume'].quantile(0.25),
'50th': df['volume'].quantile(0.50),
'75th': df['volume'].quantile(0.75),
'95th': df['volume'].quantile(0.95)
}
}
except Exception as e:
logger.error(f"Volume analysis error: {e}")
return {'error': str(e)}
class PriceMovementAnalyzer:
"""Analyze price movement patterns and statistics."""
def __init__(self):
self.db_manager = DatabaseManager()
self.db_manager.initialize()
def get_price_movement_statistics(self, symbol: str, timeframe: str = "1h", days_back: int = 7) -> Dict[str, Any]:
"""Calculate comprehensive price movement statistics."""
try:
# Fetch recent market data
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with self.db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume)
})
if not candles:
return {'error': 'No data available'}
df = pd.DataFrame(candles)
# Basic price statistics
current_price = df['close'].iloc[-1]
period_start_price = df['open'].iloc[0]
period_return = ((current_price - period_start_price) / period_start_price) * 100
# Daily returns (percentage changes)
df['returns'] = df['close'].pct_change() * 100
df['returns'] = df['returns'].fillna(0)
# Volatility metrics
volatility = df['returns'].std()
avg_return = df['returns'].mean()
# Price range analysis
df['range'] = df['high'] - df['low']
df['range_pct'] = (df['range'] / df['open']) * 100
avg_range_pct = df['range_pct'].mean()
# Directional analysis
bullish_periods = len(df[df['close'] > df['open']])
bearish_periods = len(df[df['close'] < df['open']])
neutral_periods = len(df[df['close'] == df['open']])
total_periods = len(df)
bullish_ratio = (bullish_periods / total_periods) * 100 if total_periods > 0 else 0
# Price extremes
period_high = df['high'].max()
period_low = df['low'].min()
# Momentum indicators
# Simple momentum (current vs N periods ago)
momentum_periods = min(10, len(df) - 1)
if momentum_periods > 0:
momentum = ((current_price - df['close'].iloc[-momentum_periods-1]) / df['close'].iloc[-momentum_periods-1]) * 100
else:
momentum = 0
# Trend strength (linear regression slope)
if len(df) > 2:
x = np.arange(len(df))
slope, _ = np.polyfit(x, df['close'], 1)
trend_strength = slope / df['close'].mean() * 100 # Normalize by average price
else:
trend_strength = 0
return {
'current_price': current_price,
'period_return': period_return,
'volatility': volatility,
'avg_return': avg_return,
'avg_range_pct': avg_range_pct,
'bullish_periods': bullish_periods,
'bearish_periods': bearish_periods,
'neutral_periods': neutral_periods,
'bullish_ratio': bullish_ratio,
'period_high': period_high,
'period_low': period_low,
'momentum': momentum,
'trend_strength': trend_strength,
'return_percentiles': {
'5th': df['returns'].quantile(0.05),
'25th': df['returns'].quantile(0.25),
'75th': df['returns'].quantile(0.75),
'95th': df['returns'].quantile(0.95)
},
'max_gain': df['returns'].max(),
'max_loss': df['returns'].min(),
'positive_returns': len(df[df['returns'] > 0]),
'negative_returns': len(df[df['returns'] < 0])
}
except Exception as e:
logger.error(f"Price movement analysis error: {e}")
return {'error': str(e)}
def create_volume_analysis_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
"""Create a comprehensive volume analysis chart."""
try:
analyzer = VolumeAnalyzer()
# Fetch market data for chart
db_manager = DatabaseManager()
db_manager.initialize()
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume, trades_count
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume),
'trades_count': int(row.trades_count) if row.trades_count else 0
})
if not candles:
fig = go.Figure()
fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
df = pd.DataFrame(candles)
# Calculate volume moving average
df['volume_ma'] = df['volume'].rolling(window=20, min_periods=1).mean()
# Create subplots
fig = make_subplots(
rows=3, cols=1,
subplot_titles=('Price Action', 'Volume Analysis', 'Volume vs Moving Average'),
vertical_spacing=0.08,
row_heights=[0.4, 0.3, 0.3]
)
# Price candlestick
fig.add_trace(
go.Candlestick(
x=df['timestamp'],
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
name='Price',
increasing_line_color='#26a69a',
decreasing_line_color='#ef5350'
),
row=1, col=1
)
# Volume bars with color coding
colors = ['#26a69a' if close >= open else '#ef5350' for close, open in zip(df['close'], df['open'])]
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['volume'],
name='Volume',
marker_color=colors,
opacity=0.7
),
row=2, col=1
)
# Volume vs moving average
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['volume'],
mode='lines',
name='Volume',
line=dict(color='#2196f3', width=1)
),
row=3, col=1
)
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['volume_ma'],
mode='lines',
name='Volume MA(20)',
line=dict(color='#ff9800', width=2)
),
row=3, col=1
)
# Update layout
fig.update_layout(
title=f'{symbol} Volume Analysis ({timeframe})',
xaxis_rangeslider_visible=False,
height=800,
showlegend=True,
template='plotly_white'
)
# Update y-axes
fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1)
fig.update_yaxes(title_text="Volume", row=3, col=1)
return fig
except Exception as e:
logger.error(f"Volume chart creation error: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
def create_price_movement_chart(symbol: str, timeframe: str = "1h", days_back: int = 7) -> go.Figure:
"""Create a comprehensive price movement analysis chart."""
try:
# Fetch market data for chart
db_manager = DatabaseManager()
db_manager.initialize()
end_time = datetime.now(timezone.utc)
start_time = end_time - timedelta(days=days_back)
with db_manager.get_session() as session:
from sqlalchemy import text
query = text("""
SELECT timestamp, open, high, low, close, volume
FROM market_data
WHERE symbol = :symbol
AND timeframe = :timeframe
AND timestamp >= :start_time
AND timestamp <= :end_time
ORDER BY timestamp ASC
""")
result = session.execute(query, {
'symbol': symbol,
'timeframe': timeframe,
'start_time': start_time,
'end_time': end_time
})
candles = []
for row in result:
candles.append({
'timestamp': row.timestamp,
'open': float(row.open),
'high': float(row.high),
'low': float(row.low),
'close': float(row.close),
'volume': float(row.volume)
})
if not candles:
fig = go.Figure()
fig.add_annotation(text="No data available", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
df = pd.DataFrame(candles)
# Calculate returns and statistics
df['returns'] = df['close'].pct_change() * 100
df['returns'] = df['returns'].fillna(0)
df['range_pct'] = ((df['high'] - df['low']) / df['open']) * 100
df['cumulative_return'] = (1 + df['returns'] / 100).cumprod()
# Create subplots
fig = make_subplots(
rows=3, cols=1,
subplot_titles=('Cumulative Returns', 'Period Returns (%)', 'Price Range (%)'),
vertical_spacing=0.08,
row_heights=[0.4, 0.3, 0.3]
)
# Cumulative returns
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['cumulative_return'],
mode='lines',
name='Cumulative Return',
line=dict(color='#2196f3', width=2)
),
row=1, col=1
)
# Period returns with color coding
colors = ['#26a69a' if ret >= 0 else '#ef5350' for ret in df['returns']]
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['returns'],
name='Returns (%)',
marker_color=colors,
opacity=0.7
),
row=2, col=1
)
# Price range percentage
fig.add_trace(
go.Scatter(
x=df['timestamp'],
y=df['range_pct'],
mode='lines+markers',
name='Range %',
line=dict(color='#ff9800', width=1),
marker=dict(size=4)
),
row=3, col=1
)
# Add zero line for returns
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
# Update layout
fig.update_layout(
title=f'{symbol} Price Movement Analysis ({timeframe})',
height=800,
showlegend=True,
template='plotly_white'
)
# Update y-axes
fig.update_yaxes(title_text="Cumulative Return", row=1, col=1)
fig.update_yaxes(title_text="Returns (%)", row=2, col=1)
fig.update_yaxes(title_text="Range (%)", row=3, col=1)
return fig
except Exception as e:
logger.error(f"Price movement chart creation error: {e}")
fig = go.Figure()
fig.add_annotation(text=f"Error: {str(e)}", xref="paper", yref="paper", x=0.5, y=0.5)
return fig
def create_data_analysis_panel():
"""Create the data analysis panel with volume and price movement tools."""
return html.Div([
html.H3("📊 Data Analysis Tools", style={'margin-bottom': '20px'}),
# Analysis type selection - using regular dropdown instead of SegmentedControl
html.Div([
html.Label("Analysis Type:", style={'font-weight': 'bold', 'margin-right': '10px'}),
dcc.Dropdown(
id="analysis-type-selector",
options=[
{"label": "Volume Analysis", "value": "volume"},
{"label": "Price Movement", "value": "price"},
{"label": "Combined Stats", "value": "combined"}
],
value="volume",
clearable=False,
style={'width': '200px', 'display': 'inline-block'}
)
], style={'margin-bottom': '20px'}),
# Time period selector - using regular dropdown
html.Div([
html.Label("Analysis Period:", style={'font-weight': 'bold', 'margin-right': '10px'}),
dcc.Dropdown(
id="analysis-period-selector",
options=[
{"label": "1 Day", "value": "1"},
{"label": "3 Days", "value": "3"},
{"label": "7 Days", "value": "7"},
{"label": "14 Days", "value": "14"},
{"label": "30 Days", "value": "30"}
],
value="7",
clearable=False,
style={'width': '150px', 'display': 'inline-block'}
)
], style={'margin-bottom': '20px'}),
# Charts container
html.Div(id="analysis-chart-container", children=[
html.P("Chart container loaded - waiting for callback...")
]),
# Statistics container
html.Div(id="analysis-stats-container", children=[
html.P("Stats container loaded - waiting for callback...")
])
], style={'border': '1px solid #ccc', 'padding': '20px', 'margin-top': '20px'})
def format_number(value: float, decimals: int = 2) -> str:
"""Format number with appropriate decimals and units."""
if pd.isna(value):
return "N/A"
if abs(value) >= 1e9:
return f"{value/1e9:.{decimals}f}B"
elif abs(value) >= 1e6:
return f"{value/1e6:.{decimals}f}M"
elif abs(value) >= 1e3:
return f"{value/1e3:.{decimals}f}K"
else:
return f"{value:.{decimals}f}"
def create_volume_stats_display(stats: Dict[str, Any]) -> html.Div:
"""Create volume statistics display."""
if 'error' in stats:
return dmc.Alert(
"Error loading volume statistics",
title="Volume Analysis Error",
color="red"
)
return dmc.SimpleGrid([
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📊", size="lg", color="blue"),
dmc.Stack([
dmc.Text("Total Volume", size="sm", c="dimmed"),
dmc.Text(format_number(stats['total_volume']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📈", size="lg", color="green"),
dmc.Stack([
dmc.Text("Average Volume", size="sm", c="dimmed"),
dmc.Text(format_number(stats['avg_volume']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🎯", size="lg", color="orange"),
dmc.Stack([
dmc.Text("Volume Trend", size="sm", c="dimmed"),
dmc.Text(stats['volume_trend'], fw=700, size="lg",
c="green" if stats['volume_trend'] == "Increasing" else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("", size="lg", color="red"),
dmc.Stack([
dmc.Text("High Volume Periods", size="sm", c="dimmed"),
dmc.Text(str(stats['high_volume_periods']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🔗", size="lg", color="purple"),
dmc.Stack([
dmc.Text("Volume-Price Correlation", size="sm", c="dimmed"),
dmc.Text(f"{stats['volume_price_correlation']:.3f}", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("💱", size="lg", color="teal"),
dmc.Stack([
dmc.Text("Avg Trade Size", size="sm", c="dimmed"),
dmc.Text(format_number(stats['avg_trade_size']), fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm")
], cols=3, spacing="md", style={'margin-top': '20px'})
def create_price_stats_display(stats: Dict[str, Any]) -> html.Div:
"""Create price movement statistics display."""
if 'error' in stats:
return dmc.Alert(
"Error loading price statistics",
title="Price Analysis Error",
color="red"
)
return dmc.SimpleGrid([
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("💰", size="lg", color="blue"),
dmc.Stack([
dmc.Text("Current Price", size="sm", c="dimmed"),
dmc.Text(f"${stats['current_price']:.2f}", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📈", size="lg", color="green" if stats['period_return'] >= 0 else "red"),
dmc.Stack([
dmc.Text("Period Return", size="sm", c="dimmed"),
dmc.Text(f"{stats['period_return']:+.2f}%", fw=700, size="lg",
c="green" if stats['period_return'] >= 0 else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📊", size="lg", color="orange"),
dmc.Stack([
dmc.Text("Volatility", size="sm", c="dimmed"),
dmc.Text(f"{stats['volatility']:.2f}%", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("🎯", size="lg", color="purple"),
dmc.Stack([
dmc.Text("Bullish Ratio", size="sm", c="dimmed"),
dmc.Text(f"{stats['bullish_ratio']:.1f}%", fw=700, size="lg")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("", size="lg", color="teal"),
dmc.Stack([
dmc.Text("Momentum", size="sm", c="dimmed"),
dmc.Text(f"{stats['momentum']:+.2f}%", fw=700, size="lg",
c="green" if stats['momentum'] >= 0 else "red")
], gap="xs")
])
], p="md", shadow="sm"),
dmc.Paper([
dmc.Group([
dmc.ThemeIcon("📉", size="lg", color="red"),
dmc.Stack([
dmc.Text("Max Loss", size="sm", c="dimmed"),
dmc.Text(f"{stats['max_loss']:.2f}%", fw=700, size="lg", c="red")
], gap="xs")
])
], p="md", shadow="sm")
], cols=3, spacing="md", style={'margin-top': '20px'})