Refactor technical indicators to return DataFrames and enhance documentation

- Updated all technical indicators to return pandas DataFrames instead of lists, improving consistency and usability.
- Modified the `calculate` method in `TechnicalIndicators` to directly return DataFrames with relevant indicator values.
- Enhanced the `data_integration.py` to utilize the new DataFrame outputs for better integration with charting.
- Updated documentation to reflect the new DataFrame-centric approach, including usage examples and output structures.
- Improved error handling to ensure empty DataFrames are returned when insufficient data is available.

These changes streamline the indicator calculations and improve the overall architecture, aligning with project standards for maintainability and performance.
This commit is contained in:
Vasily.onl
2025-06-09 16:28:16 +08:00
parent fc3cac24bd
commit ec8f5514bb
14 changed files with 542 additions and 542 deletions

View File

@@ -68,8 +68,14 @@ class BaseIndicator(ABC):
df = df.sort_values('timestamp').reset_index(drop=True)
# Set timestamp as index for time-series operations
df['timestamp'] = pd.to_datetime(df['timestamp'])
# Set as index, but keep as column
df.set_index('timestamp', inplace=True)
# Ensure it's datetime
df['timestamp'] = df.index
return df
@abstractmethod

View File

@@ -2,11 +2,9 @@
Bollinger Bands indicator implementation.
"""
from typing import List
import pandas as pd
from ..base import BaseIndicator
from ..result import IndicatorResult
class BollingerBandsIndicator(BaseIndicator):
@@ -18,7 +16,7 @@ class BollingerBandsIndicator(BaseIndicator):
"""
def calculate(self, df: pd.DataFrame, period: int = 20,
std_dev: float = 2.0, price_column: str = 'close') -> List[IndicatorResult]:
std_dev: float = 2.0, price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Bollinger Bands.
@@ -29,53 +27,20 @@ class BollingerBandsIndicator(BaseIndicator):
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with upper band, middle band (SMA), and lower band
DataFrame with Bollinger Bands values and metadata, indexed by timestamp
"""
# Validate input data
if not self.validate_dataframe(df, period):
return []
return pd.DataFrame()
try:
# Calculate middle band (SMA)
df = df.copy()
df['middle_band'] = df[price_column].rolling(window=period, min_periods=period).mean()
# Calculate standard deviation
df['std'] = df[price_column].rolling(window=period, min_periods=period).std()
# Calculate upper and lower bands
df['upper_band'] = df['middle_band'] + (std_dev * df['std'])
df['lower_band'] = df['middle_band'] - (std_dev * df['std'])
# Calculate bandwidth and %B
df['bandwidth'] = (df['upper_band'] - df['lower_band']) / df['middle_band']
df['percent_b'] = (df[price_column] - df['lower_band']) / (df['upper_band'] - df['lower_band'])
# Convert results to IndicatorResult objects
results = []
for timestamp, row in df.iterrows():
if not pd.isna(row['middle_band']):
result = IndicatorResult(
timestamp=timestamp,
symbol=row['symbol'],
timeframe=row['timeframe'],
values={
'upper_band': row['upper_band'],
'middle_band': row['middle_band'],
'lower_band': row['lower_band'],
'bandwidth': row['bandwidth'],
'percent_b': row['percent_b']
},
metadata={
'period': period,
'std_dev': std_dev,
'price_column': price_column
}
)
results.append(result)
return results
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating Bollinger Bands: {e}")
return []
df['upper_band'] = df['middle_band'] + (df['std'] * std_dev)
df['lower_band'] = df['middle_band'] - (df['std'] * std_dev)
# Only keep rows with valid bands, and only 'timestamp', 'upper_band', 'middle_band', 'lower_band' columns
result_df = df.loc[df['middle_band'].notna() & df['upper_band'].notna() & df['lower_band'].notna(), ['timestamp', 'upper_band', 'middle_band', 'lower_band']].copy()
result_df.set_index('timestamp', inplace=True)
return result_df
except Exception:
return pd.DataFrame()

View File

@@ -2,11 +2,9 @@
Exponential Moving Average (EMA) indicator implementation.
"""
from typing import List
import pandas as pd
from ..base import BaseIndicator
from ..result import IndicatorResult
class EMAIndicator(BaseIndicator):
@@ -18,7 +16,7 @@ class EMAIndicator(BaseIndicator):
"""
def calculate(self, df: pd.DataFrame, period: int = 20,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Exponential Moving Average (EMA).
@@ -28,33 +26,19 @@ class EMAIndicator(BaseIndicator):
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with EMA values
DataFrame with EMA values and metadata, indexed by timestamp
"""
# Validate input data
if not self.validate_dataframe(df, period):
return []
return pd.DataFrame()
try:
# Calculate EMA using pandas exponential weighted moving average
df = df.copy()
df['ema'] = df[price_column].ewm(span=period, adjust=False).mean()
# Convert results to IndicatorResult objects
results = []
for i, (timestamp, row) in enumerate(df.iterrows()):
# Only return results after minimum period
if i >= period - 1 and not pd.isna(row['ema']):
result = IndicatorResult(
timestamp=timestamp,
symbol=row['symbol'],
timeframe=row['timeframe'],
values={'ema': row['ema']},
metadata={'period': period, 'price_column': price_column}
)
results.append(result)
return results
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating EMA: {e}")
return []
# Only keep rows with valid EMA, and only 'timestamp' and 'ema' columns
result_df = df.loc[df['ema'].notna(), ['timestamp', 'ema']].copy()
# Only keep rows after enough data for EMA
result_df = result_df.iloc[period-1:]
result_df.set_index('timestamp', inplace=True)
return result_df
except Exception:
return pd.DataFrame()

View File

@@ -2,11 +2,9 @@
Moving Average Convergence Divergence (MACD) indicator implementation.
"""
from typing import List
import pandas as pd
from ..base import BaseIndicator
from ..result import IndicatorResult
class MACDIndicator(BaseIndicator):
@@ -20,7 +18,7 @@ class MACDIndicator(BaseIndicator):
def calculate(self, df: pd.DataFrame, fast_period: int = 12,
slow_period: int = 26, signal_period: int = 9,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Moving Average Convergence Divergence (MACD).
@@ -32,53 +30,23 @@ class MACDIndicator(BaseIndicator):
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with MACD, signal, and histogram values
DataFrame with MACD values and metadata, indexed by timestamp
"""
# Validate input data
if not self.validate_dataframe(df, slow_period):
return []
return pd.DataFrame()
try:
# Calculate fast and slow EMAs
df['ema_fast'] = df[price_column].ewm(span=fast_period, adjust=False).mean()
df['ema_slow'] = df[price_column].ewm(span=slow_period, adjust=False).mean()
# Calculate MACD line
df['macd'] = df['ema_fast'] - df['ema_slow']
# Calculate signal line (EMA of MACD)
df = df.copy()
df['macd'] = df[price_column].ewm(span=fast_period, adjust=False).mean() - \
df[price_column].ewm(span=slow_period, adjust=False).mean()
df['signal'] = df['macd'].ewm(span=signal_period, adjust=False).mean()
# Calculate histogram
df['histogram'] = df['macd'] - df['signal']
# Convert results to IndicatorResult objects
results = []
for i, (timestamp, row) in enumerate(df.iterrows()):
# Only return results after minimum period
if i >= slow_period - 1:
if not (pd.isna(row['macd']) or pd.isna(row['signal']) or pd.isna(row['histogram'])):
result = IndicatorResult(
timestamp=timestamp,
symbol=row['symbol'],
timeframe=row['timeframe'],
values={
'macd': row['macd'],
'signal': row['signal'],
'histogram': row['histogram']
},
metadata={
'fast_period': fast_period,
'slow_period': slow_period,
'signal_period': signal_period,
'price_column': price_column
}
)
results.append(result)
return results
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating MACD: {e}")
return []
# Only keep rows with valid MACD, and only 'timestamp', 'macd', 'signal', 'histogram' columns
result_df = df.loc[df['macd'].notna() & df['signal'].notna() & df['histogram'].notna(), ['timestamp', 'macd', 'signal', 'histogram']].copy()
# Only keep rows after enough data for MACD and signal
min_required = max(slow_period, signal_period)
result_df = result_df.iloc[min_required-1:]
result_df.set_index('timestamp', inplace=True)
return result_df
except Exception:
return pd.DataFrame()

View File

@@ -4,6 +4,7 @@ Relative Strength Index (RSI) indicator implementation.
from typing import List
import pandas as pd
import numpy as np
from ..base import BaseIndicator
from ..result import IndicatorResult
@@ -18,7 +19,7 @@ class RSIIndicator(BaseIndicator):
"""
def calculate(self, df: pd.DataFrame, period: int = 14,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Relative Strength Index (RSI).
@@ -28,48 +29,23 @@ class RSIIndicator(BaseIndicator):
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with RSI values
DataFrame with RSI values and metadata, indexed by timestamp
"""
# Validate input data
if not self.validate_dataframe(df, period + 1): # Need extra period for diff
return []
if not self.validate_dataframe(df, period):
return pd.DataFrame()
try:
# Calculate price changes
df['price_change'] = df[price_column].diff()
# Separate gains and losses
df['gain'] = df['price_change'].where(df['price_change'] > 0, 0)
df['loss'] = (-df['price_change']).where(df['price_change'] < 0, 0)
# Calculate average gain and loss using EMA
df['avg_gain'] = df['gain'].ewm(span=period, adjust=False).mean()
df['avg_loss'] = df['loss'].ewm(span=period, adjust=False).mean()
# Calculate RS and RSI
df['rs'] = df['avg_gain'] / df['avg_loss']
df['rsi'] = 100 - (100 / (1 + df['rs']))
# Handle division by zero
df['rsi'] = df['rsi'].fillna(50) # Neutral RSI when no losses
# Convert results to IndicatorResult objects
results = []
for i, (timestamp, row) in enumerate(df.iterrows()):
# Only return results after minimum period
if i >= period and not pd.isna(row['rsi']):
result = IndicatorResult(
timestamp=timestamp,
symbol=row['symbol'],
timeframe=row['timeframe'],
values={'rsi': row['rsi']},
metadata={'period': period, 'price_column': price_column}
)
results.append(result)
return results
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating RSI: {e}")
return []
df = df.copy()
delta = df[price_column].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
rs = gain / loss
rs = rs.replace([np.inf, -np.inf], np.nan)
df['rsi'] = 100 - (100 / (1 + rs))
# Only keep rows with valid RSI, and only 'timestamp' and 'rsi' columns
result_df = df.loc[df['rsi'].notna(), ['timestamp', 'rsi']].copy()
result_df = result_df.iloc[period-1:]
result_df.set_index('timestamp', inplace=True)
return result_df
except Exception:
return pd.DataFrame()

View File

@@ -2,11 +2,9 @@
Simple Moving Average (SMA) indicator implementation.
"""
from typing import List
import pandas as pd
from ..base import BaseIndicator
from ..result import IndicatorResult
class SMAIndicator(BaseIndicator):
@@ -18,7 +16,7 @@ class SMAIndicator(BaseIndicator):
"""
def calculate(self, df: pd.DataFrame, period: int = 20,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Simple Moving Average (SMA).
@@ -28,32 +26,18 @@ class SMAIndicator(BaseIndicator):
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with SMA values
DataFrame with SMA values and metadata, indexed by timestamp
"""
# Validate input data
if not self.validate_dataframe(df, period):
return []
return pd.DataFrame()
try:
# Calculate SMA using pandas rolling window
df = df.copy()
df['sma'] = df[price_column].rolling(window=period, min_periods=period).mean()
# Convert results to IndicatorResult objects
results = []
for timestamp, row in df.iterrows():
if not pd.isna(row['sma']):
result = IndicatorResult(
timestamp=timestamp,
symbol=row['symbol'],
timeframe=row['timeframe'],
values={'sma': row['sma']},
metadata={'period': period, 'price_column': price_column}
)
results.append(result)
return results
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating SMA: {e}")
return []
# Only keep rows with valid SMA, and only 'timestamp' and 'sma' columns
result_df = df.loc[df['sma'].notna(), ['timestamp', 'sma']].copy()
result_df = result_df.iloc[period-1:]
result_df.set_index('timestamp', inplace=True)
return result_df
except Exception:
return pd.DataFrame()

View File

@@ -10,22 +10,14 @@ IMPORTANT: Handles Sparse Data
- Uses pandas for efficient vectorized calculations
- Follows right-aligned timestamp convention
Supported Indicators:
- Simple Moving Average (SMA)
- Exponential Moving Average (EMA)
- Relative Strength Index (RSI)
- Moving Average Convergence Divergence (MACD)
- Bollinger Bands
TODO: need make more procedural without hardcoding indicators type and so on
"""
from datetime import datetime
from typing import Dict, List, Optional, Any, Union
from typing import Dict, List, Optional, Any
import pandas as pd
import numpy as np
from .result import IndicatorResult
from ..data_types import OHLCVCandle
from .base import BaseIndicator
from .implementations import (
SMAIndicator,
EMAIndicator,
@@ -85,7 +77,7 @@ class TechnicalIndicators:
return self._sma.prepare_dataframe(candles)
def sma(self, df: pd.DataFrame, period: int,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Simple Moving Average (SMA).
@@ -95,12 +87,12 @@ class TechnicalIndicators:
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with SMA values
DataFrame with SMA values
"""
return self._sma.calculate(df, period=period, price_column=price_column)
def ema(self, df: pd.DataFrame, period: int,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Exponential Moving Average (EMA).
@@ -115,7 +107,7 @@ class TechnicalIndicators:
return self._ema.calculate(df, period=period, price_column=price_column)
def rsi(self, df: pd.DataFrame, period: int = 14,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Relative Strength Index (RSI).
@@ -125,13 +117,13 @@ class TechnicalIndicators:
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with RSI values
DataFrame with RSI values
"""
return self._rsi.calculate(df, period=period, price_column=price_column)
def macd(self, df: pd.DataFrame,
fast_period: int = 12, slow_period: int = 26, signal_period: int = 9,
price_column: str = 'close') -> List[IndicatorResult]:
price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Moving Average Convergence Divergence (MACD).
@@ -154,7 +146,7 @@ class TechnicalIndicators:
)
def bollinger_bands(self, df: pd.DataFrame, period: int = 20,
std_dev: float = 2.0, price_column: str = 'close') -> List[IndicatorResult]:
std_dev: float = 2.0, price_column: str = 'close') -> pd.DataFrame:
"""
Calculate Bollinger Bands.
@@ -165,7 +157,7 @@ class TechnicalIndicators:
price_column: Price column to use ('open', 'high', 'low', 'close')
Returns:
List of indicator results with upper band, middle band (SMA), and lower band
DataFrame with upper band, middle band (SMA), and lower band
"""
return self._bollinger.calculate(
df,
@@ -175,9 +167,8 @@ class TechnicalIndicators:
)
def calculate_multiple_indicators(self, df: pd.DataFrame,
indicators_config: Dict[str, Dict[str, Any]]) -> Dict[str, List[IndicatorResult]]:
indicators_config: Dict[str, Dict[str, Any]]) -> Dict[str, pd.DataFrame]:
"""
TODO: need make more procedural without hardcoding indicators type and so on
Calculate multiple indicators at once for efficiency.
Args:
@@ -192,7 +183,7 @@ class TechnicalIndicators:
}
Returns:
Dictionary mapping indicator names to their results
Dictionary mapping indicator names to their results as DataFrames
"""
results = {}
@@ -235,16 +226,16 @@ class TechnicalIndicators:
else:
if self.logger:
self.logger.warning(f"Unknown indicator type: {indicator_type}")
results[indicator_name] = []
results[indicator_name] = pd.DataFrame()
except Exception as e:
if self.logger:
self.logger.error(f"Error calculating {indicator_name}: {e}")
results[indicator_name] = []
results[indicator_name] = pd.DataFrame()
return results
def calculate(self, indicator_type: str, df: pd.DataFrame, **kwargs) -> Optional[Dict[str, Any]]:
def calculate(self, indicator_type: str, df: pd.DataFrame, **kwargs) -> Optional[pd.DataFrame]:
"""
Calculate a single indicator with dynamic dispatch.
@@ -254,7 +245,7 @@ class TechnicalIndicators:
**kwargs: Indicator-specific parameters (e.g., period=20)
Returns:
A dictionary containing the indicator results, or None if the type is unknown.
DataFrame with indicator values, or None if the type is unknown or calculation fails.
"""
# Get the indicator calculation method
indicator_method = getattr(self, indicator_type, None)
@@ -265,21 +256,13 @@ class TechnicalIndicators:
try:
if df.empty:
return {'data': [], 'metadata': {}}
return pd.DataFrame()
# Call the indicator method
raw_result = indicator_method(df, **kwargs)
# Call the indicator method (now returns DataFrame)
result_df = indicator_method(df, **kwargs)
# Extract metadata from the first result if available
metadata = raw_result[0].metadata if raw_result else {}
# The methods return List[IndicatorResult], let's package that
if raw_result:
return {
"data": raw_result,
"metadata": metadata
}
return None
# Return the DataFrame directly
return result_df
except Exception as e:
if self.logger: