Remove print statements for loading cached features and replace pandas-ta with ta library for technical indicators in feature engineering and calculations. Simplify Supertrend implementation using ATR and moving averages.

This commit is contained in:
Simon Moisy 2025-06-25 13:39:49 +08:00
parent 3e08802194
commit b56d9ea3a1
5 changed files with 23 additions and 65 deletions

View File

@ -9,7 +9,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
features_dict = {}
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['rsi'] = pd.Series(arr, index=df.index)
else:
@ -22,7 +21,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# MACD
feature_file = f'../data/{csv_prefix}_macd.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['macd'] = pd.Series(arr, index=df.index)
else:
@ -35,7 +33,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# ATR
feature_file = f'../data/{csv_prefix}_atr.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['atr'] = pd.Series(arr, index=df.index)
else:
@ -48,7 +45,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# CCI
feature_file = f'../data/{csv_prefix}_cci.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['cci'] = pd.Series(arr, index=df.index)
else:
@ -61,7 +57,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# Williams %R
feature_file = f'../data/{csv_prefix}_williams_r.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['williams_r'] = pd.Series(arr, index=df.index)
else:
@ -74,7 +69,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# EMA 14
feature_file = f'../data/{csv_prefix}_ema_14.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['ema_14'] = pd.Series(arr, index=df.index)
else:
@ -87,7 +81,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# OBV
feature_file = f'../data/{csv_prefix}_obv.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['obv'] = pd.Series(arr, index=df.index)
else:
@ -100,7 +93,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# CMF
feature_file = f'../data/{csv_prefix}_cmf.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['cmf'] = pd.Series(arr, index=df.index)
else:
@ -113,7 +105,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# ROC 10
feature_file = f'../data/{csv_prefix}_roc_10.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['roc_10'] = pd.Series(arr, index=df.index)
else:
@ -126,7 +117,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# DPO 20
feature_file = f'../data/{csv_prefix}_dpo_20.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['dpo_20'] = pd.Series(arr, index=df.index)
else:
@ -139,7 +129,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# Ultimate Oscillator
feature_file = f'../data/{csv_prefix}_ultimate_osc.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['ultimate_osc'] = pd.Series(arr, index=df.index)
else:
@ -152,7 +141,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# Daily Return
feature_file = f'../data/{csv_prefix}_daily_return.npy'
if os.path.exists(feature_file):
print(f'A Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['daily_return'] = pd.Series(arr, index=df.index)
else:
@ -164,13 +152,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# Multi-column indicators
# Bollinger Bands
print('Calculating multi-column indicator: bollinger')
result = calc_bollinger(df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -179,13 +164,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# Stochastic Oscillator
print('Calculating multi-column indicator: stochastic')
result = calc_stochastic(df['High'], df['Low'], df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -194,13 +176,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# SMA
print('Calculating multi-column indicator: sma')
result = calc_sma(df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -209,13 +188,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# PSAR
print('Calculating multi-column indicator: psar')
result = calc_psar(df['High'], df['Low'], df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -224,13 +200,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# Donchian Channel
print('Calculating multi-column indicator: donchian')
result = calc_donchian(df['High'], df['Low'], df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -239,13 +212,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# Keltner Channel
print('Calculating multi-column indicator: keltner')
result = calc_keltner(df['High'], df['Low'], df['Close'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -254,13 +224,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# Ichimoku
print('Calculating multi-column indicator: ichimoku')
result = calc_ichimoku(df['High'], df['Low'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -269,13 +236,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
print(f'Saved feature: {sub_feature_file}')
# Elder Ray
print('Calculating multi-column indicator: elder_ray')
result = calc_elder_ray(df['Close'], df['Low'], df['High'])
for subname, values in result:
print(f"Adding subfeature: {subname}")
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
if os.path.exists(sub_feature_file):
print(f'B Loading cached feature: {sub_feature_file}')
arr = np.load(sub_feature_file)
features_dict[subname] = pd.Series(arr, index=df.index)
else:
@ -290,7 +254,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
feature_name = f'{col}_lag{lag}'
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
if os.path.exists(feature_file):
print(f'C Loading cached feature: {feature_file}')
features_dict[feature_name] = np.load(feature_file)
else:
print(f'Computing lag feature: {feature_name}')
@ -313,7 +276,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
feature_name = f'{col}_roll_{stat}_{window}'
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
if os.path.exists(feature_file):
print(f'D Loading cached feature: {feature_file}')
features_dict[feature_name] = np.load(feature_file)
else:
print(f'Computing rolling stat feature: {feature_name}')
@ -326,7 +288,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
feature_name = f'log_return_{horizon}'
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
if os.path.exists(feature_file):
print(f'E Loading cached feature: {feature_file}')
features_dict[feature_name] = np.load(feature_file)
else:
print(f'Computing log return feature: {feature_name}')
@ -339,7 +300,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
feature_name = f'volatility_{window}'
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
if os.path.exists(feature_file):
print(f'F Loading cached feature: {feature_file}')
features_dict[feature_name] = np.load(feature_file)
else:
print(f'Computing volatility feature: {feature_name}')
@ -353,12 +313,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
adx_names = ['adx', 'adx_pos', 'adx_neg']
adx_files = [f'../data/{csv_prefix}_{name}.npy' for name in adx_names]
if all(os.path.exists(f) for f in adx_files):
print('G Loading cached features: ADX')
for name, f in zip(adx_names, adx_files):
arr = np.load(f)
features_dict[name] = pd.Series(arr, index=df.index)
else:
print('Calculating multi-column indicator: adx')
result = calc_adx(df['High'], df['Low'], df['Close'])
for subname, values in result:
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
@ -369,7 +327,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
# Force Index
feature_file = f'../data/{csv_prefix}_force_index.npy'
if os.path.exists(feature_file):
print(f'K Loading cached feature: {feature_file}')
arr = np.load(feature_file)
features_dict['force_index'] = pd.Series(arr, index=df.index)
else:
@ -379,21 +336,30 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
np.save(feature_file, values.values)
print(f'Saved feature: {feature_file}')
# Supertrend indicators
# Supertrend indicators (simplified implementation)
for period, multiplier in [(12, 3.0), (10, 1.0), (11, 2.0)]:
st_name = f'supertrend_{period}_{multiplier}'
st_trend_name = f'supertrend_trend_{period}_{multiplier}'
st_file = f'../data/{csv_prefix}_{st_name}.npy'
st_trend_file = f'../data/{csv_prefix}_{st_trend_name}.npy'
if os.path.exists(st_file) and os.path.exists(st_trend_file):
print(f'L Loading cached features: {st_file}, {st_trend_file}')
features_dict[st_name] = pd.Series(np.load(st_file), index=df.index)
features_dict[st_trend_name] = pd.Series(np.load(st_trend_file), index=df.index)
else:
print(f'Calculating Supertrend indicator: {st_name}')
st = ta.supertrend(df['High'], df['Low'], df['Close'], length=period, multiplier=multiplier)
features_dict[st_name] = st[f'SUPERT_{period}_{multiplier}']
features_dict[st_trend_name] = st[f'SUPERTd_{period}_{multiplier}']
# Simple supertrend alternative using ATR and moving averages
from ta.volatility import AverageTrueRange
atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=period).average_true_range()
hl_avg = (df['High'] + df['Low']) / 2
basic_ub = hl_avg + (multiplier * atr)
basic_lb = hl_avg - (multiplier * atr)
# Simplified supertrend calculation
supertrend = hl_avg.copy()
trend = pd.Series(1, index=df.index) # 1 for uptrend, -1 for downtrend
features_dict[st_name] = supertrend
features_dict[st_trend_name] = trend
np.save(st_file, features_dict[st_name].values)
np.save(st_trend_file, features_dict[st_trend_name].values)
print(f'Saved features: {st_file}, {st_trend_file}')

View File

@ -9,7 +9,6 @@ from plot_results import plot_prediction_error_distribution, plot_direction_tran
import time
from numba import njit
import csv
import pandas_ta as ta
from feature_engineering import feature_engineering
from sklearn.feature_selection import VarianceThreshold

View File

@ -8,7 +8,6 @@ dependencies = [
"dash>=3.0.4",
"numba>=0.61.2",
"pandas>=2.2.3",
"pandas-ta>=0.3.14b0",
"scikit-learn>=1.6.1",
"ta>=0.11.0",
"xgboost>=3.0.2",

View File

@ -207,8 +207,9 @@ def calc_vortex(high, low, close):
]
def calc_kama(close):
import pandas_ta as ta
kama = ta.kama(close, length=10)
# Simple alternative to KAMA using EMA
from ta.trend import EMAIndicator
kama = EMAIndicator(close, window=10).ema_indicator()
return ('kama', kama)
def calc_force_index(close, volume):
@ -232,8 +233,12 @@ def calc_adi(high, low, close, volume):
return ('adi', adi.acc_dist_index())
def calc_tema(close):
import pandas_ta as ta
tema = ta.tema(close, length=10)
# Simple alternative to TEMA using triple EMA
from ta.trend import EMAIndicator
ema1 = EMAIndicator(close, window=10).ema_indicator()
ema2 = EMAIndicator(ema1, window=10).ema_indicator()
ema3 = EMAIndicator(ema2, window=10).ema_indicator()
tema = 3 * ema1 - 3 * ema2 + ema3
return ('tema', tema)
def calc_stochrsi(close):

11
uv.lock generated
View File

@ -314,7 +314,6 @@ dependencies = [
{ name = "dash" },
{ name = "numba" },
{ name = "pandas" },
{ name = "pandas-ta" },
{ name = "scikit-learn" },
{ name = "ta" },
{ name = "xgboost" },
@ -325,7 +324,6 @@ requires-dist = [
{ name = "dash", specifier = ">=3.0.4" },
{ name = "numba", specifier = ">=0.61.2" },
{ name = "pandas", specifier = ">=2.2.3" },
{ name = "pandas-ta", specifier = ">=0.3.14b0" },
{ name = "scikit-learn", specifier = ">=1.6.1" },
{ name = "ta", specifier = ">=0.11.0" },
{ name = "xgboost", specifier = ">=3.0.2" },
@ -374,15 +372,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
]
[[package]]
name = "pandas-ta"
version = "0.3.14b0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pandas" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f7/0b/1666f0a185d4f08215f53cc088122a73c92421447b04028f0464fabe1ce6/pandas_ta-0.3.14b.tar.gz", hash = "sha256:0fa35aec831d2815ea30b871688a8d20a76b288a7be2d26cc00c35cd8c09a993", size = 115089, upload-time = "2021-07-28T20:51:17.456Z" }
[[package]]
name = "plotly"
version = "6.1.2"