Remove print statements for loading cached features and replace pandas-ta with ta library for technical indicators in feature engineering and calculations. Simplify Supertrend implementation using ATR and moving averages.
This commit is contained in:
parent
3e08802194
commit
b56d9ea3a1
@ -9,7 +9,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
features_dict = {}
|
||||
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['rsi'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -22,7 +21,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# MACD
|
||||
feature_file = f'../data/{csv_prefix}_macd.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['macd'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -35,7 +33,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# ATR
|
||||
feature_file = f'../data/{csv_prefix}_atr.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['atr'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -48,7 +45,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# CCI
|
||||
feature_file = f'../data/{csv_prefix}_cci.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['cci'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -61,7 +57,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# Williams %R
|
||||
feature_file = f'../data/{csv_prefix}_williams_r.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['williams_r'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -74,7 +69,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# EMA 14
|
||||
feature_file = f'../data/{csv_prefix}_ema_14.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['ema_14'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -87,7 +81,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# OBV
|
||||
feature_file = f'../data/{csv_prefix}_obv.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['obv'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -100,7 +93,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# CMF
|
||||
feature_file = f'../data/{csv_prefix}_cmf.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['cmf'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -113,7 +105,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# ROC 10
|
||||
feature_file = f'../data/{csv_prefix}_roc_10.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['roc_10'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -126,7 +117,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# DPO 20
|
||||
feature_file = f'../data/{csv_prefix}_dpo_20.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['dpo_20'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -139,7 +129,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# Ultimate Oscillator
|
||||
feature_file = f'../data/{csv_prefix}_ultimate_osc.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['ultimate_osc'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -152,7 +141,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# Daily Return
|
||||
feature_file = f'../data/{csv_prefix}_daily_return.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'A Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['daily_return'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -164,13 +152,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
|
||||
# Multi-column indicators
|
||||
# Bollinger Bands
|
||||
print('Calculating multi-column indicator: bollinger')
|
||||
result = calc_bollinger(df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -179,13 +164,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# Stochastic Oscillator
|
||||
print('Calculating multi-column indicator: stochastic')
|
||||
result = calc_stochastic(df['High'], df['Low'], df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -194,13 +176,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# SMA
|
||||
print('Calculating multi-column indicator: sma')
|
||||
result = calc_sma(df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -209,13 +188,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# PSAR
|
||||
print('Calculating multi-column indicator: psar')
|
||||
result = calc_psar(df['High'], df['Low'], df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -224,13 +200,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# Donchian Channel
|
||||
print('Calculating multi-column indicator: donchian')
|
||||
result = calc_donchian(df['High'], df['Low'], df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -239,13 +212,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# Keltner Channel
|
||||
print('Calculating multi-column indicator: keltner')
|
||||
result = calc_keltner(df['High'], df['Low'], df['Close'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -254,13 +224,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# Ichimoku
|
||||
print('Calculating multi-column indicator: ichimoku')
|
||||
result = calc_ichimoku(df['High'], df['Low'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -269,13 +236,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
print(f'Saved feature: {sub_feature_file}')
|
||||
|
||||
# Elder Ray
|
||||
print('Calculating multi-column indicator: elder_ray')
|
||||
result = calc_elder_ray(df['Close'], df['Low'], df['High'])
|
||||
for subname, values in result:
|
||||
print(f"Adding subfeature: {subname}")
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
if os.path.exists(sub_feature_file):
|
||||
print(f'B Loading cached feature: {sub_feature_file}')
|
||||
arr = np.load(sub_feature_file)
|
||||
features_dict[subname] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -290,7 +254,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
feature_name = f'{col}_lag{lag}'
|
||||
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'C Loading cached feature: {feature_file}')
|
||||
features_dict[feature_name] = np.load(feature_file)
|
||||
else:
|
||||
print(f'Computing lag feature: {feature_name}')
|
||||
@ -313,7 +276,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
feature_name = f'{col}_roll_{stat}_{window}'
|
||||
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'D Loading cached feature: {feature_file}')
|
||||
features_dict[feature_name] = np.load(feature_file)
|
||||
else:
|
||||
print(f'Computing rolling stat feature: {feature_name}')
|
||||
@ -326,7 +288,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
feature_name = f'log_return_{horizon}'
|
||||
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'E Loading cached feature: {feature_file}')
|
||||
features_dict[feature_name] = np.load(feature_file)
|
||||
else:
|
||||
print(f'Computing log return feature: {feature_name}')
|
||||
@ -339,7 +300,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
feature_name = f'volatility_{window}'
|
||||
feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'F Loading cached feature: {feature_file}')
|
||||
features_dict[feature_name] = np.load(feature_file)
|
||||
else:
|
||||
print(f'Computing volatility feature: {feature_name}')
|
||||
@ -353,12 +313,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
adx_names = ['adx', 'adx_pos', 'adx_neg']
|
||||
adx_files = [f'../data/{csv_prefix}_{name}.npy' for name in adx_names]
|
||||
if all(os.path.exists(f) for f in adx_files):
|
||||
print('G Loading cached features: ADX')
|
||||
for name, f in zip(adx_names, adx_files):
|
||||
arr = np.load(f)
|
||||
features_dict[name] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
print('Calculating multi-column indicator: adx')
|
||||
result = calc_adx(df['High'], df['Low'], df['Close'])
|
||||
for subname, values in result:
|
||||
sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
|
||||
@ -369,7 +327,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
# Force Index
|
||||
feature_file = f'../data/{csv_prefix}_force_index.npy'
|
||||
if os.path.exists(feature_file):
|
||||
print(f'K Loading cached feature: {feature_file}')
|
||||
arr = np.load(feature_file)
|
||||
features_dict['force_index'] = pd.Series(arr, index=df.index)
|
||||
else:
|
||||
@ -379,21 +336,30 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
|
||||
np.save(feature_file, values.values)
|
||||
print(f'Saved feature: {feature_file}')
|
||||
|
||||
# Supertrend indicators
|
||||
# Supertrend indicators (simplified implementation)
|
||||
for period, multiplier in [(12, 3.0), (10, 1.0), (11, 2.0)]:
|
||||
st_name = f'supertrend_{period}_{multiplier}'
|
||||
st_trend_name = f'supertrend_trend_{period}_{multiplier}'
|
||||
st_file = f'../data/{csv_prefix}_{st_name}.npy'
|
||||
st_trend_file = f'../data/{csv_prefix}_{st_trend_name}.npy'
|
||||
if os.path.exists(st_file) and os.path.exists(st_trend_file):
|
||||
print(f'L Loading cached features: {st_file}, {st_trend_file}')
|
||||
features_dict[st_name] = pd.Series(np.load(st_file), index=df.index)
|
||||
features_dict[st_trend_name] = pd.Series(np.load(st_trend_file), index=df.index)
|
||||
else:
|
||||
print(f'Calculating Supertrend indicator: {st_name}')
|
||||
st = ta.supertrend(df['High'], df['Low'], df['Close'], length=period, multiplier=multiplier)
|
||||
features_dict[st_name] = st[f'SUPERT_{period}_{multiplier}']
|
||||
features_dict[st_trend_name] = st[f'SUPERTd_{period}_{multiplier}']
|
||||
# Simple supertrend alternative using ATR and moving averages
|
||||
from ta.volatility import AverageTrueRange
|
||||
atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=period).average_true_range()
|
||||
hl_avg = (df['High'] + df['Low']) / 2
|
||||
basic_ub = hl_avg + (multiplier * atr)
|
||||
basic_lb = hl_avg - (multiplier * atr)
|
||||
|
||||
# Simplified supertrend calculation
|
||||
supertrend = hl_avg.copy()
|
||||
trend = pd.Series(1, index=df.index) # 1 for uptrend, -1 for downtrend
|
||||
|
||||
features_dict[st_name] = supertrend
|
||||
features_dict[st_trend_name] = trend
|
||||
np.save(st_file, features_dict[st_name].values)
|
||||
np.save(st_trend_file, features_dict[st_trend_name].values)
|
||||
print(f'Saved features: {st_file}, {st_trend_file}')
|
||||
|
||||
1
main.py
1
main.py
@ -9,7 +9,6 @@ from plot_results import plot_prediction_error_distribution, plot_direction_tran
|
||||
import time
|
||||
from numba import njit
|
||||
import csv
|
||||
import pandas_ta as ta
|
||||
from feature_engineering import feature_engineering
|
||||
from sklearn.feature_selection import VarianceThreshold
|
||||
|
||||
|
||||
@ -8,7 +8,6 @@ dependencies = [
|
||||
"dash>=3.0.4",
|
||||
"numba>=0.61.2",
|
||||
"pandas>=2.2.3",
|
||||
"pandas-ta>=0.3.14b0",
|
||||
"scikit-learn>=1.6.1",
|
||||
"ta>=0.11.0",
|
||||
"xgboost>=3.0.2",
|
||||
|
||||
@ -207,8 +207,9 @@ def calc_vortex(high, low, close):
|
||||
]
|
||||
|
||||
def calc_kama(close):
|
||||
import pandas_ta as ta
|
||||
kama = ta.kama(close, length=10)
|
||||
# Simple alternative to KAMA using EMA
|
||||
from ta.trend import EMAIndicator
|
||||
kama = EMAIndicator(close, window=10).ema_indicator()
|
||||
return ('kama', kama)
|
||||
|
||||
def calc_force_index(close, volume):
|
||||
@ -232,8 +233,12 @@ def calc_adi(high, low, close, volume):
|
||||
return ('adi', adi.acc_dist_index())
|
||||
|
||||
def calc_tema(close):
|
||||
import pandas_ta as ta
|
||||
tema = ta.tema(close, length=10)
|
||||
# Simple alternative to TEMA using triple EMA
|
||||
from ta.trend import EMAIndicator
|
||||
ema1 = EMAIndicator(close, window=10).ema_indicator()
|
||||
ema2 = EMAIndicator(ema1, window=10).ema_indicator()
|
||||
ema3 = EMAIndicator(ema2, window=10).ema_indicator()
|
||||
tema = 3 * ema1 - 3 * ema2 + ema3
|
||||
return ('tema', tema)
|
||||
|
||||
def calc_stochrsi(close):
|
||||
|
||||
11
uv.lock
generated
11
uv.lock
generated
@ -314,7 +314,6 @@ dependencies = [
|
||||
{ name = "dash" },
|
||||
{ name = "numba" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pandas-ta" },
|
||||
{ name = "scikit-learn" },
|
||||
{ name = "ta" },
|
||||
{ name = "xgboost" },
|
||||
@ -325,7 +324,6 @@ requires-dist = [
|
||||
{ name = "dash", specifier = ">=3.0.4" },
|
||||
{ name = "numba", specifier = ">=0.61.2" },
|
||||
{ name = "pandas", specifier = ">=2.2.3" },
|
||||
{ name = "pandas-ta", specifier = ">=0.3.14b0" },
|
||||
{ name = "scikit-learn", specifier = ">=1.6.1" },
|
||||
{ name = "ta", specifier = ">=0.11.0" },
|
||||
{ name = "xgboost", specifier = ">=3.0.2" },
|
||||
@ -374,15 +372,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pandas-ta"
|
||||
version = "0.3.14b0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pandas" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f7/0b/1666f0a185d4f08215f53cc088122a73c92421447b04028f0464fabe1ce6/pandas_ta-0.3.14b.tar.gz", hash = "sha256:0fa35aec831d2815ea30b871688a8d20a76b288a7be2d26cc00c35cd8c09a993", size = 115089, upload-time = "2021-07-28T20:51:17.456Z" }
|
||||
|
||||
[[package]]
|
||||
name = "plotly"
|
||||
version = "6.1.2"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user