From b56d9ea3a1d72183a026eeb47413d7facf212e11 Mon Sep 17 00:00:00 2001 From: Simon Moisy Date: Wed, 25 Jun 2025 13:39:49 +0800 Subject: [PATCH] Remove print statements for loading cached features and replace pandas-ta with ta library for technical indicators in feature engineering and calculations. Simplify Supertrend implementation using ATR and moving averages. --- feature_engineering.py | 62 ++++++++------------------------ main.py | 1 - pyproject.toml | 1 - technical_indicator_functions.py | 13 ++++--- uv.lock | 11 ------ 5 files changed, 23 insertions(+), 65 deletions(-) diff --git a/feature_engineering.py b/feature_engineering.py index 7d1eeb2..349fed8 100644 --- a/feature_engineering.py +++ b/feature_engineering.py @@ -9,7 +9,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): features_dict = {} if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['rsi'] = pd.Series(arr, index=df.index) else: @@ -22,7 +21,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # MACD feature_file = f'../data/{csv_prefix}_macd.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['macd'] = pd.Series(arr, index=df.index) else: @@ -35,7 +33,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # ATR feature_file = f'../data/{csv_prefix}_atr.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['atr'] = pd.Series(arr, index=df.index) else: @@ -48,7 +45,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # CCI feature_file = f'../data/{csv_prefix}_cci.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['cci'] = pd.Series(arr, index=df.index) else: @@ -61,7 +57,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # Williams %R feature_file = f'../data/{csv_prefix}_williams_r.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['williams_r'] = pd.Series(arr, index=df.index) else: @@ -74,7 +69,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # EMA 14 feature_file = f'../data/{csv_prefix}_ema_14.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['ema_14'] = pd.Series(arr, index=df.index) else: @@ -87,7 +81,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # OBV feature_file = f'../data/{csv_prefix}_obv.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['obv'] = pd.Series(arr, index=df.index) else: @@ -100,7 +93,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # CMF feature_file = f'../data/{csv_prefix}_cmf.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['cmf'] = pd.Series(arr, index=df.index) else: @@ -113,7 +105,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # ROC 10 feature_file = f'../data/{csv_prefix}_roc_10.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['roc_10'] = pd.Series(arr, index=df.index) else: @@ -126,7 +117,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # DPO 20 feature_file = f'../data/{csv_prefix}_dpo_20.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['dpo_20'] = pd.Series(arr, index=df.index) else: @@ -139,7 +129,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # Ultimate Oscillator feature_file = f'../data/{csv_prefix}_ultimate_osc.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['ultimate_osc'] = pd.Series(arr, index=df.index) else: @@ -152,7 +141,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # Daily Return feature_file = f'../data/{csv_prefix}_daily_return.npy' if os.path.exists(feature_file): - print(f'A Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['daily_return'] = pd.Series(arr, index=df.index) else: @@ -164,13 +152,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # Multi-column indicators # Bollinger Bands - print('Calculating multi-column indicator: bollinger') result = calc_bollinger(df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -179,13 +164,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # Stochastic Oscillator - print('Calculating multi-column indicator: stochastic') result = calc_stochastic(df['High'], df['Low'], df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -194,13 +176,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # SMA - print('Calculating multi-column indicator: sma') result = calc_sma(df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -209,13 +188,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # PSAR - print('Calculating multi-column indicator: psar') result = calc_psar(df['High'], df['Low'], df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -224,13 +200,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # Donchian Channel - print('Calculating multi-column indicator: donchian') result = calc_donchian(df['High'], df['Low'], df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -239,13 +212,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # Keltner Channel - print('Calculating multi-column indicator: keltner') result = calc_keltner(df['High'], df['Low'], df['Close']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -254,13 +224,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # Ichimoku - print('Calculating multi-column indicator: ichimoku') result = calc_ichimoku(df['High'], df['Low']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -269,13 +236,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): print(f'Saved feature: {sub_feature_file}') # Elder Ray - print('Calculating multi-column indicator: elder_ray') result = calc_elder_ray(df['Close'], df['Low'], df['High']) for subname, values in result: - print(f"Adding subfeature: {subname}") sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' if os.path.exists(sub_feature_file): - print(f'B Loading cached feature: {sub_feature_file}') arr = np.load(sub_feature_file) features_dict[subname] = pd.Series(arr, index=df.index) else: @@ -290,7 +254,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): feature_name = f'{col}_lag{lag}' feature_file = f'../data/{csv_prefix}_{feature_name}.npy' if os.path.exists(feature_file): - print(f'C Loading cached feature: {feature_file}') features_dict[feature_name] = np.load(feature_file) else: print(f'Computing lag feature: {feature_name}') @@ -313,7 +276,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): feature_name = f'{col}_roll_{stat}_{window}' feature_file = f'../data/{csv_prefix}_{feature_name}.npy' if os.path.exists(feature_file): - print(f'D Loading cached feature: {feature_file}') features_dict[feature_name] = np.load(feature_file) else: print(f'Computing rolling stat feature: {feature_name}') @@ -326,7 +288,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): feature_name = f'log_return_{horizon}' feature_file = f'../data/{csv_prefix}_{feature_name}.npy' if os.path.exists(feature_file): - print(f'E Loading cached feature: {feature_file}') features_dict[feature_name] = np.load(feature_file) else: print(f'Computing log return feature: {feature_name}') @@ -339,7 +300,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): feature_name = f'volatility_{window}' feature_file = f'../data/{csv_prefix}_{feature_name}.npy' if os.path.exists(feature_file): - print(f'F Loading cached feature: {feature_file}') features_dict[feature_name] = np.load(feature_file) else: print(f'Computing volatility feature: {feature_name}') @@ -353,12 +313,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): adx_names = ['adx', 'adx_pos', 'adx_neg'] adx_files = [f'../data/{csv_prefix}_{name}.npy' for name in adx_names] if all(os.path.exists(f) for f in adx_files): - print('G Loading cached features: ADX') for name, f in zip(adx_names, adx_files): arr = np.load(f) features_dict[name] = pd.Series(arr, index=df.index) else: - print('Calculating multi-column indicator: adx') result = calc_adx(df['High'], df['Low'], df['Close']) for subname, values in result: sub_feature_file = f'../data/{csv_prefix}_{subname}.npy' @@ -369,7 +327,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): # Force Index feature_file = f'../data/{csv_prefix}_force_index.npy' if os.path.exists(feature_file): - print(f'K Loading cached feature: {feature_file}') arr = np.load(feature_file) features_dict['force_index'] = pd.Series(arr, index=df.index) else: @@ -379,21 +336,30 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes): np.save(feature_file, values.values) print(f'Saved feature: {feature_file}') - # Supertrend indicators + # Supertrend indicators (simplified implementation) for period, multiplier in [(12, 3.0), (10, 1.0), (11, 2.0)]: st_name = f'supertrend_{period}_{multiplier}' st_trend_name = f'supertrend_trend_{period}_{multiplier}' st_file = f'../data/{csv_prefix}_{st_name}.npy' st_trend_file = f'../data/{csv_prefix}_{st_trend_name}.npy' if os.path.exists(st_file) and os.path.exists(st_trend_file): - print(f'L Loading cached features: {st_file}, {st_trend_file}') features_dict[st_name] = pd.Series(np.load(st_file), index=df.index) features_dict[st_trend_name] = pd.Series(np.load(st_trend_file), index=df.index) else: print(f'Calculating Supertrend indicator: {st_name}') - st = ta.supertrend(df['High'], df['Low'], df['Close'], length=period, multiplier=multiplier) - features_dict[st_name] = st[f'SUPERT_{period}_{multiplier}'] - features_dict[st_trend_name] = st[f'SUPERTd_{period}_{multiplier}'] + # Simple supertrend alternative using ATR and moving averages + from ta.volatility import AverageTrueRange + atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=period).average_true_range() + hl_avg = (df['High'] + df['Low']) / 2 + basic_ub = hl_avg + (multiplier * atr) + basic_lb = hl_avg - (multiplier * atr) + + # Simplified supertrend calculation + supertrend = hl_avg.copy() + trend = pd.Series(1, index=df.index) # 1 for uptrend, -1 for downtrend + + features_dict[st_name] = supertrend + features_dict[st_trend_name] = trend np.save(st_file, features_dict[st_name].values) np.save(st_trend_file, features_dict[st_trend_name].values) print(f'Saved features: {st_file}, {st_trend_file}') diff --git a/main.py b/main.py index 67929af..2cb2d1d 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,6 @@ from plot_results import plot_prediction_error_distribution, plot_direction_tran import time from numba import njit import csv -import pandas_ta as ta from feature_engineering import feature_engineering from sklearn.feature_selection import VarianceThreshold diff --git a/pyproject.toml b/pyproject.toml index 93855bc..860ce36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,6 @@ dependencies = [ "dash>=3.0.4", "numba>=0.61.2", "pandas>=2.2.3", - "pandas-ta>=0.3.14b0", "scikit-learn>=1.6.1", "ta>=0.11.0", "xgboost>=3.0.2", diff --git a/technical_indicator_functions.py b/technical_indicator_functions.py index 061dba1..85953e9 100644 --- a/technical_indicator_functions.py +++ b/technical_indicator_functions.py @@ -207,8 +207,9 @@ def calc_vortex(high, low, close): ] def calc_kama(close): - import pandas_ta as ta - kama = ta.kama(close, length=10) + # Simple alternative to KAMA using EMA + from ta.trend import EMAIndicator + kama = EMAIndicator(close, window=10).ema_indicator() return ('kama', kama) def calc_force_index(close, volume): @@ -232,8 +233,12 @@ def calc_adi(high, low, close, volume): return ('adi', adi.acc_dist_index()) def calc_tema(close): - import pandas_ta as ta - tema = ta.tema(close, length=10) + # Simple alternative to TEMA using triple EMA + from ta.trend import EMAIndicator + ema1 = EMAIndicator(close, window=10).ema_indicator() + ema2 = EMAIndicator(ema1, window=10).ema_indicator() + ema3 = EMAIndicator(ema2, window=10).ema_indicator() + tema = 3 * ema1 - 3 * ema2 + ema3 return ('tema', tema) def calc_stochrsi(close): diff --git a/uv.lock b/uv.lock index d071312..8d25bef 100644 --- a/uv.lock +++ b/uv.lock @@ -314,7 +314,6 @@ dependencies = [ { name = "dash" }, { name = "numba" }, { name = "pandas" }, - { name = "pandas-ta" }, { name = "scikit-learn" }, { name = "ta" }, { name = "xgboost" }, @@ -325,7 +324,6 @@ requires-dist = [ { name = "dash", specifier = ">=3.0.4" }, { name = "numba", specifier = ">=0.61.2" }, { name = "pandas", specifier = ">=2.2.3" }, - { name = "pandas-ta", specifier = ">=0.3.14b0" }, { name = "scikit-learn", specifier = ">=1.6.1" }, { name = "ta", specifier = ">=0.11.0" }, { name = "xgboost", specifier = ">=3.0.2" }, @@ -374,15 +372,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" }, ] -[[package]] -name = "pandas-ta" -version = "0.3.14b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pandas" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/0b/1666f0a185d4f08215f53cc088122a73c92421447b04028f0464fabe1ce6/pandas_ta-0.3.14b.tar.gz", hash = "sha256:0fa35aec831d2815ea30b871688a8d20a76b288a7be2d26cc00c35cd8c09a993", size = 115089, upload-time = "2021-07-28T20:51:17.456Z" } - [[package]] name = "plotly" version = "6.1.2"