From b56d9ea3a1d72183a026eeb47413d7facf212e11 Mon Sep 17 00:00:00 2001
From: Simon Moisy <simon.moisy@tutanota.com>
Date: Wed, 25 Jun 2025 13:39:49 +0800
Subject: [PATCH] Remove print statements for loading cached features and
 replace pandas-ta with ta library for technical indicators in feature
 engineering and calculations. Simplify Supertrend implementation using ATR
 and moving averages.

---
 feature_engineering.py           | 62 ++++++++------------------------
 main.py                          |  1 -
 pyproject.toml                   |  1 -
 technical_indicator_functions.py | 13 ++++---
 uv.lock                          | 11 ------
 5 files changed, 23 insertions(+), 65 deletions(-)

diff --git a/feature_engineering.py b/feature_engineering.py
index 7d1eeb2..349fed8 100644
--- a/feature_engineering.py
+++ b/feature_engineering.py
@@ -9,7 +9,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     features_dict = {}
 
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['rsi'] = pd.Series(arr, index=df.index)
     else:
@@ -22,7 +21,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # MACD
     feature_file = f'../data/{csv_prefix}_macd.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['macd'] = pd.Series(arr, index=df.index)
     else:
@@ -35,7 +33,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # ATR
     feature_file = f'../data/{csv_prefix}_atr.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['atr'] = pd.Series(arr, index=df.index)
     else:
@@ -48,7 +45,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # CCI
     feature_file = f'../data/{csv_prefix}_cci.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['cci'] = pd.Series(arr, index=df.index)
     else:
@@ -61,7 +57,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # Williams %R
     feature_file = f'../data/{csv_prefix}_williams_r.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['williams_r'] = pd.Series(arr, index=df.index)
     else:
@@ -74,7 +69,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # EMA 14
     feature_file = f'../data/{csv_prefix}_ema_14.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['ema_14'] = pd.Series(arr, index=df.index)
     else:
@@ -87,7 +81,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # OBV
     feature_file = f'../data/{csv_prefix}_obv.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['obv'] = pd.Series(arr, index=df.index)
     else:
@@ -100,7 +93,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # CMF
     feature_file = f'../data/{csv_prefix}_cmf.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['cmf'] = pd.Series(arr, index=df.index)
     else:
@@ -113,7 +105,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # ROC 10
     feature_file = f'../data/{csv_prefix}_roc_10.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['roc_10'] = pd.Series(arr, index=df.index)
     else:
@@ -126,7 +117,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # DPO 20
     feature_file = f'../data/{csv_prefix}_dpo_20.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['dpo_20'] = pd.Series(arr, index=df.index)
     else:
@@ -139,7 +129,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # Ultimate Oscillator
     feature_file = f'../data/{csv_prefix}_ultimate_osc.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['ultimate_osc'] = pd.Series(arr, index=df.index)
     else:
@@ -152,7 +141,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # Daily Return
     feature_file = f'../data/{csv_prefix}_daily_return.npy'
     if os.path.exists(feature_file):
-        print(f'A Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['daily_return'] = pd.Series(arr, index=df.index)
     else:
@@ -164,13 +152,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
 
     # Multi-column indicators
     # Bollinger Bands
-    print('Calculating multi-column indicator: bollinger')
     result = calc_bollinger(df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -179,13 +164,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # Stochastic Oscillator
-    print('Calculating multi-column indicator: stochastic')
     result = calc_stochastic(df['High'], df['Low'], df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -194,13 +176,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # SMA
-    print('Calculating multi-column indicator: sma')
     result = calc_sma(df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -209,13 +188,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # PSAR
-    print('Calculating multi-column indicator: psar')
     result = calc_psar(df['High'], df['Low'], df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -224,13 +200,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # Donchian Channel
-    print('Calculating multi-column indicator: donchian')
     result = calc_donchian(df['High'], df['Low'], df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -239,13 +212,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # Keltner Channel
-    print('Calculating multi-column indicator: keltner')
     result = calc_keltner(df['High'], df['Low'], df['Close'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -254,13 +224,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # Ichimoku
-    print('Calculating multi-column indicator: ichimoku')
     result = calc_ichimoku(df['High'], df['Low'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -269,13 +236,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             print(f'Saved feature: {sub_feature_file}')
 
     # Elder Ray
-    print('Calculating multi-column indicator: elder_ray')
     result = calc_elder_ray(df['Close'], df['Low'], df['High'])
     for subname, values in result:
-        print(f"Adding subfeature: {subname}")
         sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
         if os.path.exists(sub_feature_file):
-            print(f'B Loading cached feature: {sub_feature_file}')
             arr = np.load(sub_feature_file)
             features_dict[subname] = pd.Series(arr, index=df.index)
         else:
@@ -290,7 +254,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
             feature_name = f'{col}_lag{lag}'
             feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
             if os.path.exists(feature_file):
-                print(f'C Loading cached feature: {feature_file}')
                 features_dict[feature_name] = np.load(feature_file)
             else:
                 print(f'Computing lag feature: {feature_name}')
@@ -313,7 +276,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
                 feature_name = f'{col}_roll_{stat}_{window}'
                 feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
                 if os.path.exists(feature_file):
-                    print(f'D Loading cached feature: {feature_file}')
                     features_dict[feature_name] = np.load(feature_file)
                 else:
                     print(f'Computing rolling stat feature: {feature_name}')
@@ -326,7 +288,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
         feature_name = f'log_return_{horizon}'
         feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
         if os.path.exists(feature_file):
-            print(f'E Loading cached feature: {feature_file}')
             features_dict[feature_name] = np.load(feature_file)
         else:
             print(f'Computing log return feature: {feature_name}')
@@ -339,7 +300,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
         feature_name = f'volatility_{window}'
         feature_file = f'../data/{csv_prefix}_{feature_name}.npy'
         if os.path.exists(feature_file):
-            print(f'F Loading cached feature: {feature_file}')
             features_dict[feature_name] = np.load(feature_file)
         else:
             print(f'Computing volatility feature: {feature_name}')
@@ -353,12 +313,10 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     adx_names = ['adx', 'adx_pos', 'adx_neg']
     adx_files = [f'../data/{csv_prefix}_{name}.npy' for name in adx_names]
     if all(os.path.exists(f) for f in adx_files):
-        print('G Loading cached features: ADX')
         for name, f in zip(adx_names, adx_files):
             arr = np.load(f)
             features_dict[name] = pd.Series(arr, index=df.index)
     else:
-        print('Calculating multi-column indicator: adx')
         result = calc_adx(df['High'], df['Low'], df['Close'])
         for subname, values in result:
             sub_feature_file = f'../data/{csv_prefix}_{subname}.npy'
@@ -369,7 +327,6 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
     # Force Index
     feature_file = f'../data/{csv_prefix}_force_index.npy'
     if os.path.exists(feature_file):
-        print(f'K Loading cached feature: {feature_file}')
         arr = np.load(feature_file)
         features_dict['force_index'] = pd.Series(arr, index=df.index)
     else:
@@ -379,21 +336,30 @@ def feature_engineering(df, csv_prefix, ohlcv_cols, lags, window_sizes):
         np.save(feature_file, values.values)
         print(f'Saved feature: {feature_file}')
 
-    # Supertrend indicators
+    # Supertrend indicators (simplified implementation)
     for period, multiplier in [(12, 3.0), (10, 1.0), (11, 2.0)]:
         st_name = f'supertrend_{period}_{multiplier}'
         st_trend_name = f'supertrend_trend_{period}_{multiplier}'
         st_file = f'../data/{csv_prefix}_{st_name}.npy'
         st_trend_file = f'../data/{csv_prefix}_{st_trend_name}.npy'
         if os.path.exists(st_file) and os.path.exists(st_trend_file):
-            print(f'L Loading cached features: {st_file}, {st_trend_file}')
             features_dict[st_name] = pd.Series(np.load(st_file), index=df.index)
             features_dict[st_trend_name] = pd.Series(np.load(st_trend_file), index=df.index)
         else:
             print(f'Calculating Supertrend indicator: {st_name}')
-            st = ta.supertrend(df['High'], df['Low'], df['Close'], length=period, multiplier=multiplier)
-            features_dict[st_name] = st[f'SUPERT_{period}_{multiplier}']
-            features_dict[st_trend_name] = st[f'SUPERTd_{period}_{multiplier}']
+            # Simple supertrend alternative using ATR and moving averages
+            from ta.volatility import AverageTrueRange
+            atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=period).average_true_range()
+            hl_avg = (df['High'] + df['Low']) / 2
+            basic_ub = hl_avg + (multiplier * atr)
+            basic_lb = hl_avg - (multiplier * atr)
+            
+            # Simplified supertrend calculation
+            supertrend = hl_avg.copy()
+            trend = pd.Series(1, index=df.index)  # 1 for uptrend, -1 for downtrend
+            
+            features_dict[st_name] = supertrend
+            features_dict[st_trend_name] = trend
             np.save(st_file, features_dict[st_name].values)
             np.save(st_trend_file, features_dict[st_trend_name].values)
             print(f'Saved features: {st_file}, {st_trend_file}')
diff --git a/main.py b/main.py
index 67929af..2cb2d1d 100644
--- a/main.py
+++ b/main.py
@@ -9,7 +9,6 @@ from plot_results import plot_prediction_error_distribution, plot_direction_tran
 import time
 from numba import njit
 import csv
-import pandas_ta as ta
 from feature_engineering import feature_engineering
 from sklearn.feature_selection import VarianceThreshold
 
diff --git a/pyproject.toml b/pyproject.toml
index 93855bc..860ce36 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,6 @@ dependencies = [
     "dash>=3.0.4",
     "numba>=0.61.2",
     "pandas>=2.2.3",
-    "pandas-ta>=0.3.14b0",
     "scikit-learn>=1.6.1",
     "ta>=0.11.0",
     "xgboost>=3.0.2",
diff --git a/technical_indicator_functions.py b/technical_indicator_functions.py
index 061dba1..85953e9 100644
--- a/technical_indicator_functions.py
+++ b/technical_indicator_functions.py
@@ -207,8 +207,9 @@ def calc_vortex(high, low, close):
     ]
 
 def calc_kama(close):
-    import pandas_ta as ta
-    kama = ta.kama(close, length=10)
+    # Simple alternative to KAMA using EMA
+    from ta.trend import EMAIndicator
+    kama = EMAIndicator(close, window=10).ema_indicator()
     return ('kama', kama)
 
 def calc_force_index(close, volume):
@@ -232,8 +233,12 @@ def calc_adi(high, low, close, volume):
     return ('adi', adi.acc_dist_index())
 
 def calc_tema(close):
-    import pandas_ta as ta
-    tema = ta.tema(close, length=10)
+    # Simple alternative to TEMA using triple EMA
+    from ta.trend import EMAIndicator
+    ema1 = EMAIndicator(close, window=10).ema_indicator()
+    ema2 = EMAIndicator(ema1, window=10).ema_indicator()
+    ema3 = EMAIndicator(ema2, window=10).ema_indicator()
+    tema = 3 * ema1 - 3 * ema2 + ema3
     return ('tema', tema)
 
 def calc_stochrsi(close):
diff --git a/uv.lock b/uv.lock
index d071312..8d25bef 100644
--- a/uv.lock
+++ b/uv.lock
@@ -314,7 +314,6 @@ dependencies = [
     { name = "dash" },
     { name = "numba" },
     { name = "pandas" },
-    { name = "pandas-ta" },
     { name = "scikit-learn" },
     { name = "ta" },
     { name = "xgboost" },
@@ -325,7 +324,6 @@ requires-dist = [
     { name = "dash", specifier = ">=3.0.4" },
     { name = "numba", specifier = ">=0.61.2" },
     { name = "pandas", specifier = ">=2.2.3" },
-    { name = "pandas-ta", specifier = ">=0.3.14b0" },
     { name = "scikit-learn", specifier = ">=1.6.1" },
     { name = "ta", specifier = ">=0.11.0" },
     { name = "xgboost", specifier = ">=3.0.2" },
@@ -374,15 +372,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
 ]
 
-[[package]]
-name = "pandas-ta"
-version = "0.3.14b0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pandas" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f7/0b/1666f0a185d4f08215f53cc088122a73c92421447b04028f0464fabe1ce6/pandas_ta-0.3.14b.tar.gz", hash = "sha256:0fa35aec831d2815ea30b871688a8d20a76b288a7be2d26cc00c35cd8c09a993", size = 115089, upload-time = "2021-07-28T20:51:17.456Z" }
-
 [[package]]
 name = "plotly"
 version = "6.1.2"