Implemented Supertrend indicators for feature engineering in main.py, including caching of computed features. Updated plotting functions in plot_results.py to save charts in a dedicated directory and added new functions for directional accuracy and prediction transition heatmaps.

Added multiple technical indicators for feature engineering, including ADX, TRIX, Vortex, KAMA, Force Index, EOM, MFI, ADI, TEMA, StochRSI, and Awesome Oscillator. Improved NaN handling and implemented leave-one-out feature evaluation with results saved to CSV.
reverted to sequential computing for features, added one distribution visualization graph
2025-05-30 18:14:42 +08:00 · 2025-05-30 17:59:09 +08:00 · 2025-05-30 15:54:48 +08:00 · 2025-05-30 15:54:14 +08:00
5 changed files with 502 additions and 143 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,9 +8,12 @@ dependencies = [
    "dash>=3.0.4",
    "gspread>=6.2.1",
    "matplotlib>=3.10.3",
+    "numba>=0.61.2",
    "pandas>=2.2.3",
    "psutil>=7.0.0",
+    "scikit-learn>=1.6.1",
    "scipy>=1.15.3",
    "seaborn>=0.13.2",
+    "ta>=0.11.0",
    "xgboost>=3.0.2",
 ]
--- a/requirements.txt
+++ b/requirements.txt
--- a/uv.lock
+++ b/uv.lock
@@ -200,10 +200,13 @@ dependencies = [
    { name = "dash" },
    { name = "gspread" },
    { name = "matplotlib" },
+    { name = "numba" },
    { name = "pandas" },
    { name = "psutil" },
+    { name = "scikit-learn" },
    { name = "scipy" },
    { name = "seaborn" },
+    { name = "ta" },
    { name = "xgboost" },
 ]

@@ -212,10 +215,13 @@ requires-dist = [
    { name = "dash", specifier = ">=3.0.4" },
    { name = "gspread", specifier = ">=6.2.1" },
    { name = "matplotlib", specifier = ">=3.10.3" },
+    { name = "numba", specifier = ">=0.61.2" },
    { name = "pandas", specifier = ">=2.2.3" },
    { name = "psutil", specifier = ">=7.0.0" },
+    { name = "scikit-learn", specifier = ">=1.6.1" },
    { name = "scipy", specifier = ">=1.15.3" },
    { name = "seaborn", specifier = ">=0.13.2" },
+    { name = "ta", specifier = ">=0.11.0" },
    { name = "xgboost", specifier = ">=3.0.2" },
 ]

@@ -378,6 +384,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]

+[[package]]
+name = "joblib"
+version = "1.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/fe/0f5a938c54105553436dbff7a61dc4fed4b1b2c98852f8833beaf4d5968f/joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444", size = 330475, upload-time = "2025-05-23T12:04:37.097Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" },
+]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.8"
@@ -465,6 +480,34 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762, upload-time = "2024-12-24T18:30:48.903Z" },
 ]

+[[package]]
+name = "llvmlite"
+version = "0.44.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/75/d4863ddfd8ab5f6e70f4504cf8cc37f4e986ec6910f4ef8502bb7d3c1c71/llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614", size = 28132306, upload-time = "2025-01-20T11:12:18.634Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d9/6e8943e1515d2f1003e8278819ec03e4e653e2eeb71e4d00de6cfe59424e/llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791", size = 26201096, upload-time = "2025-01-20T11:12:24.544Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/46/8ffbc114def88cc698906bf5acab54ca9fdf9214fe04aed0e71731fb3688/llvmlite-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8", size = 42361859, upload-time = "2025-01-20T11:12:31.839Z" },
+    { url = "https://files.pythonhosted.org/packages/30/1c/9366b29ab050a726af13ebaae8d0dff00c3c58562261c79c635ad4f5eb71/llvmlite-0.44.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408", size = 41184199, upload-time = "2025-01-20T11:12:40.049Z" },
+    { url = "https://files.pythonhosted.org/packages/69/07/35e7c594b021ecb1938540f5bce543ddd8713cff97f71d81f021221edc1b/llvmlite-0.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2", size = 30332381, upload-time = "2025-01-20T11:12:47.054Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e2/86b245397052386595ad726f9742e5223d7aea999b18c518a50e96c3aca4/llvmlite-0.44.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3", size = 28132305, upload-time = "2025-01-20T11:12:53.936Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ec/506902dc6870249fbe2466d9cf66d531265d0f3a1157213c8f986250c033/llvmlite-0.44.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427", size = 26201090, upload-time = "2025-01-20T11:12:59.847Z" },
+    { url = "https://files.pythonhosted.org/packages/99/fe/d030f1849ebb1f394bb3f7adad5e729b634fb100515594aca25c354ffc62/llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1", size = 42361858, upload-time = "2025-01-20T11:13:07.623Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/7a/ce6174664b9077fc673d172e4c888cb0b128e707e306bc33fff8c2035f0d/llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610", size = 41184200, upload-time = "2025-01-20T11:13:20.058Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/c6/258801143975a6d09a373f2641237992496e15567b907a4d401839d671b8/llvmlite-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955", size = 30331193, upload-time = "2025-01-20T11:13:26.976Z" },
+    { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297, upload-time = "2025-01-20T11:13:32.57Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105, upload-time = "2025-01-20T11:13:38.744Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" },
+    { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" },
+    { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306, upload-time = "2025-01-20T11:14:09.035Z" },
+    { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090, upload-time = "2025-01-20T11:14:15.401Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904, upload-time = "2025-01-20T11:14:22.949Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245, upload-time = "2025-01-20T11:14:31.731Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193, upload-time = "2025-01-20T11:14:38.578Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.2"
@@ -593,6 +636,38 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
 ]

+[[package]]
+name = "numba"
+version = "0.61.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "llvmlite" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/ca/f470be59552ccbf9531d2d383b67ae0b9b524d435fb4a0d229fef135116e/numba-0.61.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cf9f9fc00d6eca0c23fc840817ce9f439b9f03c8f03d6246c0e7f0cb15b7162a", size = 2775663, upload-time = "2025-04-09T02:57:34.143Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/13/3bdf52609c80d460a3b4acfb9fdb3817e392875c0d6270cf3fd9546f138b/numba-0.61.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ea0247617edcb5dd61f6106a56255baab031acc4257bddaeddb3a1003b4ca3fd", size = 2778344, upload-time = "2025-04-09T02:57:36.609Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/7d/bfb2805bcfbd479f04f835241ecf28519f6e3609912e3a985aed45e21370/numba-0.61.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8c7a522c26215d5f62ebec436e3d341f7f590079245a2f1008dfd498cc1642", size = 3824054, upload-time = "2025-04-09T02:57:38.162Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/27/797b2004745c92955470c73c82f0e300cf033c791f45bdecb4b33b12bdea/numba-0.61.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd1e74609855aa43661edffca37346e4e8462f6903889917e9f41db40907daa2", size = 3518531, upload-time = "2025-04-09T02:57:39.709Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/c6/c2fb11e50482cb310afae87a997707f6c7d8a48967b9696271347441f650/numba-0.61.2-cp310-cp310-win_amd64.whl", hash = "sha256:ae45830b129c6137294093b269ef0a22998ccc27bf7cf096ab8dcf7bca8946f9", size = 2831612, upload-time = "2025-04-09T02:57:41.559Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/97/c99d1056aed767503c228f7099dc11c402906b42a4757fec2819329abb98/numba-0.61.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:efd3db391df53aaa5cfbee189b6c910a5b471488749fd6606c3f33fc984c2ae2", size = 2775825, upload-time = "2025-04-09T02:57:43.442Z" },
+    { url = "https://files.pythonhosted.org/packages/95/9e/63c549f37136e892f006260c3e2613d09d5120672378191f2dc387ba65a2/numba-0.61.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:49c980e4171948ffebf6b9a2520ea81feed113c1f4890747ba7f59e74be84b1b", size = 2778695, upload-time = "2025-04-09T02:57:44.968Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c8/8740616c8436c86c1b9a62e72cb891177d2c34c2d24ddcde4c390371bf4c/numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60", size = 3829227, upload-time = "2025-04-09T02:57:46.63Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/06/66e99ae06507c31d15ff3ecd1f108f2f59e18b6e08662cd5f8a5853fbd18/numba-0.61.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18", size = 3523422, upload-time = "2025-04-09T02:57:48.222Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/a4/2b309a6a9f6d4d8cfba583401c7c2f9ff887adb5d54d8e2e130274c0973f/numba-0.61.2-cp311-cp311-win_amd64.whl", hash = "sha256:76bcec9f46259cedf888041b9886e257ae101c6268261b19fda8cfbc52bec9d1", size = 2831505, upload-time = "2025-04-09T02:57:50.108Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626, upload-time = "2025-04-09T02:57:51.857Z" },
+    { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287, upload-time = "2025-04-09T02:57:53.658Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" },
+    { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" },
+    { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929, upload-time = "2025-04-09T02:57:58.45Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785, upload-time = "2025-04-09T02:57:59.96Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289, upload-time = "2025-04-09T02:58:01.435Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918, upload-time = "2025-04-09T02:58:02.933Z" },
+    { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056, upload-time = "2025-04-09T02:58:04.538Z" },
+    { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846, upload-time = "2025-04-09T02:58:06.125Z" },
+]
+
 [[package]]
 name = "numpy"
 version = "2.2.6"
@@ -938,6 +1013,44 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
 ]

+[[package]]
+name = "scikit-learn"
+version = "1.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "joblib" },
+    { name = "numpy" },
+    { name = "scipy" },
+    { name = "threadpoolctl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312, upload-time = "2025-01-10T08:07:55.348Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/3a/f4597eb41049110b21ebcbb0bcb43e4035017545daa5eedcfeb45c08b9c5/scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e", size = 12067702, upload-time = "2025-01-10T08:05:56.515Z" },
+    { url = "https://files.pythonhosted.org/packages/37/19/0423e5e1fd1c6ec5be2352ba05a537a473c1677f8188b9306097d684b327/scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36", size = 11112765, upload-time = "2025-01-10T08:06:00.272Z" },
+    { url = "https://files.pythonhosted.org/packages/70/95/d5cb2297a835b0f5fc9a77042b0a2d029866379091ab8b3f52cc62277808/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5", size = 12643991, upload-time = "2025-01-10T08:06:04.813Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/91/ab3c697188f224d658969f678be86b0968ccc52774c8ab4a86a07be13c25/scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b", size = 13497182, upload-time = "2025-01-10T08:06:08.42Z" },
+    { url = "https://files.pythonhosted.org/packages/17/04/d5d556b6c88886c092cc989433b2bab62488e0f0dafe616a1d5c9cb0efb1/scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002", size = 11125517, upload-time = "2025-01-10T08:06:12.783Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/2a/e291c29670795406a824567d1dfc91db7b699799a002fdaa452bceea8f6e/scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33", size = 12102620, upload-time = "2025-01-10T08:06:16.675Z" },
+    { url = "https://files.pythonhosted.org/packages/25/92/ee1d7a00bb6b8c55755d4984fd82608603a3cc59959245068ce32e7fb808/scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d", size = 11116234, upload-time = "2025-01-10T08:06:21.83Z" },
+    { url = "https://files.pythonhosted.org/packages/30/cd/ed4399485ef364bb25f388ab438e3724e60dc218c547a407b6e90ccccaef/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2", size = 12592155, upload-time = "2025-01-10T08:06:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/f3/62fc9a5a659bb58a03cdd7e258956a5824bdc9b4bb3c5d932f55880be569/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8", size = 13497069, upload-time = "2025-01-10T08:06:32.515Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/a6/c5b78606743a1f28eae8f11973de6613a5ee87366796583fb74c67d54939/scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415", size = 11139809, upload-time = "2025-01-10T08:06:35.514Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516, upload-time = "2025-01-10T08:06:40.009Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837, upload-time = "2025-01-10T08:06:43.305Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728, upload-time = "2025-01-10T08:06:47.618Z" },
+    { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700, upload-time = "2025-01-10T08:06:50.888Z" },
+    { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613, upload-time = "2025-01-10T08:06:54.115Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001, upload-time = "2025-01-10T08:06:58.613Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360, upload-time = "2025-01-10T08:07:01.556Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004, upload-time = "2025-01-10T08:07:06.931Z" },
+    { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776, upload-time = "2025-01-10T08:07:11.715Z" },
+    { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865, upload-time = "2025-01-10T08:07:16.088Z" },
+    { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804, upload-time = "2025-01-10T08:07:20.385Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530, upload-time = "2025-01-10T08:07:23.675Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852, upload-time = "2025-01-10T08:07:26.817Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256, upload-time = "2025-01-10T08:07:31.084Z" },
+]
+
 [[package]]
 name = "scipy"
 version = "1.15.3"
@@ -1026,6 +1139,25 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]

+[[package]]
+name = "ta"
+version = "0.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "pandas" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e0/9a/37d92a6b470dc9088612c2399a68f1a9ac22872d4e1eff416818e22ab11b/ta-0.11.0.tar.gz", hash = "sha256:de86af43418420bd6b088a2ea9b95483071bf453c522a8441bc2f12bcf8493fd", size = 25308, upload-time = "2023-11-02T13:53:35.434Z" }
+
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.13.2"
--- a/xgboost/main.py
+++ b/xgboost/main.py
@@ -3,18 +3,15 @@ import os
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 import pandas as pd
 import numpy as np
-from sklearn.model_selection import train_test_split
 from custom_xgboost import CustomXGBoostGPU
-from sklearn.metrics import mean_squared_error
-from plot_results import display_actual_vs_predicted, plot_target_distribution, plot_predicted_vs_actual_log_returns, plot_predicted_vs_actual_prices
-import ta
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from plot_results import plot_prediction_error_distribution, plot_direction_transition_heatmap
 from cycles.supertrend import Supertrends
-from ta.trend import SMAIndicator, DPOIndicator, IchimokuIndicator, PSARIndicator
-from ta.momentum import ROCIndicator, KAMAIndicator, UltimateOscillator, StochasticOscillator, WilliamsRIndicator
-from ta.volatility import KeltnerChannel, DonchianChannel
-from ta.others import DailyReturnIndicator
 import time
 from numba import njit
+import itertools
+import csv
+import pandas_ta as ta

 def run_indicator(func, *args):
    return func(*args)
@@ -211,7 +208,74 @@ def run_feature_job(job, df):
    result = func(df, *args)
    return feature_name, result

+def calc_adx(high, low, close):
+    from ta.trend import ADXIndicator
+    adx = ADXIndicator(high=high, low=low, close=close, window=14)
+    return [
+        ('adx', adx.adx()),
+        ('adx_pos', adx.adx_pos()),
+        ('adx_neg', adx.adx_neg())
+    ]
+
+def calc_trix(close):
+    from ta.trend import TRIXIndicator
+    trix = TRIXIndicator(close=close, window=15)
+    return ('trix', trix.trix())
+
+def calc_vortex(high, low, close):
+    from ta.trend import VortexIndicator
+    vortex = VortexIndicator(high=high, low=low, close=close, window=14)
+    return [
+        ('vortex_pos', vortex.vortex_indicator_pos()),
+        ('vortex_neg', vortex.vortex_indicator_neg())
+    ]
+
+def calc_kama(close):
+    import pandas_ta as ta
+    kama = ta.kama(close, length=10)
+    return ('kama', kama)
+
+def calc_force_index(close, volume):
+    from ta.volume import ForceIndexIndicator
+    fi = ForceIndexIndicator(close=close, volume=volume, window=13)
+    return ('force_index', fi.force_index())
+
+def calc_eom(high, low, volume):
+    from ta.volume import EaseOfMovementIndicator
+    eom = EaseOfMovementIndicator(high=high, low=low, volume=volume, window=14)
+    return ('eom', eom.ease_of_movement())
+
+def calc_mfi(high, low, close, volume):
+    from ta.volume import MFIIndicator
+    mfi = MFIIndicator(high=high, low=low, close=close, volume=volume, window=14)
+    return ('mfi', mfi.money_flow_index())
+
+def calc_adi(high, low, close, volume):
+    from ta.volume import AccDistIndexIndicator
+    adi = AccDistIndexIndicator(high=high, low=low, close=close, volume=volume)
+    return ('adi', adi.acc_dist_index())
+
+def calc_tema(close):
+    import pandas_ta as ta
+    tema = ta.tema(close, length=10)
+    return ('tema', tema)
+
+def calc_stochrsi(close):
+    from ta.momentum import StochRSIIndicator
+    stochrsi = StochRSIIndicator(close=close, window=14, smooth1=3, smooth2=3)
+    return [
+        ('stochrsi', stochrsi.stochrsi()),
+        ('stochrsi_k', stochrsi.stochrsi_k()),
+        ('stochrsi_d', stochrsi.stochrsi_d())
+    ]
+
+def calc_awesome_oscillator(high, low):
+    from ta.momentum import AwesomeOscillatorIndicator
+    ao = AwesomeOscillatorIndicator(high=high, low=low, window1=5, window2=34)
+    return ('awesome_osc', ao.awesome_oscillator())
+
 if __name__ == '__main__':
+    IMPUTE_NANS = True  # Set to True to impute NaNs, False to drop rows with NaNs
    csv_path = './data/btcusd_1-min_data.csv'
    csv_prefix = os.path.splitext(os.path.basename(csv_path))[0]

@@ -516,8 +580,7 @@ if __name__ == '__main__':
            np.save(sub_feature_file, values.values)
            print(f'Saved feature: {sub_feature_file}')

-    # Prepare jobs for lags, rolling stats, log returns, and volatility
-    feature_jobs = []
+    # Prepare lags, rolling stats, log returns, and volatility features sequentially
    # Lags
    for col in ohlcv_cols:
        for lag in range(1, lags + 1):
@@ -527,8 +590,11 @@ if __name__ == '__main__':
                print(f'C Loading cached feature: {feature_file}')
                features_dict[feature_name] = np.load(feature_file)
            else:
-                print(f'Adding lag feature job: {feature_name}')
-                feature_jobs.append((feature_name, compute_lag, col, lag))
+                print(f'Computing lag feature: {feature_name}')
+                result = compute_lag(df, col, lag)
+                features_dict[feature_name] = result
+                np.save(feature_file, result.values)
+                print(f'Saved feature: {feature_file}')
    # Rolling statistics
    for col in ohlcv_cols:
        for window in window_sizes:
@@ -547,8 +613,11 @@ if __name__ == '__main__':
                    print(f'D Loading cached feature: {feature_file}')
                    features_dict[feature_name] = np.load(feature_file)
                else:
-                    print(f'Adding rolling stat feature job: {feature_name}')
-                    feature_jobs.append((feature_name, compute_rolling, col, stat, window))
+                    print(f'Computing rolling stat feature: {feature_name}')
+                    result = compute_rolling(df, col, stat, window)
+                    features_dict[feature_name] = result
+                    np.save(feature_file, result.values)
+                    print(f'Saved feature: {feature_file}')
    # Log returns for different horizons
    for horizon in [5, 15, 30]:
        feature_name = f'log_return_{horizon}'
@@ -557,8 +626,11 @@ if __name__ == '__main__':
            print(f'E Loading cached feature: {feature_file}')
            features_dict[feature_name] = np.load(feature_file)
        else:
-            print(f'Adding log return feature job: {feature_name}')
-            feature_jobs.append((feature_name, compute_log_return, horizon))
+            print(f'Computing log return feature: {feature_name}')
+            result = compute_log_return(df, horizon)
+            features_dict[feature_name] = result
+            np.save(feature_file, result.values)
+            print(f'Saved feature: {feature_file}')
    # Volatility
    for window in window_sizes:
        feature_name = f'volatility_{window}'
@@ -567,22 +639,61 @@ if __name__ == '__main__':
            print(f'F Loading cached feature: {feature_file}')
            features_dict[feature_name] = np.load(feature_file)
        else:
-            print(f'Adding volatility feature job: {feature_name}')
-            feature_jobs.append((feature_name, compute_volatility, window))
-
-    # Sequential computation for all non-cached features
-    if feature_jobs:
-        print(f'Computing {len(feature_jobs)} features sequentially...')
-        for job in feature_jobs:
-            print(f'Computing feature job: {job[0]}')
-            feature_name, result = run_feature_job(job, df)
+            print(f'Computing volatility feature: {feature_name}')
+            result = compute_volatility(df, window)
            features_dict[feature_name] = result
-            feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
            np.save(feature_file, result.values)
-            print(f'Saved computed feature: {feature_file}')
-        print('All features computed.')
+            print(f'Saved feature: {feature_file}')
+
+    # --- Additional Technical Indicator Features ---
+    # ADX
+    adx_names = ['adx', 'adx_pos', 'adx_neg']
+    adx_files = [f'./data/{csv_prefix}_{name}.npy' for name in adx_names]
+    if all(os.path.exists(f) for f in adx_files):
+        print('G Loading cached features: ADX')
+        for name, f in zip(adx_names, adx_files):
+            arr = np.load(f)
+            features_dict[name] = pd.Series(arr, index=df.index)
    else:
-        print('All features loaded from cache.')
+        print('Calculating multi-column indicator: adx')
+        result = calc_adx(df['High'], df['Low'], df['Close'])
+        for subname, values in result:
+            sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Force Index
+    feature_file = f'./data/{csv_prefix}_force_index.npy'
+    if os.path.exists(feature_file):
+        print(f'K Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['force_index'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: force_index')
+        _, values = calc_force_index(df['Close'], df['Volume'])
+        features_dict['force_index'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # Supertrend indicators
+    for period, multiplier in [(12, 3.0), (10, 1.0), (11, 2.0)]:
+        st_name = f'supertrend_{period}_{multiplier}'
+        st_trend_name = f'supertrend_trend_{period}_{multiplier}'
+        st_file = f'./data/{csv_prefix}_{st_name}.npy'
+        st_trend_file = f'./data/{csv_prefix}_{st_trend_name}.npy'
+        if os.path.exists(st_file) and os.path.exists(st_trend_file):
+            print(f'L Loading cached features: {st_file}, {st_trend_file}')
+            features_dict[st_name] = pd.Series(np.load(st_file), index=df.index)
+            features_dict[st_trend_name] = pd.Series(np.load(st_trend_file), index=df.index)
+        else:
+            print(f'Calculating Supertrend indicator: {st_name}')
+            st = ta.supertrend(df['High'], df['Low'], df['Close'], length=period, multiplier=multiplier)
+            features_dict[st_name] = st[f'SUPERT_{period}_{multiplier}']
+            features_dict[st_trend_name] = st[f'SUPERTd_{period}_{multiplier}']
+            np.save(st_file, features_dict[st_name].values)
+            np.save(st_trend_file, features_dict[st_trend_name].values)
+            print(f'Saved features: {st_file}, {st_trend_file}')

    # Concatenate all new features at once
    print('Concatenating all new features to DataFrame...')
@@ -602,130 +713,94 @@ if __name__ == '__main__':
        except Exception:
            pass

-    # Drop intermediate features_df to free memory
-    print('Dropping intermediate features_df to free memory...')
-    del features_df
-    import gc
-    gc.collect()
-
-    feature_end_time = time.time()
-    print(f'Feature computation completed in {feature_end_time - feature_start_time:.2f} seconds.')
-
-    # Add Supertrend indicators (custom)
-    print('Preparing data for Supertrend calculation...')
-    st_df = df.rename(columns={'High': 'high', 'Low': 'low', 'Close': 'close'})
-    
-    print('Calculating Supertrend indicators...')
-    supertrend = Supertrends(st_df)
-    st_results = supertrend.calculate_supertrend_indicators()
-    for idx, st in enumerate(st_results):
-        period = st['params']['period']
-        multiplier = st['params']['multiplier']
-        # Skip useless supertrend features
-        if (period == 10 and multiplier == 1.0) or (period == 11 and multiplier == 2.0):
-            continue
-        print(f'Adding Supertrend features: supertrend_{period}_{multiplier} and supertrend_trend_{period}_{multiplier}')
-        df[f'supertrend_{period}_{multiplier}'] = st['results']['supertrend']
-        df[f'supertrend_trend_{period}_{multiplier}'] = st['results']['trend']
-
    # Add time features (exclude 'dayofweek')
    print('Adding hour feature...')
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')
    df['hour'] = df['Timestamp'].dt.hour

-    # Drop NaNs after all feature engineering
-    print('Dropping NaNs after feature engineering...')
-    df = df.dropna().reset_index(drop=True)
+    # Handle NaNs after all feature engineering
+    if IMPUTE_NANS:
+        print('Imputing NaNs after feature engineering (using mean imputation)...')
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        for col in numeric_cols:
+            df[col] = df[col].fillna(df[col].mean())
+        # If you want to impute non-numeric columns differently, add logic here
+    else:
+        print('Dropping NaNs after feature engineering...')
+        df = df.dropna().reset_index(drop=True)

    # Exclude 'Timestamp', 'Close', 'log_return', and any future target columns from features
    print('Selecting feature columns...')
    exclude_cols = ['Timestamp', 'Close', 'log_return', 'log_return_5', 'log_return_15', 'log_return_30']
    feature_cols = [col for col in df.columns if col not in exclude_cols]
+    print('Features used for training:', feature_cols)

-    # Print the features used for training
-    print("Features used for training:", feature_cols)
+    # Prepare CSV for results
+    results_csv = './data/leave_one_out_results.csv'
+    if not os.path.exists(results_csv):
+        with open(results_csv, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['left_out_feature', 'used_features', 'rmse', 'mae', 'r2', 'mape', 'directional_accuracy'])

-    # Drop excluded columns to save memory
-    print('Dropping excluded columns to save memory...')
-    df = df[feature_cols + ['log_return', 'Timestamp']]
+    total_features = len(feature_cols)
+    never_leave_out = {'Open', 'High', 'Low', 'Close', 'Volume'}
+    for idx, left_out in enumerate(feature_cols):
+        if left_out in never_leave_out:
+            continue
+        used = [f for f in feature_cols if f != left_out]
+        print(f'\n=== Leave-one-out {idx+1}/{total_features}: left out {left_out} ===')
+        try:
+            # Prepare X and y for this combination
+            X = df[used].values.astype(np.float32)
+            y = df["log_return"].values.astype(np.float32)
+            split_idx = int(len(X) * 0.8)
+            X_train, X_test = X[:split_idx], X[split_idx:]
+            y_train, y_test = y[:split_idx], y[split_idx:]
+            test_timestamps = df['Timestamp'].values[split_idx:]

-    print('Preparing X and y...')
-    X = df[feature_cols].values.astype(np.float32)
-    y = df['log_return'].values.astype(np.float32)
-  
-    split_idx = int(len(X) * 0.8)
-    print(f'Splitting data: {split_idx} train, {len(X) - split_idx} test')
-    X_train, X_test = X[:split_idx], X[split_idx:]
-    y_train, y_test = y[:split_idx], y[split_idx:]
-    test_timestamps = df['Timestamp'].values[split_idx:]
+            model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
+            booster = model.train()
+            model.save_model(f'./data/xgboost_model_wo_{left_out}.json')

-    print('Initializing model...')
-    model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
- 
-    print('Training model...')
-    booster = model.train()
- 
-    print('Training complete.')
+            test_preds = model.predict(X_test)
+            rmse = np.sqrt(mean_squared_error(y_test, test_preds))

-    # Save the trained model
-    model.save_model('./data/xgboost_model.json')
-    print('Model saved to ./data/xgboost_model.json')
+            # Reconstruct price series from log returns
+            if 'Close' in df.columns:
+                close_prices = df['Close'].values
+            else:
+                close_prices = pd.read_csv(csv_path)['Close'].values
+            start_price = close_prices[split_idx]
+            actual_prices = [start_price]
+            for r_ in y_test:
+                actual_prices.append(actual_prices[-1] * np.exp(r_))
+            actual_prices = np.array(actual_prices[1:])
+            predicted_prices = [start_price]
+            for r_ in test_preds:
+                predicted_prices.append(predicted_prices[-1] * np.exp(r_))
+            predicted_prices = np.array(predicted_prices[1:])

-    if hasattr(model, 'params'):
-        print("Model hyperparameters:", model.params)
-    if hasattr(model, 'model') and hasattr(model.model, 'get_score'):
-        import operator
-        importances = model.model.get_score(importance_type='weight')
-        # Map f0, f1, ... to actual feature names
-        feature_map = {f"f{idx}": name for idx, name in enumerate(feature_cols)}
-        sorted_importances = sorted(importances.items(), key=operator.itemgetter(1), reverse=True)
-        print('Feature importances (sorted, with names):')
-        for feat, score in sorted_importances:
-            print(f'{feature_map.get(feat, feat)}: {score}')
+            mae = mean_absolute_error(actual_prices, predicted_prices)
+            r2 = r2_score(actual_prices, predicted_prices)
+            direction_actual = np.sign(np.diff(actual_prices))
+            direction_pred = np.sign(np.diff(predicted_prices))
+            directional_accuracy = (direction_actual == direction_pred).mean()
+            mape = np.mean(np.abs((actual_prices - predicted_prices) / actual_prices)) * 100

-    print('Making predictions for first 5 test samples...')
-    preds = model.predict(X_test[:5])
-    print('Predictions for first 5 test samples:', preds)
-    print('Actual values for first 5 test samples:', y_test[:5])
+            # Save results to CSV
+            with open(results_csv, 'a', newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow([left_out, "|".join(used), rmse, mae, r2, mape, directional_accuracy])
+            print(f'Left out {left_out}: RMSE={rmse:.4f}, MAE={mae:.4f}, R2={r2:.4f}, MAPE={mape:.2f}%, DirAcc={directional_accuracy*100:.2f}%')

-    print('Making predictions for all test samples...')
-    test_preds = model.predict(X_test)
-    rmse = np.sqrt(mean_squared_error(y_test, test_preds))
-    print(f'RMSE on test set: {rmse:.4f}')
+            # Plotting for this run
+            plot_prefix = f'loo_{left_out}'
+            print('Plotting distribution of absolute prediction errors...')
+            plot_prediction_error_distribution(predicted_prices, actual_prices, prefix=plot_prefix)

-    print('Saving y_test and test_preds to disk...')
-    np.save('./data/y_test.npy', y_test)
-    np.save('./data/test_preds.npy', test_preds)
-
-    # Reconstruct price series from log returns
-    print('Reconstructing price series from log returns...')
-    # Get the last available Close price before the test set
-    # The DataFrame df has been reset, so use split_idx to get the right row
-    if 'Close' in df.columns:
-        close_prices = df['Close'].values
-    else:
-        # Reload original CSV to get Close prices if not present
-        close_prices = pd.read_csv(csv_path)['Close'].values
-    start_price = close_prices[split_idx]  # This is the price at the split point
-    # Actual prices
-    actual_prices = [start_price]
-    for r in y_test:
-        actual_prices.append(actual_prices[-1] * np.exp(r))
-    actual_prices = np.array(actual_prices[1:])
-    # Predicted prices
-    predicted_prices = [start_price]
-    for r in test_preds:
-        predicted_prices.append(predicted_prices[-1] * np.exp(r))
-    predicted_prices = np.array(predicted_prices[1:])
-
-    print('Plotting predicted vs actual prices...')
-    plot_predicted_vs_actual_prices(actual_prices, predicted_prices, test_timestamps)
-
-    print("Final features used for training:", feature_cols)
-
-    print("Shape of X:", X.shape)
-    print("First row of X:", X[0])
-    print("stoch_k in feature_cols?", "stoch_k" in feature_cols)
-    if "stoch_k" in feature_cols:
-        idx = feature_cols.index("stoch_k")
-        print("First 10 values of stoch_k:", X[:10, idx])
+            print('Plotting directional accuracy...')
+            plot_direction_transition_heatmap(actual_prices, predicted_prices, prefix=plot_prefix)
+        except Exception as e:
+            print(f'Leave-one-out failed for {left_out}: {e}')
+    print(f'All leave-one-out runs completed. Results saved to {results_csv}')
+    sys.exit(0)
--- a/xgboost/plot_results.py
+++ b/xgboost/plot_results.py
@@ -24,7 +24,7 @@ def display_actual_vs_predicted(y_test, test_preds, timestamps, n_plot=200):
        hovermode='closest'
    )
    fig = go.Figure(data=data, layout=layout)
-    pyo.plot(fig)
+    pyo.plot(fig, auto_open=False)

 def plot_target_distribution(y_train, y_test):
    import plotly.offline as pyo
@@ -50,7 +50,7 @@ def plot_target_distribution(y_train, y_test):
        barmode='overlay'
    )
    fig = go.Figure(data=data, layout=layout)
-    pyo.plot(fig)
+    pyo.plot(fig, auto_open=False)

 def plot_predicted_vs_actual_log_returns(y_test, test_preds, timestamps=None, n_plot=200):
    import plotly.offline as pyo
@@ -78,7 +78,7 @@ def plot_predicted_vs_actual_log_returns(y_test, test_preds, timestamps=None, n_
        hovermode='closest'
    )
    fig_line = go.Figure(data=data_line, layout=layout_line)
-    pyo.plot(fig_line, filename='log_return_line_plot.html')
+    pyo.plot(fig_line, filename='charts/log_return_line_plot.html', auto_open=False)

    # Scatter plot: Predicted vs Actual
    trace_scatter = go.Scatter(
@@ -108,7 +108,7 @@ def plot_predicted_vs_actual_log_returns(y_test, test_preds, timestamps=None, n_
        hovermode='closest'
    )
    fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
-    pyo.plot(fig_scatter, filename='log_return_scatter_plot.html')
+    pyo.plot(fig_scatter, filename='charts/log_return_scatter_plot.html', auto_open=False)

 def plot_predicted_vs_actual_prices(actual_prices, predicted_prices, timestamps=None, n_plot=200):
    import plotly.offline as pyo
@@ -136,7 +136,7 @@ def plot_predicted_vs_actual_prices(actual_prices, predicted_prices, timestamps=
        hovermode='closest'
    )
    fig_line = go.Figure(data=data_line, layout=layout_line)
-    pyo.plot(fig_line, filename='price_line_plot.html')
+    pyo.plot(fig_line, filename='charts/price_line_plot.html', auto_open=False)

    # Scatter plot: Predicted vs Actual
    trace_scatter = go.Scatter(
@@ -166,4 +166,153 @@ def plot_predicted_vs_actual_prices(actual_prices, predicted_prices, timestamps=
        hovermode='closest'
    )
    fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
-    pyo.plot(fig_scatter, filename='price_scatter_plot.html')
+    pyo.plot(fig_scatter, filename='charts/price_scatter_plot.html', auto_open=False)
+
+def plot_prediction_error_distribution(predicted_prices, actual_prices, nbins=100, prefix=""):
+    """
+    Plots the distribution of signed prediction errors between predicted and actual prices,
+    coloring negative errors (under-prediction) and positive errors (over-prediction) differently.
+    """
+    import plotly.offline as pyo
+    import plotly.graph_objs as go
+    errors = np.array(predicted_prices) - np.array(actual_prices)
+    
+    # Separate negative and positive errors
+    neg_errors = errors[errors < 0]
+    pos_errors = errors[errors >= 0]
+
+    # Calculate common bin edges
+    min_error = np.min(errors)
+    max_error = np.max(errors)
+    bin_edges = np.linspace(min_error, max_error, nbins + 1)
+    xbins = dict(start=min_error, end=max_error, size=(max_error - min_error) / nbins)
+
+    trace_neg = go.Histogram(
+        x=neg_errors,
+        opacity=0.75,
+        marker=dict(color='blue'),
+        name='Negative Error (Under-prediction)',
+        xbins=xbins
+    )
+    trace_pos = go.Histogram(
+        x=pos_errors,
+        opacity=0.75,
+        marker=dict(color='orange'),
+        name='Positive Error (Over-prediction)',
+        xbins=xbins
+    )
+    layout = go.Layout(
+        title='Distribution of Prediction Errors (Signed)',
+        xaxis=dict(title='Prediction Error (Predicted - Actual)'),
+        yaxis=dict(title='Frequency'),
+        barmode='overlay',
+        bargap=0.05
+    )
+    fig = go.Figure(data=[trace_neg, trace_pos], layout=layout)
+    filename = f'charts/{prefix}_prediction_error_distribution.html'
+    pyo.plot(fig, filename=filename, auto_open=False)
+
+def plot_directional_accuracy(actual_prices, predicted_prices, timestamps=None, n_plot=200):
+    """
+    Plots the directional accuracy of predictions compared to actual price movements.
+    Shows whether the predicted direction matches the actual direction of price movement.
+    
+    Args:
+        actual_prices: Array of actual price values
+        predicted_prices: Array of predicted price values  
+        timestamps: Optional array of timestamps for x-axis
+        n_plot: Number of points to plot (default 200, plots last n_plot points)
+    """
+    import plotly.graph_objs as go
+    import plotly.offline as pyo
+    import numpy as np
+    
+    # Calculate price changes
+    actual_changes = np.diff(actual_prices)
+    predicted_changes = np.diff(predicted_prices)
+    
+    # Determine if directions match
+    actual_direction = np.sign(actual_changes)
+    predicted_direction = np.sign(predicted_changes)
+    correct_direction = actual_direction == predicted_direction
+    
+    # Get last n_plot points
+    actual_changes = actual_changes[-n_plot:]
+    predicted_changes = predicted_changes[-n_plot:]
+    correct_direction = correct_direction[-n_plot:]
+    
+    if timestamps is not None:
+        x_values = timestamps[1:] # Skip first since we took diff
+        x_values = x_values[-n_plot:] # Get last n_plot points
+    else:
+        x_values = list(range(len(actual_changes)))
+    
+    # Create traces for correct and incorrect predictions
+    correct_trace = go.Scatter(
+        x=np.array(x_values)[correct_direction],
+        y=actual_changes[correct_direction],
+        mode='markers',
+        name='Correct Direction',
+        marker=dict(color='green', size=8)
+    )
+    
+    incorrect_trace = go.Scatter(
+        x=np.array(x_values)[~correct_direction],
+        y=actual_changes[~correct_direction],
+        mode='markers',
+        name='Incorrect Direction',
+        marker=dict(color='red', size=8)
+    )
+    
+    # Calculate accuracy percentage
+    accuracy = np.mean(correct_direction) * 100
+    
+    layout = go.Layout(
+        title=f'Directional Accuracy (Overall: {accuracy:.1f}%)',
+        xaxis=dict(title='Time' if timestamps is not None else 'Sample'),
+        yaxis=dict(title='Price Change'),
+        showlegend=True
+    )
+    
+    fig = go.Figure(data=[correct_trace, incorrect_trace], layout=layout)
+    pyo.plot(fig, filename='charts/directional_accuracy.html', auto_open=False)
+
+def plot_direction_transition_heatmap(actual_prices, predicted_prices, prefix=""):
+    """
+    Plots a heatmap showing the frequency of each (actual, predicted) direction pair.
+    """
+    import numpy as np
+    import plotly.graph_objs as go
+    import plotly.offline as pyo
+
+    # Calculate directions
+    actual_direction = np.sign(np.diff(actual_prices))
+    predicted_direction = np.sign(np.diff(predicted_prices))
+
+    # Build 3x3 matrix: rows=actual, cols=predicted, values=counts
+    # Map -1 -> 0, 0 -> 1, 1 -> 2 for indexing
+    mapping = {-1: 0, 0: 1, 1: 2}
+    matrix = np.zeros((3, 3), dtype=int)
+    for a, p in zip(actual_direction, predicted_direction):
+        matrix[mapping[a], mapping[p]] += 1
+
+    # Axis labels
+    directions = ['Down (-1)', 'No Change (0)', 'Up (+1)']
+
+    # Plot heatmap
+    heatmap = go.Heatmap(
+        z=matrix,
+        x=directions,  # predicted
+        y=directions,  # actual
+        colorscale='Viridis',
+        colorbar=dict(title='Count')
+    )
+    layout = go.Layout(
+        title='Direction Prediction Transition Matrix',
+        xaxis=dict(title='Predicted Direction'),
+        yaxis=dict(title='Actual Direction')
+    )
+    fig = go.Figure(data=[heatmap], layout=layout)
+    filename = f'charts/{prefix}_direction_transition_heatmap.html'
+    pyo.plot(fig, filename=filename, auto_open=False)
+
Author	SHA1	Message	Date
Simon Moisy	082a2835b6	Implemented Supertrend indicators for feature engineering in main.py, including caching of computed features. Updated plotting functions in plot_results.py to save charts in a dedicated directory and added new functions for directional accuracy and prediction transition heatmaps.	2025-05-30 18:14:42 +08:00
Simon Moisy	ada6150413	Added multiple technical indicators for feature engineering, including ADX, TRIX, Vortex, KAMA, Force Index, EOM, MFI, ADI, TEMA, StochRSI, and Awesome Oscillator. Improved NaN handling and implemented leave-one-out feature evaluation with results saved to CSV.	2025-05-30 17:59:09 +08:00
Simon Moisy	ced64825bd	reverted to sequential computing for features, added one distribution visualization graph	2025-05-30 15:54:48 +08:00
Simon Moisy	2f98463df8	more uv updates	2025-05-30 15:54:14 +08:00