gitignore updated, model file

model updated
Added mode indicators, still WIP
2025-05-30 12:31:20 +08:00 · 2025-05-30 12:29:37 +08:00 · 2025-05-29 12:45:45 +08:00 · 2025-05-29 18:28:53 +08:00 · 2025-05-29 11:04:03 +08:00 · 2025-05-28 02:50:40 +08:00
8 changed files with 1086 additions and 249 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,12 @@
 # ---> Python
-*.json
+/credentials/*.json
 *.csv
 *.png
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
+/data/*.npy

 # C extensions
 *.so
--- a/cycles/backtest.py
+++ b/cycles/backtest.py
@@ -1,5 +1,6 @@
 import pandas as pd
 import numpy as np
+import time

 from cycles.supertrend import Supertrends
 from cycles.market_fees import MarketFees
@@ -27,6 +28,9 @@ class Backtest:
        trends_arr = np.stack(trends, axis=1)
        meta_trend = np.where((trends_arr[:,0] == trends_arr[:,1]) & (trends_arr[:,1] == trends_arr[:,2]), 
                                trends_arr[:,0], 0)
+        # Shift meta_trend by one to avoid lookahead bias
+        meta_trend_signal = np.roll(meta_trend, 1)
+        meta_trend_signal[0] = 0  # or np.nan, but 0 means 'no signal' for first bar
        
        position = 0  # 0 = no position, 1 = long
        entry_price = 0
@@ -39,14 +43,22 @@ class Backtest:
        entry_time = None
        current_trade_min1_start_idx = None

-        min1_df['timestamp'] = pd.to_datetime(min1_df.index)
+        min1_df.index = pd.to_datetime(min1_df.index)
+        min1_timestamps = min1_df.index.values

+        last_print_time = time.time()
        for i in range(1, len(_df)):
+            current_time = time.time()
+            if current_time - last_print_time >= 5:
+                progress = (i / len(_df)) * 100
+                print(f"\rProgress: {progress:.1f}%", end="", flush=True)
+                last_print_time = current_time
+
            price_open = _df['open'].iloc[i]
            price_close = _df['close'].iloc[i]
            date = _df['timestamp'].iloc[i]
-            prev_mt = meta_trend[i-1]
-            curr_mt = meta_trend[i]
+            prev_mt = meta_trend_signal[i-1]
+            curr_mt = meta_trend_signal[i]
            
            # Check stop loss if in position
            if position == 1:
@@ -87,6 +99,8 @@ class Backtest:
            drawdown = (max_balance - balance) / max_balance
            drawdowns.append(drawdown)

+        print("\rProgress: 100%\r\n", end="", flush=True)
+
        # If still in position at end, sell at last close
        if position == 1:
            exit_result = Backtest.handle_exit(coin, _df['close'].iloc[-1], entry_price, entry_time, _df['timestamp'].iloc[-1])
--- a/cycles/supertrend.py
+++ b/cycles/supertrend.py
@@ -1,70 +1,30 @@
 import pandas as pd
 import numpy as np
 import logging
-from scipy.signal import find_peaks
-from matplotlib.patches import Rectangle
-from scipy import stats
-import concurrent.futures
-from functools import partial
 from functools import lru_cache
-import matplotlib.pyplot as plt

-# Color configuration
-# Plot colors
-DARK_BG_COLOR = '#181C27'
-LEGEND_BG_COLOR = '#333333'
-TITLE_COLOR = 'white'
-AXIS_LABEL_COLOR = 'white'
-
-# Candlestick colors
-CANDLE_UP_COLOR = '#089981'  # Green
-CANDLE_DOWN_COLOR = '#F23645'  # Red
-
-# Marker colors
-MIN_COLOR = 'red'
-MAX_COLOR = 'green'
-
-# Line style colors
-MIN_LINE_STYLE = 'g--'  # Green dashed
-MAX_LINE_STYLE = 'r--'  # Red dashed
-SMA7_LINE_STYLE = 'y-'  # Yellow solid
-SMA15_LINE_STYLE = 'm-'  # Magenta solid
-
-# SuperTrend colors
-ST_COLOR_UP = 'g-'
-ST_COLOR_DOWN = 'r-'
-
-# Cache the calculation results by function parameters
@lru_cache(maxsize=32)
 def cached_supertrend_calculation(period, multiplier, data_tuple):
-    # Convert tuple back to numpy arrays
    high = np.array(data_tuple[0])
    low = np.array(data_tuple[1])
    close = np.array(data_tuple[2])
-    
-    # Calculate TR and ATR using vectorized operations
    tr = np.zeros_like(close)
    tr[0] = high[0] - low[0]
    hc_range = np.abs(high[1:] - close[:-1])
    lc_range = np.abs(low[1:] - close[:-1])
    hl_range = high[1:] - low[1:]
    tr[1:] = np.maximum.reduce([hl_range, hc_range, lc_range])
-    
-    # Use numpy's exponential moving average
    atr = np.zeros_like(tr)
    atr[0] = tr[0]
    multiplier_ema = 2.0 / (period + 1)
    for i in range(1, len(tr)):
        atr[i] = (tr[i] * multiplier_ema) + (atr[i-1] * (1 - multiplier_ema))
-
-    # Calculate bands
    upper_band = np.zeros_like(close)
    lower_band = np.zeros_like(close)
    for i in range(len(close)):
        hl_avg = (high[i] + low[i]) / 2
        upper_band[i] = hl_avg + (multiplier * atr[i])
        lower_band[i] = hl_avg - (multiplier * atr[i])
-
    final_upper = np.zeros_like(close)
    final_lower = np.zeros_like(close)
    supertrend = np.zeros_like(close)
@@ -106,76 +66,18 @@ def cached_supertrend_calculation(period, multiplier, data_tuple):
    }

 def calculate_supertrend_external(data, period, multiplier):
-    # Convert DataFrame columns to hashable tuples
    high_tuple = tuple(data['high'])
    low_tuple = tuple(data['low'])
    close_tuple = tuple(data['close'])
-    
-    # Call the cached function
    return cached_supertrend_calculation(period, multiplier, (high_tuple, low_tuple, close_tuple))

-
 class Supertrends:
    def __init__(self, data, verbose=False, display=False):
-        """
-        Initialize the TrendDetectorSimple class.
-        
-        Parameters:
-        - data: pandas DataFrame containing price data
-        - verbose: boolean, whether to display detailed logging information
-        - display: boolean, whether to enable display/plotting features
-        """
-        
        self.data = data
        self.verbose = verbose
-        self.display = display
-        
-        # Only define display-related variables if display is True
-        if self.display:
-            # Plot style configuration
-            self.plot_style = 'dark_background' 
-            self.bg_color = DARK_BG_COLOR
-            self.plot_size = (12, 8)
-            
-            # Candlestick configuration
-            self.candle_width = 0.6
-            self.candle_up_color = CANDLE_UP_COLOR
-            self.candle_down_color = CANDLE_DOWN_COLOR
-            self.candle_alpha = 0.8
-            self.wick_width = 1
-            
-            # Marker configuration
-            self.min_marker = '^'
-            self.min_color = MIN_COLOR
-            self.min_size = 100
-            self.max_marker = 'v'
-            self.max_color = MAX_COLOR
-            self.max_size = 100
-            self.marker_zorder = 100
-            
-            # Line configuration
-            self.line_width = 1
-            self.min_line_style = MIN_LINE_STYLE
-            self.max_line_style = MAX_LINE_STYLE
-            self.sma7_line_style = SMA7_LINE_STYLE
-            self.sma15_line_style = SMA15_LINE_STYLE
-            
-            # Text configuration
-            self.title_size = 14
-            self.title_color = TITLE_COLOR
-            self.axis_label_size = 12
-            self.axis_label_color = AXIS_LABEL_COLOR
-            
-            # Legend configuration
-            self.legend_loc = 'best'
-            self.legend_bg_color = LEGEND_BG_COLOR
-        
-        # Configure logging
        logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
                           format='%(asctime)s - %(levelname)s - %(message)s')
        self.logger = logging.getLogger('TrendDetectorSimple')
-        
-        # Convert data to pandas DataFrame if it's not already
        if not isinstance(self.data, pd.DataFrame):
            if isinstance(self.data, list):
                self.data = pd.DataFrame({'close': self.data})
@@ -183,154 +85,101 @@ class Supertrends:
                raise ValueError("Data must be a pandas DataFrame or a list")

    def calculate_tr(self):
+        df = self.data.copy()
+        high = df['high'].values
+        low = df['low'].values
+        close = df['close'].values
+        tr = np.zeros_like(close)
+        tr[0] = high[0] - low[0]
+        for i in range(1, len(close)):
+            hl_range = high[i] - low[i]
+            hc_range = abs(high[i] - close[i-1])
+            lc_range = abs(low[i] - close[i-1])
+            tr[i] = max(hl_range, hc_range, lc_range)
+        return tr
+
+    def calculate_atr(self, period=14):
+        tr = self.calculate_tr()
+        atr = np.zeros_like(tr)
+        atr[0] = tr[0]
+        multiplier = 2.0 / (period + 1)
+        for i in range(1, len(tr)):
+            atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
+        return atr
+
+    def calculate_supertrend(self, period=10, multiplier=3.0):
        """
-        Calculate True Range (TR) for the price data.
-        
-        True Range is the greatest of:
-        1. Current high - current low
-        2. |Current high - previous close|
-        3. |Current low - previous close|
-        
+        Calculate SuperTrend indicator for the price data.
+        SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
+        Parameters:
+        - period: int, the period for the ATR calculation (default: 10)
+        - multiplier: float, the multiplier for the ATR (default: 3.0)
        Returns:
-        - Numpy array of TR values
+        - Dictionary containing SuperTrend values, trend direction, and upper/lower bands
        """
        df = self.data.copy()
        high = df['high'].values
        low = df['low'].values
        close = df['close'].values
-        
-        tr = np.zeros_like(close)
-        tr[0] = high[0] - low[0]  # First TR is just the first day's range
-        
+        atr = self.calculate_atr(period)
+        upper_band = np.zeros_like(close)
+        lower_band = np.zeros_like(close)
+        for i in range(len(close)):
+            hl_avg = (high[i] + low[i]) / 2
+            upper_band[i] = hl_avg + (multiplier * atr[i])
+            lower_band[i] = hl_avg - (multiplier * atr[i])
+        final_upper = np.zeros_like(close)
+        final_lower = np.zeros_like(close)
+        supertrend = np.zeros_like(close)
+        trend = np.zeros_like(close)
+        final_upper[0] = upper_band[0]
+        final_lower[0] = lower_band[0]
+        if close[0] <= upper_band[0]:
+            supertrend[0] = upper_band[0]
+            trend[0] = -1
+        else:
+            supertrend[0] = lower_band[0]
+            trend[0] = 1
        for i in range(1, len(close)):
-            # Current high - current low
-            hl_range = high[i] - low[i]
-            # |Current high - previous close|
-            hc_range = abs(high[i] - close[i-1])
-            # |Current low - previous close|
-            lc_range = abs(low[i] - close[i-1])
-            
-            # TR is the maximum of these three values
-            tr[i] = max(hl_range, hc_range, lc_range)
-            
-        return tr
-    
-    def calculate_atr(self, period=14):
-        """
-        Calculate Average True Range (ATR) for the price data.
-        
-        ATR is the exponential moving average of the True Range over a specified period.
-        
-        Parameters:
-        - period: int, the period for the ATR calculation (default: 14)
-        
-        Returns:
-        - Numpy array of ATR values
-        """
-        
-        tr = self.calculate_tr()
-        atr = np.zeros_like(tr)
-        
-        # First ATR value is just the first TR
-        atr[0] = tr[0]
-        
-        # Calculate exponential moving average (EMA) of TR
-        multiplier = 2.0 / (period + 1)
-        
-        for i in range(1, len(tr)):
-            atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
-            
-        return atr
-    
-    def detect_trends(self):
-        """
-        Detect trends by identifying local minima and maxima in the price data
-        using scipy.signal.find_peaks.
-        
-        Parameters:
-        - prominence: float, required prominence of peaks (relative to the price range)
-        - width: int, required width of peaks in data points
-        
-        Returns:
-        - DataFrame with columns for timestamps, prices, and trend indicators
-        - Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
-        """
-        df = self.data
-        # close_prices = df['close'].values
-        
-        # max_peaks, _ = find_peaks(close_prices)
-        # min_peaks, _ = find_peaks(-close_prices)
-        
-        # df['is_min'] = False
-        # df['is_max'] = False
-        
-        # for peak in max_peaks:
-        #     df.at[peak, 'is_max'] = True
-        # for peak in min_peaks:
-        #     df.at[peak, 'is_min'] = True
-        
-        # result = df[['timestamp', 'close', 'is_min', 'is_max']].copy()
-        
-        # Perform linear regression on min_peaks and max_peaks
-        # min_prices = df['close'].iloc[min_peaks].values
-        # max_prices = df['close'].iloc[max_peaks].values
-        
-        # Linear regression for min peaks if we have at least 2 points
-        # min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
-        # Linear regression for max peaks if we have at least 2 points
-        # max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
+            if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
+                final_upper[i] = upper_band[i]
+            else:
+                final_upper[i] = final_upper[i-1]
+            if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
+                final_lower[i] = lower_band[i]
+            else:
+                final_lower[i] = final_lower[i-1]
+            if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
+                supertrend[i] = final_upper[i]
+                trend[i] = -1
+            elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
+                supertrend[i] = final_lower[i]
+                trend[i] = 1
+            elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
+                supertrend[i] = final_lower[i]
+                trend[i] = 1
+            elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
+                supertrend[i] = final_upper[i]
+                trend[i] = -1
+        supertrend_results = {
+            'supertrend': supertrend,
+            'trend': trend,
+            'upper_band': final_upper,
+            'lower_band': final_lower
+        }
+        return supertrend_results

-        # Calculate Simple Moving Averages (SMA) for 7 and 15 periods        
-        # sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
-        # sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
-        
-        analysis_results = {}
-        # analysis_results['linear_regression'] = {
-        #     'min': {
-        #         'slope': min_slope,
-        #         'intercept': min_intercept,
-        #         'r_squared': min_r_value ** 2
-        #     },
-        #     'max': {
-        #         'slope': max_slope,
-        #         'intercept': max_intercept,
-        #         'r_squared': max_r_value ** 2
-        #     }
-        # }
-        # analysis_results['sma'] = {
-        #     '7': sma_7,
-        #     '15': sma_15
-        # }
-        
-        # Calculate SuperTrend indicators
-        supertrend_results_list = self._calculate_supertrend_indicators()
-        analysis_results['supertrend'] = supertrend_results_list
-        
-        return analysis_results
-        
    def calculate_supertrend_indicators(self):
-        """
-        Calculate SuperTrend indicators with different parameter sets in parallel.
-        Returns:
-        - list, the SuperTrend results
-        """
        supertrend_params = [
-            {"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
-            {"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
-            {"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
+            {"period": 12, "multiplier": 3.0},
+            {"period": 10, "multiplier": 1.0},
+            {"period": 11, "multiplier": 2.0}
        ]
-        data = self.data.copy()
-        
-        # For just 3 calculations, direct calculation might be faster than process pool
        results = []
        for p in supertrend_params:
-            result = calculate_supertrend_external(data, p["period"], p["multiplier"])
-            results.append(result)
-        
-        supertrend_results_list = []
-        for params, result in zip(supertrend_params, results):
-            supertrend_results_list.append({
+            result = self.calculate_supertrend(period=p["period"], multiplier=p["multiplier"])
+            results.append({
                "results": result,
-                "params": params
+                "params": p
            })
-        return supertrend_results_list
+        return results
--- a/data/xgboost_model.json
+++ b/data/xgboost_model.json
--- a/main.py
+++ b/main.py
@@ -6,7 +6,6 @@ import os
 import datetime
 import argparse
 import json
-import ast

 from cycles.utils.storage import Storage
 from cycles.utils.system import SystemUtils
@@ -48,6 +47,7 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
        cumulative_profit = 0
        max_drawdown = 0
        peak = 0
+
        for trade in trades:
            cumulative_profit += trade['profit_pct']
            if cumulative_profit > peak:
@@ -55,10 +55,14 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
            drawdown = peak - cumulative_profit
            if drawdown > max_drawdown:
                max_drawdown = drawdown
+
        final_usd = initial_usd
+
        for trade in trades:
            final_usd *= (1 + trade['profit_pct'])
-        total_fees_usd = sum(trade.get('fee_usd', 0.0) for trade in trades)
+
+        total_fees_usd = sum(trade['fee_usd'] for trade in trades)
+
        row = {
            "timeframe": rule_name,
            "stop_loss_pct": stop_loss_pct,
@@ -75,6 +79,7 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
            "total_fees_usd": total_fees_usd,
        }
        results_rows.append(row)
+
        for trade in trades:
            trade_rows.append({
                "timeframe": rule_name,
@@ -87,7 +92,9 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
                "type": trade.get("type"),
                "fee_usd": trade.get("fee_usd"),
            })
+
        logging.info(f"Timeframe: {rule_name}, Stop Loss: {stop_loss_pct}, Trades: {n_trades}")
+
        if debug:
            for trade in trades:
                if trade['type'] == 'STOP':
@@ -95,13 +102,16 @@ def process_timeframe_data(min1_df, df, stop_loss_pcts, rule_name, initial_usd,
            for trade in trades:
                if trade['profit_pct'] < -0.09:  # or whatever is close to -0.10
                    print("Large loss trade:", trade)
+
    return results_rows, trade_rows

 def process(timeframe_info, debug=False):
-    """Process a single (timeframe, stop_loss_pct) combination (no monthly split)"""
+    from cycles.utils.storage import Storage  # import inside function for safety
+    storage = Storage(logging=None)  # or pass a logger if you want, but None is safest for multiprocessing
+
    rule, data_1min, stop_loss_pct, initial_usd = timeframe_info

-    if rule == "1T":
+    if rule == "1T" or rule == "1min":
        df = data_1min.copy()
    else:
        df = data_1min.resample(rule).agg({
@@ -112,7 +122,33 @@ def process(timeframe_info, debug=False):
            'volume': 'sum'
        }).dropna()
    df = df.reset_index()
+
    results_rows, all_trade_rows = process_timeframe_data(data_1min, df, [stop_loss_pct], rule, initial_usd, debug=debug)
+
+    if all_trade_rows:
+        trades_fieldnames = ["entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type", "fee_usd"]
+        # Prepare header
+        summary_fields = ["timeframe", "stop_loss_pct", "n_trades", "n_stop_loss", "win_rate", "max_drawdown", "avg_trade", "profit_ratio", "final_usd"]
+        summary_row = results_rows[0]
+        header_line = "\t".join(summary_fields) + "\n"
+        value_line = "\t".join(str(summary_row.get(f, "")) for f in summary_fields) + "\n"
+        # File name
+        tf = summary_row["timeframe"]
+        sl = summary_row["stop_loss_pct"]
+        sl_percent = int(round(sl * 100))
+        trades_filename = os.path.join(storage.results_dir, f"trades_{tf}_ST{sl_percent}pct.csv")
+        # Write header
+        with open(trades_filename, "w") as f:
+            f.write(header_line)
+            f.write(value_line)
+        # Now write trades (append mode, skip header)
+        with open(trades_filename, "a", newline="") as f:
+            import csv
+            writer = csv.DictWriter(f, fieldnames=trades_fieldnames)
+            writer.writeheader()
+            for trade in all_trade_rows:
+                writer.writerow({k: trade.get(k, "") for k in trades_fieldnames})
+
    return results_rows, all_trade_rows

 def aggregate_results(all_rows):
@@ -126,7 +162,6 @@ def aggregate_results(all_rows):

    summary_rows = []
    for (rule, stop_loss_pct), rows in grouped.items():
-        n_months = len(rows)
        total_trades = sum(r['n_trades'] for r in rows)
        total_stop_loss = sum(r['n_stop_loss'] for r in rows)
        avg_win_rate = np.mean([r['win_rate'] for r in rows])
@@ -163,7 +198,7 @@ def get_nearest_price(df, target_date):
        return nearest_time, price

 if __name__ == "__main__":
-    debug = True
+    debug = False

    parser = argparse.ArgumentParser(description="Run backtest with config file.")
    parser.add_argument("config", type=str, nargs="?", help="Path to config JSON file.")
@@ -171,11 +206,11 @@ if __name__ == "__main__":

    # Default values (from config.json)
    default_config = {
-        "start_date": "2024-05-15",
+        "start_date": "2025-05-01",
        "stop_date": datetime.datetime.today().strftime('%Y-%m-%d'),
        "initial_usd": 10000,
-        "timeframes": ["1D"],
-        "stop_loss_pcts": [0.01, 0.02, 0.03],
+        "timeframes": ["1D", "6h", "3h", "1h", "30m", "15m", "5m", "1m"],
+        "stop_loss_pcts": [0.01, 0.02, 0.03, 0.05],
    }

    if args.config:
@@ -238,6 +273,7 @@ if __name__ == "__main__":
    if debug:
        all_results_rows = []
        all_trade_rows = []
+
        for task in tasks:
            results, trades = process(task, debug)
            if results or trades:
@@ -263,7 +299,4 @@ if __name__ == "__main__":
    ]
    storage.write_backtest_results(backtest_filename, backtest_fieldnames, all_results_rows, metadata_lines)

-    trades_fieldnames = ["entry_time", "exit_time", "entry_price", "exit_price", "profit_pct", "type", "fee_usd"]
-    storage.write_trades(all_trade_rows, trades_fieldnames)
-
    
--- a/xgboost/custom_xgboost.py
+++ b/xgboost/custom_xgboost.py
@@ -0,0 +1,39 @@
+import xgboost as xgb
+import numpy as np
+
+class CustomXGBoostGPU:
+    def __init__(self, X_train, X_test, y_train, y_test):
+        self.X_train = X_train.astype(np.float32)
+        self.X_test = X_test.astype(np.float32)
+        self.y_train = y_train.astype(np.float32)
+        self.y_test = y_test.astype(np.float32)
+        self.model = None
+        self.params = None  # Will be set during training
+
+    def train(self, **xgb_params):
+        params = {
+            'tree_method': 'hist',
+            'device': 'cuda',
+            'objective': 'reg:squarederror',
+            'eval_metric': 'rmse',
+            'verbosity': 1,
+        }
+        params.update(xgb_params)
+        self.params = params  # Store params for later access
+        dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
+        dtest = xgb.DMatrix(self.X_test, label=self.y_test)
+        evals = [(dtrain, 'train'), (dtest, 'eval')]
+        self.model = xgb.train(params, dtrain, num_boost_round=100, evals=evals, early_stopping_rounds=10)
+        return self.model
+
+    def predict(self, X):
+        if self.model is None:
+            raise ValueError('Model not trained yet.')
+        dmatrix = xgb.DMatrix(X.astype(np.float32))
+        return self.model.predict(dmatrix)
+
+    def save_model(self, file_path):
+        """Save the trained XGBoost model to the specified file path."""
+        if self.model is None:
+            raise ValueError('Model not trained yet.')
+        self.model.save_model(file_path)
--- a/xgboost/main.py
+++ b/xgboost/main.py
@@ -0,0 +1,731 @@
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from custom_xgboost import CustomXGBoostGPU
+from sklearn.metrics import mean_squared_error
+from plot_results import display_actual_vs_predicted, plot_target_distribution, plot_predicted_vs_actual_log_returns, plot_predicted_vs_actual_prices
+import ta
+from cycles.supertrend import Supertrends
+from ta.trend import SMAIndicator, DPOIndicator, IchimokuIndicator, PSARIndicator
+from ta.momentum import ROCIndicator, KAMAIndicator, UltimateOscillator, StochasticOscillator, WilliamsRIndicator
+from ta.volatility import KeltnerChannel, DonchianChannel
+from ta.others import DailyReturnIndicator
+import time
+from numba import njit
+
+def run_indicator(func, *args):
+    return func(*args)
+
+def run_indicator_job(job):
+    import time
+    func, *args = job
+    indicator_name = func.__name__
+    start = time.time()
+    result = func(*args)
+    elapsed = time.time() - start
+    print(f'Indicator {indicator_name} computed in {elapsed:.4f} seconds')
+    return result
+
+def calc_rsi(close):
+    from ta.momentum import RSIIndicator
+    return ('rsi', RSIIndicator(close, window=14).rsi())
+
+def calc_macd(close):
+    from ta.trend import MACD
+    return ('macd', MACD(close).macd())
+
+def calc_bollinger(close):
+    from ta.volatility import BollingerBands
+    bb = BollingerBands(close=close, window=20, window_dev=2)
+    return [
+        ('bb_bbm', bb.bollinger_mavg()),
+        ('bb_bbh', bb.bollinger_hband()),
+        ('bb_bbl', bb.bollinger_lband()),
+        ('bb_bb_width', bb.bollinger_hband() - bb.bollinger_lband())
+    ]
+
+def calc_stochastic(high, low, close):
+    from ta.momentum import StochasticOscillator
+    stoch = StochasticOscillator(high=high, low=low, close=close, window=14, smooth_window=3)
+    return [
+        ('stoch_k', stoch.stoch()),
+        ('stoch_d', stoch.stoch_signal())
+    ]
+
+def calc_atr(high, low, close):
+    from ta.volatility import AverageTrueRange
+    atr = AverageTrueRange(high=high, low=low, close=close, window=14)
+    return ('atr', atr.average_true_range())
+
+def calc_cci(high, low, close):
+    from ta.trend import CCIIndicator
+    cci = CCIIndicator(high=high, low=low, close=close, window=20)
+    return ('cci', cci.cci())
+
+def calc_williamsr(high, low, close):
+    from ta.momentum import WilliamsRIndicator
+    willr = WilliamsRIndicator(high=high, low=low, close=close, lbp=14)
+    return ('williams_r', willr.williams_r())
+
+def calc_ema(close):
+    from ta.trend import EMAIndicator
+    ema = EMAIndicator(close=close, window=14)
+    return ('ema_14', ema.ema_indicator())
+
+def calc_obv(close, volume):
+    from ta.volume import OnBalanceVolumeIndicator
+    obv = OnBalanceVolumeIndicator(close=close, volume=volume)
+    return ('obv', obv.on_balance_volume())
+
+def calc_cmf(high, low, close, volume):
+    from ta.volume import ChaikinMoneyFlowIndicator
+    cmf = ChaikinMoneyFlowIndicator(high=high, low=low, close=close, volume=volume, window=20)
+    return ('cmf', cmf.chaikin_money_flow())
+
+def calc_sma(close):
+    from ta.trend import SMAIndicator
+    return [
+        ('sma_50', SMAIndicator(close, window=50).sma_indicator()),
+        ('sma_200', SMAIndicator(close, window=200).sma_indicator())
+    ]
+
+def calc_roc(close):
+    from ta.momentum import ROCIndicator
+    return ('roc_10', ROCIndicator(close, window=10).roc())
+
+def calc_momentum(close):
+    return ('momentum_10', close - close.shift(10))
+
+def calc_psar(high, low, close):
+    # Use the Numba-accelerated fast_psar function for speed
+    psar_values = fast_psar(np.array(high), np.array(low), np.array(close))
+    return [('psar', pd.Series(psar_values, index=close.index))]
+
+def calc_donchian(high, low, close):
+    from ta.volatility import DonchianChannel
+    donchian = DonchianChannel(high, low, close, window=20)
+    return [
+        ('donchian_hband', donchian.donchian_channel_hband()),
+        ('donchian_lband', donchian.donchian_channel_lband()),
+        ('donchian_mband', donchian.donchian_channel_mband())
+    ]
+
+def calc_keltner(high, low, close):
+    from ta.volatility import KeltnerChannel
+    keltner = KeltnerChannel(high, low, close, window=20)
+    return [
+        ('keltner_hband', keltner.keltner_channel_hband()),
+        ('keltner_lband', keltner.keltner_channel_lband()),
+        ('keltner_mband', keltner.keltner_channel_mband())
+    ]
+
+def calc_dpo(close):
+    from ta.trend import DPOIndicator
+    return ('dpo_20', DPOIndicator(close, window=20).dpo())
+
+def calc_ultimate(high, low, close):
+    from ta.momentum import UltimateOscillator
+    return ('ultimate_osc', UltimateOscillator(high, low, close).ultimate_oscillator())
+
+def calc_ichimoku(high, low):
+    from ta.trend import IchimokuIndicator
+    ichimoku = IchimokuIndicator(high, low, window1=9, window2=26, window3=52)
+    return [
+        ('ichimoku_a', ichimoku.ichimoku_a()),
+        ('ichimoku_b', ichimoku.ichimoku_b()),
+        ('ichimoku_base_line', ichimoku.ichimoku_base_line()),
+        ('ichimoku_conversion_line', ichimoku.ichimoku_conversion_line())
+    ]
+
+def calc_elder_ray(close, low, high):
+    from ta.trend import EMAIndicator
+    ema = EMAIndicator(close, window=13).ema_indicator()
+    return [
+        ('elder_ray_bull', ema - low),
+        ('elder_ray_bear', ema - high)
+    ]
+
+def calc_daily_return(close):
+    from ta.others import DailyReturnIndicator
+    return ('daily_return', DailyReturnIndicator(close).daily_return())
+
+@njit
+def fast_psar(high, low, close, af=0.02, max_af=0.2):
+    length = len(close)
+    psar = np.zeros(length)
+    bull = True
+    af_step = af
+    ep = low[0]
+    psar[0] = low[0]
+    for i in range(1, length):
+        prev_psar = psar[i-1]
+        if bull:
+            psar[i] = prev_psar + af_step * (ep - prev_psar)
+            if low[i] < psar[i]:
+                bull = False
+                psar[i] = ep
+                af_step = af
+                ep = low[i]
+            else:
+                if high[i] > ep:
+                    ep = high[i]
+                    af_step = min(af_step + af, max_af)
+        else:
+            psar[i] = prev_psar + af_step * (ep - prev_psar)
+            if high[i] > psar[i]:
+                bull = True
+                psar[i] = ep
+                af_step = af
+                ep = high[i]
+            else:
+                if low[i] < ep:
+                    ep = low[i]
+                    af_step = min(af_step + af, max_af)
+    return psar
+
+def compute_lag(df, col, lag):
+    return df[col].shift(lag)
+
+def compute_rolling(df, col, stat, window):
+    if stat == 'mean':
+        return df[col].rolling(window).mean()
+    elif stat == 'std':
+        return df[col].rolling(window).std()
+    elif stat == 'min':
+        return df[col].rolling(window).min()
+    elif stat == 'max':
+        return df[col].rolling(window).max()
+
+def compute_log_return(df, horizon):
+    return np.log(df['Close'] / df['Close'].shift(horizon))
+
+def compute_volatility(df, window):
+    return df['log_return'].rolling(window).std()
+
+def run_feature_job(job, df):
+    feature_name, func, *args = job
+    print(f'Computing feature: {feature_name}')
+    result = func(df, *args)
+    return feature_name, result
+
+if __name__ == '__main__':
+    csv_path = './data/btcusd_1-min_data.csv'
+    csv_prefix = os.path.splitext(os.path.basename(csv_path))[0]
+
+    print('Reading CSV and filtering data...')
+    df = pd.read_csv(csv_path)
+    df = df[df['Volume'] != 0]
+
+    min_date = '2017-06-01'
+    print('Converting Timestamp and filtering by date...')
+    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
+    df = df[df['Timestamp'] >= min_date]
+
+    lags = 3
+
+    print('Calculating log returns as the new target...')
+    df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))
+
+    ohlcv_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
+    window_sizes = [5, 15, 30]  # in minutes, adjust as needed
+
+    features_dict = {}
+
+    print('Starting feature computation...')
+    feature_start_time = time.time()
+
+    # --- Technical Indicator Features: Calculate or Load from Cache ---
+    print('Calculating or loading technical indicator features...')
+    # RSI
+    feature_file = f'./data/{csv_prefix}_rsi.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['rsi'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: rsi')
+        _, values = calc_rsi(df['Close'])
+        features_dict['rsi'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # MACD
+    feature_file = f'./data/{csv_prefix}_macd.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['macd'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: macd')
+        _, values = calc_macd(df['Close'])
+        features_dict['macd'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # ATR
+    feature_file = f'./data/{csv_prefix}_atr.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['atr'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: atr')
+        _, values = calc_atr(df['High'], df['Low'], df['Close'])
+        features_dict['atr'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # CCI
+    feature_file = f'./data/{csv_prefix}_cci.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['cci'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: cci')
+        _, values = calc_cci(df['High'], df['Low'], df['Close'])
+        features_dict['cci'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # Williams %R
+    feature_file = f'./data/{csv_prefix}_williams_r.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['williams_r'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: williams_r')
+        _, values = calc_williamsr(df['High'], df['Low'], df['Close'])
+        features_dict['williams_r'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # EMA 14
+    feature_file = f'./data/{csv_prefix}_ema_14.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['ema_14'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: ema_14')
+        _, values = calc_ema(df['Close'])
+        features_dict['ema_14'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # OBV
+    feature_file = f'./data/{csv_prefix}_obv.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['obv'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: obv')
+        _, values = calc_obv(df['Close'], df['Volume'])
+        features_dict['obv'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # CMF
+    feature_file = f'./data/{csv_prefix}_cmf.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['cmf'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: cmf')
+        _, values = calc_cmf(df['High'], df['Low'], df['Close'], df['Volume'])
+        features_dict['cmf'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # ROC 10
+    feature_file = f'./data/{csv_prefix}_roc_10.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['roc_10'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: roc_10')
+        _, values = calc_roc(df['Close'])
+        features_dict['roc_10'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # DPO 20
+    feature_file = f'./data/{csv_prefix}_dpo_20.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['dpo_20'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: dpo_20')
+        _, values = calc_dpo(df['Close'])
+        features_dict['dpo_20'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # Ultimate Oscillator
+    feature_file = f'./data/{csv_prefix}_ultimate_osc.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['ultimate_osc'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: ultimate_osc')
+        _, values = calc_ultimate(df['High'], df['Low'], df['Close'])
+        features_dict['ultimate_osc'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # Daily Return
+    feature_file = f'./data/{csv_prefix}_daily_return.npy'
+    if os.path.exists(feature_file):
+        print(f'A Loading cached feature: {feature_file}')
+        arr = np.load(feature_file)
+        features_dict['daily_return'] = pd.Series(arr, index=df.index)
+    else:
+        print('Calculating feature: daily_return')
+        _, values = calc_daily_return(df['Close'])
+        features_dict['daily_return'] = values
+        np.save(feature_file, values.values)
+        print(f'Saved feature: {feature_file}')
+
+    # Multi-column indicators
+    # Bollinger Bands
+    print('Calculating multi-column indicator: bollinger')
+    result = calc_bollinger(df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Stochastic Oscillator
+    print('Calculating multi-column indicator: stochastic')
+    result = calc_stochastic(df['High'], df['Low'], df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # SMA
+    print('Calculating multi-column indicator: sma')
+    result = calc_sma(df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # PSAR
+    print('Calculating multi-column indicator: psar')
+    result = calc_psar(df['High'], df['Low'], df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Donchian Channel
+    print('Calculating multi-column indicator: donchian')
+    result = calc_donchian(df['High'], df['Low'], df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Keltner Channel
+    print('Calculating multi-column indicator: keltner')
+    result = calc_keltner(df['High'], df['Low'], df['Close'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Ichimoku
+    print('Calculating multi-column indicator: ichimoku')
+    result = calc_ichimoku(df['High'], df['Low'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Elder Ray
+    print('Calculating multi-column indicator: elder_ray')
+    result = calc_elder_ray(df['Close'], df['Low'], df['High'])
+    for subname, values in result:
+        print(f"Adding subfeature: {subname}")
+        sub_feature_file = f'./data/{csv_prefix}_{subname}.npy'
+        if os.path.exists(sub_feature_file):
+            print(f'B Loading cached feature: {sub_feature_file}')
+            arr = np.load(sub_feature_file)
+            features_dict[subname] = pd.Series(arr, index=df.index)
+        else:
+            features_dict[subname] = values
+            np.save(sub_feature_file, values.values)
+            print(f'Saved feature: {sub_feature_file}')
+
+    # Prepare jobs for lags, rolling stats, log returns, and volatility
+    feature_jobs = []
+    # Lags
+    for col in ohlcv_cols:
+        for lag in range(1, lags + 1):
+            feature_name = f'{col}_lag{lag}'
+            feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
+            if os.path.exists(feature_file):
+                print(f'C Loading cached feature: {feature_file}')
+                features_dict[feature_name] = np.load(feature_file)
+            else:
+                print(f'Adding lag feature job: {feature_name}')
+                feature_jobs.append((feature_name, compute_lag, col, lag))
+    # Rolling statistics
+    for col in ohlcv_cols:
+        for window in window_sizes:
+            if (col == 'Open' and window == 5):
+                continue
+            if (col == 'High' and window == 5):
+                continue
+            if (col == 'High' and window == 30):
+                continue
+            if (col == 'Low' and window == 15):
+                continue
+            for stat in ['mean', 'std', 'min', 'max']:
+                feature_name = f'{col}_roll_{stat}_{window}'
+                feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
+                if os.path.exists(feature_file):
+                    print(f'D Loading cached feature: {feature_file}')
+                    features_dict[feature_name] = np.load(feature_file)
+                else:
+                    print(f'Adding rolling stat feature job: {feature_name}')
+                    feature_jobs.append((feature_name, compute_rolling, col, stat, window))
+    # Log returns for different horizons
+    for horizon in [5, 15, 30]:
+        feature_name = f'log_return_{horizon}'
+        feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
+        if os.path.exists(feature_file):
+            print(f'E Loading cached feature: {feature_file}')
+            features_dict[feature_name] = np.load(feature_file)
+        else:
+            print(f'Adding log return feature job: {feature_name}')
+            feature_jobs.append((feature_name, compute_log_return, horizon))
+    # Volatility
+    for window in window_sizes:
+        feature_name = f'volatility_{window}'
+        feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
+        if os.path.exists(feature_file):
+            print(f'F Loading cached feature: {feature_file}')
+            features_dict[feature_name] = np.load(feature_file)
+        else:
+            print(f'Adding volatility feature job: {feature_name}')
+            feature_jobs.append((feature_name, compute_volatility, window))
+
+    # Sequential computation for all non-cached features
+    if feature_jobs:
+        print(f'Computing {len(feature_jobs)} features sequentially...')
+        for job in feature_jobs:
+            print(f'Computing feature job: {job[0]}')
+            feature_name, result = run_feature_job(job, df)
+            features_dict[feature_name] = result
+            feature_file = f'./data/{csv_prefix}_{feature_name}.npy'
+            np.save(feature_file, result.values)
+            print(f'Saved computed feature: {feature_file}')
+        print('All features computed.')
+    else:
+        print('All features loaded from cache.')
+
+    # Concatenate all new features at once
+    print('Concatenating all new features to DataFrame...')
+    features_df = pd.DataFrame(features_dict)
+    print("Columns in features_df:", features_df.columns.tolist())
+    print("All-NaN columns in features_df:", features_df.columns[features_df.isna().all()].tolist())
+    df = pd.concat([df, features_df], axis=1)
+
+    # Print all columns after concatenation
+    print("All columns in df after concat:", df.columns.tolist())
+
+    # Downcast all float columns to save memory
+    print('Downcasting float columns to save memory...')
+    for col in df.columns:
+        try:
+            df[col] = pd.to_numeric(df[col], downcast='float')
+        except Exception:
+            pass
+
+    # Drop intermediate features_df to free memory
+    print('Dropping intermediate features_df to free memory...')
+    del features_df
+    import gc
+    gc.collect()
+
+    feature_end_time = time.time()
+    print(f'Feature computation completed in {feature_end_time - feature_start_time:.2f} seconds.')
+
+    # Add Supertrend indicators (custom)
+    print('Preparing data for Supertrend calculation...')
+    st_df = df.rename(columns={'High': 'high', 'Low': 'low', 'Close': 'close'})
+    
+    print('Calculating Supertrend indicators...')
+    supertrend = Supertrends(st_df)
+    st_results = supertrend.calculate_supertrend_indicators()
+    for idx, st in enumerate(st_results):
+        period = st['params']['period']
+        multiplier = st['params']['multiplier']
+        # Skip useless supertrend features
+        if (period == 10 and multiplier == 1.0) or (period == 11 and multiplier == 2.0):
+            continue
+        print(f'Adding Supertrend features: supertrend_{period}_{multiplier} and supertrend_trend_{period}_{multiplier}')
+        df[f'supertrend_{period}_{multiplier}'] = st['results']['supertrend']
+        df[f'supertrend_trend_{period}_{multiplier}'] = st['results']['trend']
+
+    # Add time features (exclude 'dayofweek')
+    print('Adding hour feature...')
+    df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')
+    df['hour'] = df['Timestamp'].dt.hour
+
+    # Drop NaNs after all feature engineering
+    print('Dropping NaNs after feature engineering...')
+    df = df.dropna().reset_index(drop=True)
+
+    # Exclude 'Timestamp', 'Close', 'log_return', and any future target columns from features
+    print('Selecting feature columns...')
+    exclude_cols = ['Timestamp', 'Close', 'log_return', 'log_return_5', 'log_return_15', 'log_return_30']
+    feature_cols = [col for col in df.columns if col not in exclude_cols]
+
+    # Print the features used for training
+    print("Features used for training:", feature_cols)
+
+    # Drop excluded columns to save memory
+    print('Dropping excluded columns to save memory...')
+    df = df[feature_cols + ['log_return', 'Timestamp']]
+
+    print('Preparing X and y...')
+    X = df[feature_cols].values.astype(np.float32)
+    y = df['log_return'].values.astype(np.float32)
+  
+    split_idx = int(len(X) * 0.8)
+    print(f'Splitting data: {split_idx} train, {len(X) - split_idx} test')
+    X_train, X_test = X[:split_idx], X[split_idx:]
+    y_train, y_test = y[:split_idx], y[split_idx:]
+    test_timestamps = df['Timestamp'].values[split_idx:]
+
+    print('Initializing model...')
+    model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
+ 
+    print('Training model...')
+    booster = model.train()
+ 
+    print('Training complete.')
+
+    # Save the trained model
+    model.save_model('./data/xgboost_model.json')
+    print('Model saved to ./data/xgboost_model.json')
+
+    if hasattr(model, 'params'):
+        print("Model hyperparameters:", model.params)
+    if hasattr(model, 'model') and hasattr(model.model, 'get_score'):
+        import operator
+        importances = model.model.get_score(importance_type='weight')
+        # Map f0, f1, ... to actual feature names
+        feature_map = {f"f{idx}": name for idx, name in enumerate(feature_cols)}
+        sorted_importances = sorted(importances.items(), key=operator.itemgetter(1), reverse=True)
+        print('Feature importances (sorted, with names):')
+        for feat, score in sorted_importances:
+            print(f'{feature_map.get(feat, feat)}: {score}')
+
+    print('Making predictions for first 5 test samples...')
+    preds = model.predict(X_test[:5])
+    print('Predictions for first 5 test samples:', preds)
+    print('Actual values for first 5 test samples:', y_test[:5])
+
+    print('Making predictions for all test samples...')
+    test_preds = model.predict(X_test)
+    rmse = np.sqrt(mean_squared_error(y_test, test_preds))
+    print(f'RMSE on test set: {rmse:.4f}')
+
+    print('Saving y_test and test_preds to disk...')
+    np.save('./data/y_test.npy', y_test)
+    np.save('./data/test_preds.npy', test_preds)
+
+    # Reconstruct price series from log returns
+    print('Reconstructing price series from log returns...')
+    # Get the last available Close price before the test set
+    # The DataFrame df has been reset, so use split_idx to get the right row
+    if 'Close' in df.columns:
+        close_prices = df['Close'].values
+    else:
+        # Reload original CSV to get Close prices if not present
+        close_prices = pd.read_csv(csv_path)['Close'].values
+    start_price = close_prices[split_idx]  # This is the price at the split point
+    # Actual prices
+    actual_prices = [start_price]
+    for r in y_test:
+        actual_prices.append(actual_prices[-1] * np.exp(r))
+    actual_prices = np.array(actual_prices[1:])
+    # Predicted prices
+    predicted_prices = [start_price]
+    for r in test_preds:
+        predicted_prices.append(predicted_prices[-1] * np.exp(r))
+    predicted_prices = np.array(predicted_prices[1:])
+
+    print('Plotting predicted vs actual prices...')
+    plot_predicted_vs_actual_prices(actual_prices, predicted_prices, test_timestamps)
+
+    print("Final features used for training:", feature_cols)
+
+    print("Shape of X:", X.shape)
+    print("First row of X:", X[0])
+    print("stoch_k in feature_cols?", "stoch_k" in feature_cols)
+    if "stoch_k" in feature_cols:
+        idx = feature_cols.index("stoch_k")
+        print("First 10 values of stoch_k:", X[:10, idx])
--- a/xgboost/plot_results.py
+++ b/xgboost/plot_results.py
@@ -0,0 +1,169 @@
+import numpy as np
+import dash
+from dash import dcc, html
+import plotly.graph_objs as go
+import threading
+
+
+def display_actual_vs_predicted(y_test, test_preds, timestamps, n_plot=200):
+    import plotly.offline as pyo
+    n_plot = min(n_plot, len(y_test))
+    plot_indices = timestamps[:n_plot]
+    actual = y_test[:n_plot]
+    predicted = test_preds[:n_plot]
+
+    trace_actual = go.Scatter(x=plot_indices, y=actual, mode='lines', name='Actual')
+    trace_predicted = go.Scatter(x=plot_indices, y=predicted, mode='lines', name='Predicted')
+    data = [trace_actual, trace_predicted]
+    layout = go.Layout(
+        title='Actual vs. Predicted BTC Close Prices (Test Set)',
+        xaxis={'title': 'Timestamp'},
+        yaxis={'title': 'BTC Close Price'},
+        legend={'x': 0, 'y': 1},
+        margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
+        hovermode='closest'
+    )
+    fig = go.Figure(data=data, layout=layout)
+    pyo.plot(fig)
+
+def plot_target_distribution(y_train, y_test):
+    import plotly.offline as pyo
+    trace_train = go.Histogram(
+        x=y_train,
+        nbinsx=100,
+        opacity=0.5,
+        name='Train',
+        marker=dict(color='blue')
+    )
+    trace_test = go.Histogram(
+        x=y_test,
+        nbinsx=100,
+        opacity=0.5,
+        name='Test',
+        marker=dict(color='orange')
+    )
+    data = [trace_train, trace_test]
+    layout = go.Layout(
+        title='Distribution of Target Variable (Close Price)',
+        xaxis=dict(title='BTC Close Price'),
+        yaxis=dict(title='Frequency'),
+        barmode='overlay'
+    )
+    fig = go.Figure(data=data, layout=layout)
+    pyo.plot(fig)
+
+def plot_predicted_vs_actual_log_returns(y_test, test_preds, timestamps=None, n_plot=200):
+    import plotly.offline as pyo
+    import plotly.graph_objs as go
+    n_plot = min(n_plot, len(y_test))
+    actual = y_test[:n_plot]
+    predicted = test_preds[:n_plot]
+    if timestamps is not None:
+        x_axis = timestamps[:n_plot]
+        x_label = 'Timestamp'
+    else:
+        x_axis = list(range(n_plot))
+        x_label = 'Index'
+
+    # Line plot: Actual vs Predicted over time
+    trace_actual = go.Scatter(x=x_axis, y=actual, mode='lines', name='Actual')
+    trace_predicted = go.Scatter(x=x_axis, y=predicted, mode='lines', name='Predicted')
+    data_line = [trace_actual, trace_predicted]
+    layout_line = go.Layout(
+        title='Actual vs. Predicted Log Returns (Test Set)',
+        xaxis={'title': x_label},
+        yaxis={'title': 'Log Return'},
+        legend={'x': 0, 'y': 1},
+        margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
+        hovermode='closest'
+    )
+    fig_line = go.Figure(data=data_line, layout=layout_line)
+    pyo.plot(fig_line, filename='log_return_line_plot.html')
+
+    # Scatter plot: Predicted vs Actual
+    trace_scatter = go.Scatter(
+        x=actual,
+        y=predicted,
+        mode='markers',
+        name='Predicted vs Actual',
+        opacity=0.5
+    )
+    # Diagonal reference line
+    min_val = min(np.min(actual), np.min(predicted))
+    max_val = max(np.max(actual), np.max(predicted))
+    trace_diag = go.Scatter(
+        x=[min_val, max_val],
+        y=[min_val, max_val],
+        mode='lines',
+        name='Ideal',
+        line=dict(dash='dash', color='red')
+    )
+    data_scatter = [trace_scatter, trace_diag]
+    layout_scatter = go.Layout(
+        title='Predicted vs Actual Log Returns (Scatter)',
+        xaxis={'title': 'Actual Log Return'},
+        yaxis={'title': 'Predicted Log Return'},
+        showlegend=True,
+        margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
+        hovermode='closest'
+    )
+    fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
+    pyo.plot(fig_scatter, filename='log_return_scatter_plot.html')
+
+def plot_predicted_vs_actual_prices(actual_prices, predicted_prices, timestamps=None, n_plot=200):
+    import plotly.offline as pyo
+    import plotly.graph_objs as go
+    n_plot = min(n_plot, len(actual_prices))
+    actual = actual_prices[:n_plot]
+    predicted = predicted_prices[:n_plot]
+    if timestamps is not None:
+        x_axis = timestamps[:n_plot]
+        x_label = 'Timestamp'
+    else:
+        x_axis = list(range(n_plot))
+        x_label = 'Index'
+
+    # Line plot: Actual vs Predicted over time
+    trace_actual = go.Scatter(x=x_axis, y=actual, mode='lines', name='Actual Price')
+    trace_predicted = go.Scatter(x=x_axis, y=predicted, mode='lines', name='Predicted Price')
+    data_line = [trace_actual, trace_predicted]
+    layout_line = go.Layout(
+        title='Actual vs. Predicted BTC Prices (Test Set)',
+        xaxis={'title': x_label},
+        yaxis={'title': 'BTC Price'},
+        legend={'x': 0, 'y': 1},
+        margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
+        hovermode='closest'
+    )
+    fig_line = go.Figure(data=data_line, layout=layout_line)
+    pyo.plot(fig_line, filename='price_line_plot.html')
+
+    # Scatter plot: Predicted vs Actual
+    trace_scatter = go.Scatter(
+        x=actual,
+        y=predicted,
+        mode='markers',
+        name='Predicted vs Actual',
+        opacity=0.5
+    )
+    # Diagonal reference line
+    min_val = min(np.min(actual), np.min(predicted))
+    max_val = max(np.max(actual), np.max(predicted))
+    trace_diag = go.Scatter(
+        x=[min_val, max_val],
+        y=[min_val, max_val],
+        mode='lines',
+        name='Ideal',
+        line=dict(dash='dash', color='red')
+    )
+    data_scatter = [trace_scatter, trace_diag]
+    layout_scatter = go.Layout(
+        title='Predicted vs Actual Prices (Scatter)',
+        xaxis={'title': 'Actual Price'},
+        yaxis={'title': 'Predicted Price'},
+        showlegend=True,
+        margin={'l': 40, 'b': 40, 't': 40, 'r': 10},
+        hovermode='closest'
+    )
+    fig_scatter = go.Figure(data=data_scatter, layout=layout_scatter)
+    pyo.plot(fig_scatter, filename='price_scatter_plot.html')
Author	SHA1	Message	Date
Simon Moisy	a22914731f	gitignore updated, model file	2025-05-30 12:31:20 +08:00
Simon Moisy	81e4b640a7	model updated	2025-05-30 12:29:37 +08:00
Simon Moisy	2dba88b620	Added mode indicators, still WIP	2025-05-29 12:45:45 +08:00
Simon Moisy	de67b27e37	XGBoost first iteration	2025-05-29 18:28:53 +08:00
Simon Moisy	1284549106	progress print	2025-05-29 11:04:03 +08:00
Simon Moisy	5f03524d6a	never fallback to default values for fee_usd	2025-05-28 02:50:40 +08:00
Simon Moisy	74c8048ed5	shifted one day back on the metatrend to avoid lookahead bias, reverted metatrend calculus to use no cpu optimization for readability	2025-05-27 17:49:55 +08:00