OHLCVPredictor/custom_xgboost.py

import xgboost as xgb
import numpy as np

class CustomXGBoostGPU:
    def __init__(self, X_train, X_test, y_train, y_test):
        self.X_train = X_train.astype(np.float32)
        self.X_test = X_test.astype(np.float32)
        self.y_train = y_train.astype(np.float32)
        self.y_test = y_test.astype(np.float32)
        self.model = None
        self.params = None  # Will be set during training

    def train(self, **xgb_params):
        params = {
            'tree_method': 'hist',
            'device': 'cuda',
            'objective': 'reg:squarederror',
            'eval_metric': 'mae',
            'verbosity': 0,
        }
        params.update(xgb_params)
        self.params = params  # Store params for later access
        dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
        dtest = xgb.DMatrix(self.X_test, label=self.y_test)
        evals = [(dtrain, 'train'), (dtest, 'eval')]
        self.model = xgb.train(params, dtrain, num_boost_round=100, evals=evals, early_stopping_rounds=10)
        return self.model

    def predict(self, X):
        if self.model is None:
            raise ValueError('Model not trained yet.')
        dmatrix = xgb.DMatrix(X.astype(np.float32))
        return self.model.predict(dmatrix)

    def save_model(self, file_path):
        """Save the trained XGBoost model to the specified file path."""
        if self.model is None:
            raise ValueError('Model not trained yet.')
        self.model.save_model(file_path)

    def get_feature_importance(self, feature_names):
        if self.model is None:
            raise ValueError('Model not trained yet.')
        # get_score returns a dict with keys like 'f0', 'f1', ...
        score_dict = self.model.get_score(importance_type='weight')
        # Map to feature names
        importances = [score_dict.get(f'f{i}', 0.0) for i in range(len(feature_names))]
        return dict(zip(feature_names, importances))