68 lines
2.8 KiB
Python
68 lines
2.8 KiB
Python
import xgboost as xgb
|
|
import numpy as np
|
|
|
|
class CustomXGBoostGPU:
|
|
def __init__(self, X_train=None, X_test=None, y_train=None, y_test=None):
|
|
# Make training data optional for inference-only usage
|
|
self.X_train = X_train.astype(np.float32) if X_train is not None else None
|
|
self.X_test = X_test.astype(np.float32) if X_test is not None else None
|
|
self.y_train = y_train.astype(np.float32) if y_train is not None else None
|
|
self.y_test = y_test.astype(np.float32) if y_test is not None else None
|
|
self.model = None
|
|
self.params = None # Will be set during training
|
|
|
|
@classmethod
|
|
def load_model(cls, model_path):
|
|
"""Load a pre-trained model from file for inference
|
|
|
|
Args:
|
|
model_path (str): Path to the saved XGBoost model file
|
|
|
|
Returns:
|
|
CustomXGBoostGPU: Instance with loaded model ready for inference
|
|
"""
|
|
instance = cls() # Create instance without training data
|
|
instance.model = xgb.Booster()
|
|
instance.model.load_model(model_path)
|
|
return instance
|
|
|
|
def train(self, **xgb_params):
|
|
if self.X_train is None or self.y_train is None:
|
|
raise ValueError('Training data is required for training. Use load_model() for inference-only usage.')
|
|
|
|
params = {
|
|
'tree_method': 'hist',
|
|
'device': 'cuda',
|
|
'objective': 'reg:squarederror',
|
|
'eval_metric': 'mae',
|
|
'verbosity': 0,
|
|
}
|
|
params.update(xgb_params)
|
|
self.params = params # Store params for later access
|
|
dtrain = xgb.DMatrix(self.X_train, label=self.y_train)
|
|
dtest = xgb.DMatrix(self.X_test, label=self.y_test)
|
|
evals = [(dtrain, 'train'), (dtest, 'eval')]
|
|
self.model = xgb.train(params, dtrain, num_boost_round=100, evals=evals, early_stopping_rounds=10)
|
|
return self.model
|
|
|
|
def predict(self, X):
|
|
if self.model is None:
|
|
raise ValueError('Model not trained yet.')
|
|
dmatrix = xgb.DMatrix(X.astype(np.float32))
|
|
return self.model.predict(dmatrix)
|
|
|
|
def save_model(self, file_path):
|
|
"""Save the trained XGBoost model to the specified file path."""
|
|
if self.model is None:
|
|
raise ValueError('Model not trained yet.')
|
|
self.model.save_model(file_path)
|
|
|
|
def get_feature_importance(self, feature_names):
|
|
if self.model is None:
|
|
raise ValueError('Model not trained yet.')
|
|
# get_score returns a dict with keys like 'f0', 'f1', ...
|
|
score_dict = self.model.get_score(importance_type='weight')
|
|
# Map to feature names
|
|
importances = [score_dict.get(f'f{i}', 0.0) for i in range(len(feature_names))]
|
|
return dict(zip(feature_names, importances))
|