78 lines
2.8 KiB
Python
78 lines
2.8 KiB
Python
import numpy as np
|
|
from typing import Dict, List, Tuple
|
|
try:
|
|
from .custom_xgboost import CustomXGBoostGPU
|
|
except ImportError:
|
|
from custom_xgboost import CustomXGBoostGPU
|
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
|
|
|
|
def _compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, float, float, float]:
|
|
"""Compute RMSE, MAPE, R2, and directional accuracy.
|
|
|
|
Returns:
|
|
(rmse, mape, r2, directional_accuracy)
|
|
"""
|
|
rmse = float(np.sqrt(mean_squared_error(y_true, y_pred)))
|
|
with np.errstate(divide='ignore', invalid='ignore'):
|
|
mape_arr = np.abs((y_true - y_pred) / np.where(y_true == 0, np.nan, y_true))
|
|
mape = float(np.nanmean(mape_arr) * 100.0)
|
|
r2 = float(r2_score(y_true, y_pred))
|
|
direction_actual = np.sign(np.diff(y_true))
|
|
direction_pred = np.sign(np.diff(y_pred))
|
|
min_len = min(len(direction_actual), len(direction_pred))
|
|
if min_len == 0:
|
|
dir_acc = 0.0
|
|
else:
|
|
dir_acc = float((direction_actual[:min_len] == direction_pred[:min_len]).mean())
|
|
return rmse, mape, r2, dir_acc
|
|
|
|
|
|
def walk_forward_cv(
|
|
X: np.ndarray,
|
|
y: np.ndarray,
|
|
feature_names: List[str],
|
|
n_splits: int = 5,
|
|
) -> Tuple[Dict[str, float], Dict[str, float]]:
|
|
"""Run simple walk-forward CV and aggregate metrics and feature importances.
|
|
|
|
Returns:
|
|
metrics_avg: Average metrics across folds {rmse, mape, r2, dir_acc}
|
|
importance_avg: Average feature importance across folds {feature -> importance}
|
|
"""
|
|
num_samples = len(X)
|
|
fold_size = num_samples // (n_splits + 1)
|
|
if fold_size <= 0:
|
|
raise ValueError("Not enough samples for walk-forward CV")
|
|
|
|
metrics_accum = {"rmse": [], "mape": [], "r2": [], "dir_acc": []}
|
|
importance_sum = {name: 0.0 for name in feature_names}
|
|
|
|
for i in range(1, n_splits + 1):
|
|
train_end = i * fold_size
|
|
test_end = (i + 1) * fold_size if i < n_splits else num_samples
|
|
X_train, y_train = X[:train_end], y[:train_end]
|
|
X_test, y_test = X[train_end:test_end], y[train_end:test_end]
|
|
if len(X_test) == 0:
|
|
continue
|
|
|
|
model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
|
|
model.train(eval_metric='rmse')
|
|
|
|
preds = model.predict(X_test)
|
|
rmse, mape, r2, dir_acc = _compute_metrics(y_test, preds)
|
|
metrics_accum["rmse"].append(rmse)
|
|
metrics_accum["mape"].append(mape)
|
|
metrics_accum["r2"].append(r2)
|
|
metrics_accum["dir_acc"].append(dir_acc)
|
|
|
|
fold_importance = model.get_feature_importance(feature_names)
|
|
for name, val in fold_importance.items():
|
|
importance_sum[name] += float(val)
|
|
|
|
metrics_avg = {k: float(np.mean(v)) if len(v) > 0 else float('nan') for k, v in metrics_accum.items()}
|
|
importance_avg = {k: (importance_sum[k] / n_splits) for k in feature_names}
|
|
return metrics_avg, importance_avg
|
|
|
|
|