OHLCVPredictor/evaluation.py

78 lines
2.8 KiB
Python

import numpy as np
from typing import Dict, List, Tuple
try:
from .custom_xgboost import CustomXGBoostGPU
except ImportError:
from custom_xgboost import CustomXGBoostGPU
from sklearn.metrics import mean_squared_error, r2_score
def _compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, float, float, float]:
"""Compute RMSE, MAPE, R2, and directional accuracy.
Returns:
(rmse, mape, r2, directional_accuracy)
"""
rmse = float(np.sqrt(mean_squared_error(y_true, y_pred)))
with np.errstate(divide='ignore', invalid='ignore'):
mape_arr = np.abs((y_true - y_pred) / np.where(y_true == 0, np.nan, y_true))
mape = float(np.nanmean(mape_arr) * 100.0)
r2 = float(r2_score(y_true, y_pred))
direction_actual = np.sign(np.diff(y_true))
direction_pred = np.sign(np.diff(y_pred))
min_len = min(len(direction_actual), len(direction_pred))
if min_len == 0:
dir_acc = 0.0
else:
dir_acc = float((direction_actual[:min_len] == direction_pred[:min_len]).mean())
return rmse, mape, r2, dir_acc
def walk_forward_cv(
X: np.ndarray,
y: np.ndarray,
feature_names: List[str],
n_splits: int = 5,
) -> Tuple[Dict[str, float], Dict[str, float]]:
"""Run simple walk-forward CV and aggregate metrics and feature importances.
Returns:
metrics_avg: Average metrics across folds {rmse, mape, r2, dir_acc}
importance_avg: Average feature importance across folds {feature -> importance}
"""
num_samples = len(X)
fold_size = num_samples // (n_splits + 1)
if fold_size <= 0:
raise ValueError("Not enough samples for walk-forward CV")
metrics_accum = {"rmse": [], "mape": [], "r2": [], "dir_acc": []}
importance_sum = {name: 0.0 for name in feature_names}
for i in range(1, n_splits + 1):
train_end = i * fold_size
test_end = (i + 1) * fold_size if i < n_splits else num_samples
X_train, y_train = X[:train_end], y[:train_end]
X_test, y_test = X[train_end:test_end], y[train_end:test_end]
if len(X_test) == 0:
continue
model = CustomXGBoostGPU(X_train, X_test, y_train, y_test)
model.train(eval_metric='rmse')
preds = model.predict(X_test)
rmse, mape, r2, dir_acc = _compute_metrics(y_test, preds)
metrics_accum["rmse"].append(rmse)
metrics_accum["mape"].append(mape)
metrics_accum["r2"].append(r2)
metrics_accum["dir_acc"].append(dir_acc)
fold_importance = model.get_feature_importance(feature_names)
for name, val in fold_importance.items():
importance_sum[name] += float(val)
metrics_avg = {k: float(np.mean(v)) if len(v) > 0 else float('nan') for k, v in metrics_accum.items()}
importance_avg = {k: (importance_sum[k] / n_splits) for k in feature_names}
return metrics_avg, importance_avg