Source code for gloss.surrogate.ml_models

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
import lightgbm as lgb


class _RBFRegressorWrapper(BaseEstimator, RegressorMixin):
    """sklearn-compatible wrapper around scipy.interpolate.RBFInterpolator."""

    def __init__(self, kernel="thin_plate_spline", smoothing=0.0):
        self.kernel = kernel
        self.smoothing = smoothing

    def fit(self, X, y):
        from scipy.interpolate import RBFInterpolator
        self.interpolator_ = RBFInterpolator(
            X, y, kernel=self.kernel, smoothing=self.smoothing
        )
        return self

    def predict(self, X):
        return self.interpolator_(X)


[docs] def get_ml_model_configs(): """Return list of ML model configs for auto-selection. Each config is a dict with: name, estimator, param_grid. """ return [ { "name": "GaussianProcess", "estimator": GaussianProcessRegressor(random_state=42), "param_grid": { "kernel": [ ConstantKernel() * RBF(), ConstantKernel() * Matern(nu=1.5), ConstantKernel() * Matern(nu=2.5), ], "alpha": [1e-10, 1e-5, 1e-2], }, }, { "name": "RandomForest", "estimator": RandomForestRegressor(random_state=42), "param_grid": { "n_estimators": [50, 100, 200], "max_depth": [None, 10, 20], }, }, { "name": "XGBoost", "estimator": xgb.XGBRegressor(random_state=42, verbosity=0), "param_grid": { "n_estimators": [50, 100, 200], "max_depth": [3, 6, 10], "learning_rate": [0.01, 0.1, 0.3], }, }, { "name": "LightGBM", "estimator": lgb.LGBMRegressor(random_state=42, verbosity=-1), "param_grid": { "n_estimators": [50, 100, 200], "max_depth": [-1, 10, 20], "learning_rate": [0.01, 0.1, 0.3], }, }, { "name": "SVR", "estimator": Pipeline([ ("scaler", StandardScaler()), ("svr", SVR()), ]), "param_grid": { "svr__C": [0.1, 1, 10], "svr__kernel": ["rbf", "linear"], "svr__epsilon": [0.01, 0.1], }, }, { "name": "KNN", "estimator": KNeighborsRegressor(), "param_grid": { "n_neighbors": [3, 5, 10], "weights": ["uniform", "distance"], }, }, { "name": "RBF", "estimator": _RBFRegressorWrapper(), "param_grid": { "kernel": ["thin_plate_spline", "multiquadric", "cubic"], "smoothing": [0.0, 0.1, 1.0], }, }, ]