Full GP Example¶
This is a walk-through of a full example using the GP algorithm. This algorithm is a bit expensive to train but I think it is a good starting point. We can upgrade this method to more sparse methods so that we can train
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from sklearn.decomposition import PCA
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import WhiteKernel, ConstantKernel, RBF
from sklearn.multioutput import MultiOutputRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time as time
import sys
sys.path.insert(0, "/Users/eman/Documents/code_projects/ml4ocean")
from src.models.utils import MultiTaskGP, PCATargetTransform
# Make Fake Dataset
X, y = make_regression(
    n_samples=1000, 
    n_features=10,    # Total Features
    n_informative=3,   # Informative Features 
    n_targets=10,
    bias=10,
    noise=0.8,
    random_state=123
)
# Training and Testing
xtrain, xtest, ytrain, ytest = train_test_split(X, y, train_size=500, random_state=123)
GP - Standard¶
# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()
# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)
# train GP Model
t0 = time.time()
gp_model.fit(xtrain, ytrain)
t1 = time.time() - t0
# Predictions
ypred, ystd = gp_model.predict(xtest, return_std=True)
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)
GP - MultiOutput w. PCA Transformer (Manually)¶
# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()
# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)
# Define target transformer
pca_model = PCA(n_components=3)
# Transform Targes
ytrain_red = pca_model.fit_transform(ytrain)
# train GP Model
t0 = time.time()
gp_model.fit(xtrain, ytrain_red)
t1 = time.time() - t0
# Predictions
ypred_red, ystd = gp_model.predict(xtest, return_std=True)
# Inverse transform predictions
ypred = pca_model.inverse_transform(ypred_red)
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)
w. TargetTransformerClass¶
Note: This does not give confidence intervals. I will have to modify the code-base later for this to work.
# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()
# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)
# Define target transformer
pca_model = PCA(n_components=3)
# Define Wrapper for target transformation
full_regressor = TransformedTargetRegressor(
    regressor=gp_model,
    transformer=pca_model,   # same number of components as informative
    check_inverse=False                 # PCA is not a direct inverse transformation
)
# train GP Model
t0 = time.time()
full_regressor.fit(xtrain, ytrain)
t1 = time.time() - t0
# Predictions
ypred = full_regressor.predict(xtest)
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)
GP - Multitask w. PCA Transformer¶
Note: still a working progress...
# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()
# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)
# Define Multioutput function
gp_model_multi = MultiOutputRegressor(
    gp_model, 
    n_jobs=1,              # Number of cores to use to parallelize the training
)
# Define target transformer
pca_model = PCA(n_components=3)
# Define Wrapper for target transformation
full_regressor = TransformedTargetRegressor(
    regressor=gp_model_multi,
    transformer=pca_model,   # same number of components as informative
    check_inverse=False                 # PCA is not a direct inverse transformation
)
# Fit Model
t0 = time.time()
full_regressor.fit(xtrain, ytrain)
t1 = time.time() - t0
# Predict with test set
ypred = full_regressor.predict(xtest)
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
    f"GP Model:\n"
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)
Interestingly enough, we got worse results for this model than the previous. Probably because of the uninformative features for each layer. It makes me skeptical to use the multi-task GP.