Full GP Example¶

This is a walk-through of a full example using the GP algorithm. This algorithm is a bit expensive to train but I think it is a good starting point. We can upgrade this method to more sparse methods so that we can train

import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from sklearn.decomposition import PCA
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import WhiteKernel, ConstantKernel, RBF
from sklearn.multioutput import MultiOutputRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time as time

import sys
sys.path.insert(0, "/Users/eman/Documents/code_projects/ml4ocean")

from src.models.utils import MultiTaskGP, PCATargetTransform

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-2-fb82ad241d94> in <module>
      2 sys.path.insert(0, "/Users/eman/Documents/code_projects/ml4ocean")
      3 
----> 4 from src.models.utils import MultiTaskGP, PCATargetTransform

ModuleNotFoundError: No module named 'src'

# Make Fake Dataset
X, y = make_regression(
    n_samples=1000, 
    n_features=10,    # Total Features
    n_informative=3,   # Informative Features 
    n_targets=10,
    bias=10,
    noise=0.8,
    random_state=123

)

# Training and Testing
xtrain, xtest, ytrain, ytest = train_test_split(X, y, train_size=500, random_state=123)

GP - Standard¶

# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()

# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)


# train GP Model
t0 = time.time()
gp_model.fit(xtrain, ytrain)
t1 = time.time() - t0

# Predictions
ypred, ystd = gp_model.predict(xtest, return_std=True)

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

MAE: 0.697
MSE: 0.762
RMSE: 0.873
R2: 1.000 
Time: 10.6 seconds

GP - MultiOutput w. PCA Transformer (Manually)¶

# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()

# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)

# Define target transformer
pca_model = PCA(n_components=3)

# Transform Targes
ytrain_red = pca_model.fit_transform(ytrain)


# train GP Model
t0 = time.time()
gp_model.fit(xtrain, ytrain_red)
t1 = time.time() - t0

# Predictions
ypred_red, ystd = gp_model.predict(xtest, return_std=True)

# Inverse transform predictions
ypred = pca_model.inverse_transform(ypred_red)

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

MAE: 0.690
MSE: 0.748
RMSE: 0.865
R2: 1.000 
Time: 5.88 seconds

w. TargetTransformerClass¶

Note: This does not give confidence intervals. I will have to modify the code-base later for this to work.

# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()

# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)

# Define target transformer
pca_model = PCA(n_components=3)


# Define Wrapper for target transformation
full_regressor = TransformedTargetRegressor(
    regressor=gp_model,
    transformer=pca_model,   # same number of components as informative
    check_inverse=False                 # PCA is not a direct inverse transformation

)

# train GP Model
t0 = time.time()
full_regressor.fit(xtrain, ytrain)
t1 = time.time() - t0

# Predictions
ypred = full_regressor.predict(xtest)

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

MAE: 0.690
MSE: 0.748
RMSE: 0.865
R2: 1.000 
Time: 6.01 seconds

GP - Multitask w. PCA Transformer¶

Note: still a working progress...

# define kernel function
kernel = ConstantKernel() * RBF() + WhiteKernel()

# define GP model
gp_model = GaussianProcessRegressor(
    kernel=kernel,            # kernel function (very important)
    normalize_y=True,         # good standard practice
    random_state=123,         # reproducibility
    n_restarts_optimizer=10,  # good practice (avoids local minima)
)

# Define Multioutput function
gp_model_multi = MultiOutputRegressor(
    gp_model, 
    n_jobs=1,              # Number of cores to use to parallelize the training
)

# Define target transformer
pca_model = PCA(n_components=3)

# Define Wrapper for target transformation
full_regressor = TransformedTargetRegressor(
    regressor=gp_model_multi,
    transformer=pca_model,   # same number of components as informative
    check_inverse=False                 # PCA is not a direct inverse transformation

)


# Fit Model
t0 = time.time()
full_regressor.fit(xtrain, ytrain)
t1 = time.time() - t0

# Predict with test set
ypred = full_regressor.predict(xtest)

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"GP Model:\n"
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

GP Model:
MAE: 67.818
MSE: 7901.428
RMSE: 88.890
R2: -10.353 
Time: 12.9 seconds

Interestingly enough, we got worse results for this model than the previous. Probably because of the uninformative features for each layer. It makes me skeptical to use the multi-task GP.