Stochastic Variational GP¶
Warning: Need to use GPUs in order for this to work.
import sys
# Add the path to the models
sys.path.insert(0, '/media/disk/erc/papers/2019_ML_OCN/code/ml4ocean')
from src.models.utils import MultiTaskGP
from src.models.gpflow_gpu import SVGP, MOSVGP
%load_ext autoreload
%autoreload 2
import time as time
import numpy as np
import gpflow
import tensorflow as tf
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Make Fake Dataset
X, y = make_regression(
n_samples=10000,
n_features=10, # Total Features
n_informative=3, # Informative Features
n_targets=10,
bias=10,
noise=0.8,
random_state=123
)
train_size = 3000
# Training and Testing
xtrain, xtest, ytrain, ytest = train_test_split(
X, y, train_size=train_size, random_state=123
)
xtrain.shape, ytrain.shape
import tensorflow as tf
gpflow.__version__
tf.__version__
tf.train
SVGP Algorithm¶
# Test Arguments to Ensure it works
class TestArgs:
num_inducing = 100
iterations = 10
small_iterations = 10
adam_lr = 0.01
gamma = 0.1
minibatch_size = 100
initial_likelihood_var = 0.01
seed = 0
# Better training arguments
class Args:
num_inducing = 100
iterations = 10000
small_iterations = 1000
adam_lr = 0.01
gamma = 0.1
minibatch_size = 1000
initial_likelihood_var = 0.01
# Good practice
gpflow.reset_default_graph_and_session()
# Initialize Model
gp_model = SVGP(
num_inducing=TestArgs.num_inducing,
iterations=TestArgs.iterations,
small_iterations=TestArgs.small_iterations,
adam_lr=TestArgs.adam_lr,
gamma=TestArgs.gamma,
minibatch_size=TestArgs.minibatch_size,
initial_likelihood_var=TestArgs.initial_likelihood_var,
seed=TestArgs.seed
)
# Fit Model to Data
t0 = time.time()
gp_model.fit(xtrain, ytrain)
t1 = time.time() - t0
print(
f"Training Time: {t1:.3} seconds"
)
# Predict with test set
t0 = time.time()
ypred, ystd = gp_model.predict(xtest, return_std=True)
t1 = time.time() - t0
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
f"GP Model:\n"
f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}"
f" \nTime: {t1:.3} seconds"
)
MultiOutput¶
# Good practice
gpflow.reset_default_graph_and_session()
# Initialize Model
mogp_model = MOSVGP(
num_inducing=TestArgs.num_inducing,
iterations=TestArgs.iterations,
small_iterations=TestArgs.small_iterations,
adam_lr=TestArgs.adam_lr,
gamma=TestArgs.gamma,
minibatch_size=TestArgs.minibatch_size,
initial_likelihood_var=TestArgs.initial_likelihood_var,
seed=TestArgs.seed
)
# Fit Model to Data
t0 = time.time()
mogp_model.fit(xtrain, ytrain)
t1 = time.time() - t0
print(
f"Training Time: {t1:.3} seconds"
)
# Predict with test set
t0 = time.time()
ypred, ystd = mogp_model.predict(xtest, return_std=True)
t1 = time.time() - t0
# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)
print(
f"GP Model:\n"
f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}"
f" \nTime: {t1:.3} seconds"
)