More Scalable GPs¶

In this notebook I will demonstrate a few implementations of some more advanced and scalable GPs. These methods use approximations via inducing inputs (a smart subset of the data to represent the entire dataset). This allows the GP algorithms to run in

$\mathcal{O}(NM^2)$ instead of

$\mathcal{O}(N^3)$ where

$N$ are the number of samples and

$M$ are the number of inducing inputs.

The algorithms to be used below are as follows: * Sparse GP - FITC Approximation (TODO) * Sparse GP - VFE Approximation (Done)

import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from sklearn.decomposition import PCA
# from sklearn.multioutput import MultiOutputRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.utils.estimator_checks import check_estimator
import time as time

%load_ext autoreload
%autoreload 2

# check_estimator(SVGP)

Import Module¶

So here you need to add the path to the models directory. There are automated ways to do this but just for a simple case, you can just manually add the directoy whenever you start the notebook. See below.

import sys

# Add the path to the models
sys.path.insert(0, '/media/disk/erc/papers/2019_ML_OCN/code/ml4ocean')
from src.models.utils import MultiTaskGP

# Import the GP Functions
from src.models.gpy import SGP, SVGP
import GPy

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

# Make Fake Dataset
X, y = make_regression(
    n_samples=20000, 
    n_features=20,    # Total Features
    n_informative=3,   # Informative Features 
    n_targets=10,
    bias=10,
    noise=0.8,
    random_state=123

)
train_size = 10000

# Training and Testing
xtrain, xtest, ytrain, ytest = train_test_split(
    X, y, train_size=train_size, random_state=123
)

xtrain.shape, ytrain.shape

((10000, 20), (10000, 10))

Sparse Variational GP - MultiOutput (Shared Kernel, Shared Outputs)¶

# Define Kernel Function
input_dimensions = X.shape[1]
kernel = GPy.kern.RBF(
    input_dim=input_dimensions, 
    ARD=False
)

# define GP model
n_inducing = 25
gp_model = SVGP(
    kernel=kernel,
    n_inducing=n_inducing,
    max_iters=300, 
    optimizer='lbfgs',
    verbose=True,
    n_restarts=10
)


# train GP Model
# print(xtrain, ytrain)
t0 = time.time()
gp_model.fit(xtrain, ytrain)
t1 = time.time() - t0

# Predictions
ypred, ystd = gp_model.predict(xtest, return_std=True)

print(
#     f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f"Training Time: {t1:.3} seconds"
)

ypred.shape, ystd.shape

gp_model.display_model()

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

SVGP - Multi-Task¶

# Define Kernel Function
input_dimensions = X.shape[1]
kernel = GPy.kern.RBF(
    input_dim=input_dimensions, 
    ARD=False
)

# define GP model
n_inducing = 100
gp_model = SVGP(
    kernel=kernel,
    n_inducing=n_inducing,
    max_iters=300, 
    optimizer='lbfgs',
    verbose=False,
    n_restarts=0
)

# Define Multioutput function
gp_model_multi = MultiTaskGP(
    gp_model, 
    n_jobs=1,              # Number of cores to use to parallelize the training
)

# train GP Model
t0 = time.time()
gp_model_multi.fit(xtrain, ytrain)
t1 = time.time() - t0

print(
    f"Training \nTime: {t1:.3} seconds"
)

KeyboardInterrupt caught, calling on_optimization_end() to round things up

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-eb03c966a4be> in <module>
     25 # train GP Model
     26 t0 = time.time()
---> 27 gp_model_multi.fit(xtrain, ytrain)
     28 t1 = time.time() - t0
     29 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/sklearn/multioutput.py in fit(self, X, y, sample_weight)
    167             delayed(_fit_estimator)(
    168                 self.estimator, X, y[:, i], sample_weight)
--> 169             for i in range(y.shape[1]))
    170         return self
    171 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
    922                 self._iterating = self._original_iterator is not None
    923 
--> 924             while self.dispatch_one_batch(iterator):
    925                 pass
    926 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/sklearn/multioutput.py in _fit_estimator(estimator, X, y, sample_weight)
     37         estimator.fit(X, y, sample_weight=sample_weight)
     38     else:
---> 39         estimator.fit(X, y)
     40     return estimator
     41 

/media/disk/erc/papers/2019_ML_OCN/code/ml4ocean/src/models/gpy.py in fit(self, X, y)
    131         else:
    132             gp_model.optimize(
--> 133                 self.optimizer, messages=self.verbose, max_iters=self.max_iters
    134             )
    135         self.gp_model = gp_model

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/core/sparse_gp_mpi.py in optimize(self, optimizer, start, **kwargs)
     91         self._IN_OPTIMIZATION_ = True
     92         if self.mpi_comm==None:
---> 93             ret = super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)
     94         elif self.mpi_comm.rank==0:
     95             ret = super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/core/gp.py in optimize(self, optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
    657         self.inference_method.on_optimization_start()
    658         try:
--> 659             ret = super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
    660         except KeyboardInterrupt:
    661             print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/model.py in optimize(self, optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
    109 
    110         with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo:
--> 111             opt.run(start, f_fp=self._objective_grads, f=self._objective, fp=self._grads)
    112 
    113         self.optimizer_array = opt.x_opt

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/optimization/optimization.py in run(self, x_init, **kwargs)
     49     def run(self, x_init, **kwargs):
     50         start = dt.datetime.now()
---> 51         self.opt(x_init, **kwargs)
     52         end = dt.datetime.now()
     53         self.time = str(end - start)

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/optimization/optimization.py in opt(self, x_init, f_fp, f, fp)
    122             opt_dict['factr'] = self.bfgs_factor
    123 
--> 124         opt_result = optimize.fmin_l_bfgs_b(f_fp, x_init, maxfun=self.max_iters, maxiter=self.max_iters, **opt_dict)
    125         self.x_opt = opt_result[0]
    126         self.f_opt = f_fp(self.x_opt)[0]

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in fmin_l_bfgs_b(func, x0, fprime, args, approx_grad, bounds, m, factr, pgtol, epsilon, iprint, maxfun, maxiter, disp, callback, maxls)
    197 
    198     res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
--> 199                            **opts)
    200     d = {'grad': res['jac'],
    201          'task': res['message'],

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, **unknown_options)
    333             # until the completion of the current minimization iteration.
    334             # Overwrite f and g:
--> 335             f, g = func_and_grad(x)
    336         elif task_str.startswith(b'NEW_X'):
    337             # new iteration

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in func_and_grad(x)
    283     else:
    284         def func_and_grad(x):
--> 285             f = fun(x, *args)
    286             g = jac(x, *args)
    287             return f, g

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
    298     def function_wrapper(*wrapper_args):
    299         ncalls[0] += 1
--> 300         return function(*(wrapper_args + args))
    301 
    302     return ncalls, function_wrapper

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/scipy/optimize/optimize.py in __call__(self, x, *args)
     61     def __call__(self, x, *args):
     62         self.x = numpy.asarray(x).copy()
---> 63         fg = self.fun(x, *args)
     64         self.jac = fg[1]
     65         return fg[0]

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/model.py in _objective_grads(self, x)
    271     def _objective_grads(self, x):
    272         try:
--> 273             self.optimizer_array = x
    274             obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
    275             self._fail_count = 0

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/parameterized.py in __setattr__(self, name, val)
    337                 param = self.parameters[pnames.index(name)]
    338                 param[:] = val; return
--> 339         return object.__setattr__(self, name, val)
    340 
    341     #===========================================================================

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/core/sparse_gp_mpi.py in optimizer_array(self, p)
     86                 self.mpi_comm.Bcast(np.int32(1),root=0)
     87             self.mpi_comm.Bcast(p, root=0)
---> 88         SparseGP.optimizer_array.fset(self,p)
     89 
     90     def optimize(self, optimizer=None, start=None, **kwargs):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/parameter_core.py in optimizer_array(self, p)
    122 
    123         self._optimizer_copy_transformed = False
--> 124         self.trigger_update()
    125 
    126     def _trigger_params_changed(self, trigger_parent=True):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/updateable.py in trigger_update(self, trigger_parent)
     77             #print "Warning: updates are off, updating the model will do nothing"
     78             return
---> 79         self._trigger_params_changed(trigger_parent)

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/parameter_core.py in _trigger_params_changed(self, trigger_parent)
    132         """
    133         [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
--> 134         self.notify_observers(None, None if trigger_parent else -np.inf)
    135 
    136     def _size_transformed(self):

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/observable.py in notify_observers(self, which, min_priority)
     89                 which = self
     90             if min_priority is None:
---> 91                 [callble(self, which=which) for _, _, callble in self.observers]
     92             else:
     93                 for p, _, callble in self.observers:

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/observable.py in <listcomp>(.0)
     89                 which = self
     90             if min_priority is None:
---> 91                 [callble(self, which=which) for _, _, callble in self.observers]
     92             else:
     93                 for p, _, callble in self.observers:

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/paramz/core/parameter_core.py in _parameters_changed_notification(self, me, which)
    506         """
    507         self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
--> 508         self.parameters_changed()
    509     def _pass_through_notify_observers(self, me, which=None):
    510         self.notify_observers(which=which)

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/models/sparse_gp_regression.py in parameters_changed(self)
     64             update_gradients_sparsegp(self, mpi_comm=self.mpi_comm)
     65         else:
---> 66             super(SparseGPRegression, self).parameters_changed()

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/core/sparse_gp_mpi.py in parameters_changed(self)
    120             update_gradients(self, mpi_comm=self.mpi_comm)
    121         else:
--> 122             super(SparseGP_MPI,self).parameters_changed()

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/core/sparse_gp.py in parameters_changed(self)
     78         self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood,
     79                                         self.Y_normalized, Y_metadata=self.Y_metadata,
---> 80                                         mean_function=self.mean_function)
     81         self._update_gradients()
     82 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/inference/latent_function_inference/var_dtc.py in inference(self, kern, X, Z, likelihood, Y, Y_metadata, mean_function, precision, Lm, dL_dKmm, psi0, psi1, psi2, Z_tilde)
    207         #Bi, _ = dpotri(LB, lower=1)
    208         #symmetrify(Bi)
--> 209         Bi = -dpotri(LB, lower=1)[0]
    210         diag.add(Bi, 1)
    211 

/usr/local/miniconda3/envs/ml4ocn/lib/python3.6/site-packages/GPy/util/linalg.py in dpotri(A, lower)
    140 
    141     A = force_F_ordered(A)
--> 142     R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug
    143 
    144     symmetrify(R)

KeyboardInterrupt:

# Predict with test set
t0 = time.time()
ypred1, ystd = gp_model_multi.predict(xtest, return_std=True)
print(ypred.shape, ystd.shape)
ypred2 = gp_model_multi.predict(xtest)
np.testing.assert_array_equal(ypred1, ypred2)
t1 = time.time() - t0
# Get Stats
mae = mean_absolute_error(ypred2, ytest)
mse = mean_squared_error(ypred2, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred2, ytest)

print(
    f"GP Model:\n"
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

# Get Stats
mae = mean_absolute_error(ypred, ytest)
mse = mean_squared_error(ypred, ytest)
rmse = np.sqrt(mse)
r2 = r2_score(ypred, ytest)

print(
    f"MAE: {mae:.3f}\nMSE: {mse:.3f}\nRMSE: {rmse:.3f}\nR2: {r2:.3f}" 
    f" \nTime: {t1:.3} seconds"
)

# Inverse transform predictions
# ypred = pca_model.inverse_transform(ypred_red)

# Define target transformer
pca_model = PCA(n_components=3)

# Transform Targes
ytrain_red = pca_model.fit_transform(ytrain)