Skip to content

1 control output transform

import sys
sys.path.insert(0, '/home/emmanuel/projects/2020_ml_ocn/ml4ocean/src')

from data.make_dataset import DataLoad
dataloader = DataLoad()

X, y = dataloader.load_control_data('na')

X = X[dataloader.core_vars]
y = y.drop(dataloader.meta_vars, axis=1)
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(
    X.values, y.values, train_size=0.8, random_state=123
)

Normalization - Inputs

from sklearn.preprocessing import StandardScaler
# normalize X
x_scaler = StandardScaler()
xtrain_norm = x_scaler.fit_transform(xtrain)
xtest_norm = x_scaler.transform(xtest)

Normalization - Outputs

from sklearn.decomposition import PCA

# decompose Y
n_components = 10
random_state = 123
y_scaler = PCA(n_components=n_components, random_state=random_state)

Model I

from sklearn.linear_model import LinearRegression
from sklearn.compose import TransformedTargetRegressor
lin_model = TransformedTargetRegressor(
    LinearRegression(n_jobs=-1),
    transformer=y_scaler,
    check_inverse=False,
)


lin_model.fit(xtrain_norm, ytrain);