1 control output transform
import sys
sys.path.insert(0, '/home/emmanuel/projects/2020_ml_ocn/ml4ocean/src')
from data.make_dataset import DataLoad
dataloader = DataLoad()
X, y = dataloader.load_control_data('na')
X = X[dataloader.core_vars]
y = y.drop(dataloader.meta_vars, axis=1)
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(
X.values, y.values, train_size=0.8, random_state=123
)
Normalization - Inputs¶
from sklearn.preprocessing import StandardScaler
# normalize X
x_scaler = StandardScaler()
xtrain_norm = x_scaler.fit_transform(xtrain)
xtest_norm = x_scaler.transform(xtest)
Normalization - Outputs¶
from sklearn.decomposition import PCA
# decompose Y
n_components = 10
random_state = 123
y_scaler = PCA(n_components=n_components, random_state=random_state)
Model I¶
from sklearn.linear_model import LinearRegression
from sklearn.compose import TransformedTargetRegressor
lin_model = TransformedTargetRegressor(
LinearRegression(n_jobs=-1),
transformer=y_scaler,
check_inverse=False,
)
lin_model.fit(xtrain_norm, ytrain);