Source code for skbel.testing.test_basic

"""pytest unit tests for skbel."""

#  Copyright (c) 2022. Robin Thibaut, Ghent University

import os
from os.path import join as jp

import numpy as np
import scipy
from sklearn.cross_decomposition import CCA
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PowerTransformer, StandardScaler

from skbel import BEL
from skbel.tmaps import TransportMap

# Get file path
my_path = os.path.dirname(os.path.abspath(__file__))


[docs] def init_bel(): """Set all BEL pipelines. This is the blueprint of the framework. """ # Pipeline before CCA X_pre_processing = Pipeline( [ ("scaler", StandardScaler(with_mean=False)), ("pca", PCA(n_components=50)), ] ) Y_pre_processing = Pipeline( [ ("scaler", StandardScaler(with_mean=False)), ("pca", PCA(n_components=30)), ] ) # Canonical Correlation Analysis cca = CCA(n_components=30) # Pipeline after CCA X_post_processing = Pipeline( [("normalizer", PowerTransformer(method="yeo-johnson", standardize=True))] ) Y_post_processing = Pipeline( [("normalizer", PowerTransformer(method="yeo-johnson", standardize=True))] ) # Initiate BEL object bel_model = BEL( X_pre_processing=X_pre_processing, X_post_processing=X_post_processing, Y_pre_processing=Y_pre_processing, Y_post_processing=Y_post_processing, regression_model=cca, ) return bel_model
[docs] def test_mvn(): """Compare posterior mean and covariance with reference default values.""" # Initiate BEL object bel = init_bel() # Set seed seed = 123456 bel.seed = seed bel.mode = "mvn" bel.n_posts = 10 X_train = np.load(jp(my_path, "X_train.npy")) y_train = np.load(jp(my_path, "y_train.npy")) X_test = np.load(jp(my_path, "X_test.npy")) # Fit bel.fit(X=X_train, Y=y_train) # Predict posterior mean and covariance bel.predict(X_obs=X_test) # Compare with reference ref_mean = np.load(jp(my_path, "ref_mean.npy")) ref_covariance = np.load(jp(my_path, "ref_covariance.npy")) msg1 = "The posterior means are different" np.testing.assert_array_almost_equal(bel.posterior_mean[0], ref_mean, err_msg=msg1) msg2 = "The posterior covariances are different" np.testing.assert_allclose(bel.posterior_covariance[0], ref_covariance, atol=1e-3, err_msg=msg2)
[docs] def test_kde(): """Compare posterior samples with reference default values.""" # %% Initiate BEL model # Initiate BEL object model = init_bel() # Set seed seed = 123456 model.seed = seed X_train = np.load(jp(my_path, "X_train.npy")) y_train = np.load(jp(my_path, "y_train.npy")) X_test = np.load(jp(my_path, "X_test.npy")) # %% Set model parameters model.mode = "kde" # How to compute the posterior conditional distribution # Number of samples to be extracted from the posterior distribution model.n_posts = 10 # %% Train the model # Fit BEL model model.fit(X=X_train, Y=y_train) # Sample for the observation # Extract n random sample (target CV's). # The posterior distribution is computed within the method below. y_samples = model.predict(X_test) # np.save(jp(my_path, "y_samples_kde.npy"), y_samples) y_samples_ref = np.load(jp(my_path, "y_samples_kde.npy")) msg1 = "The posterior samples are different" np.testing.assert_allclose(y_samples, y_samples_ref, atol=2, err_msg=msg1)
[docs] def test_tm(): """Compare posterior samples with reference default values.""" N = 1000 X = scipy.stats.norm.rvs(size=(N, 2), random_state=123456) b = 1 # Twist factor X[:, 1] += b * X[:, 0] ** 2 nonmonotone = [ [ [], [0], [0, 0, "HF"], [0, 0, 0, "HF"], [0, 0, 0, 0, "HF"], [0, 0, 0, 0, 0, "HF"], [0, 0, 0, 0, 0, 0, "HF"], [0, 0, 0, 0, 0, 0, 0, "HF"], ] ] monotone = [ [ [1], "iRBF 1", "iRBF 1", "iRBF 1", "iRBF 1", "iRBF 1", "iRBF 1", "iRBF 1", "iRBF 1", ] ] tm = TransportMap( monotone=monotone, nonmonotone=nonmonotone, X=X, polynomial_type="probabilist's hermite", monotonicity="separable monotonicity", standardize_samples=True, workers=1, ) tm.optimize() norm_samples = scipy.stats.norm.rvs(size=(500, 1), random_state=42) # noqa x1_obs = 2.32 X_precalc = np.ones((500, 1)) * x1_obs # Now invert the map conditionally. X_star are the posterior samples. X_star = tm.inverse_map(X_precalc=X_precalc, Y=norm_samples) X_star_ref = np.load(jp(my_path, "x_star_tm.npy")) msg1 = "The inverted samples are different" np.testing.assert_allclose(X_star, X_star_ref, atol=1e-3, err_msg=msg1)