noxer.gm.metrics module
Evaluation metrics for quality of outputs of generative models.
""" Evaluation metrics for quality of outputs of generative models. """ from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.dummy import DummyClassifier from sklearn.svm import SVC, LinearSVC from sklearn.ensemble import GradientBoostingClassifier from sklearn.metrics import r2_score import numpy as np def distribution_similarity(X_true, X_pred, cross_testing=False): # """Compares the similarity of two distributions using a set of samples from "truth" distribution X_true and "predicted" distribution X_pred. Is useful for estimation of quality of GAN's and VAE's and the like. Parameters ---------- X_true : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution. X_pred : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution. cross_testing : bool, optional Whether to use cross-validation like approach for testing. Returns ------- z : float The similarity score for two distributions, calculated from the generalization estimate of the model that is trained to distinguish between two sets of samples. """ pipe = Pipeline([ ('scaler', StandardScaler()), ('model', DummyClassifier()) ]) dummy_search = { 'model__strategy': ["stratified", "most_frequent", "uniform"] } lin_search = { 'model': [LinearSVC()], 'model__penalty': ['l1', 'l2'], 'model__dual': [False], 'model__C': 10 ** np.linspace(-10, 10), 'model__max_iter': [10000], } gb_search = { 'model': [GradientBoostingClassifier()], 'model__learning_rate': [1.0, 0.1, 0.01, 0.001], 'model__n_estimators': [2 ** i for i in range(11)], } model = GridSearchCV( pipe, [dummy_search, lin_search, gb_search], # svc_search n_jobs=-1, verbose=0 ) X = np.row_stack([X_true, X_pred]) X = X.reshape((len(X), -1)) y = np.concatenate([ np.ones(len(X_true)), np.zeros(len(X_pred)) ]) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, stratify=y) score = model.fit(X_train, y_train).score(X_test, y_test) # scale the error to be in range from 0.0 to 1.0 U, C = np.unique(y_test, return_counts=True) scale = max(C * 1.0) / sum(C * 1.0) score = (1.0 - score)/scale score = min(1.0, score) return score if __name__ == "__main__": # example usage X1 = np.random.randn(512,2) for offset in [0.1, 0.2, 0.4, 0.8, 1.6, 3.2]: X2 = np.random.randn(512,2) + offset sim = distribution_similarity(X1, X2) print(sim)
Functions
def distribution_similarity(
X_true, X_pred, cross_testing=False)
Compares the similarity of two distributions using a set of samples from "truth" distribution X_true and "predicted" distribution X_pred. Is useful for estimation of quality of GAN's and VAE's and the like.
Parameters
X_true : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution.
X_pred : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution.
cross_testing : bool, optional Whether to use cross-validation like approach for testing.
Returns
z : float The similarity score for two distributions, calculated from the generalization estimate of the model that is trained to distinguish between two sets of samples.
def distribution_similarity(X_true, X_pred, cross_testing=False): # """Compares the similarity of two distributions using a set of samples from "truth" distribution X_true and "predicted" distribution X_pred. Is useful for estimation of quality of GAN's and VAE's and the like. Parameters ---------- X_true : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution. X_pred : array-like of shape = (n_samples, n_outputs) Samples from "ground truth" distribution. cross_testing : bool, optional Whether to use cross-validation like approach for testing. Returns ------- z : float The similarity score for two distributions, calculated from the generalization estimate of the model that is trained to distinguish between two sets of samples. """ pipe = Pipeline([ ('scaler', StandardScaler()), ('model', DummyClassifier()) ]) dummy_search = { 'model__strategy': ["stratified", "most_frequent", "uniform"] } lin_search = { 'model': [LinearSVC()], 'model__penalty': ['l1', 'l2'], 'model__dual': [False], 'model__C': 10 ** np.linspace(-10, 10), 'model__max_iter': [10000], } gb_search = { 'model': [GradientBoostingClassifier()], 'model__learning_rate': [1.0, 0.1, 0.01, 0.001], 'model__n_estimators': [2 ** i for i in range(11)], } model = GridSearchCV( pipe, [dummy_search, lin_search, gb_search], # svc_search n_jobs=-1, verbose=0 ) X = np.row_stack([X_true, X_pred]) X = X.reshape((len(X), -1)) y = np.concatenate([ np.ones(len(X_true)), np.zeros(len(X_pred)) ]) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, stratify=y) score = model.fit(X_train, y_train).score(X_test, y_test) # scale the error to be in range from 0.0 to 1.0 U, C = np.unique(y_test, return_counts=True) scale = max(C * 1.0) / sum(C * 1.0) score = (1.0 - score)/scale score = min(1.0, score) return score