Top

noxer.gm.sgm module

Sequential generator model - sample the outputs vector iteratively entry by entry.

For an output vector of size n, n models are trained. First model learns the distribution of first entry in output vector. Second model learns the distribution of second entry, conditioned on first one. Thirs model learns the distribution of third entry, conditioned on first and second ones. ...

"""
Sequential generator model - sample the outputs vector
iteratively entry by entry.

For an output vector of size n, n models are trained.
First model learns the distribution of first entry in
output vector.
Second model learns the distribution of second entry,
conditioned on first one.
Thirs model learns the distribution of third entry,
conditioned on first and second ones.
...
"""
import numpy as np

from sklearn.base import clone, BaseEstimator

from .base import GeneratorBase


class ScalarGenerator(GeneratorBase):
    """
    A model that can generate a single scalar value.
    Trains a regression model to simulate a conditional
    density function.

    See `fit` for further details on how the model is
    trained.

    Parameters
    ----------

    estimator: BaseEstimator, regression model that will be
                used to simulate density function.
    """

    def __init__(self, estimator):
        if not isinstance(estimator, BaseEstimator):
            raise ValueError('estimator should be of type BaseEstimator')
        self.estimator = estimator
        self.estimator_ = None
        self.m = None
        self.M = None

    def set_params(self, **params):
        """Delegate parameters to estimator"""
        self.estimator.set_params(**params)
        return self

    def fit(self, X, y, **kwargs):
        """Fit generative model to the data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        y : {array-like, sparse matrix}, shape [n_samples]
            The data that should be generated by particular model.
        """

        # compute range of values of output
        m, M = np.min(y), np.max(y)
        self.m = m
        self.M = M
        interval = M - m

        # expand intervals a bit
        m -= interval * 0.1
        M += interval * 0.1

        # make a clone of estimator
        self.estimator_ = clone(self.estimator)

        # create a training dataset for estimator
        X_, y_ = [], []
        N = len(y)

        # high density values (1.0) for correctly generated values
        X_.append(np.column_stack([X, y]))
        y_.append(np.ones(N))

        # low density values (0.0) for incorrectly generated values
        for reps in range(5):
            random_y = np.random.rand(N)*(M - m) + m
            X_.append(np.column_stack([X, random_y]))
            y_.append(np.zeros(N))

        # overlap of correctly and incorrectly generated values is
        # ok - then the output of model is ~ mean of vales

        # make matricies
        X_ = np.row_stack(X_)
        y_ = np.concatenate(y_)

        # fit the density surrogate
        self.estimator_.fit(X_, y_)

    def predict(self, X, **kwargs):
        """Generate a scalar value conditioned on input.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        Output:
        ------
        y : {array-like, sparse matrix}, shape [n_samples]
            The scalar outputs generated by the model.
        """

        # range of all possible outputs
        yv = np.linspace(self.m, self.M, 10000)

        # matrix that will be used as output
        C = np.column_stack([X, np.ones(len(X))])

        Yp = []

        # get distributions for outputs
        for v in yv:
            C[:, -1] = v
            yp = self.estimator_.predict(C)
            Yp.append(yp)

        # distribution for a single output is a row
        Yp = np.column_stack(Yp)

        # normalize distribution values
        Yp = np.maximum(Yp, 0.0)
        Yp = (Yp.T / np.sum(Yp, axis=-1)).T

        # do interpolation here?

        # generate random values
        y = [
            np.random.choice(yv, p=p) for p in Yp
        ]

        y = np.array(y)

        return y


class SGM(GeneratorBase):
    """
    This class generates desired output feature by feature.
    That is, for example with outputs Y \in R^(n, k), k
    scalar conditional generative models are build for
    every entry in output vector.

    Inspired by
    http://proceedings.mlr.press/v15/larochelle11a/larochelle11a.pdf
    https://arxiv.org/pdf/1606.05328.pdf
    """

    def __init__(self, estimator):
        self.estimator = estimator
        self.models = None # models used for generation of output features, as well as scales for features

    def set_params(self, **params):
        """Delegate parameters to estimator"""
        self.estimator.set_params(**params)
        return self

    def fit(self, X, Y, **kwargs):
        """Fit generative models to the data

        Parameters
        ----------
        Y : {array-like, sparse matrix}, shape [n_samples, n_output_features]
            The data that should be generated by particular model.

        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.
        """

        # all inputs to the model
        self.models = []

        for y in Y.T:
            # fit the model
            model = clone(self.estimator)
            model.fit(X, y)

            # save model and its range
            self.models.append(model)

            # generated output is not a condition
            X = np.column_stack([X, y])

        return self

    def predict(self, X, **kwargs):
        """Generate samples using the generative model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        """

        if not self.models:
            raise AssertionError('Model is not fitted yet. Please call fit first')

        # initial condition list
        Y = []

        for model in self.models:
            y = model.predict(X)
            Y.append(y)
            X = np.column_stack([X, y])

        # stack all outputs
        Y = np.column_stack(Y)
        return Y

Classes

class SGM

This class generates desired output feature by feature. That is, for example with outputs Y \in R^(n, k), k scalar conditional generative models are build for every entry in output vector.

Inspired by http://proceedings.mlr.press/v15/larochelle11a/larochelle11a.pdf https://arxiv.org/pdf/1606.05328.pdf

class SGM(GeneratorBase):
    """
    This class generates desired output feature by feature.
    That is, for example with outputs Y \in R^(n, k), k
    scalar conditional generative models are build for
    every entry in output vector.

    Inspired by
    http://proceedings.mlr.press/v15/larochelle11a/larochelle11a.pdf
    https://arxiv.org/pdf/1606.05328.pdf
    """

    def __init__(self, estimator):
        self.estimator = estimator
        self.models = None # models used for generation of output features, as well as scales for features

    def set_params(self, **params):
        """Delegate parameters to estimator"""
        self.estimator.set_params(**params)
        return self

    def fit(self, X, Y, **kwargs):
        """Fit generative models to the data

        Parameters
        ----------
        Y : {array-like, sparse matrix}, shape [n_samples, n_output_features]
            The data that should be generated by particular model.

        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.
        """

        # all inputs to the model
        self.models = []

        for y in Y.T:
            # fit the model
            model = clone(self.estimator)
            model.fit(X, y)

            # save model and its range
            self.models.append(model)

            # generated output is not a condition
            X = np.column_stack([X, y])

        return self

    def predict(self, X, **kwargs):
        """Generate samples using the generative model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        """

        if not self.models:
            raise AssertionError('Model is not fitted yet. Please call fit first')

        # initial condition list
        Y = []

        for model in self.models:
            y = model.predict(X)
            Y.append(y)
            X = np.column_stack([X, y])

        # stack all outputs
        Y = np.column_stack(Y)
        return Y

Ancestors (in MRO)

  • SGM
  • noxer.gm.base.GeneratorBase
  • sklearn.base.BaseEstimator
  • builtins.object

Static methods

def __init__(

self, estimator)

Initialize self. See help(type(self)) for accurate signature.

def __init__(self, estimator):
    self.estimator = estimator
    self.models = None # models used for generation of output features, as well as scales for features

def fit(

self, X, Y, **kwargs)

Fit generative models to the data

Parameters

Y : {array-like, sparse matrix}, shape [n_samples, n_output_features] The data that should be generated by particular model.

X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to condition the generative model's outputs.

def fit(self, X, Y, **kwargs):
    """Fit generative models to the data
    Parameters
    ----------
    Y : {array-like, sparse matrix}, shape [n_samples, n_output_features]
        The data that should be generated by particular model.
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data used to condition the generative model's outputs.
    """
    # all inputs to the model
    self.models = []
    for y in Y.T:
        # fit the model
        model = clone(self.estimator)
        model.fit(X, y)
        # save model and its range
        self.models.append(model)
        # generated output is not a condition
        X = np.column_stack([X, y])
    return self

def get_params(

self, deep=True)

Get parameters for this estimator.

Parameters

deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : mapping of string to any Parameter names mapped to their values.

def get_params(self, deep=True):
    """Get parameters for this estimator.
    Parameters
    ----------
    deep : boolean, optional
        If True, will return the parameters for this estimator and
        contained subobjects that are estimators.
    Returns
    -------
    params : mapping of string to any
        Parameter names mapped to their values.
    """
    out = dict()
    for key in self._get_param_names():
        # We need deprecation warnings to always be on in order to
        # catch deprecated param values.
        # This is set in utils/__init__.py but it gets overwritten
        # when running under python3 somehow.
        warnings.simplefilter("always", DeprecationWarning)
        try:
            with warnings.catch_warnings(record=True) as w:
                value = getattr(self, key, None)
            if len(w) and w[0].category == DeprecationWarning:
                # if the parameter is deprecated, don't show it
                continue
        finally:
            warnings.filters.pop(0)
        # XXX: should we rather test if instance of estimator?
        if deep and hasattr(value, 'get_params'):
            deep_items = value.get_params().items()
            out.update((key + '__' + k, val) for k, val in deep_items)
        out[key] = value
    return out

def predict(

self, X, **kwargs)

Generate samples using the generative model

Parameters

X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to condition the generative model's outputs.

def predict(self, X, **kwargs):
    """Generate samples using the generative model
    Parameters
    ----------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data used to condition the generative model's outputs.
    """
    if not self.models:
        raise AssertionError('Model is not fitted yet. Please call fit first')
    # initial condition list
    Y = []
    for model in self.models:
        y = model.predict(X)
        Y.append(y)
        X = np.column_stack([X, y])
    # stack all outputs
    Y = np.column_stack(Y)
    return Y

def score(

self, X, Y, **kwargs)

Score the generative model on the real data.

Parameters

Y : {array-like, sparse matrix}, shape [n_samples, ...] The data that should be generated by particular model.

X : {array-like, sparse matrix}, shape [n_samples, ...] The data used to condition the generative model's outputs.

def score(self, X, Y, **kwargs):
    """Score the generative model on the real data.
    Parameters
    ----------
    Y : {array-like, sparse matrix}, shape [n_samples, ...]
        The data that should be generated by particular model.
    X : {array-like, sparse matrix}, shape [n_samples, ...]
        The data used to condition the generative model's outputs.
    """
    Yp = self.predict(X, **kwargs)
    score = distribution_similarity(Y, Yp)
    return score

def set_params(

self, **params)

Delegate parameters to estimator

def set_params(self, **params):
    """Delegate parameters to estimator"""
    self.estimator.set_params(**params)
    return self

Instance variables

var estimator

var models

class ScalarGenerator

A model that can generate a single scalar value. Trains a regression model to simulate a conditional density function.

See fit for further details on how the model is trained.

Parameters

estimator: BaseEstimator, regression model that will be used to simulate density function.

class ScalarGenerator(GeneratorBase):
    """
    A model that can generate a single scalar value.
    Trains a regression model to simulate a conditional
    density function.

    See `fit` for further details on how the model is
    trained.

    Parameters
    ----------

    estimator: BaseEstimator, regression model that will be
                used to simulate density function.
    """

    def __init__(self, estimator):
        if not isinstance(estimator, BaseEstimator):
            raise ValueError('estimator should be of type BaseEstimator')
        self.estimator = estimator
        self.estimator_ = None
        self.m = None
        self.M = None

    def set_params(self, **params):
        """Delegate parameters to estimator"""
        self.estimator.set_params(**params)
        return self

    def fit(self, X, y, **kwargs):
        """Fit generative model to the data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        y : {array-like, sparse matrix}, shape [n_samples]
            The data that should be generated by particular model.
        """

        # compute range of values of output
        m, M = np.min(y), np.max(y)
        self.m = m
        self.M = M
        interval = M - m

        # expand intervals a bit
        m -= interval * 0.1
        M += interval * 0.1

        # make a clone of estimator
        self.estimator_ = clone(self.estimator)

        # create a training dataset for estimator
        X_, y_ = [], []
        N = len(y)

        # high density values (1.0) for correctly generated values
        X_.append(np.column_stack([X, y]))
        y_.append(np.ones(N))

        # low density values (0.0) for incorrectly generated values
        for reps in range(5):
            random_y = np.random.rand(N)*(M - m) + m
            X_.append(np.column_stack([X, random_y]))
            y_.append(np.zeros(N))

        # overlap of correctly and incorrectly generated values is
        # ok - then the output of model is ~ mean of vales

        # make matricies
        X_ = np.row_stack(X_)
        y_ = np.concatenate(y_)

        # fit the density surrogate
        self.estimator_.fit(X_, y_)

    def predict(self, X, **kwargs):
        """Generate a scalar value conditioned on input.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape [n_samples, n_features]
            The data used to condition the generative model's outputs.

        Output:
        ------
        y : {array-like, sparse matrix}, shape [n_samples]
            The scalar outputs generated by the model.
        """

        # range of all possible outputs
        yv = np.linspace(self.m, self.M, 10000)

        # matrix that will be used as output
        C = np.column_stack([X, np.ones(len(X))])

        Yp = []

        # get distributions for outputs
        for v in yv:
            C[:, -1] = v
            yp = self.estimator_.predict(C)
            Yp.append(yp)

        # distribution for a single output is a row
        Yp = np.column_stack(Yp)

        # normalize distribution values
        Yp = np.maximum(Yp, 0.0)
        Yp = (Yp.T / np.sum(Yp, axis=-1)).T

        # do interpolation here?

        # generate random values
        y = [
            np.random.choice(yv, p=p) for p in Yp
        ]

        y = np.array(y)

        return y

Ancestors (in MRO)

  • ScalarGenerator
  • noxer.gm.base.GeneratorBase
  • sklearn.base.BaseEstimator
  • builtins.object

Static methods

def __init__(

self, estimator)

Initialize self. See help(type(self)) for accurate signature.

def __init__(self, estimator):
    if not isinstance(estimator, BaseEstimator):
        raise ValueError('estimator should be of type BaseEstimator')
    self.estimator = estimator
    self.estimator_ = None
    self.m = None
    self.M = None

def fit(

self, X, y, **kwargs)

Fit generative model to the data.

Parameters

X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to condition the generative model's outputs.

y : {array-like, sparse matrix}, shape [n_samples] The data that should be generated by particular model.

def fit(self, X, y, **kwargs):
    """Fit generative model to the data.
    Parameters
    ----------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data used to condition the generative model's outputs.
    y : {array-like, sparse matrix}, shape [n_samples]
        The data that should be generated by particular model.
    """
    # compute range of values of output
    m, M = np.min(y), np.max(y)
    self.m = m
    self.M = M
    interval = M - m
    # expand intervals a bit
    m -= interval * 0.1
    M += interval * 0.1
    # make a clone of estimator
    self.estimator_ = clone(self.estimator)
    # create a training dataset for estimator
    X_, y_ = [], []
    N = len(y)
    # high density values (1.0) for correctly generated values
    X_.append(np.column_stack([X, y]))
    y_.append(np.ones(N))
    # low density values (0.0) for incorrectly generated values
    for reps in range(5):
        random_y = np.random.rand(N)*(M - m) + m
        X_.append(np.column_stack([X, random_y]))
        y_.append(np.zeros(N))
    # overlap of correctly and incorrectly generated values is
    # ok - then the output of model is ~ mean of vales
    # make matricies
    X_ = np.row_stack(X_)
    y_ = np.concatenate(y_)
    # fit the density surrogate
    self.estimator_.fit(X_, y_)

def get_params(

self, deep=True)

Get parameters for this estimator.

Parameters

deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators.

Returns

params : mapping of string to any Parameter names mapped to their values.

def get_params(self, deep=True):
    """Get parameters for this estimator.
    Parameters
    ----------
    deep : boolean, optional
        If True, will return the parameters for this estimator and
        contained subobjects that are estimators.
    Returns
    -------
    params : mapping of string to any
        Parameter names mapped to their values.
    """
    out = dict()
    for key in self._get_param_names():
        # We need deprecation warnings to always be on in order to
        # catch deprecated param values.
        # This is set in utils/__init__.py but it gets overwritten
        # when running under python3 somehow.
        warnings.simplefilter("always", DeprecationWarning)
        try:
            with warnings.catch_warnings(record=True) as w:
                value = getattr(self, key, None)
            if len(w) and w[0].category == DeprecationWarning:
                # if the parameter is deprecated, don't show it
                continue
        finally:
            warnings.filters.pop(0)
        # XXX: should we rather test if instance of estimator?
        if deep and hasattr(value, 'get_params'):
            deep_items = value.get_params().items()
            out.update((key + '__' + k, val) for k, val in deep_items)
        out[key] = value
    return out

def predict(

self, X, **kwargs)

Generate a scalar value conditioned on input.

Parameters

X : {array-like, sparse matrix}, shape [n_samples, n_features] The data used to condition the generative model's outputs.

Output:

y : {array-like, sparse matrix}, shape [n_samples] The scalar outputs generated by the model.

def predict(self, X, **kwargs):
    """Generate a scalar value conditioned on input.
    Parameters
    ----------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data used to condition the generative model's outputs.
    Output:
    ------
    y : {array-like, sparse matrix}, shape [n_samples]
        The scalar outputs generated by the model.
    """
    # range of all possible outputs
    yv = np.linspace(self.m, self.M, 10000)
    # matrix that will be used as output
    C = np.column_stack([X, np.ones(len(X))])
    Yp = []
    # get distributions for outputs
    for v in yv:
        C[:, -1] = v
        yp = self.estimator_.predict(C)
        Yp.append(yp)
    # distribution for a single output is a row
    Yp = np.column_stack(Yp)
    # normalize distribution values
    Yp = np.maximum(Yp, 0.0)
    Yp = (Yp.T / np.sum(Yp, axis=-1)).T
    # do interpolation here?
    # generate random values
    y = [
        np.random.choice(yv, p=p) for p in Yp
    ]
    y = np.array(y)
    return y

def score(

self, X, Y, **kwargs)

Score the generative model on the real data.

Parameters

Y : {array-like, sparse matrix}, shape [n_samples, ...] The data that should be generated by particular model.

X : {array-like, sparse matrix}, shape [n_samples, ...] The data used to condition the generative model's outputs.

def score(self, X, Y, **kwargs):
    """Score the generative model on the real data.
    Parameters
    ----------
    Y : {array-like, sparse matrix}, shape [n_samples, ...]
        The data that should be generated by particular model.
    X : {array-like, sparse matrix}, shape [n_samples, ...]
        The data used to condition the generative model's outputs.
    """
    Yp = self.predict(X, **kwargs)
    score = distribution_similarity(Y, Yp)
    return score

def set_params(

self, **params)

Delegate parameters to estimator

def set_params(self, **params):
    """Delegate parameters to estimator"""
    self.estimator.set_params(**params)
    return self

Instance variables

var M

var estimator

var estimator_

var m