Module `dimdrop.util`

Source code

from .rbm import EarlyStoppingRBM
from .sequence import DECSequence
from .transform import Transform

__all__ = [
    'EarlyStoppingRBM', 'DECSequence', 'Transform'
]

Sub-modules

dimdrop.util.rbm
dimdrop.util.sequence
dimdrop.util.transform
dimdrop.util.tsne: Code taken from https://github.com/kylemcdonald/Parametric-t-SNE …

Classes

class DECSequence (data, model, batch_size)

Sequence generator for the DEC network

Parameters

data : array: The data to generate the sequence for
model : <a title="dimdrop.models.DEC" href="../models/index.html#dimdrop.models.DEC">DEC</a>: The model to train
batch_size : int: The batch size

Attributes

target : array: The current target distribution

Source code

class DECSequence(Sequence):
    """
    Sequence generator for the DEC network

    Parameters
    ----------
    data : array
        The data to generate the sequence for
    model : `dimdrop.models.DEC`
        The model to train
    batch_size : int
        The batch size

    Attributes
    ----------
    target : array
        The current target distribution
    """

    def __init__(self, data, model, batch_size):
        self.data = data
        self.model = model
        self.batch_size = batch_size
        self.target = target_distribution(self.model.predict(self.data))

    def __len__(self):
        return math.ceil(self.data.shape[0] / self.batch_size)

    def __getitem__(self, index):
        idx = slice(
            index * self.batch_size,
            min((index+1) * self.batch_size, self.target.shape[0])
        )
        return (self.data[idx], self.target[idx])

    def on_epoch_end(self):
        """
        After each epoch update the target distribution.
        """
        self.target = target_distribution(self.model.predict(self.data))

Ancestors

keras.utils.data_utils.Sequence

Methods

def on_epoch_end(self)

After each epoch update the target distribution.

Source code

def on_epoch_end(self):
    """
    After each epoch update the target distribution.
    """
    self.target = target_distribution(self.model.predict(self.data))

class EarlyStoppingRBM (n_components=256, batch_size=100, lr=0.01, patience=3, epochs=1000, verbose=0)

Adaptation on the BernoulliRBM class of sklearn to add the ability to stop early when training does not improve.

Parameters

n_components : int, optional: The size of the output, default 256
batch_size : int, optional: The batch size of the rbm, default 100
lr : float, optional: The learning rate of the rbm, default 0.01
patience : int, optional: The amount of epochs without improvement before training stops, default 3
epochs : int, optional: The maximum amount of epochs, default 1000
verbose : int, optional: The verbosity of the rbm, default 0

Attributes

dimdrop.util.rbm : BernoulliRBM: the rbm to be trained

Source code

class EarlyStoppingRBM:
    """
    Adaptation on the `BernoulliRBM` class of sklearn to add the ability to
    stop early when training does not improve.

    Parameters
    ----------
    n_components : int, optional
        The size of the output, default `256`
    batch_size : int, optional
        The batch size of the rbm, default `100`
    lr : float, optional
        The learning rate of the rbm, default `0.01`
    patience : int, optional
        The amount of epochs without improvement before training stops,
        default `3`
    epochs : int, optional
        The maximum amount of epochs, default `1000`
    verbose : int, optional
        The verbosity of the rbm, default `0`

    Attributes
    ----------
    rbm : BernoulliRBM
        the rbm to be trained
    """

    def __init__(
        self,
        n_components=256,
        batch_size=100,
        lr=0.01,
        patience=3,
        epochs=1000,
        verbose=0
    ):
        self.rbm = BernoulliRBM(
            n_components=n_components,
            n_iter=1,
            batch_size=batch_size,
            learning_rate=lr,
            verbose=verbose
        )
        self.patience = patience
        self.epochs = epochs
        self.verbose = verbose

    def fit(self, data):
        """
        Fit the rbm to the given data

        Parameters
        ----------
        data : array
            Data to be fitted
        """
        self.rbm.fit(data)
        min_likelyhood = np.mean(
            [np.mean(self.rbm.score_samples(data)) for _ in range(5)])
        last_likelyhood = min_likelyhood
        min_index = 0
        for i in range(1, self.epochs):
            if min_index + self.patience > i:
                if self.verbose:
                    print('Epoch {}/{}'.format(i + 1, self.epochs))
                self.rbm.fit(data)
                last_likelyhood = np.mean(
                    [np.mean(self.rbm.score_samples(data)) for _ in range(5)])
                if last_likelyhood < min_likelyhood:
                    min_likelyhood = last_likelyhood
                    min_index = i
            else:
                break

Methods

def fit(self, data)

Fit the rbm to the given data

Parameters

data : array: Data to be fitted

Source code

def fit(self, data):
    """
    Fit the rbm to the given data

    Parameters
    ----------
    data : array
        Data to be fitted
    """
    self.rbm.fit(data)
    min_likelyhood = np.mean(
        [np.mean(self.rbm.score_samples(data)) for _ in range(5)])
    last_likelyhood = min_likelyhood
    min_index = 0
    for i in range(1, self.epochs):
        if min_index + self.patience > i:
            if self.verbose:
                print('Epoch {}/{}'.format(i + 1, self.epochs))
            self.rbm.fit(data)
            last_likelyhood = np.mean(
                [np.mean(self.rbm.score_samples(data)) for _ in range(5)])
            if last_likelyhood < min_likelyhood:
                min_likelyhood = last_likelyhood
                min_index = i
        else:
            break

class Transform (scale=True, log=False)

Transform input data

Parameters

scale : bool: Whether to scale the input data
log : bool: Whether to take the log of the input data

Returns

The transformed input data

Source code

class Transform:
    """
    Transform input data

    Parameters
    ----------
    scale : bool
        Whether to scale the input data
    log : bool
        Whether to take the log of the input data

    Returns
    -------
    The transformed input data
    """

    def __init__(self, scale=True, log=False):
        self.scale = scale
        self.log = log

    def __call__(self, data):
        if not self.scale and not self.log:
            return data
        if self.log:
            output = np.log2(data + 1)
        if self.scale:
            output = np.zeros(data.shape, dtype=np.float32)
            for i in range(data.shape[0]):
                output[i, :] = data[i, :] / np.max(data[i, :])
        return output