Source code for hottbox.core.operations

"""
Functions and operations for tensor algebra

Credit to `Jean Kossaifi <http://jeankossaifi.com/>`_.
"""

import functools
import numpy as np


# TODO: I think it will be better if the following three operations would have the same signature
[docs]def khatri_rao(matrices, skip_matrix=None, reverse=False):
    """ Khatri-Rao product of a list of matrices.

    Parameters
    ----------
    matrices : list[np.ndarray]
        List of matrices. Each matrix should have the same number of columns
    skip_matrix : int
        Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
    reverse : bool
        If True, perform khatri-rao product on the list of matrices in the reversed order

    Returns
    -------
    result : np.ndarray
        The result of the Khatri-Rao product is a matrix of shape `()`
    """
    if len(matrices) < 2:
        raise ValueError('khatri_roa product requires a list of at least 2 matrices, '
                         'but {} given.'.format(len(matrices)))
    if skip_matrix is not None:
        matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
    if reverse:
        matrices = matrices[::-1]
    n_rows, n_cols = matrices[0].shape
    for matrix in matrices[1:]:
        if matrix.shape[1] != n_cols:
            raise ValueError('All matrices must have the same number of columns.')
        n_rows *= matrix.shape[0]
    result = np.zeros((n_rows, n_cols))
    for i in range(n_cols):
        temp = matrices[0][:, i]  # Accumulates the khatri-rao product of the i-th columns
        for matrix in matrices[1:]:
            temp = np.einsum('i,j->ij', temp, matrix[:, i]).ravel()
        result[:, i] = temp  # the i-t column is the kronecker product of all the i-th columns of all matrices:
    return result


[docs]def hadamard(matrices, skip_matrix=None, reverse=False):
    """ Hadamard product of a list of matrices.

    Parameters
    ----------
    matrices : list[np.ndarray]
       List of matrices. All matrices should be of the same shape.
    skip_matrix : int
       Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
    reverse : bool
       If True, perform hadamard product on the list of matrices in the reversed order

    Returns
    -------
    result : np.ndarray
        The result of the Hadamard product is a matrix of the same shape as every matrix in `matrices`
    """
    if skip_matrix is not None:
        matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
    if reverse:
        matrices = matrices[::-1]
    return functools.reduce(np.multiply, matrices)


[docs]def kronecker(matrices, skip_matrix=None, reverse=False):
    """ Kronecker product of a list of matrices.

    Parameters
    ----------
    matrices : list[np.ndarray]
        List of matrices.
    skip_matrix : int
        Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
    reverse : bool
        If True, perform Kronecker product on the list of matrices in the reversed order

    Returns
    -------
    result : np.ndarray
        The result of the Kronecker product is a matrix of shape ``(prod(n_rows), prod(n_columns)``
        where ``prod(n_rows) = prod([m.shape[0] for m in matrices])``
        and ``prod(n_columns) = prod([m.shape[1] for m in matrices])``
    """
    if skip_matrix is not None:
        matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
    if reverse:
        matrices = matrices[::-1]

    result = matrices[0]
    for matrix in matrices[1:]:
        result = np.kron(result, matrix)
    return result


[docs]def unfold(tensor, mode):
    """ Unfolds N-dimensional array into a 2D array.

    Parameters
    ----------
    tensor : np.ndarray
        N-dimensional array to be unfolded
    mode : int
        Specifies a mode along which a `tensor` will be unfolded

    Returns
    -------
    matrix : np.ndarray
        Unfolded version of a `tensor` with a shape ``(tensor.shape[mode], -1)``
    """
    matrix = np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1))
    return matrix


[docs]def kolda_unfold(tensor, mode):
    """ Unfolds N-dimensional array into a 2D array.

    Parameters
    ----------
    tensor : np.ndarray
        N-dimensional array to be unfolded
    mode : int
        Specifies a mode along which a `tensor` will be unfolded

    Returns
    -------
    matrix : np.ndarray
        Unfolded version of a `tensor` with a shape ``(tensor.shape[mode], -1)``

    Notes
    -----
        Much slower for then ``unfold``.
    """
    matrix = np.transpose(tensor, _kolda_reorder(tensor.ndim, mode)).reshape((tensor.shape[mode], -1))
    return matrix


[docs]def fold(matrix, mode, shape):
    """ Fold a 2D array into a N-dimensional array.

    Parameters
    ----------
    matrix : np.ndarray
        Unfolded version of a tensor
    mode : int
        A mode along which original tensor was unfolded into a `matrix`
    shape : tuple
        Shape of the original tensor before it was unfolded

    Returns
    -------
    tensor : np.ndarray
        N-dimensional array of the original shape

    Notes
    -----
        At the moment it reverts unfolding operation (``unfold``). Will be generalised in a future
    """
    full_shape = list(shape)
    mode_dim = full_shape.pop(mode)
    full_shape.insert(0, mode_dim)
    tensor = np.moveaxis(np.reshape(matrix, full_shape), 0, mode)
    return tensor


[docs]def kolda_fold(matrix, mode, shape):
    """ Fold a 2D array into a N-dimensional array.

    Parameters
    ----------
    matrix : np.ndarray
        Unfolded version of a tensor
    mode : int
        A mode along which original tensor was unfolded into a `matrix`
    shape : tuple
        Shape of the original tensor before it was unfolded

    Returns
    -------
    tensor : np.ndarray
        N-dimensional array of the original shape

    Notes
    -----
        1) Much slower then ``fold``
        2) At the moment it reverts unfolding operation (``kolda_unfold``). Will be generalised in a future
    """

    unfolded_indices = _kolda_reorder(len(shape), mode)
    original_shape = [shape[i] for i in unfolded_indices]
    matrix = matrix.reshape(original_shape)

    folded_indices = list(range(len(shape)-1, 0, -1))
    folded_indices.insert(mode, 0)
    tensor = np.transpose(matrix, folded_indices)
    return tensor


[docs]def mode_n_product(tensor, matrix, mode):
    """ Mode-n product of a N-dimensional array with a matrix.

    Parameters
    ----------
    tensor : np.ndarray
        N-dimensional array
    matrix : np.ndarray
        2D array
    mode : int
        Specifies mode along which a `tensor` is multiplied by a `matrix`. The size of a `tensor` along this `mode`
        should be equal to the number of columns of the `matrix`. That is: ``tensor.shape[mode] = matrix.shape[1]``

    Returns
    -------
    result : np.ndarray
        The result of the mode-n product of a `tensor` with a `matrix` along specified `mode`.

    Notes
    -----
        Result of mode-n product does not depend on the folding/unfolding convention,
        as long as folding and unfolding operations belong to the same convention.
    """
    # TODO: Implement mode-n product with a vector
    if matrix.ndim != 2:
        raise ValueError("Mode-n product can only be performed with the 2D array, "
                         "whereas an array of order {} was provided".format(matrix.ndim))
    orig_shape = list(tensor.shape)
    new_shape = orig_shape
    new_shape[mode] = matrix.shape[0]
    result = fold(np.dot(matrix, unfold(tensor, mode)), mode, tuple(new_shape))
    return result


def _kolda_reorder(ndim, mode):
    """Reorders the elements
    """
    indices = list(range(ndim))
    element = indices.pop(mode)
    return ([element] + indices[::-1])



[docs]def sampled_khatri_rao(matrices, sample_size=None, skip_matrix=None):
    """ Sampled Khatri-Rao product of a list of matrices.

    Parameters
    ----------
    matrices : list[np.ndarray]
        List of matrices. Each matrix should have the same number of columns
    skip_matrix : int
        Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
    reverse : bool
        If True, perform khatri-rao product on the list of matrices in the reversed order

    Returns
    -------
    result : np.ndarray
        The result of the Khatri-Rao product of the sampled matrices
    indices : tuple list
        list of indices for all modes

    References
    ----------
    -   Battaglino, C., Ballard, G., & Kolda, T. G. (2018). A Practical Randomized CP Tensor
        Decomposition. SIAM Journal on Matrix Analysis and Applications, 39(2), 876–901.
        http://doi.org/10.1137/17m1112303
    """
    np.random.seed(0)
    if len(matrices) < 2:
        raise ValueError('khatri_roa product requires a list of at least 2 matrices, '
                         'but {} given.'.format(len(matrices)))

    n_cols = matrices[0].shape[1]

    if skip_matrix is not None:
        matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]

    if sample_size is None:
        sample_size = int(10*n_cols*np.log(n_cols))

    result = np.ones((sample_size, n_cols))
    idxlist = [np.random.randint(0, m.shape[0], size=sample_size, dtype=int) for m in matrices]

    for i, (idx, mat) in enumerate(zip(idxlist, matrices)):
        if mat.shape[1] != n_cols:
            raise ValueError('All matrices must have the same number of columns.'+\
                    'Got {} and {}'.format(mat.shape, n_cols))

        result = hadamard([result, mat[idx, :]])

    return result, idxlist
Fork me on GitHub

Table Of Contents

Source code for hottbox.core.operations