"""
Functions and operations for tensor algebra
Credit to `Jean Kossaifi <http://jeankossaifi.com/>`_.
"""
import functools
import numpy as np
# TODO: I think it will be better if the following three operations would have the same signature
[docs]def khatri_rao(matrices, skip_matrix=None, reverse=False):
""" Khatri-Rao product of a list of matrices.
Parameters
----------
matrices : list[np.ndarray]
List of matrices. Each matrix should have the same number of columns
skip_matrix : int
Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
reverse : bool
If True, perform khatri-rao product on the list of matrices in the reversed order
Returns
-------
result : np.ndarray
The result of the Khatri-Rao product is a matrix of shape `()`
"""
if len(matrices) < 2:
raise ValueError('khatri_roa product requires a list of at least 2 matrices, '
'but {} given.'.format(len(matrices)))
if skip_matrix is not None:
matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
if reverse:
matrices = matrices[::-1]
n_rows, n_cols = matrices[0].shape
for matrix in matrices[1:]:
if matrix.shape[1] != n_cols:
raise ValueError('All matrices must have the same number of columns.')
n_rows *= matrix.shape[0]
result = np.zeros((n_rows, n_cols))
for i in range(n_cols):
temp = matrices[0][:, i] # Accumulates the khatri-rao product of the i-th columns
for matrix in matrices[1:]:
temp = np.einsum('i,j->ij', temp, matrix[:, i]).ravel()
result[:, i] = temp # the i-t column is the kronecker product of all the i-th columns of all matrices:
return result
[docs]def hadamard(matrices, skip_matrix=None, reverse=False):
""" Hadamard product of a list of matrices.
Parameters
----------
matrices : list[np.ndarray]
List of matrices. All matrices should be of the same shape.
skip_matrix : int
Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
reverse : bool
If True, perform hadamard product on the list of matrices in the reversed order
Returns
-------
result : np.ndarray
The result of the Hadamard product is a matrix of the same shape as every matrix in `matrices`
"""
if skip_matrix is not None:
matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
if reverse:
matrices = matrices[::-1]
return functools.reduce(np.multiply, matrices)
[docs]def kronecker(matrices, skip_matrix=None, reverse=False):
""" Kronecker product of a list of matrices.
Parameters
----------
matrices : list[np.ndarray]
List of matrices.
skip_matrix : int
Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
reverse : bool
If True, perform Kronecker product on the list of matrices in the reversed order
Returns
-------
result : np.ndarray
The result of the Kronecker product is a matrix of shape ``(prod(n_rows), prod(n_columns)``
where ``prod(n_rows) = prod([m.shape[0] for m in matrices])``
and ``prod(n_columns) = prod([m.shape[1] for m in matrices])``
"""
if skip_matrix is not None:
matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
if reverse:
matrices = matrices[::-1]
result = matrices[0]
for matrix in matrices[1:]:
result = np.kron(result, matrix)
return result
[docs]def unfold(tensor, mode):
""" Unfolds N-dimensional array into a 2D array.
Parameters
----------
tensor : np.ndarray
N-dimensional array to be unfolded
mode : int
Specifies a mode along which a `tensor` will be unfolded
Returns
-------
matrix : np.ndarray
Unfolded version of a `tensor` with a shape ``(tensor.shape[mode], -1)``
"""
matrix = np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1))
return matrix
[docs]def kolda_unfold(tensor, mode):
""" Unfolds N-dimensional array into a 2D array.
Parameters
----------
tensor : np.ndarray
N-dimensional array to be unfolded
mode : int
Specifies a mode along which a `tensor` will be unfolded
Returns
-------
matrix : np.ndarray
Unfolded version of a `tensor` with a shape ``(tensor.shape[mode], -1)``
Notes
-----
Much slower for then ``unfold``.
"""
matrix = np.transpose(tensor, _kolda_reorder(tensor.ndim, mode)).reshape((tensor.shape[mode], -1))
return matrix
[docs]def fold(matrix, mode, shape):
""" Fold a 2D array into a N-dimensional array.
Parameters
----------
matrix : np.ndarray
Unfolded version of a tensor
mode : int
A mode along which original tensor was unfolded into a `matrix`
shape : tuple
Shape of the original tensor before it was unfolded
Returns
-------
tensor : np.ndarray
N-dimensional array of the original shape
Notes
-----
At the moment it reverts unfolding operation (``unfold``). Will be generalised in a future
"""
full_shape = list(shape)
mode_dim = full_shape.pop(mode)
full_shape.insert(0, mode_dim)
tensor = np.moveaxis(np.reshape(matrix, full_shape), 0, mode)
return tensor
[docs]def kolda_fold(matrix, mode, shape):
""" Fold a 2D array into a N-dimensional array.
Parameters
----------
matrix : np.ndarray
Unfolded version of a tensor
mode : int
A mode along which original tensor was unfolded into a `matrix`
shape : tuple
Shape of the original tensor before it was unfolded
Returns
-------
tensor : np.ndarray
N-dimensional array of the original shape
Notes
-----
1) Much slower then ``fold``
2) At the moment it reverts unfolding operation (``kolda_unfold``). Will be generalised in a future
"""
unfolded_indices = _kolda_reorder(len(shape), mode)
original_shape = [shape[i] for i in unfolded_indices]
matrix = matrix.reshape(original_shape)
folded_indices = list(range(len(shape)-1, 0, -1))
folded_indices.insert(mode, 0)
tensor = np.transpose(matrix, folded_indices)
return tensor
[docs]def mode_n_product(tensor, matrix, mode):
""" Mode-n product of a N-dimensional array with a matrix.
Parameters
----------
tensor : np.ndarray
N-dimensional array
matrix : np.ndarray
2D array
mode : int
Specifies mode along which a `tensor` is multiplied by a `matrix`. The size of a `tensor` along this `mode`
should be equal to the number of columns of the `matrix`. That is: ``tensor.shape[mode] = matrix.shape[1]``
Returns
-------
result : np.ndarray
The result of the mode-n product of a `tensor` with a `matrix` along specified `mode`.
Notes
-----
Result of mode-n product does not depend on the folding/unfolding convention,
as long as folding and unfolding operations belong to the same convention.
"""
# TODO: Implement mode-n product with a vector
if matrix.ndim != 2:
raise ValueError("Mode-n product can only be performed with the 2D array, "
"whereas an array of order {} was provided".format(matrix.ndim))
orig_shape = list(tensor.shape)
new_shape = orig_shape
new_shape[mode] = matrix.shape[0]
result = fold(np.dot(matrix, unfold(tensor, mode)), mode, tuple(new_shape))
return result
def _kolda_reorder(ndim, mode):
"""Reorders the elements
"""
indices = list(range(ndim))
element = indices.pop(mode)
return ([element] + indices[::-1])
[docs]def sampled_khatri_rao(matrices, sample_size=None, skip_matrix=None):
""" Sampled Khatri-Rao product of a list of matrices.
Parameters
----------
matrices : list[np.ndarray]
List of matrices. Each matrix should have the same number of columns
skip_matrix : int
Index of a matrix (from the `matrices`) to be skipped. By default none are skipped
reverse : bool
If True, perform khatri-rao product on the list of matrices in the reversed order
Returns
-------
result : np.ndarray
The result of the Khatri-Rao product of the sampled matrices
indices : tuple list
list of indices for all modes
References
----------
- Battaglino, C., Ballard, G., & Kolda, T. G. (2018). A Practical Randomized CP Tensor
Decomposition. SIAM Journal on Matrix Analysis and Applications, 39(2), 876–901.
http://doi.org/10.1137/17m1112303
"""
np.random.seed(0)
if len(matrices) < 2:
raise ValueError('khatri_roa product requires a list of at least 2 matrices, '
'but {} given.'.format(len(matrices)))
n_cols = matrices[0].shape[1]
if skip_matrix is not None:
matrices = [matrices[i] for i in range(len(matrices)) if i != skip_matrix]
if sample_size is None:
sample_size = int(10*n_cols*np.log(n_cols))
result = np.ones((sample_size, n_cols))
idxlist = [np.random.randint(0, m.shape[0], size=sample_size, dtype=int) for m in matrices]
for i, (idx, mat) in enumerate(zip(idxlist, matrices)):
if mat.shape[1] != n_cols:
raise ValueError('All matrices must have the same number of columns.'+\
'Got {} and {}'.format(mat.shape, n_cols))
result = hadamard([result, mat[idx, :]])
return result, idxlist