Source code for hottbox.datasets.synthetic
import numpy as np
from ..core.structures import Tensor
import warnings
def _predefined_distr(distr, shape):
"""
Parameters
----------
distr :
shape :
Returns
-------
"""
distrlist = {'uniform': np.random.uniform(size=shape),
'normal': np.random.normal(size=shape),
'triangular': np.random.triangular(-1, 0, 1, size=shape),
'standard-t': np.random.standard_t(10, size=shape),
'ones': np.ones(shape),
'zeros': np.zeros(shape)}
if distr not in distrlist:
raise NameError("The distribution {} is not an available one.\
Please refer to the list of implementations: {}".format(distr, distrlist.keys()))
return distrlist[distr]
[docs]def make_clusters(dims, centers=5, n_samples=1000, center_bounds=(-10.0, 10.0), std=0.5, return_centers=False):
""" Generates a tensor of any dimension with isotropic gaussian blobs as clusters
Parameters
----------
dims : int
specifies the order of the tensor
n_samples : int or list(int)
Specifies the size of each clusters
centers : int or list(tuples)
The number of clusters in the dataset and their size (can be a list)
center_bounds : tuple(float, float)
Specifies the bound (min, max) for generating the centers
std : float
The standard deviation of each of the generated clusters
return_centers : bool
Returns the positions of the centroids
Returns
-------
tensor: Tensor
Generated tensor according to the parameters specified
"""
tensor = np.array([]).reshape(0, 1, dims)
if isinstance(centers, int):
centroids = np.random.uniform(*center_bounds, size=(centers, 1, dims))
else:
centroids = centers
n_cent = len(centroids)
if isinstance(n_samples, int):
if n_samples % n_cent != 0:
warnings.warn("The number of samples is not divisible by the number of centers - Truncating", UserWarning)
samples = [n_samples//n_cent]*n_cent
else:
samples = n_samples
if len(samples) != n_cent:
raise ValueError("The number of samples specified do not match the number " +
"of centers")
for s_size, center in zip(samples, centroids):
cl = np.random.normal(loc=center, scale=std, size=(s_size, 1, dims))
tensor = np.concatenate((tensor, cl))
tensor = np.asarray(tensor)
if return_centers:
return Tensor(array=tensor), centroids
else:
return Tensor(array=tensor)