Source code for deepreplay.datasets.parabola

import numpy as np

[docs]def load_data(xlim=(-1, 1), n_points=1000, shuffle=True, seed=13):
    """Generates a dataset composed of two parabolas, with `n_points`
    each. The upper parabola represents the negative cases (class 0)
    and the lower parabola, shifted 0.5 from the first, represents
    the positive cases (class 1).

    Parameters
    ----------
    xlim: tuple of ints, optional
        Boundaries for the X axis. Default is (-1, 1)
    n_points: int, optional
        Number of points in each parabola. Default is 1,000.
    shuffle: boolean, optional
        If True, the points are shuffled. Default is True.
    seed: int, optional
        Random seed. Default is 13.

    Returns
    -------
    X, y: tuple of ndarray
        X is an array of shape (2 * n_points, 2) containing the
        samples for the two parabolas.
        y is an array of shape (2 * n_points, 1) containing the
        classes of the samples.
    """
    # feature x1, 1,000 points evenly spaced between -1 and 1
    x1 = np.linspace(xlim[0], xlim[1], n_points)
    # feature x2, for the two curves
    x2_blue = np.square(x1)
    x2_green = np.square(x1) - .5

    # coordinates for points in the blue line
    blue_line = np.vstack([x1, x2_blue])
    # coordinates for points in the green line
    green_line = np.vstack([x1, x2_green])

    # Remember, blue line is negative (0) and green line is positive (1)
    X = np.concatenate([blue_line, green_line], axis=1).transpose()
    y = np.concatenate([np.zeros(n_points), np.ones(n_points)])

    # But we must not feed the network with neatly organized inputs...
    # so let's randomize them
    if shuffle:
        np.random.seed(seed)
        shuffled = np.random.permutation(range(X.shape[0]))
        X = X[shuffled]
        y = y[shuffled].reshape(-1, 1)

    return (X, y)