Source code for snf.datasets

# -*- coding: utf-8 -*-
"""
Functions for loading test data setss
"""

import os.path as op
from pkg_resources import resource_filename
import numpy as np
from sklearn.utils import Bunch

_res_path = resource_filename('snf', 'tests/data/{resource}')


def _load_data(dset, dfiles):
    """
    Loads `dfiles` for `dset` and return Bunch with data and labels

    Parameters
    ----------
    dset : {'sim', 'digits'}
        Dataset to load
    dfiles : list of str
        Data files in `dset`

    Returns
    -------
    data : :obj:`sklearn.utils.Bunch`
        With keys `data` and `labels`
    """

    dpath = _res_path.format(resource=dset)

    if not op.isdir(dpath):  # should never happen
        raise ValueError('{} is not a valid dataset. If you are receiving '
                         'this error after using snf.datasets.load_simdata() '
                         'or snf.datasets.load_digits() it is possible that '
                         'snfpy was improperly installed. Please check your '
                         'installation and try again.'.format(dset))

    # space versus comma-delimited files (ugh)
    try:
        data = [np.loadtxt(op.join(dpath, fn)) for fn in dfiles]
    except ValueError:
        data = [np.loadtxt(op.join(dpath, fn), delimiter=',') for fn in dfiles]

    return Bunch(data=data,
                 labels=np.loadtxt(op.join(dpath, 'label.csv')))


[docs]def load_simdata(): """ Loads "similarity" data with two datatypes Returns ------- sim : :obj:`sklearn.utils.Bunch` Dictionary-like object with keys ['data', 'labels'] """ dfiles = [ 'data1.csv', 'data2.csv' ] return _load_data('sim', dfiles)
[docs]def load_digits(): """ Loads "digits" dataset with four datatypes Returns ------- digits : :obj:`sklearn.utils.Bunch` Dictionary-like object with keys ['data', 'labels'] """ dfiles = [ 'fourier.csv', 'pixel.csv', 'profile.csv', 'zer.csv' ] return _load_data('digits', dfiles)