Source code for wbia.scripts.classify_shark

# -*- coding: utf-8 -*-
# flake8: noqa
import logging
import utool as ut
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.svm
import sklearn.metrics
from sklearn import preprocessing

try:
    from wbia_cnn.models import abstract_models

    AbstractCategoricalModel = abstract_models.AbstractCategoricalModel
except ImportError:
    AbstractCategoricalModel = object
    print('no wbia_cnn')

from os.path import join

(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')


[docs]def shark_net(dry=False):
    """
    CommandLine:
        python -m wbia.scripts.classify_shark shark_net
        python -m wbia.scripts.classify_shark shark_net --dry
        python -m wbia.scripts.classify_shark shark_net --vd --monitor

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.scripts.classify_shark import *  # NOQA
        >>> shark_net()
    """
    from wbia.scripts import classify_shark
    import wbia

    ibs = wbia.opendb('WS_ALL')  # NOQA
    config = {'dim_size': (224, 224), 'resize_dim': 'wh'}

    # ------------
    # Define dataset
    # ------------
    target_type = 'binary'
    # target_type = 'multiclass3'
    # ut.delete(ibs.get_neuralnet_dir())  # to reset
    dataset = classify_shark.get_shark_dataset(target_type, 'chip')

    # ------------
    # Define model
    # ------------
    if ut.get_computer_name() == 'Leviathan':
        batch_size = 128
        suffix = 'resnet'
        # suffix = 'lenet'
        # suffix = 'incep'
    else:
        suffix = 'lenet'
        batch_size = 64
        # suffix = 'resnet'
        # batch_size = 32

    model_name = 'injur-shark-' + suffix

    if False:
        model = classify_shark.WhaleSharkInjuryModel(
            name=model_name,
            output_dims=len(dataset.getprop('target_names')),
            data_shape=config['dim_size'] + (3,),
            batch_size=batch_size,
            arch_dpath='.',
        )
        model.init_arch()
        model.load_model_state()
    else:
        model = classify_shark.WhaleSharkInjuryModel(
            name=model_name,
            dataset_dpath=dataset.dataset_dpath,
            training_dpath=ibs.get_neuralnet_dir(),
            #
            output_dims=len(dataset.getprop('target_names')),
            data_shape=config['dim_size'] + (3,),
            batch_size=batch_size,
        )
        model.init_arch()
        model.print_layer_info()

    if False:
        model.arch_dpath = '/home/joncrall/Desktop/manually_saved/arch_injur-shark-resnet_o2_d27_c2942_jzuddodd/'

        state_fpath = model.get_model_state_fpath(dpath=model.trained_arch_dpath)
        state_fpath = model.get_model_state_fpath()
        model.load_model_state(fpath=state_fpath)

        # X_test, y_test = dataset.subset('test')
        # X_test, y_test = dataset.subset('valid')
        # X_test, y_test = dataset.subset('learn')
        X_test, y_test = dataset.subset('test')
        # y_pred = model.predict(X_test)
        test_outptuts = model._predict(X_test)
        y_pred = test_outptuts['predictions']
        logger.info(model.name)
        report = sklearn.metrics.classification_report(y_true=y_test, y_pred=y_pred)
        logger.info(report)

        state_fpath = '/home/joncrall/Desktop/manually_saved/arch_injur-shark-resnet_o2_d27_c2942_jzuddodd/model_state_arch_jzuddodd.pkl'
        dpath = '/home/joncrall/Desktop/manually_saved/arch_injur-shark-lenet_o2_d11_c688_acioqbst'
        model.dump_cases(X_test, y_test, 'test', dpath=dpath)

    hyperparams = dict(
        era_size=30,
        max_epochs=1000,
        rate_schedule=0.1,
        augment_on=True,
        class_weight='balanced',
        stopping_patience=200,
    )
    model.learn_state.weight_decay = 0.000002
    model.learn_state.learning_rate = 0.005
    ut.update_existing(model.hyperparams, hyperparams, assert_exists=True)
    model.monitor_config['monitor'] = True
    model.monitor_config['weight_dump_freq'] = 100
    model.monitor_config['case_dump_freq'] = 100

    # model.build_backprop_func()
    # model.build_forward_func()

    # ---------------
    # Setup and learn
    # ---------------

    X_learn, y_learn = dataset.subset('learn')
    X_valid, y_valid = dataset.subset('valid')
    X_test, y_test = dataset.subset('test')
    # model.ensure_data_params(X_learn, y_learn)
    # X_train = X_learn  # NOQA
    # y_train = y_learn  # NOQA
    valid_idx = None  # NOQA

    if dry or ut.get_argflag('--dry'):
        return model, dataset
    model.fit(X_learn, y_learn, X_valid=X_valid, y_valid=y_valid)


# @ut.reloadable_class
[docs]class WhaleSharkInjuryModel(AbstractCategoricalModel):
    """
    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.scripts.classify_shark import *  # NOQA
        >>> from wbia.scripts import classify_shark
        >>> ds = classify_shark.get_sharks_dataset('binary', 'chip')
        >>> problem = classify_shark.ClfProblem(ds)
        >>> problem.print_support_info()
        >>> ibs = ds.ibs
    """

[docs]    def def_lenet(model):
        import lasange
        from wbia_cnn import custom_layers

        logger.info('[model] init_arch')

        lrelu = lasange.nonlinearities.LeakyRectify(leakiness=(1.0 / 3.0))
        W = lasange.init.Orthogonal('relu')

        bundles = custom_layers.make_bundles(
            nonlinearity=lrelu,
            batch_norm=True,
            filter_size=(3, 3),
            stride=(1, 1),
            pool_size=(2, 2),
            pool_stride=(2, 2),
            W=W,
        )
        b = ut.DynStruct(copy_dict=bundles)

        network_layers_def = [
            b.InputBundle(shape=model.input_shape, noise=False),
            # Convolutional layers
            b.ConvBundle(num_filters=16, pool=True),
            b.ConvBundle(num_filters=16),
            b.ConvBundle(num_filters=16, pool=True),
            b.ConvBundle(num_filters=16),
            b.ConvBundle(num_filters=32, pool=True),
            b.ConvBundle(num_filters=32),
            b.ConvBundle(num_filters=32, pool=True),
            b.ConvBundle(num_filters=32),
            # Fully connected layers
            b.DenseBundle(num_units=64, dropout=0.5),
            b.DenseBundle(num_units=64, dropout=0.5),
            b.SoftmaxBundle(num_units=model.output_dims),
        ]
        return network_layers_def

[docs]    def def_resnet(model):
        import lasange
        from wbia_cnn import custom_layers

        logger.info('[model] init_arch')
        nonlinearity = lasange.nonlinearities.LeakyRectify(leakiness=(1.0 / 3.0))
        W = lasange.init.HeNormal(gain='relu')
        # W = lasange.init.GlorotUniform()

        bundles = custom_layers.make_bundles(
            nonlinearity=nonlinearity,
            filter_size=(3, 3),
            stride=(1, 1),
            W=W,
            pool_size=(2, 2),
            pool_stride=(2, 2),
        )
        b = ut.DynStruct(copy_dict=bundles)

        network_layers_def = [
            b.InputBundle(shape=model.input_shape, noise=False),
            # Convolutional layers
            b.ConvBundle(num_filters=16, pool=False),
            b.ResidualBundle(num_filters=16, stride=(2, 2), preactivate=False),
            b.ResidualBundle(num_filters=16),
            b.ResidualBundle(num_filters=16, stride=(2, 2)),
            b.ResidualBundle(num_filters=16),
            b.ResidualBundle(num_filters=16, stride=(2, 2)),
            b.ResidualBundle(num_filters=16),
            b.ResidualBundle(num_filters=16, stride=(2, 2)),
            b.ResidualBundle(num_filters=16, dropout=None),
            b.ResidualBundle(num_filters=16, stride=(2, 2), dropout=0.5),
            b.ResidualBundle(num_filters=16, postactivate=True, dropout=0.5),
            # Fully connected layers
            b.GlobalPool(),
            b.SoftmaxBundle(num_units=model.output_dims),
        ]
        return network_layers_def

[docs]    def def_inception(model):
        import lasange
        from wbia_cnn import custom_layers

        logger.info('[model] init_arch')

        N = 16

        # Define default incption branch types
        incep_branches = [
            dict(t='c', s=(1, 1), r=0, n=N),
            dict(t='c', s=(3, 3), r=N // 2, n=N // 2),
            dict(t='c', s=(3, 3), r=N // 4, n=N // 4, d=2),
            dict(t='p', s=(3, 3), n=N // 2),
        ]

        lrelu = lasange.nonlinearities.LeakyRectify(leakiness=(1.0 / 3.0))
        W = lasange.init.Orthogonal('relu')

        bundles = custom_layers.make_bundles(
            nonlinearity=lrelu,
            batch_norm=True,
            filter_size=(3, 3),
            stride=(1, 1),
            pool_size=(3, 3),
            pool_stride=(2, 2),
            branches=incep_branches,
            W=W,
        )
        b = ut.DynStruct(copy_dict=bundles)

        network_layers_def = [
            # Convolutional layers
            b.InputBundle(shape=model.input_shape, noise=False),
            b.ConvBundle(num_filters=16, filter_size=(3, 3), pool=False),
            b.ConvBundle(num_filters=12, filter_size=(3, 3), pool=True),
            b.InceptionBundle(dropout=0.3, pool=True),
            b.InceptionBundle(dropout=0.3, pool=True),
            b.InceptionBundle(dropout=0.4, pool=True),
            b.InceptionBundle(
                dropout=0.5,
                branches=[
                    dict(t='c', s=(1, 1), r=0, n=model.output_dims),
                    dict(t='c', s=(3, 3), r=N // 2, n=model.output_dims),
                    dict(t='c', s=(3, 3), r=N // 4, n=model.output_dims, d=2),
                    dict(t='p', s=(3, 3), n=model.output_dims),
                ],
            ),
            b.GlobalPool(),
            b.SoftmaxBundle(num_units=model.output_dims)
            # Fully connected layers
            # b.DenseBundle(num_units=64, dropout=.5),
            # b.DenseBundle(num_units=64, dropout=.5),
        ]
        return network_layers_def

[docs]    def init_arch(model, verbose=ut.VERBOSE, **kwargs):
        r"""

        CommandLine:
            python -m wbia.scripts.classify_shark WhaleSharkInjuryModel.init_arch
            python -m wbia.scripts.classify_shark WhaleSharkInjuryModel.init_arch --show

            python -m wbia.scripts.classify_shark shark_net --dry --show
            python -m wbia.scripts.classify_shark shark_net --vd

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.scripts.classify_shark import *  # NOQA
            >>> verbose = True
            >>> data_shape = tuple(ut.get_argval('--datashape', type_=list,
            >>>                                  default=(224, 224, 3)))
            >>> model = WhaleSharkInjuryModel(batch_size=64, output_dims=2,
            >>>                               data_shape=data_shape)
            >>> model.init_arch()
            >>> model.print_model_info_str()
            >>> ut.quit_if_noshow()
            >>> model.show_arch(fullinfo=False)
            >>> ut.show_if_requested()
        """
        from wbia_cnn import custom_layers

        # if ut.get_computer_name() == 'Leviathan':
        if model.name.endswith('incep'):
            network_layers_def = model.def_inception()
        elif model.name.endswith('lenet'):
            network_layers_def = model.def_lenet()
        elif model.name.endswith('resnet'):
            network_layers_def = model.def_resnet()
        network_layers = custom_layers.evaluate_layer_list(network_layers_def)
        # model.network_layers = network_layers
        output_layer = network_layers[-1]
        model.output_layer = output_layer
        return output_layer

[docs]    def special_output():
        pass

    # def special_loss_function(output_activations):
    #    output_injur1 = output_activations[:, 0]
    #    output_injur2 = output_activations[:, 1]
    #    output_healthy = (1 - ((1 - output_injur1) * (1 - output_injur2))
    #    import lasange
    #    lasange.objectives.binary_crossentropy(output_injur1)
    #    lasange.objectives.binary_crossentropy(output_injur2)

[docs]    def augment(self, Xb, yb=None):
        """
        X_valid, y_valid = dataset.subset('valid')
        num = 10
        Xb = X_valid[:num]
        Xb = Xb / 255.0 if ut.is_int(Xb) else Xb
        Xb = Xb.astype(np.float32, copy=True)
        yb = None if yb is None else yb.astype(np.int32, copy=True)
        # Rescale the batch data to the range 0 to 1
        Xb_, yb_ = model.augment(Xb)
        yb_ = None
        >>> ut.quit_if_noshow()
        >>> import wbia.plottool as pt
        >>> pt.qt4ensure()
        >>> from wbia_cnn import augment
        >>> augment.show_augmented_patches(Xb, Xb_, yb, yb_, data_per_label=1)
        >>> ut.show_if_requested()
        """
        from wbia_cnn import augment

        rng = np.random
        affperterb_ranges = dict(
            zoom_range=(1.3, 1.2),
            max_tx=2,
            max_ty=2,
            max_shear=ut.TAU / 32,
            max_theta=ut.TAU,
            enable_stretch=True,
            enable_flip=True,
        )
        Xb_, yb_ = augment.augment_affine(
            Xb,
            yb,
            rng=rng,
            inplace=True,
            data_per_label=1,
            affperterb_ranges=affperterb_ranges,
            aug_prop=0.5,
        )
        return Xb_, yb_

    # def fit_interactive(X_train, y_train, X_valid, y_valid):
    #    pass


[docs]def get_shark_dataset(target_type='binary', data_type='chip'):
    """
    >>> from wbia.scripts.classify_shark import *  # NOQA
    >>> target_type = 'binary'
    >>> data_type = 'hog'
    >>> dataset = get_shark_dataset(target_type)
    """
    from wbia_cnn.dataset import DataSet
    from wbia.scripts import classify_shark

    tup = classify_shark.get_shark_labels_and_metadata(target_type)
    ibs, annots, target, target_names, config, metadata, enc = tup
    data_shape = config['dim_size'] + (3,)
    length = len(annots)

    # Build dataset configuration string
    trail_cfgstr = ibs.depc_annot.get_config_trail_str('chips', config)
    trail_hashstr = ut.hashstr27(trail_cfgstr)
    visual_uuids = annots.visual_uuids
    metadata['visual_uuid'] = np.array(visual_uuids)
    # metadata['nids'] = np.array(annots.nids)
    chips_hashstr = ut.hashstr_arr27(annots.visual_uuids, 'chips')
    cfgstr = chips_hashstr + '_' + trail_hashstr
    name = 'injur-shark'

    if data_type == 'hog':
        cfgstr = 'hog_' + cfgstr
        name += '-hog'

    training_dpath = ibs.get_neuralnet_dir()
    dataset = DataSet(
        cfgstr,
        data_shape=data_shape,
        num_data=length,
        training_dpath=training_dpath,
        name=name,
    )

    logger.info(dataset.dataset_id)

    dataset.setprop('ibs', ibs)
    dataset.setprop('annots', annots)
    dataset.setprop('target_names', target_names)
    dataset.setprop('config', config)
    dataset.setprop('enc', enc)

    try:
        dataset.load()
    except IOError:
        import vtool as vt

        dataset.ensure_dirs()

        if data_type == 'hog':
            data = np.array([h.ravel() for h in annots.hog_hog])
            labels = target
            # Save data where dataset expects it to be
            dataset.save(data, labels, metadata, data_per_label=1)
        else:
            chip_gen = ibs.depc_annot.get(
                'chips', annots.aids, 'img', eager=False, config=config
            )
            iter_ = iter(ut.ProgIter(chip_gen, length=length, lbl='load chip'))
            shape = (length,) + data_shape
            data = vt.fromiter_nd(iter_, shape=shape, dtype=np.uint8)  # NOQA
            labels = target
            # Save data where dataset expects it to be
            dataset.save(data, labels, metadata, data_per_label=1)

    from wbia_cnn.dataset import stratified_label_shuffle_split

    if not dataset.has_split('learn'):
        nids = np.array(dataset.metadata['nids'])
        # Partition into a testing and training dataset
        y = dataset.labels
        train_idx, test_idx = stratified_label_shuffle_split(
            y, nids, [0.8, 0.2], rng=22019
        )
        nids_train = nids.take(train_idx, axis=0)
        y_train = y.take(train_idx, axis=0)
        # Partition training into learning and validation
        learn_idx, valid_idx = stratified_label_shuffle_split(
            y_train, nids_train, [0.8, 0.2], y_idx=train_idx, rng=90120
        )
        assert len(np.intersect1d(learn_idx, test_idx)) == 0
        assert len(np.intersect1d(valid_idx, test_idx)) == 0
        assert len(np.intersect1d(learn_idx, valid_idx)) == 0
        if data_type == 'hog':
            dataset.add_split('train', train_idx)
        dataset.add_split('test', test_idx)
        dataset.add_split('learn', learn_idx)
        dataset.add_split('valid', valid_idx)
        dataset.clear_cache('full')

    if data_type == 'hog':
        # hack
        y = dataset.labels
        nids = np.array(dataset.metadata['nids'])
        train_idx, test_idx = stratified_label_shuffle_split(
            y, nids, [0.8, 0.2], rng=22019
        )
        nids_train = nids.take(train_idx, axis=0)
        y_train = y.take(train_idx, axis=0)
        # Partition training into learning and validation
        learn_idx, valid_idx = stratified_label_shuffle_split(
            y_train, nids_train, [0.8, 0.2], y_idx=train_idx, rng=90120
        )
        dataset._split_idxs = {}
        dataset._split_idxs['learn'] = learn_idx
        dataset._split_idxs['valid'] = valid_idx
        dataset._split_idxs['train'] = train_idx
        dataset._split_idxs['test'] = test_idx

    dataset.ensure_symlinked()
    return dataset


[docs]def get_shark_labels_and_metadata(target_type=None, ibs=None, config=None):
    """
    >>> from wbia.scripts.classify_shark import *  # NOQA
    >>> target_type = 'multiclass3'
    >>> data_type = 'hog'
    """
    import wbia

    if ibs is None:
        ibs = wbia.opendb('WS_ALL')
    if config is None:
        config = {
            # 'dim_size': (256, 256),
            'dim_size': (224, 224),
            'resize_dim': 'wh',
        }
    all_annots = ibs.annots(config=config)

    isempty = ut.not_list(ut.lmap(len, ibs.images().aids))
    # if False:
    #    x = ibs.images().compress(isempty)
    num_empty_images = sum(isempty)
    logger.info('Images without annotations: %r' % (num_empty_images,))

    logger.info(
        'Building labels for %r annotations from %r images'
        % (len(all_annots), len(ut.unique(all_annots.gids)))
    )

    TARGET_TYPE = 'binary'
    # TARGET_TYPE = 'multiclass3'
    if target_type is None:
        target_type = TARGET_TYPE

    from wbia.scripts import getshark

    category_tags = getshark.get_injur_categories(all_annots)
    logger.info('Base Category Tags tags')
    logger.info(ut.repr3(ut.dict_hist(ut.flatten(category_tags))))

    logger.info('Base Co-Occurrence Freq')
    co_occur1 = ut.tag_coocurrence(category_tags)
    logger.info(ut.repr3(co_occur1))

    ntags_list = np.array(ut.lmap(len, category_tags))
    is_no_tag = ntags_list == 0
    is_single_tag = ntags_list == 1
    is_multi_tag = ntags_list > 1

    if target_type == 'binary':
        regex_map = [
            ('injur-.*', 'injured'),
            ('healthy', 'healthy'),
        ]
    elif target_type == 'multiclass3':
        regex_map = [
            ('injur-trunc', 'injur-trunc'),
            ('injur-nicks', 'injur-trunc'),
            ('injur-scar', 'injur-scar'),
            ('injur-bite', 'injur-scar'),
            ('injur-gill', 'injur-scar'),
            ('injur-other', None),
            ('injur-dead', None),
            ('healthy', 'healthy'),
        ]
    elif target_type == 'multiclassX':
        regex_map = [
            ('injur-trunc', 'injur-trunc'),
            ('healthy', 'healthy'),
            ('injur-.*', None),
        ]
    else:
        raise ValueError('Unknown target_type=%r' % (target_type,))

    tag_vocab = ut.flat_unique(*category_tags)
    alias_map = ut.build_alias_map(regex_map, tag_vocab)
    unmapped = list(set(tag_vocab) - set(alias_map.keys()))
    logger.info('unmapped = %r' % (unmapped,))
    category_tags2 = ut.alias_tags(category_tags, alias_map)

    ntags_list = np.array(ut.lmap(len, category_tags2))
    is_no_tag = ntags_list == 0
    is_single_tag = ntags_list == 1
    is_multi_tag = ntags_list > 1

    logger.info('Cleaned tags')
    hist = ut.tag_hist(category_tags2)
    logger.info(ut.repr3(hist))

    # Get tag co-occurrence
    logger.info('Co-Occurrence Freq')
    co_occur = ut.tag_coocurrence(category_tags2)
    logger.info(ut.repr3(co_occur))

    logger.info('Co-Occurrence Percent')
    co_occur_percent = ut.odict(
        [(keys, [100 * val / hist[k] for k in keys]) for keys, val in co_occur.items()]
    )
    logger.info(ut.repr3(co_occur_percent, precision=2, nl=1))

    multi_annots = all_annots.compress(is_multi_tag)  # NOQA
    # ibs.set_image_imagesettext(multi_annots.gids, ['MultiTaged'] * is_multi_tag.sum())

    logger.info("can't use %r annots due to no labels" % (is_no_tag.sum(),))
    logger.info("can't use %r annots due to inconsistent labels" % (is_multi_tag.sum(),))
    logger.info('will use %r annots with consistent labels' % (is_single_tag.sum(),))

    annot_tags = ut.compress(category_tags2, is_single_tag)
    annots = all_annots.compress(is_single_tag)
    annot_tag_hist = ut.dict_hist(ut.flatten(annot_tags))
    logger.info('Final Annot Tags')
    logger.info(ut.repr3(annot_tag_hist))

    # target_names = ['healthy', 'injured']
    enc = preprocessing.LabelEncoder()
    enc.fit(ut.unique(ut.flatten(annot_tags)))
    target = enc.transform(ut.flatten(annot_tags))
    target_names = enc.classes_

    metadata = {
        'aids': np.array(annots.aids),
        'nids': np.array(annots.nids),
    }
    tup = ibs, annots, target, target_names, config, metadata, enc
    return tup


# @ut.reloadable_class
[docs]class ClfProblem(object):
    """Harness for researching a classification problem"""

    def __init__(problem, ds):
        problem.ds = ds

[docs]    def print_support_info(problem):
        enc = problem.ds.enc
        target_labels = enc.inverse_transform(problem.ds.target)
        label_hist = ut.dict_hist(target_labels)
        logger.info('support hist' + ut.repr3(label_hist))

[docs]    def fit_new_classifier(problem, train_idx):
        """
        References:
            http://leon.bottou.org/research/stochastic
            http://blog.explainmydata.com/2012/06/ntrain-24853-ntest-25147-ncorrupt.html
            http://scikit-learn.org/stable/modules/svm.html#svm-classification
            http://scikit-learn.org/stable/modules/grid_search.html
        """
        logger.info('[problem] train classifier on %d data points' % (len(train_idx)))
        data = problem.ds.data
        target = problem.ds.target
        x_train = data.take(train_idx, axis=0)
        y_train = target.take(train_idx, axis=0)
        clf = sklearn.svm.SVC(
            kernel=str('linear'),
            C=0.17,
            class_weight='balanced',
            decision_function_shape='ovr',
        )

        # C, penalty, loss
        # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        #              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        #              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        # clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
        # clf = clf.fit(X_train_pca, y_train)
        clf.fit(x_train, y_train)
        return clf

[docs]    def fit_new_linear_svm(problem, train_idx):
        logger.info('[problem] train classifier on %d data points' % (len(train_idx)))
        data = problem.ds.data
        target = problem.ds.target
        x_train = data.take(train_idx, axis=0)
        y_train = target.take(train_idx, axis=0)
        clf = sklearn.svm.SVC(
            kernel=str('linear'),
            C=0.17,
            class_weight='balanced',
            decision_function_shape='ovr',
        )
        clf.fit(x_train, y_train)

[docs]    def gridsearch_linear_svm_params(problem, train_idx):
        """
        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.scripts.classify_shark import *  # NOQA
            >>> from wbia.scripts import classify_shark
            >>> ds = classify_shark.get_sharks_dataset('binary')
            >>> problem = classify_shark.ClfProblem(ds)
            >>> problem.print_support_info()
        """
        try:
            import sklearn.model_selection
        except ImportError:
            pass
        import sklearn.grid_search

        with ut.Timer('cv'):
            data = problem.ds.data
            target = problem.ds.target

            def stratified_sample_idxs_unbalanced(target, size=1000):
                rng = np.random.RandomState(43)
                sample = []
                for label in np.unique(target):
                    target_idxs = np.where(target == label)[0]
                    subset_size = size
                    rand_idx = ut.random_indexes(len(target_idxs), subset_size, rng=rng)
                    sample_idx = ut.take(target_idxs, rand_idx)
                    sample.append(sample_idx)
                sample_idx = np.array(sorted(ut.flatten(sample)))
                return sample_idx

            train_idx = stratified_sample_idxs_unbalanced(target, 4000)

            x_train = data.take(train_idx, axis=0)
            y_train = target.take(train_idx, axis=0)
            param_grid = {
                # 'C': [1, .5, .1, 5, 10, 100],
                # 'C': [1, 1e-1, 1e-2, 1e-3]
                # 'C': [1, 1e-1, 1e-2, 1e-3]
                # 'C': np.linspace(1, 1e-5, 15)
                # 'C': np.linspace(.2, 1e-5, 15)
                # 'C': np.logspace(np.log10(1e-3), np.log10(.1), 30, base=10)
                # 'C': np.linspace(.1, .3, 20),
                # 'C': np.linspace(1.0, .22, 20),
                'C': np.linspace(0.25, 0.01, 40),
                # 'loss': ['l2', 'l1'],
                # 'penalty': ['l2', 'l1'],
            }
            _clf = sklearn.svm.SVC(
                kernel=str('linear'),
                C=0.17,
                class_weight='balanced',
                decision_function_shape='ovr',
            )
            clf = sklearn.grid_search.GridSearchCV(
                _clf, param_grid, n_jobs=6, iid=False, cv=5, verbose=3
            )
            clf.fit(x_train, y_train)
            # (NOTE grid.predict only uses the best estimator)
            logger.info('clf.best_params_ = %r' % (clf.best_params_,))
            logger.info('Best parameters set found on development set:')
            logger.info(clf.best_params_)
            logger.info('Grid scores on development set:')
            for params, mean_score, scores in clf.grid_scores_:
                logger.info(
                    '%0.3f (+/-%0.03f) for %r' % (mean_score, scores.std() * 2, params)
                )
            xdata = np.array([t[0]['C'] for t in clf.grid_scores_])
            ydata = np.array([t[1] for t in clf.grid_scores_])

            y_data_std = np.array([t[2].std() for t in clf.grid_scores_])
            ydata_mean = ydata
            y_data_max = ydata_mean + y_data_std
            y_data_min = ydata_mean - y_data_std

            # pt.plot(xdata, ydata, '-rx')
            import wbia.plottool as pt

            pt.figure(fnum=pt.ensure_fnum(None))
            ax = pt.gca()
            ax.fill_between(xdata, y_data_min, y_data_max, alpha=0.2, color=pt.LIGHT_BLUE)
            pt.draw_hist_subbin_maxima(ydata, xdata)

            # y_data_std = np.array([t[2].std() for t in grid.grid_scores_])
            # ydata_mean = c_ydata
            # y_data_max = ydata_mean + y_data_std
            # y_data_min = ydata_mean - y_data_std
            # # import wbia.plottool as pt
            # # pt.figure(fnum=pt.ensure_fnum(None))
            # ax = pt.gca()
            # ax.fill_between(c_xdata, c_ydata, y_data_max, alpha=.2, color=pt.LIGHT_BLUE)
            # ax.fill_between(c_xdata, c_ydata, y_data_min, alpha=.2, color=pt.LIGHT_BLUE)
            # # pt.figure(fnum=pt.ensure_fnum(None))
            # hist = c_ydata
            # centers = c_xdata
            # pt.draw_hist_subbin_maxima(c_ydata, c_xdata, maxima_thresh=None, remove_endpoints=False)

            # clf.best_params_ = {u'C': 0.07143785714285722}
            # Best parameters set found on development set:
            # {u'C': 0.07143785714285722}
            # Grid scores on development set:
            # 0.729 (+/-0.016) for {u'C': 1.0}
            # 0.729 (+/-0.019) for {u'C': 0.92857214285714285}
            # 0.733 (+/-0.017) for {u'C': 0.85714428571428569}
            # 0.734 (+/-0.015) for {u'C': 0.78571642857142865}
            # 0.736 (+/-0.016) for {u'C': 0.71428857142857138}
            # 0.739 (+/-0.020) for {u'C': 0.64286071428571434}
            # 0.742 (+/-0.020) for {u'C': 0.57143285714285719}
            # 0.743 (+/-0.021) for {u'C': 0.50000500000000003}
            # 0.746 (+/-0.023) for {u'C': 0.42857714285714288}
            # 0.749 (+/-0.023) for {u'C': 0.35714928571428572}
            # 0.755 (+/-0.025) for {u'C': 0.28572142857142857}
            # 0.760 (+/-0.027) for {u'C': 0.21429357142857142}
            # 0.762 (+/-0.025) for {u'C': 0.14286571428571437}
            # 0.770 (+/-0.036) for {u'C': 0.07143785714285722}
            # 0.664 (+/-0.031) for {u'C': 1.0000000000000001e-05}

            # 0.774 (+/-0.039) for {u'C': 0.017433288221999882}
            # 0.775 (+/-0.039) for {u'C': 0.020433597178569417}
            # 0.774 (+/-0.039) for {u'C': 0.023950266199874861}
            # 0.777 (+/-0.038) for {u'C': 0.02807216203941177}
            # 0.775 (+/-0.036) for {u'C': 0.032903445623126679}
            # 0.773 (+/-0.033) for {u'C': 0.038566204211634723}

            # 0.722 (+/-0.060) for {u'C': 0.001}
            # 0.770 (+/-0.047) for {u'C': 0.01}
            # 0.775 (+/-0.047) for {u'C': 0.1}
            # 0.774 (+/-0.047) for {u'C': 0.12}
            # 0.773 (+/-0.045) for {u'C': 0.15}
            # 0.773 (+/-0.046) for {u'C': 0.17}
            # 0.772 (+/-0.047) for {u'C': 0.2}
            # 0.760 (+/-0.043) for {u'C': 0.5}
            # 0.748 (+/-0.043) for {u'C': 1.0}
            # 0.707 (+/-0.043) for {u'C': 100}
            # 0.702 (+/-0.047) for {u'C': 1000}

[docs]    def classifier_test(problem, clf, test_idx):
        logger.info('[problem] test classifier on %d data points' % (len(test_idx),))
        data = problem.ds.data
        target = problem.ds.target
        X_test = data.take(test_idx, axis=0)
        y_true = target.take(test_idx, axis=0)

        y_conf = predict_svc_ovr(X_test)
        y_pred = y_conf.argmax(axis=1)

        result = ClfSingleResult(problem.ds, test_idx, y_true, y_pred, y_conf)
        return result

[docs]    def stratified_2sample_idxs(problem, frac=0.2, split_frac=0.75):
        target = problem.ds.target
        target_labels = problem.ds.target_labels

        rng = np.random.RandomState(43)
        train_sample = []
        test_sample = []
        for label in target_labels:
            target_idxs = np.where(target == label)[0]
            subset_size = int(len(target_idxs) * frac)
            rand_idx = ut.random_indexes(len(target_idxs), subset_size, rng=rng)
            sample_idx = ut.take(target_idxs, rand_idx)
            split = int(len(sample_idx) * split_frac)
            train_sample.append(sample_idx[split:])
            test_sample.append(sample_idx[:split])

        train_idx = np.array(sorted(ut.flatten(train_sample)))
        test_idx = np.array(sorted(ut.flatten(test_sample)))
        return train_idx, test_idx

[docs]    def gen_crossval_idxs(problem, n_folds=2):
        y = problem.ds.target
        rng = 43432
        if hasattr(problem.ds, 'nids'):
            # Ensure that an individual does not appear in both the train
            # and the test dataset
            from wbia_cnn.dataset import stratified_kfold_label_split

            labels = problem.ds.nids
            _iter = stratified_kfold_label_split(y, labels, n_folds=n_folds, rng=rng)
        else:
            xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng)
            import sklearn.cross_validation

            skf = sklearn.cross_validation.StratifiedKFold(y, **xvalkw)
            _iter = skf
            # import sklearn.model_selection
            # skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
            # _iter = skf.split(X=np.empty(len(y)), y=y)
        msg = 'cross-val test on %s' % (problem.ds.name)
        progiter = ut.ProgIter(_iter, length=n_folds, lbl=msg)
        for train_idx, test_idx in progiter:
            yield train_idx, test_idx


# @ut.reloadable_class
[docs]class ClfSingleResult(object):
    r"""
    Reports the results of a classification problem

    Example:
        >>> # DISABLE_DOCTEST
        >>> result = ClfSingleResult()
    """

    def __init__(result, ds=None, test_idx=None, y_true=None, y_pred=None, y_conf=None):
        result.ds = ds
        result.test_idx = test_idx
        result.y_true = y_true
        result.y_pred = y_pred
        result.y_conf = y_conf

[docs]    def compile_results(result):
        import pandas as pd

        y_true = result.y_true
        y_pred = result.y_pred
        y_conf = result.y_conf
        test_idx = result.test_idx

        index = pd.Series(test_idx, name='test_idx')
        if len(result.ds.target_names) == 1:
            y_conf
        decision = pd.DataFrame(y_conf, index=index, columns=result.ds.target_names)
        result.decision = decision / 3
        easiness = np.array(ut.ziptake(result.decision.values, y_true))
        columns = ['pred', 'easiness']
        column_data = [y_pred, easiness]
        data = dict(zip(columns, column_data))
        result.df = pd.DataFrame(data, index, columns)

[docs]    def print_report(result):
        report = sklearn.metrics.classification_report(
            result.y_true, result.y_pred, target_names=result.ds.target_names
        )
        logger.info(report)


[docs]def get_model_state(clf):
    model_attr_names = [a for a in dir(clf) if a.endswith('_') and not a.startswith('__')]
    model_state = {a: getattr(clf, a) for a in model_attr_names}
    return model_state


[docs]def set_model_state(clf, model_state):
    attr_names = sorted(model_state.keys())
    attr_names1 = [
        'dual_coef_',
    ]
    attr_names2 = [
        'coef_',
    ]
    attr_names3 = attr_names1 + attr_names2
    attr_namesA = ut.isect(attr_names1, attr_names)
    attr_namesB = ut.setdiff(attr_names, attr_names3)
    attr_namesC = ut.isect(attr_names2, attr_names)
    attr_names_ = attr_namesA + attr_namesB + attr_namesC
    for a in attr_names_:
        val = model_state[a]
        logger.info('a = %r' % (a,))
        try:
            setattr(clf, a, val)
        except AttributeError:
            val2 = getattr(clf, a)
            assert np.all(val == val2)


[docs]def predict_svc_ovr(clf, data):
    if len(clf.classes_) == 2:
        X = clf._validate_for_predict(data)
        X = clf._compute_kernel(X)
        _dec2 = clf._dense_decision_function(X)
        dec2 = -_dec2

        n_samples = dec2.shape[0]
        n_classes = len(clf.classes_)
        final = np.zeros((n_samples, n_classes))
        confidence_max = max(np.abs(dec2.max()), np.abs(dec2.min()))
        norm_conf = ((dec2.T[0] / confidence_max) + 1) / 2
        final.T[0] = 1 - norm_conf
        final.T[1] = norm_conf
        # output comparable to multiclass version
        y_conf = final
    else:
        # Get notion of confidence / probability of decision
        y_conf = clf.decision_function(data)
    return y_conf


[docs]def predict_ws_injury_interim_svm(ibs, aids, **kwargs):
    """
    Returns relative confidence
    """
    config = {
        # 'dim_size': (256, 256),
        'dim_size': (224, 224),
        'resize_dim': 'wh',
    }

    # Load the SVM
    model_fname = 'interim_svc_injur-shark-hog_12559_224x224x3_ldhhxnxo.cPkl'
    model_url = 'https://wildbookiarepository.azureedge.net/models/{}'.format(model_fname)
    model_fpath = ut.grab_file_url(model_url, check_hash=False)
    clf = ut.load_cPkl(model_fpath)

    annots = ibs.annots(aids, config=config)
    data = np.array([h.ravel() for h in annots.hog_hog])

    target_names = ['healthy', 'injured']
    # confidence = clf.decision_function(data)
    # y_conf = predict_svc_ovr(clf, data)
    scores = clf.decision_function(data)
    y_pred = scores > 0.0
    y_pred = y_pred.astype(np.int64)
    # y_pred = clf.predict(data)
    ut.embed()
    pred_nice = ut.take(target_names, y_pred)
    return list(zip(pred_nice, scores))


[docs]def shark_svm():
    r"""
    References:
        http://scikit-learn.org/stable/model_selection.html

    TODO:
        * Change unreviewed healthy tags to healthy-likely

    CommandLine:
        python -m wbia.scripts.classify_shark shark_svm --show
        python -m wbia.scripts.classify_shark shark_svm

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.scripts.classify_shark import *  # NOQA
        >>> shark_svm()
        >>> ut.show_if_requested()
    """
    from wbia.scripts import classify_shark

    target_type = 'binary'
    # target_type = 'multiclass3'
    # dataset = classify_shark.get_shark_dataset(target_type)

    ds = classify_shark.get_shark_dataset(target_type, 'hog')
    # Make resemble old dataset
    # FIXME; make wbia_cnn dataset work here too
    # annots = ds.getprop('annots')
    ds.enc = ds.getprop('enc')
    ds.aids = ds.getprop('annots').aids
    ds.nids = ds.getprop('annots').nids
    ds.target = ds.labels
    ds.target_names = ds.getprop('target_names')
    ds.target_labels = ds.enc.transform(ds.target_names)
    ds.ibs = ds.getprop('ibs')
    ds.config = ds.getprop('config')

    problem = classify_shark.ClfProblem(ds)
    problem.print_support_info()

    # BUILD_RELEASE_MODEL = False
    # if BUILD_RELEASE_MODEL:
    #     clf = sklearn.svm.SVC(kernel=str('linear'), C=.17,
    #                           class_weight='balanced',
    #                           decision_function_shape='ovr',
    #                           verbose=10)
    #     clf.fit(ds.data, ds.target)
    #     model_fname = 'interim_svc_{}.cPkl'.format(ds.dataset_id)
    #     model_dpath = ut.ensuredir((ds.dataset_dpath, 'svms'))
    #     model_fpath = join(model_dpath, model_fname)
    #     ut.save_cPkl(model_fpath, clf)
    #     """
    #     TO PUBLISH
    #     scp clf to lev:/media/hdd/PUBLIC/models
    #     run script lev:/media/hdd/PUBLIC/hash.py to refresh hashes
    #     """
    #     user = ut.get_user_name()
    #     host = 'cthulhu.dyn.wildme.io'
    #     remote_path = '/data/public/models/' + model_fname
    #     remote_uri = user + '@' + host + ':' + remote_path
    #     ut.rsync(model_fpath, remote_uri)

    #     command = 'python /media/hdd/PUBLIC/hash.py'
    #     ut.cmd('ssh {user}@{host} "{command}"'.format(user=user, host=host,
    #                                                   command=command))

    model_dpath = ut.ensuredir((ds.dataset_dpath, 'svms'))
    # n_folds = 10
    n_folds = 10
    # ensemble_dpath = ut.ensuredir((model_dpath, 'svms_%d_fold' % (n_folds,)))

    train_idx = ds._split_idxs['train']
    test_idx = ds._split_idxs['test']

    y_train = ds.target.take(train_idx)
    nids_train = ut.take(ds.nids, train_idx)

    # Ensure that an individual does not appear in both train and test
    # _iter = stratified_kfold_label_split(y_train, nids_train, y_idx=train_idx,
    #                                     n_folds=n_folds, rng=rng)

    class MyLabelCV(object):
        def __init__(self, y_train, nids_train, n_folds):
            self.nids_train = nids_train
            self.y_train = y_train
            self.n_folds = n_folds

        def __len__(self):
            return self.n_folds

        def __iter__(self):
            from wbia_cnn.dataset import stratified_kfold_label_split

            rng = 1809629827
            for _ in stratified_kfold_label_split(
                self.y_train, self.nids_train, n_folds=self.n_folds, rng=rng
            ):
                yield _

    clf_fpath = join(model_dpath, '%s_svc_folds_%s.cPkl' % (target_type, n_folds))
    if not ut.checkpath(clf_fpath):
        """
        Curate strategy:
            Use gridsearch to select a reasonable C=.17
            Then train 10 classifiers with 10 split cross validation.
            This lets us make an "unbias" prediction for each training example.
            Look at predictions for all training examples (predict using only
                classifiers not trained with that point).
            Look at worst worst performing examples.
            Fix any errors that occur.
            Now that the database is better, we learn the actual model.

        Learning strategy:
            * Set aside a set test.
            * The remaining data is the training set.
            * Run Gridsearch with N-fold cross validation on training set to
              look at performance given different hyperparameters of the SVM.
            * Use quadratic interpolation to select a "best" parameter.

            (NOTE grid.predict only uses the best estimator (however it is a refit estimator))

            Train a single SVM using these parameters on all training data.

            Evaluate this SVM on the test set.
        """
        C = None
        if C is None:
            import sklearn
            import sklearn.grid_search
            import sklearn.svm

            # C controls the margin of the hyperplane.
            # Smaller C = Larger Hyperplane
            # So, the larger the C the less willing the SVM will be to get
            # examples wrong.

            param_grid = {
                # 'C': np.linspace(.1, .2, 10),
                'C': [
                    0.0001,
                    0.001,
                    0.005,
                    0.01,
                    0.08,
                    0.1,
                    0.12,
                    0.15,
                    0.17,
                    0.2,
                    0.22,
                    0.5,
                    1.0,
                    100,
                    1000,
                    10000,
                ]
                # 'C': np.linspace(.1, .2, 3),
            }
            clf = sklearn.svm.SVC(
                kernel=str('linear'),
                C=0.17,
                class_weight='balanced',
                decision_function_shape='ovr',
            )
            cv = MyLabelCV(y_train, nids_train, n_folds=n_folds)
            grid = sklearn.grid_search.GridSearchCV(
                clf,
                param_grid=param_grid,
                cv=cv,
                refit=False,
                n_jobs=min(n_folds, 6),
                verbose=10,
            )
            x_train = ds.data.take(train_idx, axis=0)
            y_train = ds.target.take(train_idx, axis=0)
            grid.fit(x_train, y_train)

            for params, mean_score, scores in grid.grid_scores_:
                logger.info(
                    '%0.3f (+/-%0.03f) for %r' % (mean_score, scores.std() * 2, params)
                )

            c_xdata = np.array([t[0]['C'] for t in grid.grid_scores_])
            c_ydata = np.array([t[1] for t in grid.grid_scores_])
            import vtool as vt

            # maxima_x, maxima_y, argmaxima = vt.hist_argmaxima(c_ydata, c_xdata, maxima_thresh=None)
            submaxima_x, submaxima_y = vt.argsubmaxima(c_ydata, c_xdata)
            # pt.draw_hist_subbin_maxima(c_ydata, c_xdata, maxima_thresh=None, remove_endpoints=False)
            C = submaxima_x[0]
            logger.info('C = %r' % (C,))
        else:
            logger.info('C = %r' % (C,))

        clf_all = sklearn.svm.SVC(
            kernel=str('linear'),
            C=C,
            class_weight='balanced',
            decision_function_shape='ovr',
            verbose=10,
        )
        X_train = ds.data.take(train_idx, axis=0)
        clf_all.fit(X_train, y_train)
        ut.save_data(clf_fpath, clf_all.__dict__)
        clf = clf_all
    else:
        clf = sklearn.svm.SVC()
        clf.__dict__.update(**ut.load_data(clf_fpath))

    def classifier_test(clf, X_test, y_test):
        logger.info('[problem] test classifier on %d data points' % (len(test_idx),))
        y_conf = predict_svc_ovr(X_test)
        y_pred = y_conf.argmax(axis=1)
        result = ClfSingleResult(problem.ds, test_idx, y_test, y_pred, y_conf)
        return result

    test_idx = ds._split_idxs['test']
    X_test = ds.data.take(test_idx, axis=0)
    y_test = ds.target.take(test_idx, axis=0)
    result = classifier_test(clf, X_test, y_test)
    result.compile_results()
    result.print_report()

    result_list = [result]

    import pandas as pd

    # import wbia.plottool as pt
    # Combine information from results
    df = pd.concat([r.df for r in result_list])
    df['hardness'] = 1 / df['easiness']
    df['aid'] = ut.take(ds.aids, df.index)
    df['target'] = ut.take(ds.target, df.index)
    df['failed'] = df['pred'] != df['target']

    report = sklearn.metrics.classification_report(
        y_true=df['target'], y_pred=df['pred'], target_names=result.ds.target_names
    )
    logger.info(report)

    confusion = sklearn.metrics.confusion_matrix(df['target'], df['pred'])
    logger.info('Confusion Matrix:')
    logger.info(
        pd.DataFrame(
            confusion,
            columns=[m for m in result.ds.target_names],
            index=['gt ' + m for m in result.ds.target_names],
        )
    )

    # inspect_results(ds, result_list)

    if False:
        if False:
            # train_idx, test_idx = problem.stratified_2sample_idxs()
            train_idx = ds._split_idxs['train']
            test_idx = ds._split_idxs['test']

            # import sklearn.metrics
            # model_dpath = join(ds.dataset_dpath, 'svms')
            # model_fpath = join(model_dpath, target_type + '_svc.cPkl')
            # if ut.checkpath(model_fpath):
            #    clf = sklearn.svm.SVC(kernel=str('linear'), C=.17, class_weight='balanced',
            #                          decision_function_shape='ovr')
            #    clf.__dict__.update(**ut.load_data(model_fpath))
            # else:
            #    clf = problem.fit_new_classifier(train_idx)
            #    ut.ensuredir(model_dpath)
            #    ut.save_data(model_fpath, clf.__dict__)
            result_list = []
            result = problem.test_classifier(clf, test_idx)
            result_list.append(result)

            for result in result_list:
                result.compile_results()

            for result in result_list:
                result.print_report()

            inspect_results(ds, result_list)
        if False:
            result_list = []
            result = problem.test_classifier(clf, train_idx)
            result_list.append(result)
            for result in result_list:
                result.compile_results()
            for result in result_list:
                result.print_report()
            inspect_results(ds, result_list)
        if False:
            result_list = []
            # View support vectors
            support_idxs = clf.support_
            result = problem.test_classifier(clf, support_idxs)
            result_list.append(result)
            for result in result_list:
                result.compile_results()
            for result in result_list:
                result.print_report()
            inspect_results(ds, result_list)


[docs]def inspect_results(ds, result_list):
    import pandas as pd
    import wbia.plottool as pt

    pd.set_option('display.max_rows', 20)
    pt.qt4ensure()

    isect_sets = [
        set(s1).intersection(set(s2))
        for s1, s2 in ut.combinations([result.df.index for result in result_list], 2)
    ]
    assert all([len(s) == 0 for s in isect_sets]), 'cv sets should not intersect'

    # Combine information from results
    df = pd.concat([result.df for result in result_list])
    df['hardness'] = 1 / df['easiness']
    df['aid'] = ut.take(ds.aids, df.index)
    df['target'] = ut.take(ds.target, df.index)
    df['failed'] = df['pred'] != df['target']

    report = sklearn.metrics.classification_report(
        y_true=df['target'], y_pred=df['pred'], target_names=result.ds.target_names
    )
    logger.info(report)

    confusion = sklearn.metrics.confusion_matrix(df['target'], df['pred'])
    logger.info('Confusion Matrix:')
    logger.info(
        pd.DataFrame(
            confusion,
            columns=[m for m in result.ds.target_names],
            index=['gt ' + m for m in result.ds.target_names],
        )
    )

    def target_partition(target):
        df_chunk = df if target is None else df[df['target'] == target]
        df_chunk = df_chunk.take(df_chunk['hardness'].argsort())
        return df_chunk

    def grab_subchunk(frac, n, target):
        df_chunk = target_partition(target)
        sl = ut.snapped_slice(len(df_chunk), frac, n)
        logger.info('sl = %r' % (sl,))
        idx = df_chunk.index[sl]
        df_chunk = df_chunk.loc[idx]
        min_frac = sl.start / len(df_chunk)
        max_frac = sl.stop / len(df_chunk)
        min_frac = sl.start
        max_frac = sl.stop
        place_name = 'hardness=%.2f (%d-%d)' % (frac, min_frac, max_frac)
        if target is not None:
            df_chunk.nice = place_name + ' ' + ds.target_names[target]
        else:
            df_chunk.nice = place_name
        return df_chunk

    def grab_subchunk2(df_chunk, frac, n):
        sl = ut.snapped_slice(len(df_chunk), frac, n)
        logger.info('sl = %r' % (sl,))
        idx = df_chunk.index[sl]
        df_chunk = df_chunk.loc[idx]
        min_frac = sl.start / len(df_chunk)
        max_frac = sl.stop / len(df_chunk)
        min_frac = sl.start
        max_frac = sl.stop
        place_name = 'hardness=%.2f (%d-%d)' % (frac, min_frac, max_frac)
        if target is not None:
            df_chunk.nice = place_name + ' ' + ds.target_names[target]
        else:
            df_chunk.nice = place_name
        return df_chunk

    # Look at hardest train cases

    # Look at hardest test cases
    if True:
        # n = 4
        fracs = [0.0, 0.7, 0.8, 0.9, 1.0]
        view_targets = ds.target_labels
        n = 8 // len(view_targets)
    else:
        view_targets = [ut.listfind(ds.target_names.tolist(), 'healthy')]
        # fracs = [0.0, .7, .8, .9, 1.0]
        fracs = [0.45, 0.5, 0.55, 0.6, 0.62]
        fracs = [0.72, 0.82, 0.84, 0.88]
        fracs = [0.73, 0.83, 0.835, 0.89]
        fracs = [0.73, 0.83, 0.835, 0.89]
        fracs = [0.735, 0.833, 0.837, 0.934]
        fracs = [0.2, 0.65, 0.75, 0.85, 0.95]
        fracs = [0.3, 0.4, 0.67, 0.77, 0.87, 0.92]
        n = 8 // len(view_targets)

    if False:
        view_targets = [ut.listfind(ds.target_names.tolist(), 'healthy')]
        target_dfs = [target_partition(target) for target in view_targets]
        critical_points = [np.where(_df['failed'])[0][0] for _df in target_dfs]
        critical_fracs = [_pt / len(_df) for _pt, _df in zip(critical_points, target_dfs)]
        n = 8 * 5
        frac = critical_fracs[0]
        frac += 0.1
        _df = target_dfs[0]
        df_part = grab_subchunk2(_df, frac, n)
        df_chunks = [df_part.iloc[x] for x in ut.ichunks(range(len(df_part)), 8)]
    else:
        df_chunks = [
            grab_subchunk(frac, n, target) for frac in fracs for target in view_targets
        ]

    ibs = ds.ibs
    config = ds.config
    from wbia_cnn import draw_results

    inter = draw_results.make_InteractClasses(
        ibs, config, df_chunks, nCols=len(view_targets)
    )
    inter.start()