Source code for wbia.algo.verif.oldvsone

# -*- coding: utf-8 -*-
import utool as ut
from wbia.algo.graph.state import POSTV, NEGTV, INCMP
import numpy as np


# @profile
# def edge_hashids(samples):
#     qvuuids = samples.annots1.visual_uuids
#     dvuuids = samples.annots2.visual_uuids
#     # edge_uuids = [ut.combine_uuids(uuids)
#     #                for uuids in zip(qvuuids, dvuuids)]
#     edge_hashids = [make_edge_hashid(uuid1, uuid2) for uuid1, uuid2 in zip(qvuuids, dvuuids)]
#     # edge_uuids = [combine_2uuids(uuid1, uuid2)
#     #                for uuid1, uuid2 in zip(qvuuids, dvuuids)]
#     return edge_hashids

# @profile
# def edge_hashid(samples):
#     edge_hashids = samples.edge_hashids()
#     edge_hashid = ut.hashstr_arr27(edge_hashids, 'edges', hashlen=32,
#                                    pathsafe=True)
#     return edge_hashid

# @profile
# def make_edge_hashid(uuid1, uuid2):
#     """
#     Slightly faster than using ut.combine_uuids, because we condense and don't
#     bother casting back to UUIDS
#     """
#     sep_str = '-'
#     sep_byte = sep_str
#     pref = '{}2'.format(sep_str)
#     combined_bytes = pref + sep_byte.join([uuid1.bytes, uuid2.bytes])
#     bytes_sha1 = hashlib.sha1(combined_bytes)
#     # Digest them into a hash
#     hashbytes_20 = bytes_sha1.digest()
#     hashbytes_16 = hashbytes_20[0:16]
#     # uuid_ = uuid.UUID(bytes=hashbytes_16)
#     return hashbytes_16


[docs]def demo_single_pairwise_feature_vector(): r""" CommandLine: python -m wbia.algo.verif.vsone demo_single_pairwise_feature_vector Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.verif.vsone import * # NOQA >>> match = demo_single_pairwise_feature_vector() >>> print(match) """ import vtool as vt import wbia ibs = wbia.opendb('testdb1') qaid, daid = 1, 2 annot1 = ibs.annots([qaid])[0]._make_lazy_dict() annot2 = ibs.annots([daid])[0]._make_lazy_dict() vt.matching.ensure_metadata_normxy(annot1) vt.matching.ensure_metadata_normxy(annot2) match = vt.PairwiseMatch(annot1, annot2) cfgdict = {'checks': 200, 'symmetric': False} match.assign(cfgdict=cfgdict) match.apply_ratio_test({'ratio_thresh': 0.638}, inplace=True) match.apply_sver(inplace=True) # match.add_global_measures(['yaw', 'qual', 'gps', 'time']) match.add_global_measures(['view', 'qual', 'gps', 'time']) match.add_local_measures() # sorters = ['ratio', 'norm_dist', 'match_dist'] match.make_feature_vector() return match def demo_classes(pblm): r""" CommandLine: python -m wbia.algo.verif.vsone demo_classes --saveparts --save=classes.png --clipwhite python -m wbia.algo.verif.vsone demo_classes --saveparts --save=figures/classes.png --clipwhite --dpath=~/latex/crall-iccv-2017 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty(defaultdb='PZ_PB_RF_TRAIN') >>> pblm.load_features() >>> pblm.load_samples() >>> pblm.build_feature_subsets() >>> pblm.demo_classes() >>> ut.show_if_requested() """ task_key = 'match_state' labels = pblm.samples.subtasks[task_key] pb_labels = pblm.samples.subtasks['photobomb_state'] classname_offset = { POSTV: 0, NEGTV: 0, INCMP: 0, } class_name = POSTV class_name = NEGTV class_name = INCMP feats = pblm.samples.X_dict['learn(sum,glob)'] offset = 0 class_to_edge = {} for class_name in labels.class_names: print('Find example of %r' % (class_name,)) # Find an example of each class (that is not a photobomb) pbflags = pb_labels.indicator_df['notpb'] flags = labels.indicator_df[class_name] assert np.all(pbflags.index == flags.index) flags = flags & pbflags ratio = feats['sum(ratio)'] if class_name == INCMP: # flags &= feats['global(delta_yaw)'] > 3 flags &= feats['global(delta_view)'] > 2 # flags &= feats['sum(ratio)'] > 0 if class_name == NEGTV: low = ratio[flags].max() flags &= feats['sum(ratio)'] >= low if class_name == POSTV: low = ratio[flags].median() / 2 high = ratio[flags].median() flags &= feats['sum(ratio)'] < high flags &= feats['sum(ratio)'] > low # flags &= pblm.samples.simple_scores[flags]['score_lnbnn_1vM'] > 0 idxs = np.where(flags)[0] print('Found %d candidates' % (len(idxs))) offset = classname_offset[class_name] idx = idxs[offset] series = labels.indicator_df.iloc[idx] assert series[class_name] edge = series.name class_to_edge[class_name] = edge import wbia.plottool as pt import wbia.guitool as gt gt.ensure_qapp() pt.qtensure() fnum = 1 pt.figure(fnum=fnum, pnum=(1, 3, 1)) pnum_ = pt.make_pnum_nextgen(1, 3) # classname_alias = { # POSTV: 'positive', # NEGTV: 'negative', # INCMP: 'incomparable', # } ibs = pblm.infr.ibs for class_name in class_to_edge.keys(): edge = class_to_edge[class_name] aid1, aid2 = edge # alias = classname_alias[class_name] print('class_name = %r' % (class_name,)) annot1 = ibs.annots([aid1])[0]._make_lazy_dict() annot2 = ibs.annots([aid2])[0]._make_lazy_dict() vt.matching.ensure_metadata_normxy(annot1) vt.matching.ensure_metadata_normxy(annot2) match = vt.PairwiseMatch(annot1, annot2) cfgdict = pblm.hyper_params.vsone_match.asdict() match.apply_all(cfgdict) pt.figure(fnum=fnum, pnum=pnum_()) match.show(show_ell=False, show_ori=False) # pt.set_title(alias) def find_opt_ratio(pblm): """ script to help find the correct value for the ratio threshold >>> from wbia.algo.verif.vsone import * # NOQA >>> pblm = OneVsOneProblem.from_empty('PZ_PB_RF_TRAIN') >>> pblm = OneVsOneProblem.from_empty('GZ_Master1') """ # Find best ratio threshold pblm.load_samples() infr = pblm.infr edges = ut.emap(tuple, pblm.samples.aid_pairs.tolist()) task = pblm.samples['match_state'] pos_idx = task.class_names.tolist().index(POSTV) config = {'ratio_thresh': 1.0, 'sv_on': False} matches = infr._exec_pairwise_match(edges, config) import wbia.plottool as pt import sklearn.metrics pt.qtensure() thresholds = np.linspace(0, 1.0, 100) pos_truth = task.y_bin.T[pos_idx] ratio_fs = [m.local_measures['ratio'] for m in matches] aucs = [] # Given the current correspondences: Find the optimal # correspondence threshold. for thresh in ut.ProgIter(thresholds, 'computing thresh'): scores = np.array([fs[fs < thresh].sum() for fs in ratio_fs]) roc = sklearn.metrics.roc_auc_score(pos_truth, scores) aucs.append(roc) aucs = np.array(aucs) opt_auc = aucs.max() opt_thresh = thresholds[aucs.argmax()] if True: pt.plt.plot(thresholds, aucs, 'r-', label='') pt.plt.plot(opt_thresh, opt_auc, 'ro', label='L opt=%r' % (opt_thresh,)) pt.set_ylabel('auc') pt.set_xlabel('ratio threshold') pt.legend()
# colors = { # 1: 'r', # 2: 'b', # 3: 'g', # } # def predict_truth(ratio_fs, opt_thresh, pos_truth): # # Filter correspondence using thresh then sum their scores # new_ratio_fs = [fs < opt_thresh for fs in ratio_fs] # scores = np.array([fs.sum() for fs in new_ratio_fs]) # # Find the point (summed score threshold) that maximizes MCC # fpr, tpr, points = sklearn.metrics.roc_curve(pos_truth, scores) # mccs = np.array([sklearn.metrics.matthews_corrcoef( # pos_truth, scores > point) for point in points]) # opt_point = points[mccs.argmax()] # pos_pred = scores > opt_point # return pos_pred # thresholds = np.linspace(0, 1.0, 100) # pos_truth = task.y_bin.T[pos_idx] # ratio_fs = [m.local_measures['ratio'] for m in matches] # thresh_levels = [] # for level in range(1, 3 + 1): # if ut.allsame(pos_truth): # print('breaking') # break # print('level = %r' % (level,)) # aucs = [] # # Given the current correspondences: Find the optimal # # correspondence threshold. # for thresh in ut.ProgIter(thresholds, 'computing thresh'): # scores = np.array([fs[fs < thresh].sum() for fs in ratio_fs]) # roc = sklearn.metrics.roc_auc_score(pos_truth, scores) # aucs.append(roc) # aucs = np.array(aucs) # opt_auc = aucs.max() # opt_thresh = thresholds[aucs.argmax()] # thresh_levels.append(opt_thresh) # if True: # color = colors[level] # pt.plt.plot(thresholds, aucs, color + '-', label='L%d' % level) # pt.plt.plot(opt_thresh, opt_auc, color + 'o', # label='L%d opt=%r' % (level, opt_thresh,)) # # Remove the positive samples that this threshold fails on # pred = predict_truth(ratio_fs, opt_thresh, pos_truth) # flags = pred != pos_truth | ~pos_truth # ratio_fs = ut.compress(ratio_fs, flags) # pos_truth = pos_truth.compress(flags) # submax_thresh, submax_roc = vt.argsubmax(aucs, thresholds) # Now find all pairs that would be correctly classified using this # threshold # ratio_fs = thresh_ratio_fs # rocs = [] # for thresh in ut.ProgIter(thresholds, 'computing thresh'): # scores = np.array([fs[fs < thresh].sum() for fs in ratio_fs]) # roc = sklearn.metrics.roc_auc_score(pos_truth, scores) # rocs.append(roc) # submax_thresh, submax_roc = vt.argsubmax(rocs, thresholds) # pt.plt.plot(thresholds, rocs, 'b-', label='L2') # pt.plt.plot(submax_thresh, submax_roc, 'bo', label='L2 opt=%r' % (submax_thresh,)) # def simple_confusion(pblm, score_key=None, task_key=None, # target_class=None): # if score_key is None: # score_key = 'score_lnbnn_1vM' # if task_key is None: # task_key = pblm.primary_task_key # task = pblm.samples[task_key] # if target_class is None: # target_class = task.default_class_name # target_class_idx = task.lookup_class_idx(target_class) # scores = pblm.samples.simple_scores[score_key] # y = task.y_bin.T[target_class_idx] # conf = vt.ConfusionMetrics().fit(scores, y) # conf.label = score_key # return conf