# -*- coding: utf-8 -*-
import logging
from wbia.algo.verif import vsone
from wbia.scripts._thesis_helpers import DBInputs
from wbia.scripts._thesis_helpers import Tabular, upper_one, ave_str
from wbia.scripts._thesis_helpers import TMP_RC, W, H, DPI
import wbia.constants as const
from wbia.algo.graph import nx_utils as nxu
import ubelt as ub
import pandas as pd
import numpy as np
from os.path import basename, join, splitext, exists # NOQA
import utool as ut
import wbia.plottool as pt
import vtool as vt
import pathlib
import matplotlib as mpl
import random
import sys
from wbia.algo.graph.state import POSTV, NEGTV, INCMP, UNREV # NOQA
(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')
[docs]@ut.reloadable_class
class Chap5(DBInputs):
"""
python -m wbia Chap5.measure all GZ_Master1
python -m wbia Chap5.measure all PZ_Master1
python -m wbia Chap5.draw all GZ_Master1
python -m wbia Chap5.draw all PZ_Master1 --comp Leviathan
python -m wbia Chap5.draw error_graph_analysis GZ_Master1
python -m wbia Chap5.draw error_graph_analysis PZ_Master1 --comp Leviathan
"""
base_dpath = ut.truepath('~/latex/crall-thesis-2017/figures5')
[docs] def measure_all(self):
self.measure_dbstats()
self.measure_simulation()
[docs] def draw_all(self):
r"""
CommandLine:
python -m wbia Chap5.draw all GZ_Master1
python -m wbia Chap5.draw error_graph_analysis GZ_Master1
python -m wbia Chap5.draw all PZ_Master1
python -m wbia Chap5.draw error_graph_analysis PZ_Master1
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap4('GZ_Master1')
"""
self.ensure_results('simulation')
self.draw_simulation()
self.draw_refresh()
self.write_dbstats()
self.write_error_tables()
# python -m wbia Chap5.draw error_graph_analysis GZ_Master1
def _precollect(self):
if self.ibs is None:
_Chap5 = ut.fix_super_reload(Chap5, self)
super(_Chap5, self)._precollect()
# Split data into a training and testing test
ibs = self.ibs
annots = ibs.annots(self.aids_pool)
names = list(annots.group_items(annots.nids).values())
ut.shuffle(names, rng=321)
train_names, test_names = names[0::2], names[1::2]
train_aids, test_aids = map(ut.flatten, (train_names, test_names))
self.test_train = train_aids, test_aids
params = {}
if ibs.dbname == 'PZ_MTEST':
params['sample_method'] = 'random'
self.pblm = vsone.OneVsOneProblem.from_aids(ibs, train_aids, **params)
# ut.get_nonconflicting_path(dpath, suffix='_old')
self.const_dials = {
# 'oracle_accuracy' : (0.98, 1.0),
# 'oracle_accuracy' : (0.98, .98),
'oracle_accuracy': (0.99, 0.99),
'k_redun': 2,
'max_outer_loops': np.inf,
# 'max_outer_loops' : 1,
}
if ibs.dbname == 'GZ_Master1':
self.thresh_targets = {
'graph': ('fpr', 0.0014),
'rankclf': ('fpr', 0.001),
}
elif ibs.dbname == 'PZ_Master1':
self.thresh_targets = {
# 'graph': ('fpr', .03),
# 'rankclf': ('fpr', .01),
'graph': ('fpr', 0.0014),
'rankclf': ('fpr', 0.001),
}
else:
self.thresh_targets = {
'graph': ('fpr', 0.002),
'rankclf': ('fpr', 0),
}
config = ut.dict_union(self.const_dials, self.thresh_targets)
cfg_prefix = '{}_{}'.format(len(test_aids), len(train_aids))
self._setup_links(cfg_prefix, config)
def _setup(self):
"""
python -m wbia Chap5._setup
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> #self = Chap5('GZ_Master1')
>>> self = Chap5('PZ_Master1')
>>> #self = Chap5('PZ_MTEST')
>>> self._setup()
"""
self._precollect()
train_aids, test_aids = self.test_train
task_key = 'match_state'
pblm = self.pblm
data_key = pblm.default_data_key
clf_key = pblm.default_clf_key
pblm.eval_data_keys = [data_key]
pblm.setup(with_simple=False)
pblm.learn_evaluation_classifiers()
res = pblm.task_combo_res[task_key][clf_key][data_key]
# pblm.report_evaluation()
# if False:
# pblm.learn_evaluation_classifiers(task_keys=['photobomb_state'])
# pb_res = pblm.task_combo_res['photobomb_state'][clf_key][data_key]
# pb_res # TODO?
if True:
# Remove results that are photobombs for now
# res = pblm.task_combo_res['photobomb_state'][clf_key][data_key]
pb_task = pblm.samples.subtasks['photobomb_state']
import utool
with utool.embed_on_exception_context:
flags = pb_task.indicator_df.loc[res.index]['notpb'].values
notpb_res = res.compress(flags)
res = notpb_res
# TODO: need more principled way of selecting thresholds
graph_thresh = res.get_pos_threshes(*self.thresh_targets['graph'])
rankclf_thresh = res.get_pos_threshes(*self.thresh_targets['rankclf'])
logger.info('\n--- Graph thresholds ---')
graph_report = res.report_auto_thresholds(graph_thresh, verbose=0)
logger.info('\n --- Ranking thresholds ---')
rankclf_report = res.report_auto_thresholds(rankclf_thresh, verbose=0)
ut.writeto(
join(self.dpath, 'thresh_reports.txt'),
'\n'.join(
[
'============',
'Graph report',
'------------',
graph_report,
'',
'============',
'Rank CLF report',
'------------',
rankclf_report,
]
),
)
# Load or create the deploy classifiers
clf_dpath = ut.ensuredir((self.dpath, 'clf'))
classifiers = pblm.ensure_deploy_classifiers(dpath=clf_dpath)
sim_params = {
'test_aids': test_aids,
'train_aids': train_aids,
'classifiers': classifiers,
'graph_thresh': graph_thresh,
'rankclf_thresh': rankclf_thresh,
'const_dials': self.const_dials,
}
self.pblm = pblm
self.sim_params = sim_params
return sim_params
def _thresh_test(self):
"""
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> self = Chap5('PZ_Master1')
>>> self = Chap5('GZ_Master1')
"""
import wbia
self.ensure_setup()
task_key = 'match_state'
pblm = self.pblm
data_key = pblm.default_data_key
clf_key = pblm.default_clf_key
res = pblm.task_combo_res[task_key][clf_key][data_key]
pblm.report_evaluation()
if True:
# Remove results that are photobombs for now
# res = pblm.task_combo_res['photobomb_state'][clf_key][data_key]
pb_task = pblm.samples.subtasks['photobomb_state']
import utool
with utool.embed_on_exception_context:
flags = pb_task.indicator_df.loc[res.index]['notpb'].values
notpb_res = res.compress(flags)
res = notpb_res
"""
PLAN:
Draw an LNBNN sample.
Estimate probabilities on sample.
for each fpr on validation,
find threshold
find fpr at that threshold for the lnbnn sample
plot the predicted fpr vs the true fpr to show that this is
difficult to predict.
"""
ibs = self.ibs
sim_params = self.sim_params
classifiers = sim_params['classifiers']
test_aids = sim_params['test_aids']
const_dials = sim_params['const_dials']
graph_thresh = sim_params['graph_thresh']
verbose = 1
# ----------
# Graph test
dials1 = ut.dict_union(
const_dials,
{
'name': 'graph',
'enable_inference': True,
'match_state_thresh': graph_thresh,
},
)
infr1 = wbia.AnnotInference(
ibs=ibs, aids=test_aids, autoinit=True, verbose=verbose
)
estimate = []
thresh = []
confusions = []
target_values = [0, 0.001, 0.0012, 0.0014, 0.0016, 0.002]
for value in target_values:
match_thresh = res.get_pos_threshes('fpr', value=value)
estimate.append(value)
thresh.append(match_thresh)
infr1.enable_auto_prioritize_nonpos = True
infr1._refresh_params['window'] = 20
infr1._refresh_params['thresh'] = np.exp(-2)
infr1._refresh_params['patience'] = 20
infr1.init_simulation(classifiers=classifiers, **dials1)
infr1.init_test_mode()
infr1.reset(state='empty')
infr1.task_thresh['match_state'] = match_thresh
infr1.enable_fixredun = False
infr1.main_loop(max_loops=1)
# for e in infr1.edges():
# decision = infr1.get_edge_data(e).get('decision', UNREV)
# truth = infr1.get_edge_data(e).get('truth', None)
c = pd.DataFrame(infr1.test_state['confusion'])
confusions.append(c)
actual_fpr = []
actual_nums = []
for c, est, t in zip(estimate, confusions, thresh):
logger.info(t)
logger.info(est)
logger.info(c)
# n_total = c.sum().sum()
# n_true = np.diag(c).sum()
# n_false = n_total - n_true
# tpa = n_true / n_total
# fpr = n_false / n_total
# pos_fpr = (c.loc[POSTV].sum() - c.loc[POSTV][POSTV]) / c.loc[POSTV].sum()
N = c.sum(axis=0)
TP = pd.Series(np.diag(c), index=c.index) # NOQA
FP = (c - np.diagflat(np.diag(c))).sum(axis=0)
fpr = FP / N
# tpas.append(tpa)
actual_fpr.append(fpr)
actual_nums.append(N)
for class_name in [NEGTV, POSTV]:
fnum = 1
x = [t[class_name] for t in thresh]
# class_nums = np.array([n[class_name] for n in actual_nums])
class_actual = np.array([a[class_name] for a in actual_fpr])
class_est = target_values
pt.figure(fnum=fnum)
pt.plot(x, class_est, 'x--', label='est ' + class_name)
pt.plot(x, class_actual, 'o--', label='actual ' + class_name)
pt.legend()
[docs] @profile
def measure_simulation(self):
"""
CommandLine:
python -m wbia Chap5.measure simulation GZ_Master1
python -m wbia Chap5.measure simulation PZ_Master1
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master1')
"""
import wbia
self.ensure_setup()
ibs = self.ibs
sim_params = self.sim_params
classifiers = sim_params['classifiers']
test_aids = sim_params['test_aids']
rankclf_thresh = sim_params['rankclf_thresh']
graph_thresh = sim_params['graph_thresh']
const_dials = sim_params['const_dials']
sim_results = {}
verbose = 1
# ----------
# Graph test
dials1 = ut.dict_union(
const_dials,
{
'name': 'graph',
'enable_inference': True,
'match_state_thresh': graph_thresh,
},
)
infr1 = wbia.AnnotInference(
ibs=ibs, aids=test_aids, autoinit=True, verbose=verbose
)
infr1.enable_auto_prioritize_nonpos = True
infr1._refresh_params['window'] = 20
infr1._refresh_params['thresh'] = np.exp(-2)
infr1._refresh_params['patience'] = 20
infr1.init_simulation(classifiers=classifiers, **dials1)
infr1.init_test_mode()
infr1.reset(state='empty')
infr1.main_loop()
sim_results['graph'] = self._collect_sim_results(infr1, dials1)
# --------
# Rank+CLF
dials2 = ut.dict_union(
const_dials,
{
'name': 'rank+clf',
'enable_inference': False,
'match_state_thresh': rankclf_thresh,
},
)
infr2 = wbia.AnnotInference(
ibs=ibs, aids=test_aids, autoinit=True, verbose=verbose
)
infr2.init_simulation(classifiers=classifiers, **dials2)
infr2.init_test_mode()
infr2.enable_redundancy = False
infr2.enable_autoreview = True
infr2.reset(state='empty')
infr2.main_loop(max_loops=1, use_refresh=False)
sim_results['rank+clf'] = self._collect_sim_results(infr2, dials2)
# ------------
# Ranking test
dials3 = ut.dict_union(
const_dials,
{'name': 'ranking', 'enable_inference': False, 'match_state_thresh': None},
)
infr3 = wbia.AnnotInference(
ibs=ibs, aids=test_aids, autoinit=True, verbose=verbose
)
infr3.init_simulation(classifiers=None, **dials3)
infr3.init_test_mode()
infr3.enable_redundancy = False
infr3.enable_autoreview = False
infr3.reset(state='empty')
infr3.main_loop(max_loops=1, use_refresh=False)
sim_results['ranking'] = self._collect_sim_results(infr3, dials3)
# ------------
# Dump experiment output to disk
expt_name = 'simulation'
self.expt_results[expt_name] = sim_results
ut.ensuredir(self.dpath)
ut.save_data(join(self.dpath, expt_name + '.pkl'), sim_results)
# metrics_df = pd.DataFrame.from_dict(graph_expt_data['metrics'])
# for user, group in metrics_df.groupby('user_id'):
# logger.info('actions of user = %r' % (user,))
# user_actions = group['test_action']
# logger.info(ut.repr4(ut.dict_hist(user_actions), stritems=True))
# self.draw_simulation()
# ut.show_if_requested()
pass
def _collect_sim_results(self, infr, dials):
pred_confusion = pd.DataFrame(infr.test_state['confusion'])
pred_confusion.index.name = 'real'
pred_confusion.columns.name = 'pred'
logger.info('Edge confusion')
logger.info(pred_confusion)
expt_data = {
'real_ccs': list(infr.nid_to_gt_cc.values()),
'pred_ccs': list(infr.pos_graph.connected_components()),
'graph': infr.graph.copy(),
'dials': dials,
'refresh_thresh': infr.refresh._prob_any_remain_thresh,
'metrics': infr.metrics_list,
}
return expt_data
[docs] @profile
def measure_dbstats(self):
"""
python -m wbia Chap5.draw dbstats GZ_Master1
python -m wbia Chap5.measure dbstats PZ_Master1
python -m wbia Chap5.draw dbstats PZ_Master1
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master1')
"""
self.ensure_setup()
classifiers = self.sim_params['classifiers']
clf_meta = classifiers['match_state']['metadata'].copy()
clf_meta.pop('data_info')
def ibs_stats(aids):
pccs = self.ibs.group_annots_by_name(aids)[0]
nper_annot = ut.emap(len, pccs)
return {
'n_annots': len(aids),
'n_names': len(pccs),
'annot_size_mean': np.mean(nper_annot),
'annot_size_std': np.std(nper_annot),
}
train_aids = self.sim_params['train_aids']
test_aids = self.sim_params['test_aids']
dbstats = {
'testing': ibs_stats(test_aids),
'training': ibs_stats(train_aids),
}
traininfo = dbstats['training']
traininfo['class_hist'] = clf_meta['class_hist']
traininfo['n_training_pairs'] = sum(clf_meta['class_hist'].values())
infr = self.pblm.infr
pblm_pccs = list(self.pblm.infr.positive_components())
pblm_nper_annot = ut.emap(len, pblm_pccs)
traininfo['pblm_info'] = {
'n_annots': infr.graph.number_of_nodes(),
'n_names': len(pblm_pccs),
'annot_size_mean': np.mean(pblm_nper_annot),
'annot_size_std': np.std(pblm_nper_annot),
'notes': ut.textblock(
"""
if this (the real training data) is different from the parents
(wbia) info, that means the staging database is ahead of
annotmatch. Report the wbia one for clarity. Num annots should
always be the same though.
"""
),
}
expt_name = 'dbstats'
self.expt_results[expt_name] = dbstats
ut.save_data(join(self.dpath, expt_name + '.pkl'), dbstats)
[docs] def write_dbstats(self):
"""
# TODO: write info about what dataset was used
CommandLine:
python -m wbia Chap5.measure dbstats PZ_Master1
python -m wbia Chap5.measure dbstats PZ_Master1
python -m wbia Chap5.measure simulation GZ_Master1
python -m wbia Chap5.draw dbstats --db GZ_Master1 --diskshow
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master1')
"""
dbstats = self.ensure_results('dbstats')
d = ut.odict()
keys = ['training', 'testing']
for k in keys:
v = dbstats[k]
k = k.capitalize()
size_str = ave_str(v['annot_size_mean'], v['annot_size_std'])
r = d[k] = ut.odict()
r['Names'] = v['n_names']
r['Annots'] = v['n_annots']
r['Annots size'] = size_str
r['Training edges'] = v.get('n_training_pairs', '-')
df = pd.DataFrame.from_dict(d, orient='index').loc[list(d.keys())]
tabular = Tabular(df)
tabular.colfmt = 'numeric'
tabular.caption = self.species_nice.capitalize()
logger.info(tabular.as_table())
logger.info(tabular.as_tabular())
ut.writeto(join(self.dpath, 'dbstats.tex'), tabular.as_tabular())
fpath = ut.render_latex(tabular.as_table(), dpath=self.dpath, fname='dbstats')
return fpath
[docs] def print_error_analysis(self):
"""
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master1')
>>> self = Chap5('PZ_Master1')
"""
sim_results = self.ensure_results('simulation')
key = 'graph'
real_ccs = sim_results[key]['real_ccs']
pred_ccs = sim_results[key]['pred_ccs']
delta = ut.grouping_delta(pred_ccs, real_ccs, pure=False)
splits = delta['splits']
merges = delta['merges']
graph = sim_results[key]['graph']
ignore = [
'timestamp',
'num_reviews',
'confidence',
'default_priority',
'review_id',
]
logger.info('\nsplits = ' + ut.repr4(splits))
logger.info('\nmerges = ' + ut.repr4(merges))
def print_edge_df(df, parts):
if len(df):
order = ['truth', 'decision', 'tags', 'prob_match']
order = df.columns.intersection(order)
neworder = ut.partial_order(df.columns, order)
df = df.reindex(neworder, axis=1)
df_str = df.to_string()
cols = ['blue', 'red', 'green', 'cyan']
df_str = ut.highlight_multi_regex(
df_str,
{
ut.regex_or(ut.regex_word(str(a)) for a in part): col
for part, col in zip(parts, cols)
},
)
logger.info(df_str)
else:
logger.info(df)
for parts in merges:
logger.info('\n\n')
logger.info('Merge Row: ' + ut.repr2(parts))
sub = graph.subgraph(ut.flatten(parts))
df = nxu.edge_df(graph, sub.edges(), ignore=ignore)
print_edge_df(df, parts)
for parts in splits:
logger.info('\n\n')
logger.info('Split Row: ' + ut.repr2(parts))
sub = graph.subgraph(ut.flatten(parts))
df = nxu.edge_df(graph, sub.edges(), ignore=ignore)
print_edge_df(df, parts)
[docs] def draw_error_graph_analysis(self):
"""
CommandLine:
python -m wbia Chap5.draw error_graph_analysis GZ_Master1
python -m wbia Chap5.draw error_graph_analysis PZ_Master1
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master1')
>>> self = Chap5('PZ_Master1')
"""
import wbia
import wbia.plottool as pt
sim_results = self.ensure_results('simulation')
key = 'graph'
ignore = [
'timestamp',
'num_reviews',
'default_priority',
'confidence',
'review_id',
]
task_keys = [
'match_state',
'photobomb_state',
]
task_nice_lookup = {
'match_state': const.EVIDENCE_DECISION.CODE_TO_NICE,
'photobomb_state': {'pb': 'Photobomb', 'notpb': 'Not Photobomb'},
}
mpl.rcParams.update(TMP_RC)
# Load simulation end state with predicted and real PCCs
real_ccs = sim_results[key]['real_ccs']
pred_ccs = sim_results[key]['pred_ccs']
graph = sim_results[key]['graph']
# Manage data using a read-only inference object
ibs = wbia.opendb(db=self.dbname)
infr = wbia.AnnotInference.from_netx(graph, ibs=ibs)
infr.readonly = True
infr._viz_image_config['thumbsize'] = 700
infr._viz_image_config['grow'] = True
infr.load_latest_classifiers(join(self.dpath, 'clf'))
infr.relabel_using_reviews(rectify=False)
# For each node, mark its real and predicted ids
infr.set_node_attrs(
'real_id', {aid: nid for nid, cc in enumerate(real_ccs) for aid in cc}
)
infr.set_node_attrs(
'pred_id', {aid: nid for nid, cc in enumerate(pred_ccs) for aid in cc}
)
# from networkx.utils import arbitrary_element as arbitrary
# Gather a sample of error groups
n = 20
delta = ut.grouping_delta(pred_ccs, real_ccs, pure=False)
sampled_errors = ut.odict(
[
('merge', ut.strided_sample(delta['merges'], n)),
('split', ut.strided_sample(delta['splits'], n)),
]
)
for k, v in sampled_errors.items():
logger.info('Sampled {} {} cases'.format(len(v), k))
err_items = []
for case_type, cases in sampled_errors.items():
for case in cases:
case_aids = set(ut.flatten(case))
# For each case find what edges need fixing
if case_type == 'merge':
error_edges = infr.find_pos_augment_edges(case_aids, k=1)
else:
edges = list(nxu.edges_between(graph, case_aids))
_df = infr.get_edge_dataframe(edges)
flags = (_df.truth != _df.decision) & (_df.truth == NEGTV)
error_edges = _df.index[flags].tolist()
for edge in error_edges:
edge = infr.e_(*edge)
err_items.append((case_type, case, error_edges, edge))
err_items_df = pd.DataFrame(
err_items, columns=['case_type', 'case', 'error_edges', 'edge']
)
edges = err_items_df['edge'].tolist()
err_df = infr.get_edge_dataframe(edges)
err_df = err_df.drop(err_df.columns.intersection(ignore), axis=1)
# Lookup the probs for each state
task_probs = infr._make_task_probs(edges)
probs_df = pd.concat(task_probs, axis=1) # NOQA
dpath = ut.ensuredir((self.dpath, 'errors'))
fnum = 1
fig = pt.figure(fnum=fnum, pnum=(2, 1, 2))
ax = pt.gca()
pt.adjust_subplots(
top=1, right=1, left=0, bottom=0.15, hspace=0.01, wspace=0, fig=fig
)
subitems = err_items_df
# subitems = err_items_df[err_items_df.case_type == 'merge'].iloc[-2:]
for _, (case_type, case, error_edges, edge) in subitems.iterrows():
aids = ut.total_flatten(case)
if case_type == 'split':
colorby = 'real_id'
if case_type == 'merge':
colorby = 'pred_id'
infr.show_error_case(aids, edge, error_edges, colorby=colorby)
edge_info = err_df.loc[edge].to_dict()
xlabel = case_type.capitalize() + ' case. '
code_to_nice = task_nice_lookup['match_state']
real_code = infr.match_state_gt(edge)
pred_code = edge_info['decision']
real_nice = 'real={}'.format(code_to_nice[real_code])
if edge_info['user_id'] == 'auto_clf':
xlabel += 'Reviewed automatically'
elif edge_info['user_id'] == 'oracle':
xlabel += 'Reviewed manually'
else:
if pred_code is None:
xlabel += 'Edge did not appear in candidate set'
else:
xlabel += 'Edge was a candidate, but not reviewed'
if pred_code is None:
pred_nice = 'pred=None'
else:
pred_nice = 'pred={}'.format(code_to_nice[pred_code])
xlabel += '\n{}, {}'.format(real_nice, pred_nice)
for task_key in task_keys:
tprobs = task_probs[task_key]
_probs = tprobs.loc[edge].to_dict()
code_to_nice = task_nice_lookup[task_key]
probs = ut.odict(
(v, _probs[k]) for k, v in code_to_nice.items() if k in _probs
)
probstr = ut.repr2(probs, precision=2, strkeys=True, nobr=True)
xlabel += '\n' + probstr
xlabel = xlabel.lstrip('\n')
fig = pt.gcf()
ax = fig.axes[0]
ax.set_xlabel(xlabel)
fig.set_size_inches([W, H * 2])
parts = [ut.repr2(sorted(p), itemsep='', nobr=True) for p in case]
case_id = ','.join(list(map(str, map(len, parts))))
case_id += '_' + ut.hash_data('-'.join(parts))[0:8]
eid = '{},{}'.format(*edge)
fname = case_type + '_' + case_id + '_edge' + eid + '.png'
fpath = join(dpath, fname)
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs] def write_error_tables(self):
"""
CommandLine:
python -m wbia Chap5.draw error_tables PZ_Master1
python -m wbia Chap5.draw error_tables GZ_Master1
Ignore:
>>> from wbia.scripts.thesis import *
>>> from wbia.scripts.thesis import _ranking_hist, _ranking_cdf
>>> self = Chap5('GZ_Master1')
"""
sim_results = self.ensure_results('simulation')
keys = ['ranking', 'rank+clf', 'graph']
infos = {}
for key in keys:
# logger.info('!!!!!!!!!!!!')
# logger.info('key = %r' % (key,))
expt_data = sim_results[key]
info = self._get_error_sizes(expt_data, allow_hist=False)
info['correct']['n_pred_pccs'] = '-'
info['correct']['size_pred_pccs'] = '-'
infos[key] = info
dfs = {}
with_aves = 0
for key in keys:
info = infos[key]
table = ut.odict()
types = ['correct', 'split', 'merge']
for t in types:
caseinfo = info[t]
casetable = ut.odict()
casetable['pred PCCs'] = caseinfo['n_pred_pccs']
casetable['pred PCC size'] = caseinfo['size_pred_pccs']
casetable['real PCCs'] = caseinfo['n_real_pccs']
casetable['real PCC size'] = caseinfo['size_real_pccs']
if with_aves:
casetable['small size'] = caseinfo.get('ave_small', '-')
casetable['large size'] = caseinfo.get('ave_large', '-')
table[t] = ut.map_keys(upper_one, casetable)
df = pd.DataFrame.from_dict(table, orient='index')
df = df.loc[list(table.keys())]
dfs[key] = df
df = pd.concat(ut.take(dfs, keys), axis=0, keys=keys)
tabular = Tabular(df, index=True, escape=True, colfmt='numeric')
error_size_text = tabular.as_tabular()
logger.info(error_size_text)
# Inspect error sizes only for the graph
caseinfo = infos['graph']
table = ut.odict()
types = ['split', 'merge']
for t in types:
caseinfo = info[t]
casetable = ut.odict()
casetable['error groups'] = caseinfo.get('n_errgroups', '-')
casetable['group size'] = caseinfo.get('errgroup_size', '-')
casetable['small PCC size'] = caseinfo.get('ave_small', '-')
casetable['large PCC size'] = caseinfo.get('ave_large', '-')
casetable = ut.map_keys(upper_one, casetable)
table[t] = ut.map_keys(upper_one, casetable)
df = pd.DataFrame.from_dict(table, orient='index')
df = df.loc[list(table.keys())]
tabular = Tabular(df, index=True, escape=True, colfmt='numeric')
error_group_text = tabular.as_tabular()
logger.info(error_group_text)
fname = 'error_size_details'
ut.write_to(join(self.dpath, fname + '.tex'), error_size_text)
ut.render_latex(
error_size_text, self.dpath, fname, preamb_extra=['\\usepackage{makecell}']
)
fname = 'error_group_details'
ut.write_to(join(self.dpath, fname + '.tex'), error_group_text)
ut.render_latex(
error_group_text, self.dpath, fname, preamb_extra=['\\usepackage{makecell}']
)
def _get_error_sizes(self, expt_data, allow_hist=False):
real_ccs = expt_data['real_ccs']
pred_ccs = expt_data['pred_ccs']
graph = expt_data['graph']
# delta_df = ut.grouping_delta_stats(pred_ccs, real_ccs)
# logger.info(delta_df)
delta = ut.grouping_delta(pred_ccs, real_ccs)
unchanged = delta['unchanged']
splits = delta['splits']['new']
merges = delta['merges']['old']
# hybrids can be done by first splitting and then merging
hybrid_splits = delta['hybrid']['splits']
hybrid_merges = delta['hybrid']['merges']
all_merges = merges + hybrid_merges
all_splits = splits + hybrid_splits
def ave_size(sets):
lens = list(map(len, sets))
hist = ut.dict_hist(lens)
if allow_hist and len(hist) <= 2:
return ut.repr4(hist, nl=0)
else:
mu = np.mean(lens)
sigma = np.std(lens)
return ave_str(mu, sigma, precision=1)
def unchanged_measures(unchanged):
pred = true = unchanged
unchanged_info = ut.odict(
[
('n_pred_pccs', len(pred)),
('size_pred_pccs', ave_size(pred)),
('n_real_pccs', len(true)),
('size_real_pccs', ave_size(true)),
]
)
return unchanged_info
def get_bad_edges(ccs, bad_decision, ret_ccs=False):
for cc1, cc2 in ut.combinations(ccs, 2):
cc1 = frozenset(cc1)
cc2 = frozenset(cc2)
bad_edges = []
cross = nxu.edges_cross(graph, cc1, cc2)
for edge in cross:
d = graph.get_edge_data(*edge)
if d['decision'] == bad_decision:
if ret_ccs:
bad_edges.append((cc1, cc2, edge))
else:
bad_edges.append(edge)
yield bad_edges
def split_measures(splits):
# Filter out non-split hybrids
splits = [s for s in splits if len(s) > 1]
pred = ut.lmap(ut.flatten, splits)
true = ut.flatten(splits)
baddies = []
smalls = []
larges = []
for split in splits:
split = ut.sortedby(split, ut.lmap(len, split))
smalls.append((split[0]))
larges.append((split[1]))
b = list(get_bad_edges(split, POSTV))
baddies.append(b)
split_info = ut.odict(
[
('n_pred_pccs', len(pred)),
('size_pred_pccs', ave_size(pred)),
('n_real_pccs', len(true)),
('size_real_pccs', ave_size(true)),
('n_errgroups', len(splits)),
('errgroup_size', ave_size(splits)),
('ave_small', ave_size(smalls)),
('ave_large', ave_size(larges)),
]
)
return split_info
def merge_measures(merges):
# Filter out non-merge hybrids
merges = [s for s in merges if len(s) > 1]
true = ut.lmap(ut.flatten, merges)
pred = ut.flatten(merges)
baddies = []
n_neg_redun = 0
n_bad_pairs = 0
n_bad_pccs = 0
smalls = []
larges = []
for merge in merges:
merge = ut.sortedby(merge, ut.lmap(len, merge))
smalls.append((merge[0]))
larges.append((merge[1]))
b = list(get_bad_edges(merge, NEGTV))
b2 = list(get_bad_edges(merge, NEGTV, ret_ccs=True))
baddies.append(b)
bad_neg_redun = max(map(len, b))
n_bad_pairs += sum(map(any, b))
n_bad_pccs += len(set(ut.flatten(ut.take_column(ut.flatten(b2), [0, 1]))))
if bad_neg_redun >= 2:
n_neg_redun += 1
merge_info = ut.odict(
[
('n_pred_pccs', len(pred)),
('size_pred_pccs', ave_size(pred)),
('n_real_pccs', len(true)),
('size_real_pccs', ave_size(true)),
('n_errgroups', len(merges)),
('errgroup_size', ave_size(merges)),
('ave_incon_edges', ave_size(ut.lmap(ut.flatten, baddies))),
('n_bad_pairs', n_bad_pairs),
('n_bad_pccs', n_bad_pccs),
('n_neg_redun', n_neg_redun),
('ave_small', ave_size(smalls)),
('ave_large', ave_size(larges)),
]
)
return merge_info
# def hybrid_measures(hybrid):
# pred = hybrid['old']
# true = hybrid['new']
# hybrid_info = ut.odict([
# ('n_pred_pccs', len(pred)),
# ('size_pred_pccs', ave_size(pred)),
# ('n_real_pccs', len(true)),
# ('size_real_pccs', ave_size(true)),
# ])
# return hybrid_info
info = {
'correct': unchanged_measures(unchanged),
'split': split_measures(all_splits),
'merge': merge_measures(all_merges),
}
return info
[docs] def draw_simulation(self):
"""
CommandLine:
python -m wbia Chap5.draw simulation PZ_MTEST --diskshow
python -m wbia Chap5.draw simulation GZ_Master1 --diskshow
python -m wbia Chap5.draw simulation PZ_Master1 --diskshow
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap5('GZ_Master')
"""
sim_results = self.ensure_results('simulation')
keys = ['ranking', 'rank+clf', 'graph']
colors = ut.dzip(keys, ['red', 'orange', 'b'])
def _metrics(col):
return {k: ut.take_column(v['metrics'], col) for k, v in sim_results.items()}
fnum = 1
xdatas = _metrics('n_manual')
xmax = max(map(max, xdatas.values()))
xpad = (1.01 * xmax) - xmax
pnum_ = pt.make_pnum_nextgen(nSubplots=2)
mpl.rcParams.update(TMP_RC)
pt.figure(fnum=fnum, pnum=pnum_())
ax = pt.gca()
ydatas = _metrics('merge_remain')
for key in keys:
ax.plot(xdatas[key], ydatas[key], label=key, color=colors[key])
ax.set_ylim(0, 1)
ax.set_xlim(-xpad, xmax + xpad)
ax.set_xlabel('# manual reviews')
ax.set_ylabel('fraction of merges remain')
ax.legend()
pt.figure(fnum=fnum, pnum=pnum_())
ax = pt.gca()
ydatas = _metrics('n_errors')
for key in keys:
ax.plot(xdatas[key], ydatas[key], label=key, color=colors[key])
ax.set_ylim(0, max(map(max, ydatas.values())) * 1.01)
ax.set_xlim(-xpad, xmax + xpad)
ax.set_xlabel('# manual reviews')
ax.set_ylabel('# errors')
ax.legend()
fig = pt.gcf() # NOQA
fig.set_size_inches([W, H * 0.75])
pt.adjust_subplots(wspace=0.25, fig=fig)
fpath = join(self.dpath, 'simulation.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
if ut.get_argflag('--diskshow'):
ut.startfile(fpath)
[docs] def draw_refresh(self):
"""
CommandLine:
python -m wbia Chap5.draw refresh GZ_Master1 --diskshow
python -m wbia Chap5.draw refresh PZ_Master1 --diskshow
"""
sim_results = self.ensure_results('simulation')
keys = ['ranking', 'rank+clf', 'graph']
colors = ut.dzip(keys, ['red', 'orange', 'b'])
def _metrics(col):
return {k: ut.take_column(v['metrics'], col) for k, v in sim_results.items()}
fnum = 1
xdatas = _metrics('n_manual')
pnum_ = pt.make_pnum_nextgen(nSubplots=1)
mpl.rcParams.update(TMP_RC)
pt.figure(fnum=fnum, pnum=pnum_())
ax = pt.gca()
ydatas = _metrics('pprob_any')
# fix the visual inconsistency that doesn't matter in practice
# flags = _metrics('refresh_support')
key = 'graph'
ax.plot(xdatas[key], ydatas[key], label=key, color=colors[key])
ax.set_xlabel('# manual reviews')
ax.set_ylabel('P(C=1)')
# ax.legend()
fpath = join(self.dpath, 'refresh.png')
fig = pt.gcf() # NOQA
fig.set_size_inches([W, H * 0.5])
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_simulation2(self):
"""
CommandLine:
python -m wbia Chap5.draw_simulation2 --db PZ_MTEST --show
python -m wbia Chap5.draw_simulation2 --db GZ_Master1 --show
python -m wbia Chap5.draw_simulation2 --db PZ_Master1 --show
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> dbname = ut.get_argval('--db', default='GZ_Master1')
>>> self = Chap5(dbname)
>>> self.draw_simulation2()
>>> ut.show_if_requested()
"""
mpl.rcParams.update(TMP_RC)
sim_results = self.ensure_results('simulation')
expt_data = sim_results['graph']
metrics_df = pd.DataFrame.from_dict(expt_data['metrics'])
fnum = 1 # NOQA
overshow = {
'phase': True,
'pred': False,
'auto': True,
'real': True,
'error': True,
'recover': True,
}
if overshow['auto']:
xdata = metrics_df['n_decision']
xlabel = '# decisions'
else:
xdata = metrics_df['n_manual']
xlabel = '# manual reviews'
def plot_intervals(flags, color=None, low=0, high=1):
ax = pt.gca()
idxs = np.where(flags)[0]
ranges = ut.group_consecutives(idxs)
bounds = [(min(a), max(a)) for a in ranges if len(a) > 0]
xdata_ = xdata.values
xs, ys = [xdata_[0]], [low]
for a, b in bounds:
x1, x2 = xdata_[a], xdata_[b]
# if x1 == x2:
x1 -= 0.5
x2 += 0.5
xs.extend([x1, x1, x2, x2])
ys.extend([low, high, high, low])
xs.append(xdata_[-1])
ys.append(low)
ax.fill_between(xs, ys, low, alpha=0.6, color=color)
def overlay_actions(ymax=1):
"""
Draws indicators that detail the algorithm state at given
timestamps.
"""
phase = metrics_df['phase'].map(lambda x: x.split('_')[0])
is_correct = (
metrics_df['test_action'].map(lambda x: x.startswith('correct')).values
)
recovering = metrics_df['recovering'].values
is_auto = metrics_df['user_id'].map(lambda x: x.startswith('algo:')).values
ppos = metrics_df['pred_decision'].map(lambda x: x == POSTV).values
rpos = metrics_df['true_decision'].map(lambda x: x == POSTV).values
# ymax = max(metrics_df['n_errors'])
num = sum(overshow.values())
steps = np.linspace(0, 1, num + 1) * ymax
i = -1
def stacked_interval(data, color, i):
plot_intervals(data, color, low=steps[i], high=steps[i + 1])
if overshow['auto']:
i += 1
pt.absolute_text(
(0.2, steps[i : i + 2].mean()), 'is_auto(auto=gold,manual=blue)'
)
stacked_interval(is_auto, 'gold', i)
stacked_interval(~is_auto, 'blue', i)
if overshow['pred']:
i += 1
pt.absolute_text((0.2, steps[i : i + 2].mean()), 'pred_pos')
stacked_interval(ppos, 'aqua', low=steps[i], high=steps[i + 1])
# stacked_interval(~ppos, 'salmon', i)
if overshow['real']:
i += 1
pt.absolute_text((0.2, steps[i : i + 2].mean()), 'real_pos')
stacked_interval(rpos, 'lime', i)
# stacked_interval(~ppos, 'salmon', i)
if overshow['error']:
i += 1
pt.absolute_text((0.2, steps[i : i + 2].mean()), 'is_error')
# stacked_interval(is_correct, 'blue', low=steps[i], high=steps[i + 1])
stacked_interval(~is_correct, 'red', i)
if overshow['recover']:
i += 1
pt.absolute_text((0.2, steps[i : i + 2].mean()), 'is_recovering')
stacked_interval(recovering, 'orange', i)
if overshow['phase']:
i += 1
pt.absolute_text((0.2, steps[i : i + 2].mean()), 'phase')
stacked_interval(phase == 'ranking', 'red', i)
stacked_interval(phase == 'posredun', 'green', i)
stacked_interval(phase == 'negredun', 'blue', i)
pnum_ = pt.make_pnum_nextgen(nRows=2, nSubplots=8)
ydatas = ut.odict([('Graph', metrics_df['merge_remain'])])
pt.multi_plot(
xdata,
ydatas,
marker='',
markersize=1,
xlabel=xlabel,
ylabel='fraction of merge remaining',
ymin=0,
rcParams=TMP_RC,
use_legend=True,
fnum=1,
pnum=pnum_(),
)
# overlay_actions(1)
ykeys = ['n_errors']
pt.multi_plot(
xdata,
metrics_df[ykeys].values.T,
xlabel=xlabel,
ylabel='# of errors',
marker='',
markersize=1,
ymin=0,
rcParams=TMP_RC,
fnum=1,
pnum=pnum_(),
use_legend=False,
)
overlay_actions(max(metrics_df['n_errors']))
pt.multi_plot(
xdata,
[metrics_df['pprob_any']],
label_list=['P(C=1)'],
xlabel=xlabel,
ylabel='refresh criteria',
marker='',
ymin=0,
ymax=1,
rcParams=TMP_RC,
fnum=1,
pnum=pnum_(),
use_legend=False,
)
ax = pt.gca()
thresh = expt_data['refresh_thresh']
ax.plot([min(xdata), max(xdata)], [thresh, thresh], '-g', label='refresh thresh')
ax.legend()
# overlay_actions(1)
ykeys = ['n_fn', 'n_fp']
pt.multi_plot(
xdata,
metrics_df[ykeys].values.T,
label_list=ykeys,
xlabel=xlabel,
ylabel='# of errors',
marker='x',
markersize=1,
ymin=0,
rcParams=TMP_RC,
ymax=max(metrics_df['n_errors']),
fnum=1,
pnum=pnum_(),
use_legend=True,
)
xdata = metrics_df['n_manual']
xlabel = '# manual reviews'
ydatas = ut.odict([('Graph', metrics_df['merge_remain'])])
pt.multi_plot(
xdata,
ydatas,
marker='',
markersize=1,
xlabel=xlabel,
ylabel='fraction of merge remaining',
ymin=0,
rcParams=TMP_RC,
use_legend=True,
fnum=1,
pnum=pnum_(),
)
# overlay_actions(1)
ykeys = ['n_errors']
pt.multi_plot(
xdata,
metrics_df[ykeys].values.T,
xlabel=xlabel,
ylabel='# of errors',
marker='',
markersize=1,
ymin=0,
rcParams=TMP_RC,
fnum=1,
pnum=pnum_(),
use_legend=False,
)
overlay_actions(max(metrics_df['n_errors']))
pt.multi_plot(
xdata,
[metrics_df['pprob_any']],
label_list=['P(C=1)'],
xlabel=xlabel,
ylabel='refresh criteria',
marker='',
ymin=0,
ymax=1,
rcParams=TMP_RC,
fnum=1,
pnum=pnum_(),
use_legend=False,
)
ax = pt.gca()
thresh = expt_data['refresh_thresh']
ax.plot([min(xdata), max(xdata)], [thresh, thresh], '-g', label='refresh thresh')
ax.legend()
# overlay_actions(1)
ykeys = ['n_fn', 'n_fp']
pt.multi_plot(
xdata,
metrics_df[ykeys].values.T,
label_list=ykeys,
xlabel=xlabel,
ylabel='# of errors',
marker='x',
markersize=1,
ymin=0,
rcParams=TMP_RC,
ymax=max(metrics_df['n_errors']),
fnum=1,
pnum=pnum_(),
use_legend=True,
)
# fpath = join(self.dpath, expt_name + '2' + '.png')
# fig = pt.gcf() # NOQA
# fig.set_size_inches([W * 1.5, H * 1.1])
# vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
# if ut.get_argflag('--diskshow'):
# ut.startfile(fpath)
# fig.save_fig
# if 1:
# pt.figure(fnum=fnum, pnum=(2, 2, 4))
# overlay_actions(ymax=1)
pt.set_figtitle(self.dbname)
[docs]@ut.reloadable_class
class Chap4(DBInputs):
"""
Collect data from experiments to visualize
TODO: redo save/loading of measurments
Ignore:
>>> from wbia.scripts.thesis import *
>>> fpath = ut.glob(ut.truepath('~/Desktop/mtest_plots'), '*.pkl')[0]
>>> self = ut.load_data(fpath)
"""
base_dpath = ut.truepath('~/latex/crall-thesis-2017/figures4')
task_nice_lookup = {
'match_state': const.EVIDENCE_DECISION.CODE_TO_NICE,
'photobomb_state': {'pb': 'Photobomb', 'notpb': 'Not Photobomb'},
}
def _setup(self):
r"""
CommandLine:
python -m wbia Chap4._setup --db GZ_Master1
python -m wbia Chap4._setup --db PZ_Master1 --eval
python -m wbia Chap4._setup --db PZ_MTEST
python -m wbia Chap4._setup --db PZ_PB_RF_TRAIN
python -m wbia Chap4.measure_all --db PZ_PB_RF_TRAIN
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> dbname = ut.get_argval('--db', default='GZ_Master1')
>>> self = Chap4(dbname)
>>> self._setup()
Ignore:
from wbia.scripts.thesis import *
self = Chap4('PZ_Master1')
from wbia.scripts.thesis import *
self = Chap4('PZ_PB_RF_TRAIN')
self.ibs.print_annot_stats(aids, prefix='P')
"""
import wbia
self._precollect()
ibs = self.ibs
if ibs.dbname == 'PZ_Master1':
# FIND ALL PHOTOBOMB / INCOMPARABLE CASES
if False:
infr = wbia.AnnotInference(ibs, aids='all')
infr.reset_feedback('staging', apply=True)
logger.info(ut.repr4(infr.status()))
pblm = vsone.OneVsOneProblem.from_aids(ibs, self.aids_pool)
pblm.load_samples()
pblm.samples.print_info()
aids = self.aids_pool
else:
aids = self.aids_pool
pblm = vsone.OneVsOneProblem.from_aids(ibs, aids)
data_key = pblm.default_data_key
clf_key = pblm.default_clf_key
pblm.eval_task_keys = ['match_state', 'photobomb_state']
pblm.eval_data_keys = [data_key]
pblm.eval_clf_keys = [clf_key]
if ut.get_argflag('--eval'):
pblm.eval_task_keys = ['photobomb_state', 'match_state']
# pblm.eval_task_keys = ['match_state']
pblm.eval_data_keys = None
pblm.evaluate_classifiers()
pblm.eval_data_keys = [data_key]
else:
pblm.setup_evaluation()
if False:
pblm.infr
pblm.load_samples()
# pblm.evaluate_classifiers()
ibs = pblm.infr.ibs
pblm.samples.print_info()
species_code = ibs.get_database_species(pblm.infr.aids)[0]
if species_code == 'zebra_plains':
species = 'Plains Zebras'
if species_code == 'zebra_grevys':
species = "Grévy's Zebras"
dbcode = '{}_{}'.format(ibs.dbname, len(pblm.samples))
self.pblm = pblm
self.dbcode = dbcode
self.eval_task_keys = pblm.eval_task_keys
self.species = species
self.data_key = data_key
self.clf_key = clf_key
# config = pblm.hyper_params
# self._setup_links(cfg_prefix, config)
# RESET DPATH BASED ON SAMPLE?
# MAYBE SYMLINK TO NEW DPATH?
from os.path import expanduser
dpath = expanduser(self.base_dpath + '/' + self.dbcode)
link = expanduser(self.base_dpath + '/' + self.dbname)
ut.ensuredir(dpath)
self.real_dpath = dpath
try:
self.link = ut.symlink(dpath, link, overwrite=True)
except Exception:
if exists(dpath):
newpath = ut.non_existing_path(dpath, suffix='_old')
ut.move(link, newpath)
self.link = ut.symlink(dpath, link)
[docs] def measure_all(self):
r"""
CommandLine:
python -m wbia Chap4.measure_all --db PZ_PB_RF_TRAIN
python -m wbia Chap4.measure_all --db PZ_MTEST
python -m wbia Chap4.measure_all
python -m wbia Chap4.measure_all --db GZ_Master1
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> dbname = ut.get_argval('--db', default='PZ_MTEST')
>>> dbnames = ut.get_argval('--dbs', type_=list, default=[dbname])
>>> for dbname in dbnames:
>>> print('dbname = %r' % (dbname,))
>>> self = Chap4(dbname)
>>> self.measure_all()
"""
self._setup()
pblm = self.pblm
expt_name = 'sample_info'
results = {
'graph': pblm.infr.graph,
'aid_pool': self.aids_pool,
'pblm_aids': pblm.infr.aids,
'encoded_labels2d': pblm.samples.encoded_2d(),
'subtasks': pblm.samples.subtasks,
'multihist': pblm.samples.make_histogram(),
}
self.expt_results[expt_name] = results
ut.save_data(join(str(self.dpath), expt_name + '.pkl'), results)
importance = {
task_key: pblm.feature_importance(task_key=task_key)
for task_key in pblm.eval_task_keys
}
task = pblm.samples['match_state']
scores = pblm.samples.simple_scores['score_lnbnn_1vM']
y = task.indicator_df[task.default_class_name]
lnbnn_xy = pd.concat([scores, y], axis=1)
results = {
'lnbnn_xy': lnbnn_xy,
'task_combo_res': self.pblm.task_combo_res,
'importance': importance,
'data_key': self.data_key,
'clf_key': self.clf_key,
}
expt_name = 'all'
self.expt_results[expt_name] = results
ut.save_data(join(str(self.dpath), expt_name + '.pkl'), results)
task_key = 'match_state'
if task_key in pblm.eval_task_keys:
self.measure_hard_cases(task_key)
task_key = 'photobomb_state'
if task_key in pblm.eval_task_keys:
self.measure_hard_cases(task_key)
self.measure_rerank()
self.measure_prune()
if ut.get_argflag('--draw'):
self.draw_all()
[docs] def draw_all(self):
r"""
CommandLine:
python -m wbia Chap4.draw_all --db PZ_MTEST
python -m wbia Chap4.draw_all --db PZ_PB_RF_TRAIN
python -m wbia Chap4.draw_all --db GZ_Master1
python -m wbia Chap4.draw_all --db PZ_Master1
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> dbname = ut.get_argval('--db', default='PZ_MTEST')
>>> dbnames = ut.get_argval('--dbs', type_=list, default=[dbname])
>>> for dbname in dbnames:
>>> print('dbname = %r' % (dbname,))
>>> self = Chap4(dbname)
>>> self.draw_all()
"""
results = self.ensure_results('all')
eval_task_keys = set(results['task_combo_res'].keys())
self.write_sample_info()
task_key = 'photobomb_state'
if task_key in eval_task_keys:
self.write_importance(task_key)
self.write_metrics(task_key)
self.write_metrics2(task_key)
self.draw_roc(task_key)
self.draw_mcc_thresh(task_key)
task_key = 'match_state'
if task_key in eval_task_keys:
self.draw_class_score_hist()
self.draw_roc(task_key)
self.draw_mcc_thresh(task_key)
self.draw_wordcloud(task_key)
self.write_importance(task_key)
self.write_metrics(task_key)
self.draw_rerank()
if not ut.get_argflag('--noprune'):
self.draw_prune()
if not ut.get_argflag('--nodraw'):
task_key = 'match_state'
if task_key in eval_task_keys:
self.draw_hard_cases(task_key)
task_key = 'photobomb_state'
if task_key in eval_task_keys:
self.draw_hard_cases(task_key)
[docs] def measure_prune(self):
"""
>>> from wbia.scripts.thesis import *
>>> self = Chap4('GZ_Master1')
>>> self = Chap4('PZ_Master1')
>>> self = Chap4('PZ_MTEST')
"""
# from sklearn.feature_selection import SelectFromModel
from wbia.scripts import clf_helpers
if getattr(self, 'pblm', None) is None:
self._setup()
pblm = self.pblm
task_key = pblm.primary_task_key
data_key = pblm.default_data_key
clf_key = pblm.default_clf_key
featinfo = vt.AnnotPairFeatInfo(pblm.samples.X_dict[data_key])
logger.info(featinfo.get_infostr())
labels = pblm.samples.subtasks[task_key]
# X = pblm.samples.X_dict[data_key]
feat_dims = pblm.samples.X_dict[data_key].columns.tolist()
n_orig = len(feat_dims)
n_dims = []
reports = []
sub_reports = []
mdis_list = []
prune_rate = 1
min_feats = 1
n_steps_needed = int(np.ceil((n_orig - min_feats) / prune_rate))
prog = ub.ProgIter(range(n_steps_needed), label='prune')
for _ in prog:
prog.ensure_newline()
clf_list, res_list = pblm._train_evaluation_clf(
task_key, data_key, clf_key, feat_dims
)
combo_res = clf_helpers.ClfResult.combine_results(res_list, labels)
rs = [res.extended_clf_report(verbose=0) for res in res_list]
report = combo_res.extended_clf_report(verbose=0)
# Measure mean decrease in impurity
clf_mdi = np.array([clf_.feature_importances_ for clf_ in clf_list])
mean_mdi = ut.dzip(feat_dims, np.mean(clf_mdi, axis=0))
# Record state
n_dims.append(len(feat_dims))
reports.append(report)
sub_reports.append(rs)
mdis_list.append(mean_mdi)
# remove the worst features
sorted_featdims = ub.argsort(mean_mdi)
n_have = len(sorted_featdims)
n_remove = n_have - max(n_have - prune_rate, min_feats)
worst_features = sorted_featdims[0:n_remove]
for f in worst_features:
feat_dims.remove(f)
results = {
'n_dims': n_dims,
'reports': reports,
'sub_reports': sub_reports,
'mdis_list': mdis_list,
}
expt_name = 'prune'
self.expt_results[expt_name] = results
ut.save_data(join(str(self.dpath), expt_name + '.pkl'), results)
[docs] def measure_rerank(self):
"""
>>> from wbia.scripts.thesis import *
>>> defaultdb = 'PZ_Master1'
>>> defaultdb = 'GZ_Master1'
>>> self = Chap4(defaultdb)
>>> self._setup()
>>> self.measure_rerank()
"""
if getattr(self, 'pblm', None) is None:
self._setup()
pblm = self.pblm
infr = pblm.infr
ibs = pblm.infr.ibs
# NOTE: this is not the aids_pool for PZ_Master1
aids = pblm.infr.aids
qaids, daids_list, info_list = Sampler._varied_inputs(ibs, aids)
if pblm.hyper_params['vsone_kpts']['augment_orientation']:
# HACK
cfgdict = {
'query_rotation_heuristic': True,
}
else:
cfgdict = {}
daids = daids_list[0]
info = info_list[0]
# Execute the ranking algorithm
qaids = sorted(qaids)
daids = sorted(daids)
qreq_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict)
cm_list = qreq_.execute()
cm_list = [cm.extend_results(qreq_) for cm in cm_list]
# Measure LNBNN rank probabilities
top = 20
rerank_pairs = []
for cm in cm_list:
pairs = [infr.e_(cm.qaid, daid) for daid in cm.get_top_aids(top)]
rerank_pairs.extend(pairs)
rerank_pairs = list(set(rerank_pairs))
verifiers = infr.learn_evaluation_verifiers()
probs = verifiers['match_state'].predict_proba_df(rerank_pairs)
pos_probs = probs[POSTV]
clf_name_ranks = []
lnbnn_name_ranks = []
infr = pblm.infr
for cm in cm_list:
daids = cm.get_top_aids(top)
edges = [infr.e_(cm.qaid, daid) for daid in daids]
dnids = cm.dnid_list[ut.take(cm.daid2_idx, daids)]
scores = pos_probs.loc[edges].values
sortx = np.argsort(scores)[::-1]
clf_ranks = np.where(cm.qnid == dnids[sortx])[0]
if len(clf_ranks) == 0:
clf_rank = len(cm.unique_nids) - 1
else:
clf_rank = clf_ranks[0]
lnbnn_rank = cm.get_name_ranks([cm.qnid])[0]
clf_name_ranks.append(clf_rank)
lnbnn_name_ranks.append(lnbnn_rank)
bins = np.arange(len(qreq_.dnids))
hist = np.histogram(lnbnn_name_ranks, bins=bins)[0]
lnbnn_cdf = np.cumsum(hist) / sum(hist)
bins = np.arange(len(qreq_.dnids))
hist = np.histogram(clf_name_ranks, bins=bins)[0]
clf_cdf = np.cumsum(hist) / sum(hist)
results = [
(lnbnn_cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})),
(clf_cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})),
]
expt_name = 'rerank'
self.expt_results[expt_name] = results
ut.save_data(join(str(self.dpath), expt_name + '.pkl'), results)
[docs] def measure_hard_cases(self, task_key):
"""
Find a failure case for each class
CommandLine:
python -m wbia Chap4.measure hard_cases GZ_Master1 match_state
python -m wbia Chap4.measure hard_cases GZ_Master1 photobomb_state
python -m wbia Chap4.draw hard_cases GZ_Master1 match_state
python -m wbia Chap4.draw hard_cases GZ_Master1 photobomb_state
python -m wbia Chap4.measure hard_cases PZ_Master1 match_state
python -m wbia Chap4.measure hard_cases PZ_Master1 photobomb_state
python -m wbia Chap4.draw hard_cases PZ_Master1 match_state
python -m wbia Chap4.draw hard_cases PZ_Master1 photobomb_state
python -m wbia Chap4.measure hard_cases PZ_MTEST match_state
python -m wbia Chap4.draw hard_cases PZ_MTEST photobomb_state
python -m wbia Chap4.measure hard_cases MantaMatcher match_state
Ignore:
>>> task_key = 'match_state'
>>> task_key = 'photobomb_state'
>>> from wbia.scripts.thesis import *
>>> self = Chap4('GZ_Master1')
>>> self._setup()
"""
if getattr(self, 'pblm', None) is None:
logger.info('Need to setup before measuring hard cases')
self._setup()
logger.info('Measuring hard cases')
pblm = self.pblm
front = mid = back = 8
res = pblm.task_combo_res[task_key][self.clf_key][self.data_key]
logger.info('task_key = %r' % (task_key,))
if task_key == 'photobomb_state':
method = 'max-mcc'
method = res.get_thresholds('mcc', 'maximize')
logger.info('Using thresholds: ' + ut.repr4(method))
else:
method = 'argmax'
logger.info('Using argmax')
case_df = res.hardness_analysis(pblm.samples, pblm.infr, method=method)
# group = case_df.sort_values(['real_conf', 'easiness'])
case_df = case_df.sort_values(['easiness'])
# failure_cases = case_df[(case_df['real_conf'] > 0) & case_df['failed']]
failure_cases = case_df[case_df['failed']]
if len(failure_cases) == 0:
logger.info('No reviewed failures exist. Do pblm.qt_review_hardcases')
logger.info('There are {} failure cases'.format(len(failure_cases)))
logger.info(
'With average hardness {}'.format(
ut.repr2(
ut.stats_dict(failure_cases['hardness']), strkeys=True, precision=2
)
)
)
cases = []
for (pred, real), group in failure_cases.groupby(('pred', 'real')):
group = group.sort_values(['easiness'])
flags = ut.flag_percentile_parts(group['easiness'], front, mid, back)
subgroup = group[flags]
logger.info(
'Selected {} r({})-p({}) cases'.format(
len(subgroup), res.class_names[real], res.class_names[pred]
)
)
# ut.take_percentile_parts(group['easiness'], front, mid, back)
# Prefer examples we have manually reviewed before
# group = group.sort_values(['real_conf', 'easiness'])
# subgroup = group[0:num_top]
for idx, case in subgroup.iterrows():
edge = tuple(ut.take(case, ['aid1', 'aid2']))
cases.append(
{
'edge': edge,
'real': res.class_names[case['real']],
'pred': res.class_names[case['pred']],
'failed': case['failed'],
'easiness': case['easiness'],
'real_conf': case['real_conf'],
'probs': res.probs_df.loc[edge].to_dict(),
'edge_data': pblm.infr.get_edge_data(edge),
}
)
logger.info('Selected %d cases in total' % (len(cases)))
# Augment cases with their one-vs-one matches
infr = pblm.infr
data_key = self.data_key
config = pblm.feat_extract_info[data_key][0]['match_config']
edges = [case['edge'] for case in cases]
matches = infr._exec_pairwise_match(edges, config)
def _prep_annot(annot):
# Load data needed for plot into annot dictionary
annot['aid']
annot['rchip']
annot['kpts']
# Cast the lazy dict to a real one
return {k: annot[k] for k in annot.evaluated_keys()}
for case, match in zip(cases, matches):
# store its chip fpath and other required info
match.annot1 = _prep_annot(match.annot1)
match.annot2 = _prep_annot(match.annot2)
case['match'] = match
fpath = join(str(self.dpath), task_key + '_hard_cases.pkl')
ut.save_data(fpath, cases)
logger.info('Hard case space on disk: {}'.format(ut.get_file_nBytes_str(fpath)))
# if False:
# ybin_df = res.target_bin_df
# flags = ybin_df['pb'].values
# pb_edges = ybin_df[flags].index.tolist()
# matches = infr._exec_pairwise_match(pb_edges, config)
# prefix = 'training_'
# subdir = 'temp_cases_{}'.format(task_key)
# dpath = join(str(self.dpath), subdir)
# ut.ensuredir(dpath)
# tbl = pblm.infr.ibs.db.get_table_as_pandas('annotmatch')
# tagged_tbl = tbl[~pd.isnull(tbl['annotmatch_tag_text']).values]
# ttext = tagged_tbl['annotmatch_tag_text']
# flags = ['photobomb' in t.split(';') for t in ttext]
# pb_table = tagged_tbl[flags]
# am_pb_edges = set(
# ut.estarmap(infr.e_, zip(pb_table.annot_rowid1.tolist(),
# pb_table.annot_rowid2.tolist())))
# # missing = am_pb_edges - set(pb_edges)
# # matches = infr._exec_pairwise_match(missing, config)
# # prefix = 'missing_'
# # infr.relabel_using_reviews()
# # infr.apply_nondynamic_update()
# # infr.verbose = 100
# # for edge in missing:
# # logger.info(edge[0] in infr.aids)
# # logger.info(edge[1] in infr.aids)
# # fix = [
# # (1184, 1185),
# # (1376, 1378),
# # (1377, 1378),
# # ]
# # fb = infr.current_feedback(edge).copy()
# # fb = ut.dict_subset(fb, ['decision', 'tags', 'confidence'],
# # default=None)
# # fb['user_id'] = 'jon_fixam'
# # fb['confidence'] = 'pretty_sure'
# # fb['tags'] += ['photobomb']
# # infr.add_feedback(edge, **fb)
# for c, match in enumerate(ut.ProgIter(matches)):
# edge = match.annot1['aid'], match.annot2['aid']
# fig = pt.figure(fnum=1, clf=True)
# ax = pt.gca()
# # Draw with feature overlay
# match.show(ax, vert=False, heatmask=True, show_lines=True,
# show_ell=False, show_ori=False, show_eig=False,
# line_lw=1, line_alpha=.1,
# modifysize=True)
# fname = prefix + '_'.join(ut.emap(str, edge))
# ax.set_xlabel(fname)
# fpath = join(str(dpath), fname + '.jpg')
# vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
# # visualize real photobomb cases
return cases
[docs] def draw_hard_cases(self, task_key):
"""
draw hard cases with and without overlay
python -m wbia Chap4.draw hard_cases GZ_Master1 match_state
python -m wbia Chap4.draw hard_cases PZ_Master1 match_state
python -m wbia Chap4.draw hard_cases PZ_Master1 photobomb_state
python -m wbia Chap4.draw hard_cases GZ_Master1 photobomb_state
>>> from wbia.scripts.thesis import *
>>> self = Chap4('PZ_MTEST')
>>> task_key = 'match_state'
>>> self.draw_hard_cases(task_key)
"""
cases = self.ensure_results(task_key + '_hard_cases')
logger.info('Loaded {} {} hard cases'.format(len(cases), task_key))
subdir = 'cases_{}'.format(task_key)
dpath = join(str(self.dpath), subdir)
# ut.delete(dpath)
ut.ensuredir(dpath)
code_to_nice = self.task_nice_lookup[task_key]
mpl.rcParams.update(TMP_RC)
pz_gt_errors = { # NOQA
# The true state of these pairs are:
NEGTV: [(239, 3745), (484, 519), (802, 803)],
INCMP: [(4652, 5245), (4405, 5245), (4109, 5245), (16192, 16292)],
POSTV: [(6919, 7192)],
}
prog = ut.ProgIter(cases, 'draw {} hard case'.format(task_key), bs=False)
for case in prog:
aid1, aid2 = case['edge']
match = case['match']
real_name, pred_name = case['real'], case['pred']
real_nice, pred_nice = ut.take(code_to_nice, [real_name, pred_name])
fname = 'fail_{}_{}_{}_{}'.format(real_name, pred_name, aid1, aid2)
# Build x-label
_probs = case['probs']
probs = ut.odict(
(v, _probs[k]) for k, v in code_to_nice.items() if k in _probs
)
probstr = ut.repr2(probs, precision=2, strkeys=True, nobr=True)
xlabel = 'real={}, pred={},\n{}'.format(real_nice, pred_nice, probstr)
fig = pt.figure(fnum=1000, clf=True)
ax = pt.gca()
# Draw with feature overlay
match.show(
ax,
vert=False,
heatmask=True,
show_lines=False,
# show_lines=True, line_lw=1, line_alpha=.1,
# ell_alpha=.3,
show_ell=False,
show_ori=False,
show_eig=False,
modifysize=True,
)
ax.set_xlabel(xlabel)
# ax.get_xaxis().get_label().set_fontsize(24)
ax.get_xaxis().get_label().set_fontsize(24)
fpath = join(str(dpath), fname + '.jpg')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs] def write_metrics2(self, task_key='match_state'):
"""
CommandLine:
python -m wbia Chap4.draw metrics PZ_PB_RF_TRAIN match_state
python -m wbia Chap4.draw metrics2 PZ_Master1 photobomb_state
python -m wbia Chap4.draw metrics2 GZ_Master1 photobomb_state
python -m wbia Chap4.draw metrics2 GZ_Master1 photobomb_state
"""
results = self.ensure_results('all')
task_combo_res = results['task_combo_res']
data_key = results['data_key']
clf_key = results['clf_key']
res = task_combo_res[task_key][clf_key][data_key]
from wbia.scripts import sklearn_utils
threshes = res.get_thresholds('mcc', 'max')
y_pred = sklearn_utils.predict_from_probs(res.probs_df, threshes, force=True)
y_true = res.target_enc_df
# pred_enc = res.clf_probs.argmax(axis=1)
# y_pred = pred_enc
res.augment_if_needed()
sample_weight = res.sample_weight
target_names = res.class_names
report = sklearn_utils.classification_report2(
y_true, y_pred, target_names, sample_weight, verbose=False
)
metric_df = report['metrics']
confusion_df = report['confusion']
logger.info(metric_df)
logger.info(confusion_df)
# df = self.task_confusion[task_key]
df = confusion_df
df = df.rename_axis(self.task_nice_lookup[task_key], 0)
df = df.rename_axis(self.task_nice_lookup[task_key], 1)
df.index.name = None
df.columns.name = None
colfmt = '|l|' + 'r' * (len(df) - 1) + '|l|'
tabular = Tabular(df, colfmt=colfmt, hline=True)
tabular.groupxs = [list(range(len(df) - 1)), [len(df) - 1]]
latex_str = tabular.as_tabular()
sum_pred = df.index[-1]
sum_real = df.columns[-1]
latex_str = latex_str.replace(sum_pred, r'$\sum$ predicted')
latex_str = latex_str.replace(sum_real, r'$\sum$ real')
confusion_tex = ut.align(latex_str, '&', pos=None)
logger.info(confusion_tex)
df = metric_df
# df = self.task_metrics[task_key]
df = df.rename_axis(self.task_nice_lookup[task_key], 0)
df = df.rename_axis({'mcc': 'MCC'}, 1)
df = df.drop(['markedness', 'bookmaker', 'fpr'], axis=1)
df.index.name = None
df.columns.name = None
df['support'] = df['support'].astype(np.int)
df.columns = ut.emap(upper_one, df.columns)
import re
tabular = Tabular(df, colfmt='numeric')
top, header, mid, bot = tabular.as_parts()
lines = mid[0].split('\n')
newmid = [lines[0:-1], lines[-1:]]
tabular.parts = (top, header, newmid, bot)
latex_str = tabular.as_tabular()
latex_str = re.sub(' -0.00 ', ' 0.00 ', latex_str)
metrics_tex = latex_str
logger.info(metrics_tex)
dpath = str(self.dpath)
confusion_fname = 'confusion2_{}'.format(task_key)
metrics_fname = 'eval_metrics2_{}'.format(task_key)
ut.write_to(join(dpath, confusion_fname + '.tex'), confusion_tex)
ut.write_to(join(dpath, metrics_fname + '.tex'), metrics_tex)
fpath1 = ut.render_latex(confusion_tex, dpath=dpath, fname=confusion_fname)
fpath2 = ut.render_latex(metrics_tex, dpath=dpath, fname=metrics_fname)
return fpath1, fpath2
[docs] def write_metrics(self, task_key='match_state'):
"""
CommandLine:
python -m wbia Chap4.draw metrics PZ_PB_RF_TRAIN match_state
python -m wbia Chap4.draw metrics GZ_Master1 photobomb_state
python -m wbia Chap4.draw metrics PZ_Master1,GZ_Master1 photobomb_state,match_state
Ignore:
>>> from wbia.scripts.thesis import *
>>> self = Chap4('PZ_Master1')
>>> task_key = 'match_state'
"""
results = self.ensure_results('all')
task_combo_res = results['task_combo_res']
data_key = results['data_key']
clf_key = results['clf_key']
res = task_combo_res[task_key][clf_key][data_key]
res.augment_if_needed()
pred_enc = res.clf_probs.argmax(axis=1)
y_pred = pred_enc
y_true = res.y_test_enc
sample_weight = res.sample_weight
target_names = res.class_names
from wbia.scripts import sklearn_utils
report = sklearn_utils.classification_report2(
y_true, y_pred, target_names, sample_weight, verbose=False
)
metric_df = report['metrics']
confusion_df = report['confusion']
logger.info(metric_df)
logger.info(confusion_df)
# df = self.task_confusion[task_key]
df = confusion_df
df = df.rename_axis(self.task_nice_lookup[task_key], 0)
df = df.rename_axis(self.task_nice_lookup[task_key], 1)
df.index.name = None
df.columns.name = None
colfmt = '|l|' + 'r' * (len(df) - 1) + '|l|'
tabular = Tabular(df, colfmt=colfmt, hline=True)
tabular.groupxs = [list(range(len(df) - 1)), [len(df) - 1]]
latex_str = tabular.as_tabular()
sum_pred = df.index[-1]
sum_real = df.columns[-1]
latex_str = latex_str.replace(sum_pred, r'$\sum$ predicted')
latex_str = latex_str.replace(sum_real, r'$\sum$ real')
confusion_tex = ut.align(latex_str, '&', pos=None)
logger.info(confusion_tex)
df = metric_df
# df = self.task_metrics[task_key]
df = df.rename_axis(self.task_nice_lookup[task_key], 0)
df = df.rename_axis({'mcc': 'MCC'}, 1)
df = df.drop(['markedness', 'bookmaker', 'fpr'], axis=1)
df.index.name = None
df.columns.name = None
df['support'] = df['support'].astype(np.int)
df.columns = ut.emap(upper_one, df.columns)
import re
tabular = Tabular(df, colfmt='numeric')
top, header, mid, bot = tabular.as_parts()
lines = mid[0].split('\n')
newmid = [lines[0:-1], lines[-1:]]
tabular.parts = (top, header, newmid, bot)
latex_str = tabular.as_tabular()
latex_str = re.sub(' -0.00 ', ' 0.00 ', latex_str)
metrics_tex = latex_str
logger.info(metrics_tex)
dpath = str(self.dpath)
confusion_fname = 'confusion_{}'.format(task_key)
metrics_fname = 'eval_metrics_{}'.format(task_key)
ut.write_to(join(dpath, confusion_fname + '.tex'), confusion_tex)
ut.write_to(join(dpath, metrics_fname + '.tex'), metrics_tex)
fpath1 = ut.render_latex(confusion_tex, dpath=dpath, fname=confusion_fname)
fpath2 = ut.render_latex(metrics_tex, dpath=dpath, fname=metrics_fname)
return fpath1, fpath2
[docs] def write_sample_info(self):
"""
python -m wbia Chap4.draw sample_info GZ_Master1
"""
results = self.ensure_results('sample_info')
# results['aid_pool']
# results['encoded_labels2d']
# results['multihist']
import wbia
infr = wbia.AnnotInference.from_netx(results['graph'])
info = ut.odict()
info['n_names'] = (infr.pos_graph.number_of_components(),)
info['n_aids'] = (len(results['pblm_aids']),)
info['known_n_incomparable'] = infr.incomp_graph.number_of_edges()
subtasks = results['subtasks']
task = subtasks['match_state']
flags = task.encoded_df == task.class_names.tolist().index(INCMP)
incomp_edges = task.encoded_df[flags.values].index.tolist()
nid_edges = [infr.pos_graph.node_labels(*e) for e in incomp_edges]
nid_edges = vt.ensure_shape(np.array(nid_edges), (None, 2))
n_true = nid_edges.T[0] == nid_edges.T[1]
info['incomp_info'] = {
'inside_pcc': n_true.sum(),
'betweeen_pcc': (~n_true).sum(),
}
for task_key, task in subtasks.items():
info[task_key + '_hist'] = task.make_histogram()
info_str = ut.repr4(info)
fname = 'sample_info.txt'
ut.write_to(join(str(self.dpath), fname), info_str)
[docs] def write_importance(self, task_key):
"""
python -m wbia Chap4.draw importance GZ_Master1,PZ_Master1 match_state
python -m wbia Chap4.draw importance GZ_Master1 match_state
python -m wbia Chap4.draw importance PZ_Master1 match_state
python -m wbia Chap4.draw importance GZ_Master1 photobomb_state
python -m wbia Chap4.draw importance PZ_Master1 photobomb_state
"""
# Print info for latex table
results = self.ensure_results('all')
importances = results['importance'][task_key]
vals = importances.values()
items = importances.items()
top_dims = ut.sortedby(items, vals)[::-1]
lines = []
num_top = 10
for k, v in top_dims[:num_top]:
k = feat_alias(k)
k = k.replace('_', '\\_')
lines.append('\\tt{{{}}} & ${:.4f}$ \\\\'.format(k, v))
latex_str = '\n'.join(ut.align_lines(lines, '&'))
fname = 'feat_importance_{}'.format(task_key)
logger.info('TOP {} importances for {}'.format(num_top, task_key))
logger.info('# of dimensions: %d' % (len(importances)))
logger.info(latex_str)
logger.info()
extra_ = ut.codeblock(
r"""
\begin{{table}}[h]
\centering
\caption{{Top {}/{} dimensions for {}}}
\begin{{tabular}}{{lr}}
\toprule
Dimension & Importance \\
\midrule
{}
\bottomrule
\end{{tabular}}
\end{{table}}
"""
).format(num_top, len(importances), task_key.replace('_', '-'), latex_str)
fpath = ut.render_latex(extra_, dpath=self.dpath, fname=fname)
ut.write_to(join(str(self.dpath), fname + '.tex'), latex_str)
return fpath
[docs] def draw_prune(self):
"""
CommandLine:
python -m wbia Chap4.draw importance GZ_Master1
python -m wbia Chap4.draw importance PZ_Master1 photobomb_state
python -m wbia Chap4.draw importance PZ_Master1 match_state
python -m wbia Chap4.draw prune GZ_Master1,PZ_Master1
python -m wbia Chap4.draw prune PZ_Master1
>>> from wbia.scripts.thesis import *
>>> self = Chap4('PZ_Master1')
>>> self = Chap4('GZ_Master1')
>>> self = Chap4('PZ_MTEST')
"""
task_key = 'match_state'
expt_name = 'prune'
results = self.ensure_results(expt_name)
n_dims = results['n_dims']
mdis_list = results['mdis_list']
sub_reports = results['sub_reports']
# mccs = [r['mcc'] for r in reports]
# mccs2 = np.array([[r['mcc'] for r in rs] for rs in sub_reports])
# pos_mccs = np.array([[r['metrics']['mcc'][POSTV] for r in rs]
# for rs in sub_reports])
ave_mccs = np.array(
[[r['metrics']['mcc']['ave/sum'] for r in rs] for rs in sub_reports]
)
import wbia.plottool as pt
mpl.rcParams.update(TMP_RC)
fig = pt.figure(fnum=1, doclf=True)
pt.multi_plot(
n_dims,
{'mean': ave_mccs.mean(axis=1)},
rcParams=TMP_RC,
marker='',
force_xticks=[min(n_dims)],
# num_xticks=5,
ylabel='MCC',
xlabel='# feature dimensions',
ymin=0.5,
ymax=1,
xmin=1,
xmax=n_dims[0],
fnum=1,
use_legend=False,
)
ax = pt.gca()
ax.invert_xaxis()
fig.set_size_inches([W / 2, H])
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
# Find the point at which accuracy starts to fall
u = ave_mccs.mean(axis=1)
# middle = ut.take_around_percentile(u, .5, len(n_dims) // 2.2)
# thresh = middle.mean() - (middle.std() * 6)
# logger.info('thresh = %r' % (thresh,))
# idx = np.where(u < thresh)[0][0]
idx = u.argmax()
fig = pt.figure(fnum=2)
n_to_mid = ut.dzip(n_dims, mdis_list)
pruned_importance = n_to_mid[n_dims[idx]]
pt.wordcloud(pruned_importance, ax=fig.axes[0])
fname = 'wc_{}_pruned.png'.format(task_key)
fig_fpath = join(str(self.dpath), fname)
vt.imwrite(fig_fpath, pt.render_figure_to_image(fig, dpi=DPI))
vals = pruned_importance.values()
items = pruned_importance.items()
top_dims = ut.sortedby(items, vals)[::-1]
lines = []
num_top = 10
for k, v in top_dims[:num_top]:
k = feat_alias(k)
k = k.replace('_', '\\_')
lines.append('\\tt{{{}}} & ${:.4f}$ \\\\'.format(k, v))
latex_str = '\n'.join(ut.align_lines(lines, '&'))
increase = u[idx] - u[0]
logger.info(latex_str)
logger.info()
extra_ = ut.codeblock(
r"""
\begin{{table}}[h]
\centering
\caption{{Pruned top {}/{} dimensions for {} increases MCC by {:.4f}}}
\begin{{tabular}}{{lr}}
\toprule
Dimension & Importance \\
\midrule
{}
\bottomrule
\end{{tabular}}
\end{{table}}
"""
).format(
num_top,
len(pruned_importance),
task_key.replace('_', '-'),
increase,
latex_str,
)
# topinfo = vt.AnnotPairFeatInfo(list(pruned_importance.keys()))
fname = 'pruned_feat_importance_{}'.format(task_key)
fpath = ut.render_latex(extra_, dpath=self.dpath, fname=fname)
ut.write_to(join(str(self.dpath), fname + '.tex'), latex_str)
logger.info(ut.repr4(ut.sort_dict(n_to_mid[n_dims[idx]], 'vals', reverse=True)))
logger.info(ut.repr4(ut.sort_dict(n_to_mid[n_dims[-1]], 'vals', reverse=True)))
[docs] def measure_thresh(self, pblm):
task_key = 'match_state'
res = pblm.task_combo_res[task_key][self.clf_key][self.data_key]
infr = pblm.infr
truth_colors = infr._get_truth_colors()
cfms = res.confusions(POSTV)
fig = pt.figure(fnum=1, doclf=True) # NOQA
ax = pt.gca()
ax.plot(cfms.thresholds, cfms.n_fp, label='positive', color=truth_colors[POSTV])
cfms = res.confusions(NEGTV)
ax.plot(cfms.thresholds, cfms.n_fp, label='negative', color=truth_colors[NEGTV])
# cfms = res.confusions(INCMP)
# if len(cfms.thresholds) == 1:
# cfms.thresholds = [0, 1]
# cfms.n_fp = np.array(cfms.n_fp.tolist() * 2)
# ax.plot(cfms.thresholds, cfms.n_fp, label='incomparable',
# color=pt.color_funcs.darken_rgb(truth_colors[INCMP], .15))
ax.set_xlabel('thresholds')
ax.set_ylabel('n_fp')
ax.set_ylim(0, 20)
ax.legend()
cfms.plot_vs('fpr', 'thresholds')
def _draw_score_hist(self, freqs, xlabel, fnum):
"""helper"""
bins, freq0, freq1 = ut.take(freqs, ['bins', 'neg_freq', 'pos_freq'])
width = np.diff(bins)[0]
xlim = (bins[0] - (width / 2), bins[-1] + (width / 2))
fig = pt.multi_plot(
bins,
(freq0, freq1),
label_list=('negative', 'positive'),
color_list=(pt.FALSE_RED, pt.TRUE_BLUE),
kind='bar',
width=width,
alpha=0.7,
edgecolor='none',
xlabel=xlabel,
ylabel='frequency',
fnum=fnum,
pnum=(1, 1, 1),
rcParams=TMP_RC,
stacked=True,
ytickformat='%.2f',
xlim=xlim,
# title='LNBNN positive separation'
)
pt.adjust_subplots(top=0.8, bottom=0.2, left=0.12, right=0.9)
fig.set_size_inches([W, H])
return fig
[docs] def draw_rerank(self):
mpl.rcParams.update(TMP_RC)
expt_name = 'rerank'
results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
lnbnn_cdf = cdfs[0]
clf_cdf = cdfs[1]
fig = pt.figure(fnum=1)
plot_cmcs([lnbnn_cdf, clf_cdf], ['ranking', 'rank+clf'], fnum=1)
fig.set_size_inches([W, H * 0.6])
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(str(self.dpath), expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
if ut.get_argflag('--diskshow'):
ut.startfile(fpath)
return fpath
[docs] def draw_class_score_hist(self):
"""Plots distribution of positive and negative scores"""
task_key = 'match_state'
results = self.ensure_results('all')
task_combo_res = results['task_combo_res']
data_key = results['data_key']
clf_key = results['clf_key']
res = task_combo_res[task_key][clf_key][data_key]
y = res.target_bin_df[POSTV]
scores = res.probs_df[POSTV]
bins = np.linspace(0, 1, 100)
pos_freq = np.histogram(scores[y], bins)[0]
neg_freq = np.histogram(scores[~y], bins)[0]
pos_freq = pos_freq / pos_freq.sum()
neg_freq = neg_freq / neg_freq.sum()
score_hist_pos = {'bins': bins, 'pos_freq': pos_freq, 'neg_freq': neg_freq}
lnbnn_xy = results['lnbnn_xy']
scores = lnbnn_xy['score_lnbnn_1vM'].values
y = lnbnn_xy[POSTV].values
# Get 95% of the data at least
maxbin = scores[scores.argsort()][-max(1, int(len(scores) * 0.05))]
bins = np.linspace(0, max(maxbin, 10), 100)
pos_freq = np.histogram(scores[y], bins)[0]
neg_freq = np.histogram(scores[~y], bins)[0]
pos_freq = pos_freq / pos_freq.sum()
neg_freq = neg_freq / neg_freq.sum()
score_hist_lnbnn = {'bins': bins, 'pos_freq': pos_freq, 'neg_freq': neg_freq}
fig1 = self._draw_score_hist(score_hist_pos, 'positive probability', 1)
fig2 = self._draw_score_hist(score_hist_lnbnn, 'LNBNN score', 2)
fname = 'score_hist_pos_{}.png'.format(data_key)
vt.imwrite(join(str(self.dpath), fname), pt.render_figure_to_image(fig1, dpi=DPI))
fname = 'score_hist_lnbnn.png'
vt.imwrite(join(str(self.dpath), fname), pt.render_figure_to_image(fig2, dpi=DPI))
[docs] def draw_mcc_thresh(self, task_key):
"""
python -m wbia Chap4.draw mcc_thresh GZ_Master1 match_state
python -m wbia Chap4.draw mcc_thresh PZ_Master1 match_state
python -m wbia Chap4.draw mcc_thresh GZ_Master1 photobomb_state
python -m wbia Chap4.draw mcc_thresh PZ_Master1 photobomb_state
"""
mpl.rcParams.update(TMP_RC)
results = self.ensure_results('all')
data_key = results['data_key']
clf_key = results['clf_key']
task_combo_res = results['task_combo_res']
code_to_nice = self.task_nice_lookup[task_key]
if task_key == 'photobomb_state':
classes = ['pb']
elif task_key == 'match_state':
classes = [POSTV, NEGTV, INCMP]
res = task_combo_res[task_key][clf_key][data_key]
roc_curves = []
for class_name in classes:
c1 = res.confusions(class_name)
if len(c1.thresholds) <= 2:
continue
class_nice = code_to_nice[class_name]
idx = c1.mcc.argmax()
t = c1.thresholds[idx]
mcc = c1.mcc[idx]
roc_curves += [
{
'label': class_nice + ', t={:.2f}, mcc={:.2f}'.format(t, mcc),
'thresh': c1.thresholds,
'mcc': c1.mcc,
},
]
fig = pt.figure(fnum=1) # NOQA
ax = pt.gca()
for data in roc_curves:
ax.plot(data['thresh'], data['mcc'], label='%s' % (data['label']))
ax.set_xlabel('threshold')
ax.set_ylabel('MCC')
# ax.set_title('%s ROC for %s' % (target_class.title(), self.species))
ax.legend()
pt.adjust_subplots(top=0.8, bottom=0.2, left=0.12, right=0.9)
fig.set_size_inches([W, H])
fname = 'mcc_thresh_{}.png'.format(task_key)
fig_fpath = join(str(self.dpath), fname)
vt.imwrite(fig_fpath, pt.render_figure_to_image(fig, dpi=DPI))
if ut.get_argflag('--diskshow'):
ut.startfile(fig_fpath)
[docs] def draw_roc(self, task_key):
"""
python -m wbia Chap4.draw roc GZ_Master1 photobomb_state
python -m wbia Chap4.draw roc GZ_Master1 match_state
"""
mpl.rcParams.update(TMP_RC)
results = self.ensure_results('all')
data_key = results['data_key']
clf_key = results['clf_key']
task_combo_res = results['task_combo_res']
lnbnn_xy = results['lnbnn_xy']
if task_key == 'match_state':
scores = lnbnn_xy['score_lnbnn_1vM'].values
y = lnbnn_xy[POSTV].values
# task_key = 'match_state'
target_class = POSTV
res = task_combo_res[task_key][clf_key][data_key]
c2 = vt.ConfusionMetrics().fit(scores, y)
c3 = res.confusions(target_class)
roc_curves = [
{'label': 'LNBNN', 'fpr': c2.fpr, 'tpr': c2.tpr, 'auc': c2.auc},
{'label': 'learned', 'fpr': c3.fpr, 'tpr': c3.tpr, 'auc': c3.auc},
]
at_metric = 'tpr'
for at_value in [0.25, 0.5, 0.75]:
info = ut.odict()
for want_metric in ['fpr', 'n_false_pos', 'n_true_pos']:
key = '{}_@_{}={:.2f}'.format(want_metric, at_metric, at_value)
info[key] = c3.get_metric_at_metric(want_metric, at_metric, at_value)
logger.info(ut.repr4(info, align=True, precision=8))
else:
target_class = 'pb'
res = task_combo_res[task_key][clf_key][data_key]
c1 = res.confusions(target_class)
roc_curves = [
{'label': 'learned', 'fpr': c1.fpr, 'tpr': c1.tpr, 'auc': c1.auc},
]
fig = pt.figure(fnum=1) # NOQA
ax = pt.gca()
for data in roc_curves:
ax.plot(
data['fpr'],
data['tpr'],
label='%s AUC=%.2f' % (data['label'], data['auc']),
)
ax.set_xlabel('false positive rate')
ax.set_ylabel('true positive rate')
# ax.set_title('%s ROC for %s' % (target_class.title(), self.species))
ax.legend()
pt.adjust_subplots(top=0.8, bottom=0.2, left=0.12, right=0.9)
fig.set_size_inches([W, H])
fname = 'roc_{}.png'.format(task_key)
fig_fpath = join(str(self.dpath), fname)
vt.imwrite(fig_fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs] def draw_wordcloud(self, task_key):
import wbia.plottool as pt
results = self.ensure_results('all')
importances = ut.map_keys(feat_alias, results['importance'][task_key])
fig = pt.figure(fnum=1)
pt.wordcloud(importances, ax=fig.axes[0])
fname = 'wc_{}.png'.format(task_key)
fig_fpath = join(str(self.dpath), fname)
vt.imwrite(fig_fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs] @classmethod
def draw_tagged_pair(cls):
import wbia
# ibs = wbia.opendb(defaultdb='GZ_Master1')
ibs = wbia.opendb(defaultdb='PZ_Master1')
query_tag = 'leftrightface'
rowids = ibs._get_all_annotmatch_rowids()
texts = ['' if t is None else t for t in ibs.get_annotmatch_tag_text(rowids)]
tags = [[] if t is None else t.split(';') for t in texts]
logger.info(ut.repr4(ut.dict_hist(ut.flatten(tags))))
flags = [query_tag in t.lower() for t in texts]
filtered_rowids = ut.compress(rowids, flags)
edges = ibs.get_annotmatch_aids(filtered_rowids)
# The facematch leftright side example
# edge = (5161, 5245)
edge = edges[0]
# for edge in ut.InteractiveIter(edges):
infr = wbia.AnnotInference(ibs=ibs, aids=edge, verbose=10)
infr.reset_feedback('annotmatch', apply=True)
match = infr._exec_pairwise_match([edge])[0]
if False:
# Fix the example tags
infr.add_feedback(
edge,
'match',
tags=['facematch', 'leftrightface'],
user_id='qt-hack',
confidence='pretty_sure',
)
infr.write_wbia_staging_feedback()
infr.write_wbia_annotmatch_feedback()
pass
# THE DEPCACHE IS BROKEN FOR ANNOTMATCH APPARENTLY! >:(
# Redo matches
feat_keys = ['vecs', 'kpts', '_feats', 'flann']
match.annot1._mutable = True
match.annot2._mutable = True
for key in feat_keys:
if key in match.annot1:
del match.annot1[key]
if key in match.annot2:
del match.annot2[key]
match.apply_all({})
fig = pt.figure(fnum=1, clf=True)
ax = pt.gca()
mpl.rcParams.update(TMP_RC)
match.show(
ax,
vert=False,
heatmask=True,
show_lines=False,
show_ell=False,
show_ori=False,
show_eig=False,
# ell_alpha=.3,
modifysize=True,
)
# ax.set_xlabel(xlabel)
self = cls()
fname = 'custom_match_{}_{}_{}'.format(query_tag, *edge)
dpath = pathlib.Path(ut.truepath(self.base_dpath))
fpath = join(str(dpath), fname + '.jpg')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs] def custom_single_hard_case(self):
"""
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import *
>>> defaultdb = 'PZ_PB_RF_TRAIN'
>>> #defaultdb = 'GZ_Master1'
>>> defaultdb = 'PZ_MTEST'
>>> self = Chap4.collect(defaultdb)
>>> self.dbname = 'PZ_PB_RF_TRAIN'
"""
task_key = 'match_state'
edge = (383, 503)
for _case in self.hard_cases[task_key]:
if _case['edge'] == edge:
case = _case
break
import wbia
ibs = wbia.opendb(self.dbname)
from wbia import core_annots
config = {
'augment_orientation': True,
'ratio_thresh': 0.8,
}
config['checks'] = 80
config['sver_xy_thresh'] = 0.02
config['sver_ori_thresh'] = 3
config['Knorm'] = 3
config['symmetric'] = True
config = ut.hashdict(config)
aid1, aid2 = case['edge']
real_name = case['real']
pred_name = case['pred']
match = case['match']
code_to_nice = self.task_nice_lookup[task_key]
real_nice, pred_nice = ut.take(code_to_nice, [real_name, pred_name])
fname = 'fail_{}_{}_{}_{}'.format(real_nice, pred_nice, aid1, aid2)
# Draw case
probs = case['probs'].to_dict()
order = list(code_to_nice.values())
order = ut.setintersect(order, probs.keys())
probs = ut.map_dict_keys(code_to_nice, probs)
probstr = ut.repr2(probs, precision=2, strkeys=True, nobr=True, key_order=order)
xlabel = 'real={}, pred={},\n{}'.format(real_nice, pred_nice, probstr)
match_list = ibs.depc.get(
'pairwise_match', ([aid1], [aid2]), 'match', config=config
)
match = match_list[0]
configured_lazy_annots = core_annots.make_configured_annots(
ibs, [aid1], [aid2], config, config, preload=True
)
match.annot1 = configured_lazy_annots[config][aid1]
match.annot2 = configured_lazy_annots[config][aid2]
match.config = config
fig = pt.figure(fnum=1, clf=True)
ax = pt.gca()
mpl.rcParams.update(TMP_RC)
match.show(
ax,
vert=False,
heatmask=True,
show_lines=False,
show_ell=False,
show_ori=False,
show_eig=False,
# ell_alpha=.3,
modifysize=True,
)
ax.set_xlabel(xlabel)
subdir = 'cases_{}'.format(task_key)
dpath = join(str(self.dpath), subdir)
fpath = join(str(dpath), fname + '_custom.jpg')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
[docs]@ut.reloadable_class
class Chap3Measures(object):
[docs] def measure_baseline(self):
"""
>>> from wbia.scripts.thesis import *
>>> self = Chap3('GZ_Master1')
>>> self._precollect()
"""
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1], extra_dbsize_fracs=[1]
)
cfgdict = {}
daids = daids_list[0]
info = info_list[0]
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict)
results = [(cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict}))]
expt_name = 'baseline'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_foregroundness_intra(self):
ibs = self.ibs
samples = Sampler._intra_enc(ibs, self.aids_pool)
# qaids, daids_list, info_list = sample.expand()
results = []
for sample in samples:
qaids = sample.qaids
daids = sample.daids
info = {'qsize': len(qaids), 'dsize': len(daids)}
grid = ut.all_dict_combinations({'featweight_enabled': [False, True]})
for cfgdict in grid:
hist = _ranking_hist(ibs, qaids, daids, cfgdict)
info = ut.update_dict(info.copy(), {'pcfg': cfgdict})
results.append((hist, info))
expt_name = 'foregroundness_intra'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def draw_foregroundness_intra(self):
"""
python -m wbia Chap3.measure foregroundness_intra --dbs=GZ_Master1,PZ_Master1
python -m wbia Chap3.draw foregroundness_intra --dbs=GZ_Master1,PZ_Master1 --diskshow
"""
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
expt_name = 'foregroundness_intra'
mpl.rcParams.update(TMP_RC)
results = self.ensure_results(expt_name)
hists, infos = list(zip(*results))
pcfgs = ut.take_column(infos, 'pcfg')
df = pd.DataFrame.from_records(infos)
df['hists'] = hists
df['fg_on'] = ut.take_column(pcfgs, 'featweight_enabled')
cdfs = []
labels = []
for fg, group in df.groupby(('fg_on')):
labels.append('fg=T' if fg else 'fg=F')
hists = vt.pad_vstack(group['hists'], fill_value=0)
hist = hists.sum(axis=0)
cdf = np.cumsum(hist) / sum(hist)
cdfs.append(cdf)
qsize = str(group['qsize'].sum())
u, s = group['dsize'].mean(), group['dsize'].std()
dsize = ave_str(u, s, precision=1)
fig = plot_cmcs(cdfs, labels, ymin=0.5)
fig.set_size_inches([W, H * 0.6])
nonvaried_text = 'qsize={:s}, dsize={:s}'.format(qsize, dsize)
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def measure_foregroundness(self):
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs,
self.aids_pool,
denc_per_name=[1],
extra_dbsize_fracs=[1],
method='same_occur'
# method='same_enc'
)
daids = daids_list[0]
info = info_list[0]
results = []
cfgdict1 = {'fg_on': False}
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict1)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict1})))
cfgdict2 = {'fg_on': True}
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict2)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict2})))
expt_name = 'foregroundness'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_invar(self):
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1], extra_dbsize_fracs=[1]
)
daids = daids_list[0]
info = info_list[0]
cfgdict_list = [
{
'affine_invariance': True,
'rotation_invariance': False,
'query_rotation_heuristic': False,
},
# {'affine_invariance': True, 'rotation_invariance': True, 'query_rotation_heuristic': False},
# {'affine_invariance': False, 'rotation_invariance': True, 'query_rotation_heuristic': False},
{
'affine_invariance': False,
'rotation_invariance': False,
'query_rotation_heuristic': False,
},
{
'affine_invariance': True,
'rotation_invariance': False,
'query_rotation_heuristic': True,
},
{
'affine_invariance': False,
'rotation_invariance': False,
'query_rotation_heuristic': True,
},
]
results = []
for cfgdict in cfgdict_list:
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})))
expt_name = 'invar'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_smk(self):
"""
python -m wbia Chap3.measure smk --dbs=GZ_Master1,PZ_Master1
python -m wbia Chap3.draw smk --dbs=GZ_Master1,PZ_Master1 --diskshow
"""
from wbia.algo.smk.smk_pipeline import SMKRequest
# ibs = wbia.opendb('PZ_MTEST')
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1], extra_dbsize_fracs=[1]
)
daids = daids_list[0]
info = info_list[0]
results = []
# SMK pipeline
config = {'nAssign': 1, 'num_words': 8000, 'sv_on': True}
qreq_ = SMKRequest(ibs, qaids, daids, config)
qreq_.ensure_data()
cm_list = qreq_.execute()
cm_list = [cm.extend_results(qreq_) for cm in cm_list]
name_ranks = [cm.get_name_ranks([cm.qnid])[0] for cm in cm_list]
bins = np.arange(len(qreq_.dnids))
hist = np.histogram(name_ranks, bins=bins)[0]
cdf = np.cumsum(hist) / sum(hist)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': config})))
# LNBNN pipeline
cfgdict = {}
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})))
expt_name = 'smk'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_nsum(self):
"""
python -m wbia Chap3.measure nsum --dbs=GZ_Master1,PZ_Master1
python -m wbia Chap3.draw nsum --dbs=GZ_Master1,PZ_Master1 --diskshow
from wbia.scripts.thesis import *
self = Chap3('GZ_Master1')
self = Chap3('PZ_Master1')
self = Chap3('PZ_MTEST')
self._precollect()
"""
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1, 2, 3], extra_dbsize_fracs=[1]
)
base = {'query_rotation_heuristic': True}
cfgdict1 = ut.dict_union(
base, {'score_method': 'nsum', 'prescore_method': 'nsum'}
)
cfgdict2 = ut.dict_union(
base, {'score_method': 'csum', 'prescore_method': 'csum'}
)
results = []
for count, (daids, info) in enumerate(zip(daids_list, info_list), start=1):
cdf1 = _ranking_cdf(ibs, qaids, daids, cfgdict1)
results.append((cdf1, ut.update_dict(info.copy(), {'pcfg': cfgdict1})))
cdf2 = _ranking_cdf(ibs, qaids, daids, cfgdict2)
results.append((cdf2, ut.update_dict(info.copy(), {'pcfg': cfgdict2})))
if False:
self._precollect()
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1, 2, 3], extra_dbsize_fracs=[1]
)
# Check dpername issue
base = {'query_rotation_heuristic': False, 'K': 1, 'sv_on': True}
cfgdict1 = ut.dict_union(
base, {'score_method': 'nsum', 'prescore_method': 'nsum'}
)
cfgdict2 = ut.dict_union(
base, {'score_method': 'csum', 'prescore_method': 'csum'}
)
qaids = [2491]
info = {}
daids = daids_list[0]
a = ibs.annots(daids)
daids = a.compress(ut.flag_unique_items(a.nids)).aids
while True:
qreq1_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict1)
qreq2_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict2)
cm_list1 = qreq1_.execute(use_cache=False)
cm_list2 = qreq2_.execute(use_cache=False)
cm1 = cm_list1[0]
cm2 = cm_list2[0]
assert cm1 == cm2
# cm_list1 = [cm.extend_results(qreq1_) for cm in cm_list1]
# cm_list2 = [cm.extend_results(qreq2_) for cm in cm_list2]
# cm_list1 = [cm.compress_results() for cm in cm_list1]
# cm_list2 = [cm.compress_results() for cm in cm_list2]
name_ranks1 = [cm.get_name_ranks([cm.qnid])[0] for cm in cm_list1]
name_ranks2 = [cm.get_name_ranks([cm.qnid])[0] for cm in cm_list2]
idxs = np.where(np.array(name_ranks1) != np.array(name_ranks2))[0]
logger.info('idxs = %r' % (idxs,))
logger.info('ranks1 = {}'.format(ut.take(name_ranks1, idxs)))
logger.info('ranks2 = {}'.format(ut.take(name_ranks2, idxs)))
if len(idxs) > 0:
cm1 = cm_list1[idxs[0]] # NOQA
cm2 = cm_list2[idxs[0]] # NOQA
expt_name = 'nsum'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_dbsize(self):
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1, 2], extra_dbsize_fracs=[0, 1.0]
)
cfgdict = {
'query_rotation_heuristic': True,
}
results = []
for daids, info in zip(daids_list, info_list):
info = info.copy()
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})))
expt_name = 'dsize'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
cdfs, infos = zip(*results)
[docs] def measure_kexpt(self):
ibs = self.ibs
qaids, daids_list, info_list = Sampler._varied_inputs(
self.ibs, self.aids_pool, denc_per_name=[1, 2], extra_dbsize_fracs=[0, 1.0]
)
cfg_grid = {
'query_rotation_heuristic': True,
'K': [1, 2, 4, 6],
}
results = []
for cfgdict in ut.all_dict_combinations(cfg_grid):
for daids, info in zip(daids_list, info_list):
cdf = _ranking_cdf(ibs, qaids, daids, cfgdict)
results.append((cdf, ut.update_dict(info.copy(), {'pcfg': cfgdict})))
expt_name = 'kexpt'
self.expt_results[expt_name] = results
ut.save_data(join(self.dpath, expt_name + '.pkl'), results)
[docs] def measure_dbstats(self):
if self.ibs is None:
self._precollect()
# self.ibs.print_annot_stats(self.aids_pool)
annots = self.ibs.annots(self.aids_pool)
encounters = annots.group(annots.encounter_text)[1]
nids = ut.take_column(self.ibs._annot_groups(encounters).nids, 0)
nid_to_enc = ut.group_items(encounters, nids)
single_encs = {nid: e for nid, e in nid_to_enc.items() if len(e) == 1}
multi_encs = {nid: e for nid, e in nid_to_enc.items() if len(e) > 1}
multi_aids = ut.flatten(ut.flatten(multi_encs.values()))
enc_deltas = []
for encs_ in nid_to_enc.values():
a = encs_[0]
times = a.image_unixtimes_asfloat
delta = times.max() - times.min()
enc_deltas.append(delta)
max_enc_timedelta = max(enc_deltas)
logger.info(
'max enc timedelta = %r' % (ut.get_unix_timedelta_str(max_enc_timedelta))
)
multi_stats = self.ibs.get_annot_stats_dict(multi_aids)
multi_stats['enc_per_name']
enc_info = ut.odict()
enc_info['species_nice'] = self.species_nice
enc_info['n_singleton_names'] = len(single_encs)
enc_info['n_resighted_names'] = len(multi_encs)
enc_info['n_encounter_per_resighted_name'] = ave_str(
*ut.take(multi_stats['enc_per_name'], ['mean', 'std']), precision=1
)
n_annots_per_enc = ut.lmap(len, encounters)
enc_info['n_annots_per_encounter'] = ave_str(
np.mean(n_annots_per_enc), np.std(n_annots_per_enc), precision=1
)
enc_info['n_annots'] = sum(n_annots_per_enc)
# qual_info = ut.odict()
qual_info = ut.dict_hist(annots.quality_texts)
qual_info['None'] = qual_info.pop('UNKNOWN', 0)
qual_info['None'] += qual_info.pop(None, 0)
qual_info['species_nice'] = self.species_nice
view_info = ut.dict_hist(annots.viewpoint_code)
view_info['None'] = view_info.pop('UNKNOWN', 0)
view_info['None'] += view_info.pop(None, 0)
view_info['species_nice'] = self.species_nice
info = {
'enc': enc_info,
'qual': qual_info,
'view': view_info,
}
expt_name = ut.get_stack_frame().f_code.co_name.replace('measure_', '')
expt_name = 'dbstats'
self.expt_results[expt_name] = info
ut.save_data(join(self.dpath, expt_name + '.pkl'), info)
return info
[docs]@ut.reloadable_class
class Chap3Draw(object):
[docs] def draw_baseline(self):
mpl.rcParams.update(TMP_RC)
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
baseline_cdf = cdfs[0]
fig = plot_cmcs([baseline_cdf], ['baseline'], fnum=1)
fig.set_size_inches([W, H * 0.6])
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_smk(self):
"""
wbia Chap3.measure smk --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw smk --dbs=GZ_Master1,PZ_Master1
"""
mpl.rcParams.update(TMP_RC)
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
labels = ['smk', 'baseline']
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
fig.set_size_inches([W, H * 0.6])
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_foregroundness(self):
"""
wbia Chap3.measure foregroundness --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw foregroundness --dbs=GZ_Master1,PZ_Master1
"""
mpl.rcParams.update(TMP_RC)
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
labels = ['fg=F', 'fg=T']
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
fig.set_size_inches([W, H * 0.6])
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_invar(self):
"""
wbia Chap3.measure invar --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw invar --dbs=GZ_Master1,PZ_Master1
"""
mpl.rcParams.update(TMP_RC)
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
results = self.ensure_results(expt_name)
ALIAS_KEYS = ut.invert_dict(
{
'RI': 'rotation_invariance',
'AI': 'affine_invariance',
'QRH': 'query_rotation_heuristic',
}
)
results = [
(c, i) for c, i in results if not i['pcfg'].get('rotation_invariance', False)
]
cdfs, infos = list(zip(*results))
pcfgs = ut.take_column(infos, 'pcfg')
for p in pcfgs:
del p['rotation_invariance']
labels = [ut.get_cfg_lbl(ut.map_keys(ALIAS_KEYS, pcfg))[1:] for pcfg in pcfgs]
labels = ut.lmap(label_alias, labels)
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
fig.set_size_inches([W, H * 0.6])
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_nsum(self):
"""
wbia Chap3.measure nsum --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw nsum --dbs=GZ_Master1,PZ_Master1
"""
mpl.rcParams.update(TMP_RC)
# expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
expt_name = 'nsum'
results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
# pcfgs = ut.take_column(infos, 'pcfg')
alias = {
'nsum': 'fmech',
'csum': 'amech',
}
labels = [
alias[x['pcfg']['score_method']] + ',dpername={}'.format(x['t_dpername'])
for x in infos
]
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fig.set_size_inches([W, H * 0.6])
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_nsum_simple(self):
"""
wbia Chap3.measure nsum --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw nsum --dbs=GZ_Master1,PZ_Master1
Ignore:
>>> from wbia.scripts.thesis import * # NOQA
>>> self = Chap3('PZ_Master1')
"""
raise Exception('hacked')
mpl.rcParams.update(TMP_RC)
# expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
fpath = '/home/joncrall/latex/crall-thesis-2017/figures3/PZ_Master1/nsum.pkl'
results = ut.load_data(fpath)
# results = self.ensure_results(expt_name)
cdfs, infos = list(zip(*results))
# pcfgs = ut.take_column(infos, 'pcfg')
alias = {
'nsum': 'fmech',
'csum': 'amech',
}
labels = [
alias[x['pcfg']['score_method']] + ',dpername={}'.format(x['t_dpername'])
for x in infos
]
# hack
cdfs = cdfs[::2]
labels = labels[::2]
infos = infos[::2]
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
qsizes = ut.take_column(infos, 'qsize')
dsizes = ut.take_column(infos, 'dsize')
assert ut.allsame(qsizes) and ut.allsame(dsizes)
nonvaried_text = 'qsize={}, dsize={}'.format(qsizes[0], dsizes[0])
pt.relative_text('lowerleft', nonvaried_text, ax=pt.gca())
fig.set_size_inches([W, H * 0.6])
ut.ensuredir(self.dpath)
fpath = join(self.dpath, 'nsum_simple.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_kexpt(self):
"""
wbia Chap3.measure kexpt --dbs=GZ_Master1,PZ_Master1
wbia Chap3.draw kexpt --dbs=GZ_Master1,PZ_Master1 --diskshow
"""
mpl.rcParams.update(TMP_RC)
expt_name = ut.get_stack_frame().f_code.co_name.replace('draw_', '')
results = self.ensure_results(expt_name)
# results = self.expt_results[expt_name]
cdfs, infos = list(zip(*results))
pcfgs = ut.take_column(infos, 'pcfg')
df = pd.DataFrame.from_records(infos)
df['cdfs'] = cdfs
df['K'] = ut.take_column(pcfgs, 'K')
import wbia.plottool as pt
# groups = list(df.groupby(('dsize', 't_denc_pername')))
df = df[df['K'] != 10]
fig = pt.figure(fnum=1)
groups = list(df.groupby(('dsize')))
pnum_ = pt.make_pnum_nextgen(nCols=1, nSubplots=len(groups))
for val, df_group in groups:
# logger.info('---')
# logger.info(df_group)
relevant_df = df_group[['K', 'qsize', 'dsize', 't_dpername']]
relevant_df = relevant_df.rename(columns={'t_dpername': 'dpername'})
relevant_cfgs = [
ut.order_dict_by(d, relevant_df.columns.tolist())
for d in relevant_df.to_dict('records')
]
nonvaried_kw, varied_kws = ut.partition_varied_cfg_list(relevant_cfgs)
labels_ = [ut.get_cfg_lbl(kw)[1:] for kw in varied_kws]
cdfs_ = df_group['cdfs'].values
plot_cmcs(cdfs_, labels_, fnum=1, pnum=pnum_(), ymin=0.5)
ax = pt.gca()
nonvaried_text = ut.get_cfg_lbl(nonvaried_kw)[1:]
# ax.set_title(nonvaried_text)
pt.relative_text('lowerleft', nonvaried_text, ax=ax)
pt.adjust_subplots(
top=0.9, bottom=0.1, left=0.12, right=0.9, hspace=0.3, wspace=0.2
)
fig.set_size_inches([W, H * 1.9])
fpath = join(self.dpath, expt_name + '.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs] def draw_all(self):
"""
CommandLine:
python -m wbia Chap3.draw_all --dbs=GZ_Master1,PZ_Master1,GIRM_Master1
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import * # NOQA
>>> dbname = ut.get_argval('--db', default='PZ_MTEST')
>>> dbnames = ut.get_argval('--dbs', type_=list, default=[dbname])
>>> for dbname in dbnames:
>>> print('dbname = %r' % (dbname,))
>>> self = Chap3(dbname)
>>> self.draw_all()
"""
self.ensure_results()
# if 'baseline' in self.expt_results:
# self.draw_baseline()
if 'PZ' in self.dbname or 'GZ' in self.dbname:
expts = ['foregroundness', 'invar', 'smk', 'nsum', 'kexpt']
for expt_name in expts:
if expt_name in self.expt_results:
try:
getattr(self, 'draw_' + expt_name)()
except KeyError:
getattr(self, 'measure_' + expt_name)()
getattr(self, 'draw_' + expt_name)()
# if 'invar' in self.expt_results:
# self.draw_invar()
# if 'smk' in self.expt_results:
# self.draw_smk()
# if 'nsum' in self.expt_results:
# self.draw_nsum()
# if 'kexpt' in self.expt_results:
# self.draw_kexpt()
[docs] def draw_time_distri(self):
"""
CommandLine:
python -m wbia Chap3.draw_time_distri --dbs=GZ_Master1,PZ_Master1,GIRM_MasterV
python -m wbia Chap3.draw_time_distri --dbs=GIRM_Master1
python -m wbia Chap3.draw_time_distri --dbs=GZ_Master1
python -m wbia Chap3.draw_time_distri --dbs=PZ_Master1
python -m wbia Chap3.draw_time_distri --dbs=humpbacks_fb
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import * # NOQA
>>> dbname = ut.get_argval('--db', default='PZ_MTEST')
>>> dbnames = ut.get_argval('--dbs', type_=list, default=[dbname])
>>> for dbname in dbnames:
>>> print('dbname = %r' % (dbname,))
>>> self = Chap3(dbname)
>>> self.draw_time_distri()
"""
import matplotlib as mpl
mpl.rcParams.update(TMP_RC)
if self.ibs is None:
self._precollect()
ibs = self.ibs
annots = ibs.annots(self.aids_pool)
images = ibs.images(set(annots.gids))
unixtimes_ = images.unixtime_asfloat
num_nan = np.isnan(unixtimes_).sum()
num_total = len(unixtimes_)
unixtimes = unixtimes_[~np.isnan(unixtimes_)]
mintime = vt.safe_min(unixtimes)
maxtime = vt.safe_max(unixtimes)
unixtime_domain = np.linspace(mintime, maxtime, 1000)
import matplotlib as mpl
mpl.rcParams.update(TMP_RC)
from sklearn.neighbors.kde import KernelDensity
bw = ut.get_argval('--bw', default=None)
day = 60 * 60 * 24
if bw is not None:
pass
elif 'GIRM' in self.dbname:
bw = day / 4
elif 'GZ' in self.dbname:
bw = day * 30
elif 'PZ' in self.dbname:
bw = day * 30
elif 'humpbacks_fb' in self.dbname:
bw = day * 30
else:
from sklearn.model_selection import RandomizedSearchCV
space = np.linspace(day, day * 14, 14).tolist()
grid_params = {'bandwidth': space}
searcher = ut.partial(RandomizedSearchCV, n_iter=5, n_jobs=8)
logger.info('Searching for best bandwidth')
grid = searcher(
KernelDensity(kernel='gaussian'), grid_params, cv=2, verbose=0
)
grid.fit(unixtimes[:, None])
bw = grid.best_params_['bandwidth']
logger.info('bw = %r' % (bw,))
logger.info('bw(days) = %r' % (bw / day,))
kde = KernelDensity(kernel='gaussian', bandwidth=bw)
kde.fit(unixtimes[:, None])
log_density = kde.score_samples(unixtime_domain[:, None])
density = np.exp(log_density)
ydata = density.copy()
# emphasize smaller values
ydata /= ydata.max()
ydata = np.sqrt(ydata)
xdata = unixtime_domain
xdata_ts = ut.lmap(ut.unixtime_to_datetimeobj, xdata)
pt.multi_plot(
xdata_ts,
[ydata],
label_list=['time'],
alpha=0.7,
fnum=1,
pnum=(1, 1, 1),
ymin=0,
fill=True,
marker='',
xlabel='Date',
ylabel='# images',
num_xticks=5,
use_legend=False,
)
infos = []
if num_nan > 0:
infos.append('#nan={}'.format(num_nan))
infos.append('#total={}'.format(num_total))
else:
infos.append('#total={}'.format(num_total))
text = '\n'.join(infos)
pt.relative_text((0.02, 0.02), text, halign='left', valign='top')
ax = pt.gca()
fig = pt.gcf()
ax.set_yticks([])
if False:
icon = ibs.get_database_icon()
pt.overlay_icon(
icon,
coords=(0, 1),
bbox_alignment=(0, 1),
as_artist=1,
max_asize=(100, 200),
)
pt.adjust_subplots(top=0.9, bottom=0.1, left=0.12, right=0.9)
fig.set_size_inches([W, H * 0.4])
fpath = join(self.dpath, 'timedist.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
return fpath
[docs]@ut.reloadable_class
class Chap3(DBInputs, Chap3Draw, Chap3Measures):
base_dpath = ut.truepath('~/latex/crall-thesis-2017/figures3')
def _setup(self):
self._precollect()
[docs] @classmethod
def run_all(cls):
"""
CommandLine:
python -m wbia Chap3.run_all
"""
agg_dbnames = ['PZ_Master1', 'GZ_Master1', 'GIRM_Master1', 'humpbacks_fb']
agg_dbnames = agg_dbnames[::-1]
for dbname in agg_dbnames:
self = cls(dbname)
self.measure_all()
self.draw_time_distri()
cls.agg_dbstats()
cls.draw_agg_baseline()
[docs] def measure_all(self):
"""
Example:
from wbia.scripts.thesis import *
self = Chap3('PZ_Master1')
self.measure_all()
self = Chap3('GZ_Master1')
self.measure_all()
self = Chap3('GIRM_Master1')
self.measure_all()
"""
if self.ibs is None:
self._precollect()
self.measure_baseline()
if self.dbname in {'PZ_Master1', 'GZ_Master1'}:
self.measure_foregroundness()
self.measure_smk()
self.measure_nsum()
# self.measure_dbsize()
self.measure_kexpt()
self.measure_invar()
[docs] @classmethod
def agg_dbstats(cls):
"""
CommandLine:
python -m wbia Chap3.agg_dbstats
python -m wbia Chap3.measure_dbstats
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import * # NOQA
>>> result = Chap3.agg_dbstats()
>>> print(result)
"""
agg_dbnames = ['PZ_Master1', 'GZ_Master1', 'GIRM_Master1', 'humpbacks_fb']
infos = ut.ddict(list)
for dbname in agg_dbnames:
self = cls(dbname)
info = self.ensure_results('dbstats')
infos['enc'].append(info['enc'])
infos['qual'].append(info['qual'])
infos['view'].append(info['view'])
# labels.append(self.species_nice.capitalize())
alias = {
'species_nice': 'database',
'n_singleton_names': 'names (singleton)',
'n_resighted_names': 'names (resighted)',
'n_encounter_per_resighted_name': 'encounters per name (resighted)',
'n_annots_per_encounter': 'annots per encounter',
'n_annots': 'annots',
}
alias = ut.map_vals(upper_one, alias)
df = pd.DataFrame(infos['enc']).rename(columns=alias)
df = df.set_index('Database')
df.index.name = None
df.index = ut.emap(upper_one, df.index)
alias = ut.map_vals(upper_one, alias)
tabular = Tabular(df, colfmt='numeric')
tabular.theadify = 16
enc_text = tabular.as_tabular()
logger.info(enc_text)
df = pd.DataFrame(infos['qual'])
df = df.rename(columns={'species_nice': 'Database'})
df = df.reindex(
ut.partial_order(
df.columns, ['Database', 'excellent', 'good', 'ok', 'poor', 'None']
),
axis=1,
)
df = df.set_index('Database')
df.index.name = None
df.index = ut.emap(upper_one, df.index)
df[pd.isnull(df)] = 0
df = df.astype(np.int)
df.columns = ut.emap(upper_one, df.columns)
tabular = Tabular(df, colfmt='numeric')
qual_text = tabular.as_tabular()
logger.info(qual_text)
df = pd.DataFrame(infos['view'])
df = df.rename(
columns={
'species_nice': 'Database',
'back': 'B',
'left': 'L',
'right': 'R',
'front': 'F',
'backleft': 'BL',
'backright': 'BR',
'frontright': 'FR',
'frontleft': 'FL',
}
)
order = ut.partial_order(
df.columns, ['Database', 'BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', 'None']
)
df = df.reindex(order, axis=1)
df = df.set_index('Database')
df.index.name = None
df.index = ut.emap(upper_one, df.index)
df[pd.isnull(df)] = 0
df = df.astype(np.int)
tabular = Tabular(df, colfmt='numeric')
view_text = tabular.as_tabular()
logger.info(view_text)
ut.render_latex(
enc_text,
dpath=self.base_dpath,
fname='agg-enc',
preamb_extra=['\\usepackage{makecell}'],
)
ut.render_latex(
view_text,
dpath=self.base_dpath,
fname='agg-view',
preamb_extra=['\\usepackage{makecell}'],
)
ut.render_latex(
qual_text,
dpath=self.base_dpath,
fname='agg-qual',
preamb_extra=['\\usepackage{makecell}'],
)
ut.write_to(join(cls.base_dpath, 'agg-enc.tex'), enc_text)
ut.write_to(join(cls.base_dpath, 'agg-view.tex'), view_text)
ut.write_to(join(cls.base_dpath, 'agg-qual.tex'), qual_text)
[docs] @classmethod
def draw_agg_baseline(cls):
"""
CommandLine:
python -m wbia Chap3.draw_agg_baseline --diskshow
Example:
>>> # SCRIPT
>>> from wbia.scripts.thesis import * # NOQA
>>> Chap3.draw_agg_baseline()
"""
agg_dbnames = ['GZ_Master1', 'PZ_Master1', 'GIRM_Master1', 'humpbacks_fb']
cdfs = []
labels = []
for dbname in agg_dbnames:
self = cls(dbname)
results = self.ensure_results('baseline')
cdf, config = results[0]
dsize = config['dsize']
qsize = config['t_n_names']
baseline_cdf = results[0][0]
cdfs.append(baseline_cdf)
labels.append('{},qsize={},dsize={}'.format(self.species_nice, qsize, dsize))
# labels.append(self.species_nice.capitalize())
mpl.rcParams.update(TMP_RC)
fig = plot_cmcs(cdfs, labels, fnum=1, ymin=0.5)
fig.set_size_inches([W, H * 1.5])
fpath = join(cls.base_dpath, 'agg-baseline.png')
vt.imwrite(fpath, pt.render_figure_to_image(fig, dpi=DPI))
if ut.get_argflag('--diskshow'):
ut.startfile(fpath)
[docs]class Sampler(object):
@staticmethod
def _same_occur_split(ibs, aids):
"""
>>> from wbia.scripts.thesis import *
>>> self = Chap3('PZ_Master1')
>>> self._precollect()
"""
annots = ibs.annots(aids)
# occurrences = ibs._annot_groups(annots.group(annots.occurrence_text)[1])
encounters = ibs._annot_groups(annots.group(annots.encounter_text)[1])
nid_to_splits = ut.ddict(list)
# Find the biggest occurrences and pick an annotation from that
# occurrence to be sampled
occur_to_encs = ut.group_items(
encounters, ut.take_column(encounters.occurrence_text, 0)
)
occur_encs = ut.sortedby(
list(occur_to_encs.values()), list(map(len, occur_to_encs.values()))
)[::-1]
for encs in occur_encs:
for enc in encs:
sortx = ut.argsort(enc.qualities)[::-1]
annot = enc[sortx[0]]
if len(nid_to_splits[annot.nid]) < 2:
nid_to_splits[annot.nid].append(annot.aid)
rng = ut.ensure_rng(0)
pyrng = random.Random(rng.randint(sys.maxsize))
qaids = []
dname_encs = []
confusor_pool = []
for nid, aids_ in nid_to_splits.items():
if len(aids_) < 2:
confusor_pool.extend(aids_)
else:
pyrng.shuffle(aids_)
qaids.append(aids_[0])
dname_encs.append([[aids_[1]]])
confusor_pool = ut.shuffle(confusor_pool, rng=0)
return qaids, dname_encs, confusor_pool
@staticmethod
def _intra_enc(ibs, aids):
# Make a query / database set for each occurrence
# ibs = self.ibs
# aids = self.aids_pool
annots = ibs.annots(aids)
# occurrences = ibs._annot_groups(annots.group(annots.occurrence_text)[1])
encounters = ibs._annot_groups(annots.group(annots.encounter_text)[1])
# rng = ut.ensure_rng(0)
# pyrng = random.Random(rng.randint(sys.maxsize))
# Find the biggest occurrences and pick an annotation from that
# occurrence to be sampled
occurrences = ut.group_items(
encounters, ut.take_column(encounters.occurrence_text, 0)
)
occurrences = ut.map_vals(ibs._annot_groups, occurrences)
occur_nids = {o: set(ut.flatten(encs.nids)) for o, encs in occurrences.items()}
# Need to find multiple disjoint exact covers of the nids
# Greedy solution because this is NP-hard
from wbia.algo.graph import nx_dynamic_graph
G = nx_dynamic_graph.DynConnGraph()
G.add_nodes_from(occur_nids.keys())
occur_ids = ut.sortedby(occur_nids.keys(), ut.lmap(len, occur_nids.values()))[
::-1
]
current_combos = {
frozenset(G.connected_to(o1)): occur_nids[o1] for o1 in occur_ids
}
for o1, o2 in ut.combinations(occur_ids, 2):
if G.node_label(o1) == G.node_label(o2):
continue
cc1 = frozenset(G.connected_to(o1))
cc2 = frozenset(G.connected_to(o2))
nids1 = current_combos[cc1]
nids2 = current_combos[cc2]
if nids1.isdisjoint(nids2):
G.add_edge(o1, o2)
del current_combos[cc1]
del current_combos[cc2]
current_combos[frozenset(cc1.union(cc2))] = nids1.union(nids2)
# Pick the top few occurrence groups with the most names
grouped_occurs = list(map(frozenset, G.connected_components()))
group_size = ut.lmap(len, list(grouped_occurs))
top_groups = ut.sortedby(grouped_occurs, group_size)[::-1][0:4]
samples = []
for os in top_groups:
encs = ut.flatten(occurrences[o].aids for o in os)
encs = ut.lmap(ibs.annots, encs)
qaids = []
daids = []
for enc in encs:
if len(enc) == 1:
daids.extend(enc.aids)
else:
daids.extend(enc.aids)
qaids.extend(enc.aids)
sample = SplitSample(qaids, daids)
samples.append(sample)
return samples
@staticmethod
def _same_enc_split(ibs, aids):
"""
>>> from wbia.scripts.thesis import *
>>> self = Chap3('PZ_Master1')
>>> self._precollect()
"""
annots = ibs.annots(aids)
# occurrences = ibs._annot_groups(annots.group(annots.occurrence_text)[1])
encounters = ibs._annot_groups(annots.group(annots.encounter_text)[1])
rng = ut.ensure_rng(0)
pyrng = random.Random(rng.randint(sys.maxsize))
nid_to_splits = ut.ddict(list)
# Find the biggest occurrences and pick an annotation from that
# occurrence to be sampled
occur_to_encs = ut.group_items(
encounters, ut.take_column(encounters.occurrence_text, 0)
)
occur_encs = ut.sortedby(
list(occur_to_encs.values()), list(map(len, occur_to_encs.values()))
)[::-1]
for encs in occur_encs:
for enc in encs:
nid = enc.nids[0]
if len(nid_to_splits[nid]) == 0:
chosen = pyrng.sample(enc.aids, min(len(enc), 2))
nid_to_splits[nid].extend(chosen)
qaids = []
dname_encs = []
confusor_pool = []
for nid, aids_ in nid_to_splits.items():
if len(aids_) < 2:
confusor_pool.extend(aids_)
else:
pyrng.shuffle(aids_)
qaids.append(aids_[0])
dname_encs.append([[aids_[1]]])
confusor_pool = ut.shuffle(confusor_pool, rng=0)
return qaids, dname_encs, confusor_pool
def _rand_splits(ibs, aids, qenc_per_name, denc_per_name_, annots_per_enc):
"""This can be used for cross validation"""
# Find a split of query/database encounters and confusors
from wbia.init.filter_annots import encounter_crossval
enc_splits, nid_to_confusors = encounter_crossval(
ibs,
aids,
qenc_per_name=1,
annots_per_enc=1,
denc_per_name=denc_per_name_,
rebalance=True,
rng=0,
early=True,
)
qname_encs, dname_encs = enc_splits[0]
qaids = sorted(ut.flatten(ut.flatten(qname_encs)))
confusor_pool = ut.flatten(ut.flatten(nid_to_confusors.values()))
confusor_pool = ut.shuffle(confusor_pool, rng=0)
return qaids, dname_encs, confusor_pool
@staticmethod
def _alt_splits(
ibs, aids, qenc_per_name, denc_per_name_, annots_per_enc, viewpoint_aware=False
):
"""
This cannot be used for cross validation
Notes:
(a) single encounter experiments are structured somewhat like this:
(of course this script is more general than this)
* For each name with more than one encounter
* Choose a random encounter, and select the highest quality
annotation as the single query annotation.
* For each other encounter the best annotation that is comparable
(close to the same viewpoint) to the query. If no other encounter
satisfies this then skip this name (dont add a query or database
annotation).
* Of the remaining encounters choose a random annotation to belong
to the database.
* For each other name, that was not selected to form a
query/database pair, add all annotations to the database as
distractors.
(b) with multiple exemplars in the database:
* Follow the same steps above, but now if there are not at
least N valid database encounters, we ignore the query/database
pair.
* Multiple sets of daids are generated (each with a different
number of exempars per query), but the query set remains the
same and consistent across different runs of this experiment.
"""
# Group annotations by encounter
# from wbia.other import ibsfuncs
# primary_view = ibsfuncs.get_primary_species_viewpoint(ibs.get_primary_database_species())
annots = ibs.annots(aids)
encounters = ibs._annot_groups(annots.group(annots.encounter_text)[1])
enc_nids = ut.take_column(encounters.nids, 0)
nid_to_encs = ut.group_items(encounters, enc_nids)
rng = ut.ensure_rng(0)
pyrng = random.Random(rng.randint(sys.maxsize))
n_need = qenc_per_name + denc_per_name_
confusor_encs = {}
sample_splits = {}
def choose_best(enc, num):
if len(enc) > num:
sortx = ut.argsort(enc.qualities)[::-1]
subenc = enc.take(sortx[0:num])
else:
subenc = enc
return subenc
def _only_comparable(qsubenc, avail_dencs):
from vtool import _rhomb_dist
qviews = set(ut.flatten(qsubenc.viewpoint_code))
comparable_encs = []
for denc in avail_dencs:
comparable = []
for daid, dview in zip(denc.aids, denc.viewpoint_code):
for qview in qviews:
dist = _rhomb_dist.VIEW_CODE_DIST[(qview, dview)]
if np.isnan(dist) or dist < 2:
comparable.append(daid)
if comparable:
comparable_encs.append(ibs.annots(comparable))
return comparable_encs
for nid, encs in nid_to_encs.items():
if len(encs) < n_need:
confusor_encs[nid] = encs
else:
if viewpoint_aware:
# Randomly choose queries
avail_qxs = list(range(len(encs)))
qencxs = pyrng.sample(avail_qxs, qenc_per_name)
qencs = ut.take(encs, qencxs)
qsubenc = ibs._annot_groups(
[choose_best(enc, annots_per_enc) for enc in qencs]
)
# Ensure the db annots are comparable to at least one query
avail_dencs = ut.take(encs, ut.setdiff(avail_qxs, qencxs))
comparable_encs = _only_comparable(qsubenc, avail_dencs)
if len(comparable_encs) >= denc_per_name_:
# If we still have enough, sample daids
dencs = pyrng.sample(comparable_encs, denc_per_name_)
dsubenc = ibs._annot_groups(
[choose_best(enc, annots_per_enc) for enc in dencs]
)
sample_splits[nid] = (qsubenc.aids, dsubenc.aids)
else:
# If we don't add to confusors
confusor_encs[nid] = encs
else:
# For each name choose a query / database encounter.
chosen_encs = pyrng.sample(encs, n_need)
# Choose high quality annotations from each encounter
best_subencs = [
choose_best(enc, annots_per_enc) for enc in chosen_encs
]
# ibs._annot_groups(best_subencs).aids
qsubenc = ibs._annot_groups(best_subencs[0:qenc_per_name])
dsubenc = ibs._annot_groups(best_subencs[qenc_per_name:])
sample_splits[nid] = (qsubenc.aids, dsubenc.aids)
# if viewpoint_aware:
# for qenc, denc in sample_splits.values():
# q = ibs.annots(ut.flatten(qenc))
# d = ibs.annots(ut.flatten(denc))
# logger.info(q.viewpoint_code, d.viewpoint_code)
# make confusor encounters subject to the same constraints
confusor_pool = []
confname_encs = []
for encs in confusor_encs.values():
# new
# chosen_encs = pyrng.sample(encs, min(len(encs), denc_per_name_))
# rand_subencs = [pyrng.sample(enc.aids, annots_per_enc)
# for enc in chosen_encs]
# confname_encs.append(rand_subencs)
# old
confusor_pool.extend(ut.flatten([enc[0:annots_per_enc].aids for enc in encs]))
qaids = ut.total_flatten(ut.take_column(sample_splits.values(), 0))
dname_encs = ut.take_column(sample_splits.values(), 1)
return qaids, dname_encs, confname_encs, confusor_pool
@staticmethod
def _varied_inputs(
ibs,
aids,
denc_per_name=[1],
extra_dbsize_fracs=None,
method='alt',
viewpoint_aware=None,
):
"""
Vary num per name and total number of annots
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.scripts.thesis import * # NOQA
>>> self = Chap3('PZ_Master1')
>>> self._precollect()
>>> ibs = self.ibs
>>> aids = self.aids_pool
>>> print('--------')
>>> qaids, daids_list, info_list = self._varied_inputs(ibs, aids, [1], [1], method='same_occur')
>>> print('qsize = %r' % (len(qaids),))
>>> for info in info_list:
>>> print(ut.repr4(info))
>>> print('--------')
>>> qaids, daids_list, info_list = self._varied_inputs(ibs, aids, [1], [1], method='same_enc')
>>> print('qsize = %r' % (len(qaids),))
>>> for info in info_list:
>>> print(ut.repr4(info))
>>> print('--------')
>>> qaids, daids_list, info_list = self._varied_inputs(ibs, aids, [1, 2], [0])
>>> print('qsize = %r' % (len(qaids),))
>>> for info in info_list:
>>> print(ut.repr4(info))
>>> print('--------')
>>> qaids, daids_list, info_list = self._varied_inputs(ibs, aids, [3], [0, 1])
>>> print('qsize = %r' % (len(qaids),))
>>> for info in info_list:
>>> print(ut.repr4(info))
Ignore:
ibs, aids = self.ibs, self.aids_pool
denc_per_name = 1
denc_per_name = [1]
extra_dbsize_fracs = None
extra_dbsize_fracs = [0, .5, 1]
"""
qenc_per_name = 1
annots_per_enc = 1
denc_per_name_ = max(denc_per_name)
if method == 'alt':
if viewpoint_aware is None:
viewpoint_aware = False
qaids, dname_encs, confname_encs, confusor_pool = Sampler._alt_splits(
ibs,
aids,
qenc_per_name,
denc_per_name_,
annots_per_enc,
viewpoint_aware=viewpoint_aware,
)
elif method == 'same_occur':
assert viewpoint_aware is None, 'cannot specify viewpoint_aware here'
assert denc_per_name_ == 1
assert annots_per_enc == 1
assert qenc_per_name == 1
qaids, dname_encs, confusor_pool = Sampler._same_occur_split(ibs, aids)
elif method == 'same_enc':
assert viewpoint_aware is None, 'cannot specify viewpoint_aware here'
qaids, dname_encs, confusor_pool = Sampler._same_enc_split(ibs, aids)
else:
raise KeyError(method)
# Vary the number of database encounters in each sample
target_daids_list = []
target_info_list_ = []
for num in denc_per_name:
dname_encs_ = ut.take_column(dname_encs, slice(0, num))
dnames_ = ut.lmap(ut.flatten, dname_encs_)
daids_ = ut.flatten(dnames_)
target_daids_list.append(daids_)
name_lens = ut.lmap(len, dnames_)
dpername = name_lens[0] if ut.allsame(name_lens) else np.mean(name_lens)
target_info_list_.append(
ut.odict(
[
('qsize', len(qaids)),
('t_n_names', len(dname_encs_)),
('t_dpername', dpername),
('t_denc_pername', num),
('t_dsize', len(daids_)),
]
)
)
# confusor_names_matter = True
# if confusor_names_matter:
# extra_pools = [
# ut.total_flatten(ut.take_column(confname_encs, slice(0, num)))
# for num in denc_per_name
# ]
# dbsize_list = ut.lmap(len, target_daids_list)
# max_dsize = max(dbsize_list)
# for num, daids_ in zip(denc_per_name, target_daids_list):
# num_take = max_dsize - len(daids_)
# logger.info('num_take = %r' % (num_take,))
# confname_encs_ = ut.total_flatten(ut.take_column(confname_encs, slice(0, num)))
# confusor_pool_ = ut.total_flatten(confname_encs_)
# if num_take > len(confusor_pool_):
# # we need to siphon off valid queries to use them as
# # confusors
# raise AssertionError(
# 'have={}, need={}, not enough confusors for num={}'.format(
# len(confusor_pool_), num_take, num
# ))
# Append confusors to maintain a constant dbsize in each base sample
dbsize_list = ut.lmap(len, target_daids_list)
max_dsize = max(dbsize_list)
n_need = max_dsize - min(dbsize_list)
n_extra_avail = len(confusor_pool) - n_need
# assert len(confusor_pool) > n_need, 'not enough confusors'
padded_daids_list = []
padded_info_list_ = []
for num, daids_, info_ in zip(
denc_per_name, target_daids_list, target_info_list_
):
num_take = max_dsize - len(daids_)
assert num_take < len(confusor_pool), 'not enough confusors'
pad_aids = confusor_pool[:num_take]
new_aids = daids_ + pad_aids
info_ = info_.copy()
info_['n_pad'] = len(pad_aids)
info_['pad_dsize'] = len(new_aids)
padded_info_list_.append(info_)
padded_daids_list.append(new_aids)
# Vary the dbsize by appending extra confusors
if extra_dbsize_fracs is None:
extra_dbsize_fracs = [1.0]
extra_fracs = np.array(extra_dbsize_fracs)
n_extra_list = np.unique(extra_fracs * n_extra_avail).astype(np.int)
daids_list = []
info_list = []
for n in n_extra_list:
for daids_, info_ in zip(padded_daids_list, padded_info_list_):
extra_aids = confusor_pool[len(confusor_pool) - n :]
daids = sorted(daids_ + extra_aids)
daids_list.append(daids)
info = info_.copy()
info['n_extra'] = len(extra_aids)
info['dsize'] = len(daids)
info_list.append(info)
import pandas as pd
verbose = 0
if verbose:
logger.info(pd.DataFrame.from_records(info_list))
logger.info('#qaids = %r' % (len(qaids),))
logger.info('num_need = %r' % (n_need,))
logger.info('max_dsize = %r' % (max_dsize,))
if False:
for daids in daids_list:
ibs.print_annotconfig_stats(qaids, daids)
return qaids, daids_list, info_list
pass
[docs]class SplitSample(ut.NiceRepr):
def __init__(sample, qaids, daids):
sample.qaids = qaids
sample.daids = daids
def __nice__(sample):
return 'nQaids={}, nDaids={}'.format(len(sample.qaids), len(sample.daids))
def _ranking_hist(ibs, qaids, daids, cfgdict):
# Execute the ranking algorithm
qaids = sorted(qaids)
daids = sorted(daids)
qreq_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict)
cm_list = qreq_.execute()
cm_list = [cm.extend_results(qreq_) for cm in cm_list]
name_ranks = [cm.get_name_ranks([cm.qnid])[0] for cm in cm_list]
# Measure rank probabilities
bins = np.arange(len(qreq_.dnids))
hist = np.histogram(name_ranks, bins=bins)[0]
return hist
def _ranking_cdf(ibs, qaids, daids, cfgdict):
hist = _ranking_hist(ibs, qaids, daids, cfgdict)
cdf = np.cumsum(hist) / sum(hist)
return cdf
[docs]def label_alias(k):
k = k.replace('True', 'T')
k = k.replace('False', 'F')
return k
[docs]def feat_alias(k):
# presentation values for feature dimension
# k = k.replace('weighted_', 'wgt_')
# k = k.replace('norm_x', 'x')
# k = k.replace('norm_y', 'y')
return k
[docs]def prepare_cdfs(cdfs, labels):
cdfs = vt.pad_vstack(cdfs, fill_value=1)
# Sort so the best is on top
sortx = np.lexsort(cdfs.T[::-1])[::-1]
cdfs = cdfs[sortx]
labels = ut.take(labels, sortx)
return cdfs, labels
[docs]def plot_cmcs(cdfs, labels, fnum=1, pnum=(1, 1, 1), ymin=0.4):
cdfs, labels = prepare_cdfs(cdfs, labels)
# Truncte to 20 ranks
num_ranks = min(cdfs.shape[-1], 20)
xdata = np.arange(1, num_ranks + 1)
cdfs_trunc = cdfs[:, 0:num_ranks]
label_list = [
'%6.2f%% - %s' % (cdf[0] * 100, lbl) for cdf, lbl in zip(cdfs_trunc, labels)
]
# ymin = .4
num_yticks = (10 - int(ymin * 10)) + 1
pt.multi_plot(
xdata,
cdfs_trunc,
label_list=label_list,
xlabel='rank',
ylabel='match probability',
use_legend=True,
legend_loc='lower right',
num_yticks=num_yticks,
ymax=1,
ymin=ymin,
ypad=0.005,
xmin=0.9,
num_xticks=5,
xmax=num_ranks + 1 - 0.5,
pnum=pnum,
fnum=fnum,
rcParams=TMP_RC,
)
return pt.gcf()
[docs]def plot_cmcs2(cdfs, labels, fnum=1, **kwargs):
fig = pt.figure(fnum=fnum)
plot_cmcs(cdfs, labels, fnum=fnum, **kwargs)
pt.adjust_subplots(top=0.8, bottom=0.2, left=0.12, right=0.9)
fig.set_size_inches([W, H])
return fig