# -*- coding: utf-8 -*-
"""
displays results from harness
TODO: save a testres variable so reloading and regenration becomes easier.
"""
import logging
import numpy as np
import utool as ut
print, rrr, profile = ut.inject2(__name__)
logger = logging.getLogger('wbia')
[docs]def get_diffranks(rank_mat, qaids):
"""Find rows which scored differently over the various configs
FIXME: duplicated
"""
isdiff_flags = [not np.all(row == row[0]) for row in rank_mat]
diff_aids = ut.compress(qaids, isdiff_flags)
diff_rank = rank_mat.compress(isdiff_flags, axis=0)
diff_qxs = np.where(isdiff_flags)[0]
return diff_aids, diff_rank, diff_qxs
[docs]def get_diffmat_str(rank_mat, qaids, nConfig):
from itertools import chain
diff_aids, diff_rank, diff_qxs = get_diffranks(rank_mat, qaids)
# Find columns that ore strictly better than other columns
# def find_strictly_better_columns(diff_rank):
# colmat = diff_rank.T
# pairwise_betterness_ranks = np.array([np.sum(col <= colmat, axis=1) / len(col) for col in colmat], dtype=np.float).T
diff_mat = np.vstack((diff_aids, diff_rank.T)).T
col_lbls = list(chain(['qaid'], map(lambda x: 'cfg%d_rank' % x, range(nConfig))))
col_type = list(chain([int], [int] * nConfig))
header = 'diffmat'
diff_matstr = ut.numpy_to_csv(diff_mat, col_lbls, header, col_type)
return diff_matstr
[docs]def print_latexsum(ibs, testres, verbose=True):
r"""
Args:
ibs (IBEISController): wbia controller object
testres (?):
CommandLine:
python -m wbia.expt.experiment_printres --exec-print_latexsum
python -m wbia.scripts.gen_cand_expts --exec-gen_script
python -m wbia --tf print_latexsum -t candidacy --db PZ_Master0 -a controlled --rank-lt-list=1,5,10,100
python -m wbia --tf print_latexsum -t candidacy --db PZ_MTEST -a controlled --rank-lt-list=1,5,10,100
Example:
>>> # SCRIPT
>>> from wbia.expt.experiment_printres import * # NOQA
>>> from wbia.init import main_helpers
>>> ibs, testres = main_helpers.testdata_expts()
>>> tabular_str2 = print_latexsum(ibs, testres)
"""
logger.info('==========================')
logger.info('[harn] LaTeX: %s' % testres.testnameid)
logger.info('==========================')
# Create configuration latex table
X_LIST = testres.get_X_LIST()
criteria_lbls = [r'#ranks $\leq$ %d' % X for X in X_LIST]
dbname = ibs.get_dbname()
cfg_score_title = dbname + ' rank scores'
nLessX_dict = testres.get_nLessX_dict()
cfgscores = np.array([nLessX_dict[int(X)] for X in X_LIST]).T
# For mat row labels
row_lbls = testres.get_short_cfglbls()
# Order cdf list by rank0
row_lbls = ut.sortedby(row_lbls, cfgscores.T[0], reverse=True)
cfgscores = np.array(ut.sortedby(cfgscores.tolist(), cfgscores.T[0], reverse=True))
cmdaug = testres.get_title_aug()
# if testres.common_acfg is not None:
# cfgname = testres.common_acfg['common']['_cfgname']
# cmdaug += '_' + cfgname
# if hasattr(testres, 'common_cfgdict'):
# cmdaug += '_' + (testres.common_cfgdict['_cfgname'])
# cfg_score_title += ' ' + cmdaug
tabular_kwargs = dict(
title=cfg_score_title,
out_of=testres.nQuery,
bold_best=True,
flip=False,
SHORTEN_ROW_LBLS=False,
)
col_lbls = criteria_lbls
tabular_str = ut.util_latex.make_score_tabular(
row_lbls, col_lbls, cfgscores, **tabular_kwargs
)
# latex_formater.render(tabular_str)
cmdname = ut.latex_sanitize_command_name(
'Expmt' + ibs.get_dbname() + '_' + cmdaug + 'Table'
)
tabular_str2 = ut.latex_newcommand(cmdname, tabular_str)
logger.info(tabular_str2)
return tabular_str2
[docs]@profile
def print_results(ibs, testres, **kwargs):
r"""
Prints results from an experiment harness run.
Rows store different qaids (query annotation ids)
Cols store different configurations (algorithm parameters)
TODO: join acfgs
Args:
ibs (IBEISController): wbia controller object
testres (test_result.TestResult):
CommandLine:
python dev.py -e print --db PZ_MTEST \
-a default:dpername=1,qpername=[1,2] -t default:fg_on=False
python dev.py -e print -t best --db seals2 --allgt --vz
python dev.py -e print --db PZ_MTEST --allgt -t custom \
--print-confusion-stats
python dev.py -e print --db PZ_MTEST --allgt --noqcache \
--index 0:10:2 -t custom:rrvsone_on=True --print-confusion-stats
python dev.py -e print --db PZ_MTEST --allgt --noqcache --qaid4 \
-t custom:rrvsone_on=True --print-confusion-stats
python -m wbia print_results -t default --db PZ_MTEST -a ctrl
python -m wbia print_results -t default --db PZ_MTEST -a ctrl
python -m wbia print_results --db PZ_MTEST -a default
-t default:lnbnn_on=True default:lnbnn_on=False,bar_l2_on=True \
default:lnbnn_on=False,normonly_on=True
CommandLine:
python -m wbia.expt.experiment_printres --test-print_results
utprof.py -m wbia.expt.experiment_printres --test-print_results
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.expt.experiment_printres import * # NOQA
>>> from wbia.init import main_helpers
>>> ibs, testres = main_helpers.testdata_expts(
>>> 'pz_mtest', a='default:dpername=1,qpername=[1,2]',
>>> t='default:fg_on=false')
>>> result = print_results(ibs, testres)
>>> print(result)
"""
tup = ut.dict_take(
testres.__dict__,
['cfg_list', 'cfgx2_cmsinfo', 'testnameid', 'cfgx2_lbl', 'cfgx2_qreq_'],
)
(cfg_list, cfgx2_cmsinfo, testnameid, cfgx2_lbl, cfgx2_qreq_) = tup
# join_acfgs = kwargs.get('join_acfgs', False)
logger.info(' --- PRINT RESULTS ---')
# logger.info(' use --rank-lt-list=1,5 to specify X_LIST')
if True:
# Num of ranks less than to score
X_LIST = testres.get_X_LIST()
# X_LIST = [1, 5]
# nConfig = len(cfg_list)
# nQuery = len(testres.qaids)
cfgx2_nQuery = list(map(len, testres.cfgx2_qaids))
# cfgx2_qx2_ranks = testres.get_infoprop_list('qx2_gt_rank')
# --------------------
# A positive scorediff indicates the groundtruth was better than the
# groundfalse scores
# istrue_list = [scorediff > 0 for scorediff in scorediffs_mat]
# isfalse_list = [~istrue for istrue in istrue_list]
# ------------
# Build Colscore
nLessX_dict = testres.get_nLessX_dict()
# cfgx2_hist, edges = testres.get_rank_histograms(bins=X_LIST + [np.inf],
# join_acfgs=join_acfgs)
# cfgx2_cumsum = cfgx2_hist.cumsum(axis=1)
# ------------
best_rankscore_summary = []
# to_intersect_list = []
# print each configs scores less than X=thresh
for X, cfgx2_nLessX in nLessX_dict.items():
max_nLessX = cfgx2_nLessX.max()
bestX_cfgx_list = np.where(cfgx2_nLessX == max_nLessX)[0]
best_rankscore = '[cfg*] %d cfg(s) scored ' % len(bestX_cfgx_list)
# FIXME
best_rankscore += rankscore_str(
X, max_nLessX, cfgx2_nQuery[bestX_cfgx_list[0]]
)
best_rankscore_summary += [best_rankscore]
@ut.argv_flag_dec
def intersect_hack():
failed = testres.rank_mat > 0
colx2_failed = [np.nonzero(failed_col)[0] for failed_col in failed.T]
# failed_col2_only = np.setdiff1d(colx2_failed[1], colx2_failed[0])
# failed_col2_only_aids = ut.take(testres.qaids, failed_col2_only)
failed_col1_only = np.setdiff1d(colx2_failed[0], colx2_failed[1])
failed_col1_only_aids = ut.take(testres.qaids, failed_col1_only)
gt_aids1 = ibs.get_annot_groundtruth(
failed_col1_only_aids, daid_list=testres.cfgx2_qreq_[0].daids
)
gt_aids2 = ibs.get_annot_groundtruth(
failed_col1_only_aids, daid_list=testres.cfgx2_qreq_[1].daids
)
qaids_expt = failed_col1_only_aids
gt_avl_aids1 = ut.flatten(gt_aids1)
gt_avl_aids2 = list(set(ut.flatten(gt_aids2)).difference(gt_avl_aids1))
ibs.print_annotconfig_stats(qaids_expt, gt_avl_aids1)
ibs.print_annotconfig_stats(qaids_expt, gt_avl_aids2)
# jsontext = ut.to_json({
# 'qaids': list(qaids_expt),
# 'dinclude_aids1': list(gt_aids_expt1),
# 'dinclude_aids2': list(gt_aids_expt2),
# })
# annotation_configs.varysize_pzm
# from wbia.expt import annotation_configs
acfg = testres.acfg_list[0]
import copy
acfg1 = copy.deepcopy(acfg)
acfg2 = copy.deepcopy(acfg)
acfg1['qcfg']['min_pername'] = None
acfg2['qcfg']['min_pername'] = None
acfg1['dcfg']['min_pername'] = None
acfg2['dcfg']['min_gt_per_name'] = None
acfg1['qcfg']['default_aids'] = qaids_expt
acfg1['dcfg']['gt_avl_aids'] = gt_avl_aids1
acfg2['qcfg']['default_aids'] = qaids_expt
acfg2['dcfg']['gt_avl_aids'] = gt_avl_aids2
from wbia.init import filter_annots
from wbia.expt import experiment_helpers
annots1 = filter_annots.expand_acfgs(ibs, acfg1, verbose=True)
annots2 = filter_annots.expand_acfgs(ibs, acfg2, verbose=True)
acfg_name_list = dict( # NOQA
acfg_list=[acfg1, acfg2], expanded_aids_list=[annots1, annots2]
)
test_cfg_name_list = ['candidacy_k']
cfgdict_list, pipecfg_list = experiment_helpers.get_pipecfg_list(
test_cfg_name_list, ibs=ibs
)
t1, t2 = testres_list # NOQA
# intersect_hack()
# @ut.argv_flag_dec
# def print_rowlbl():
# logger.info('=====================')
# logger.info('[harn] Row/Query Labels: %s' % testnameid)
# logger.info('=====================')
# logger.info('[harn] queries:\n%s' % '\n'.join(qx2_lbl))
# print_rowlbl()
# ------------
@ut.argv_flag_dec
def print_collbl():
logger.info('=====================')
logger.info('[harn] Col/Config Labels: %s' % testnameid)
logger.info('=====================')
enum_cfgx2_lbl = [
'%2d) %s' % (count, cfglbl) for count, cfglbl in enumerate(cfgx2_lbl)
]
logger.info('[harn] cfglbl:\n%s' % '\n'.join(enum_cfgx2_lbl))
print_collbl()
# ------------
@ut.argv_flag_dec
def print_cfgstr():
logger.info('=====================')
logger.info('[harn] Config Strings: %s' % testnameid)
logger.info('=====================')
cfgstr_list = [query_cfg.get_cfgstr() for query_cfg in cfg_list]
enum_cfgstr_list = [
'%2d) %s' % (count, cfgstr) for count, cfgstr in enumerate(cfgstr_list)
]
logger.info('\n[harn] cfgstr:\n%s' % '\n'.join(enum_cfgstr_list))
print_cfgstr(**kwargs)
@ut.argv_flag_dec()
def print_colscore():
logger.info('==================')
logger.info('[harn] Scores per Config: %s' % testnameid)
logger.info('==================')
# for cfgx in range(nConfig):
# logger.info('[score] %s' % (cfgx2_lbl[cfgx]))
# for X in X_LIST:
# nLessX_ = nLessX_dict[int(X)][cfgx]
# logger.info(' ' + rankscore_str(X, nLessX_, nQuery))
logger.info('\n[harn] ... sorted scores')
for X in X_LIST:
logger.info('\n[harn] Sorted #ranks < %r scores' % (X))
sortx = np.array(nLessX_dict[int(X)]).argsort()
# frac_list = (nLessX_dict[int(X)] / cfgx2_nQuery)[:, None]
# logger.info('cfgx2_nQuery = %r' % (cfgx2_nQuery,))
# logger.info('frac_list = %r' % (frac_list,))
# logger.info('Pairwise Difference: ' + str(ut.safe_pdist(frac_list, metric=ut.absdiff)))
for cfgx in sortx:
nLessX_ = nLessX_dict[int(X)][cfgx]
rankstr = rankscore_str(X, nLessX_, cfgx2_nQuery[cfgx], withlbl=False)
logger.info('[score] %s --- %s' % (rankstr, cfgx2_lbl[cfgx]))
print_colscore(**kwargs)
ut.argv_flag_dec(testres.print_percent_identification_success)(**kwargs)
sumstrs = []
sumstrs.append('++===========================')
sumstrs.append('|| [cfg*] TestName: %s' % testnameid)
sumstrs.append('||---------------------------')
sumstrs.append(ut.joins('\n|| ', best_rankscore_summary))
sumstrs.append('LL===========================')
summary_str = '\n'.join(sumstrs)
# logger.info(summary_str)
ut.colorprint(summary_str, 'blue')
logger.info('To enable all printouts add --print-all to the commandline')
[docs]def rankscore_str(thresh, nLess, total, withlbl=True):
# helper to print rank scores of configs
percent = 100 * nLess / total
fmtsf = '%' + str(ut.num2_sigfig(total)) + 'd'
if withlbl:
fmtstr = (
':#ranks < %d = '
+ fmtsf
+ '/%d = (%.1f%%) (err='
+ fmtsf
+ '/'
+ str(total)
+ ')'
)
rankscore_str = fmtstr % (thresh, nLess, total, percent, (total - nLess))
else:
fmtstr = fmtsf + '/%d = (%.1f%%) (err=' + fmtsf + '/' + str(total) + ')'
rankscore_str = fmtstr % (nLess, total, percent, (total - nLess))
return rankscore_str