Source code for wbia.algo.verif.pairfeat

# -*- coding: utf-8 -*-
import logging
import utool as ut
import vtool as vt
import numpy as np
import ubelt as ub
import pandas as pd
from wbia import dtool as dt
from os.path import join
from wbia.algo.graph import nx_utils as nxu
from wbia.core_annots import ChipConfig

print, rrr, profile = ut.inject2(__name__)
logger = logging.getLogger('wbia')


[docs]class PairFeatureConfig(dt.Config): """ Config for building pairwise feature dimensions I.E. Config to distil unordered feature correspondences into a fixed length vector. """ _param_info_list = [ # ut.ParamInfo('indices', slice(0, 5)), ut.ParamInfo('indices', []), ut.ParamInfo( 'summary_ops', { # 'invsum', 'sum', 'std', 'mean', 'len', 'med', }, ), ut.ParamInfo('local_keys', None), ut.ParamInfo( 'sorters', [ # 'ratio', 'norm_dist', 'match_dist' # 'lnbnn', 'lnbnn_norm_dist', ], ), # ut.ParamInfo('bin_key', None, valid_values=[None, 'ratio']), ut.ParamInfo('bin_key', 'ratio', valid_values=[None, 'ratio']), # ut.ParamInfo('bins', [.5, .6, .7, .8]) # ut.ParamInfo('bins', None, type_=eval), ut.ParamInfo('bins', (0.625,), type_=eval), # ut.ParamInfo('need_lnbnn', False), ut.ParamInfo( 'use_na', False ), # change to True if sklearn has RFs with nan support ]
[docs]class VsOneMatchConfig(dt.Config): _param_info_list = vt.matching.VSONE_DEFAULT_CONFIG
[docs]class VsOneFeatConfig(dt.Config): """keypoint params""" _param_info_list = vt.matching.VSONE_FEAT_CONFIG
[docs]class MatchConfig(dt.Config): _param_info_list = ( vt.matching.VSONE_DEFAULT_CONFIG + vt.matching.VSONE_FEAT_CONFIG + ChipConfig._param_info_list )
[docs]class PairwiseFeatureExtractor(object): r""" Args: ibs (wbia.IBEISController): image analysis api match_config (dict): config for building feature correspondences pairfeat_cfg (dict): config for making the pairwise feat vec global_keys (list): global keys to use need_lnbnn (bool): use LNBNN for enrichment feat_dims (list): subset of feature dimensions (from pruning) if None, then all dimensions are used use_cache (bool): turns on disk based caching (default = True) verbose (int): verbosity flag (default = 1) CommandLine: python -m wbia.algo.verif.pairfeat PairwiseFeatureExtractor Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.verif.pairfeat import * # NOQA >>> import wbia >>> ibs = wbia.opendb('testdb1') >>> extr = PairwiseFeatureExtractor(ibs) >>> edges = [(1, 2), (2, 3)] >>> X = extr.transform(edges) >>> featinfo = vt.AnnotPairFeatInfo(X.columns) >>> print(featinfo.get_infostr()) """ def __init__( extr, ibs=None, config={}, use_cache=True, verbose=1, # Nested config props match_config=None, pairfeat_cfg=None, global_keys=None, need_lnbnn=None, feat_dims=None, ): extr.verbose = verbose extr.use_cache = use_cache extr.ibs = ibs # Configs for this are a bit foobar. Allow config to be a catch-all It # can either store params in nested or flat form config = config.copy() vars_ = vars() def _popconfig(key, default): """ensures param is either specified in func args xor config""" if key in config: if vars_.get(key, None) is not None: raise ValueError('{} specified twice'.format(key)) value = config.pop(key) else: # See if the local namespace has it value = vars_.get(key, None) if value is None: value = default return value # These also sort-of belong to pair-feat config extr.global_keys = _popconfig('global_keys', []) extr.need_lnbnn = _popconfig('need_lnbnn', False) extr.feat_dims = _popconfig('feat_dims', None) extr.match_config = MatchConfig(**_popconfig('match_config', {})) extr.pairfeat_cfg = PairFeatureConfig(**_popconfig('pairfeat_cfg', {})) # Allow config to store flat versions of these params extr.match_config.pop_update(config) extr.pairfeat_cfg.pop_update(config) if len(config) > 0: raise ValueError('Unused config items: ' + ut.repr4(config))
[docs] def transform(extr, edges): """ Converts an annotation edge into their corresponding feature. By default this is a caching operation. """ if extr.use_cache: feats = extr._cached_pairwise_features(edges) else: feats = extr._make_pairwise_features(edges) feats = extr._postprocess_feats(feats) return feats
def _exec_pairwise_match(extr, edges, prog_hook=None): """ Performs one-vs-one matching between pairs of annotations. This establishes the feature correspondences. CommandLine: python -m wbia.algo.verif.pairfeat _exec_pairwise_match --show Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.verif.pairfeat import * # NOQA >>> import wbia >>> ibs = wbia.opendb('testdb1') >>> match_config = dict(histeq=True) >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config) >>> edges = [(1, 2), (2, 3)] >>> prog_hook = None >>> match_list = extr._exec_pairwise_match(edges) >>> match1, match2 = match_list >>> assert match1.annot2 is match2.annot1 >>> assert match1.annot1 is not match2.annot2 >>> ut.quit_if_noshow() >>> match2.show() >>> ut.show_if_requested() """ if extr.verbose: logger.info('[extr] executing pairwise one-vs-one matching') ibs = extr.ibs match_config = extr.match_config edges = ut.lmap(tuple, ut.aslist(edges)) qaids = ut.take_column(edges, 0) daids = ut.take_column(edges, 1) # The depcache does the pairwise matching procedure match_list = ibs.depc.get( 'pairwise_match', (qaids, daids), 'match', config=match_config ) # Hack: Postprocess matches to re-add wbia annotation info # in lazy-dict format from wbia import core_annots config = ut.hashdict(match_config) qannot_cfg = dannot_cfg = config preload = True configured_lazy_annots = core_annots.make_configured_annots( ibs, qaids, daids, qannot_cfg, dannot_cfg, preload=preload ) for qaid, daid, match in zip(qaids, daids, match_list): match.annot1 = configured_lazy_annots[config][qaid] match.annot2 = configured_lazy_annots[config][daid] match.config = config return match_list def _enrich_matches_lnbnn(extr, matches, other_aids, other_nids, inplace=False): """ Given a set of one-vs-one matches, searches for LNBNN normalizers in a larger database to enrich the matches with database-level distinctiveness. """ from wbia.algo.hots import nn_weights raise NotImplementedError( 'havent tested since the re-work. ' 'Need to ensure that things work correctly.' ) ibs = extr.ibs cfgdict = { 'can_match_samename': False, 'can_match_sameimg': True, 'K': 3, 'Knorm': 3, 'prescore_method': 'csum', 'score_method': 'csum', } custom_nid_lookup = ut.dzip(other_aids, other_nids) aids = [m.annot2['aid'] for m in matches] qreq_ = ibs.new_query_request( aids, other_aids, cfgdict=cfgdict, custom_nid_lookup=custom_nid_lookup, verbose=extr.verbose >= 2, ) qreq_.load_indexer() indexer = qreq_.indexer if not inplace: matches_ = [match.copy() for match in matches] else: matches_ = matches K = qreq_.qparams.K Knorm = qreq_.qparams.Knorm normalizer_rule = qreq_.qparams.normalizer_rule extr.print('Stacking vecs for batch lnbnn matching') offset_list = np.cumsum([0] + [match_.fm.shape[0] for match_ in matches_]) stacked_vecs = np.vstack( [ match_.matched_vecs2() for match_ in ut.ProgIter(matches_, label='stack matched vecs') ] ) vecs = stacked_vecs num = K + Knorm idxs, dists = indexer.batch_knn(vecs, num, chunksize=8192, label='lnbnn scoring') idx_list = [idxs[left:right] for left, right in ut.itertwo(offset_list)] dist_list = [dists[left:right] for left, right in ut.itertwo(offset_list)] iter_ = zip(matches_, idx_list, dist_list) prog = ut.ProgIter(iter_, length=len(matches_), label='lnbnn scoring') for match_, neighb_idx, neighb_dist in prog: qaid = match_.annot2['aid'] norm_k = nn_weights.get_normk(qreq_, qaid, neighb_idx, Knorm, normalizer_rule) ndist = vt.take_col_per_row(neighb_dist, norm_k) vdist = match_.local_measures['match_dist'] lnbnn_dist = nn_weights.lnbnn_fn(vdist, ndist) lnbnn_clip_dist = np.clip(lnbnn_dist, 0, np.inf) match_.local_measures['lnbnn_norm_dist'] = ndist match_.local_measures['lnbnn'] = lnbnn_dist match_.local_measures['lnbnn_clip'] = lnbnn_clip_dist match_.fs = lnbnn_dist return matches_ def _enriched_pairwise_matches(extr, edges, prog_hook=None): """ Adds extra domain specific local and global properties that the match object (feature corresopndences) doesnt directly provide. Example: >>> # xdoctest: +REQUIRES(module:wbia_cnn, --slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.verif.pairfeat import * # NOQA >>> import wbia >>> ibs = wbia.opendb('testdb1') >>> match_config = { >>> 'K': 1, 'Knorm': 3, 'affine_invariance': True, >>> 'augment_orientation': True, 'checks': 20, 'ratio_thresh': 0.8, >>> 'refine_method': 'homog', 'sv_on': True, 'sver_xy_thresh': 0.01, >>> 'symmetric': True, 'weight': 'fgweights' >>> } >>> global_keys = ['gps', 'qual', 'time'] >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config, >>> global_keys=global_keys) >>> assert extr.global_keys == global_keys >>> edges = [(1, 2), (2, 3)] >>> prog_hook = None >>> match_list = extr._enriched_pairwise_matches(edges) >>> match1, match2 = match_list >>> assert match1.annot2 is match2.annot1 >>> assert match1.annot1 is not match2.annot2 >>> print('match1.global_measures = {!r}'.format(match1.global_measures)) >>> assert len(match1.global_measures) == 3, 'global measures' """ # logger.info('extr.global_keys = {!r}'.format(extr.global_keys)) if extr.global_keys is None: raise ValueError('specify global keys') # global_keys = ['view_int', 'qual', 'gps', 'time'] # global_keys = ['view', 'qual', 'gps', 'time'] matches = extr._exec_pairwise_match(edges, prog_hook=prog_hook) if extr.need_lnbnn: extr._enrich_matches_lnbnn(matches, inplace=True) if extr.verbose: logger.info('[extr] enriching match attributes') # Ensure matches know about relavent metadata for match in matches: vt.matching.ensure_metadata_normxy(match.annot1) vt.matching.ensure_metadata_normxy(match.annot2) for match in ut.ProgIter(matches, label='setup globals'): match.add_global_measures(extr.global_keys) for match in ut.ProgIter(matches, label='setup locals'): match.add_local_measures() return matches def _make_pairwise_features(extr, edges): """ Construct matches and their pairwise features CommandLine: python -m wbia.algo.verif.pairfeat _make_pairwise_features Doctest: >>> # xdoctest: +REQUIRES(module:wbia_cnn) >>> from wbia.algo.verif.pairfeat import * >>> from wbia.algo.graph import demo >>> infr = demo.demodata_mtest_infr() >>> extr = PairwiseFeatureExtractor(ibs=infr.ibs) >>> match_config = {'K': 1, 'Knorm': 3, 'affine_invariance': True, >>> 'augment_orientation': True, 'checks': 20, >>> 'ratio_thresh': 0.8, 'refine_method': 'homog', >>> 'sv_on': True, 'sver_xy_thresh': 0.01, >>> 'symmetric': True, 'weight': 'fgweights'} >>> local_keys = [ >>> 'fgweights', 'match_dist', 'norm_dist', 'norm_x1', 'norm_x2', >>> 'norm_y1', 'norm_y2', 'ratio_score', 'scale1', 'scale2', >>> 'sver_err_ori', 'sver_err_scale', 'sver_err_xy', >>> 'weighted_norm_dist', 'weighted_ratio_score'] >>> pairfeat_cfg = { >>> 'bin_key': 'ratio', >>> 'bins': [0.6, 0.7, 0.8], >>> 'indices': [], >>> 'local_keys': local_keys, >>> 'sorters': [], >>> 'summary_ops': {'len', 'mean', 'sum'} >>> } >>> global_keys = ['gps', 'qual', 'time', 'view'] >>> ibs = infr.ibs >>> extr = PairwiseFeatureExtractor(ibs, match_config=match_config, >>> pairfeat_cfg=pairfeat_cfg, >>> global_keys=global_keys) >>> multi_index = True >>> edges = [(1, 2), (2, 3)] >>> matches, X = extr._make_pairwise_features(edges) >>> featinfo = vt.AnnotPairFeatInfo(X.columns) >>> print(featinfo.get_infostr()) >>> match = matches[0] >>> glob_X = match._make_global_feature_vector(global_keys) >>> assert len(glob_X) == 19 """ edges = ut.lmap(tuple, ut.aslist(edges)) if len(edges) == 0: return [], [] matches = extr._enriched_pairwise_matches(edges) # --------------- # Try different feature constructions logger.info('[extr] building pairwise features') pairfeat_cfg = extr.pairfeat_cfg.copy() use_na = pairfeat_cfg.pop('use_na') pairfeat_cfg['summary_ops'] = set(pairfeat_cfg['summary_ops']) X = pd.DataFrame( [ m.make_feature_vector(**pairfeat_cfg) for m in ut.ProgIter(matches, label='making pairwise feats') ] ) multi_index = True if multi_index: # Index features by edges uv_index = nxu.ensure_multi_index(edges, ('aid1', 'aid2')) X.index = uv_index X[pd.isnull(X)] = np.nan X[np.isinf(X)] = np.nan # Re-order column names to ensure dimensions are consistent X = X.reindex(sorted(X.columns), axis=1) # hack to fix feature validity if 'global(speed)' in X.columns: if np.any(np.isinf(X['global(speed)'])): flags = np.isinf(X['global(speed)']) numer = X.loc[flags, 'global(gps_delta)'] denom = X.loc[flags, 'global(time_delta)'] newvals = np.full(len(numer), np.nan) newvals[(numer == 0) & (denom == 0)] = 0 X.loc[flags, 'global(speed)'] = newvals aid_pairs_ = [(m.annot1['aid'], m.annot2['aid']) for m in matches] assert aid_pairs_ == edges, 'edge ordering changed' if not use_na: # Fill nan values with very large values to workaround lack of nan # support in sklearn master. X[pd.isnull(X)] = (2 ** 30) - 1 return matches, X def _make_cfgstr(extr, edges): ibs = extr.ibs edge_uuids = ibs.unflat_map(ibs.get_annot_visual_uuids, edges) edge_hashid = ut.hashid_arr(edge_uuids, 'edges') _cfg_lbl = ut.partial(ut.repr2, si=True, itemsep='', kvsep=':') match_configclass = ibs.depc_annot.configclass_dict['pairwise_match'] cfgstr = '_'.join( [ edge_hashid, _cfg_lbl(extr.match_config), _cfg_lbl(extr.pairfeat_cfg), 'global(' + _cfg_lbl(extr.global_keys) + ')', 'pairwise_match_version=%r' % (match_configclass().version,), ] ) return cfgstr def _postprocess_feats(extr, feats): # Take the filtered subset of columns if extr.feat_dims is not None: missing = set(extr.feat_dims).difference(feats.columns) if any(missing): # logger.info('We have: ' + ut.repr4(feats.columns)) alt = feats.columns.difference(extr.feat_dims) mis_msg = 'Missing feature dims: ' + ut.repr4(missing) alt_msg = 'Did you mean? ' + ut.repr4(alt) logger.info(mis_msg) logger.info(alt_msg) raise KeyError(mis_msg) feats = feats[extr.feat_dims] return feats def _cached_pairwise_features(extr, edges): """ Create pairwise features for annotations in a test inference object based on the features used to learn here TODO: need a more systematic way of specifying which feature dimensions need to be computed Notes: Given a edge (u, v), we need to: * Check which classifiers we have * Check which feat-cols the classifier needs, and construct a configuration that can acheive that. * Construct the chip/feat config * Construct the vsone config * Additional LNBNN enriching config * Pairwise feature construction config * Then we can apply the feature to the classifier edges = [(1, 2)] """ edges = list(edges) if extr.verbose: logger.info( '[pairfeat] Requesting {} cached pairwise features'.format(len(edges)) ) # TODO: use object properties if len(edges) == 0: assert extr.feat_dims is not None, 'no edges and unset feat dims' index = nxu.ensure_multi_index([], ('aid1', 'aid2')) feats = pd.DataFrame(columns=extr.feat_dims, index=index) return feats else: use_cache = not extr.need_lnbnn and len(edges) > 2 cache_dir = join(extr.ibs.get_cachedir(), 'infr_bulk_cache') feat_cfgstr = extr._make_cfgstr(edges) cacher = ub.Cacher( 'bulk_pairfeats_v3', feat_cfgstr, enabled=use_cache, dpath=cache_dir, verbose=extr.verbose - 3, ) # if cacher.exists() and extr.verbose > 3: # fpath = cacher.get_fpath() # logger.info('Load match cache size: {}'.format( # ut.get_file_nBytes_str(fpath))) try: data = cacher.tryload() except TypeError: data = None if data is None: data = extr._make_pairwise_features(edges) cacher.save(data) # if cacher.enabled and extr.verbose > 3: # fpath = cacher.get_fpath() # logger.info('Save match cache size: {}'.format( # ut.get_file_nBytes_str(fpath))) matches, feats = data feats = extr._postprocess_feats(feats) return feats