Source code for wbia.algo.graph.mixin_matching

# -*- coding: utf-8 -*-
import logging
import numpy as np
import utool as ut
import pandas as pd
import itertools as it
import networkx as nx
import vtool as vt
from os.path import join  # NOQA
from wbia.algo.graph import nx_utils as nxu
from wbia.algo.graph.nx_utils import e_
from wbia.algo.graph.state import POSTV, NEGTV, INCMP, UNREV  # NOQA
from concurrent import futures
import tqdm


print, rrr, profile = ut.inject2(__name__)
logger = logging.getLogger('wbia')


def _cm_breaking_worker(cm_list, review_cfg={}, scoring='annot'):

    ranks_top = review_cfg.get('ranks_top', None)
    ranks_bot = review_cfg.get('ranks_bot', None)

    # Construct K-broken graph
    edges = []

    if ranks_bot is None:
        ranks_bot = 0

    scoring = scoring.lower()
    assert scoring in ['annot', 'name']
    assert ranks_bot == 0

    for count, cm in enumerate(cm_list):
        score_list = cm.annot_score_list

        # rank_list = ut.argsort(score_list)[::-1]
        # sortx = ut.argsort(rank_list)

        # top_sortx = sortx[:ranks_top]
        # bot_sortx = sortx[len(sortx) - ranks_bot :]
        # short_sortx = ut.unique(top_sortx + bot_sortx)
        # daid_list = ut.take(cm.daid_list, short_sortx)

        values = sorted(zip(score_list, cm.daid_list))[::-1]
        keep = values[:ranks_top]
        daid_list = ut.take_column(keep, 1)
        for daid in daid_list:
            u, v = (cm.qaid, daid)
            if v < u:
                u, v = v, u
            edges.append((u, v))
    return edges


def _make_rankings_worker(args):
    import wbia  # NOQA
    import tqdm  # NOQA

    (
        dbdir,
        qaids_chunk,
        daids,
        cfgdict,
        custom_nid_lookup,
        verbose,
        use_cache,
        invalidate_supercache,
        ranks_top,
    ) = args

    ibs = wbia.opendb(dbdir=dbdir)

    edges = set([])
    for qaids in tqdm.tqdm(qaids_chunk):

        qreq_ = ibs.new_query_request(
            [qaids],
            daids,
            cfgdict=cfgdict,
            custom_nid_lookup=custom_nid_lookup,
            verbose=verbose,
        )

        cm_list = qreq_.execute(
            prog_hook=None,
            use_cache=use_cache,
            invalidate_supercache=False,
        )

        new_edges = set(_cm_breaking_worker(cm_list, review_cfg={'ranks_top': ranks_top}))
        edges = edges | new_edges

    return edges


[docs]@ut.reloadable_class class AnnotInfrMatching(object): """ Methods for running matching algorithms """
[docs] @profile def exec_matching( infr, qaids=None, daids=None, prog_hook=None, cfgdict=None, name_method='node', use_cache=True, invalidate_supercache=False, batch_size=None, ranks_top=5, ): """ Loads chip matches into the inference structure Uses graph name labeling and ignores wbia labeling """ return infr._make_rankings( qaids, daids, prog_hook, cfgdict, name_method, use_cache=use_cache, invalidate_supercache=invalidate_supercache, batch_size=batch_size, ranks_top=ranks_top, )
def _set_vsmany_info(infr, qreq_, cm_list): infr.vsmany_qreq_ = qreq_ infr.vsmany_cm_list = cm_list infr.cm_list = cm_list infr.qreq_ = qreq_ def _make_rankings( infr, qaids=None, daids=None, prog_hook=None, cfgdict=None, name_method='node', use_cache=None, invalidate_supercache=None, batch_size=None, ranks_top=5, ): # from wbia.algo.graph import graph_iden # TODO: expose other ranking algos like SMK rank_algo = 'LNBNN' infr.print('Exec {} ranking algorithm'.format(rank_algo), 1) ibs = infr.ibs if qaids is None: qaids = infr.aids qaids = ut.ensure_iterable(qaids) if daids is None: daids = infr.aids if cfgdict is None: cfgdict = { # 'can_match_samename': False, 'can_match_samename': True, 'can_match_sameimg': True, # 'augment_queryside_hack': True, 'K': 3, 'Knorm': 3, 'prescore_method': 'csum', 'score_method': 'csum', } cfgdict.update(infr.ranker_params) infr.print('Using LNBNN config = %r' % (cfgdict,)) # hack for using current nids if name_method == 'node': aids = sorted(set(ut.aslist(qaids) + ut.aslist(daids))) custom_nid_lookup = infr.get_node_attrs('name_label', aids) elif name_method == 'edge': custom_nid_lookup = { aid: nid for nid, cc in infr.pos_graph._ccs.items() for aid in cc } elif name_method == 'wbia': custom_nid_lookup = None else: raise KeyError('Unknown name_method={}'.format(name_method)) verbose = infr.verbose >= 2 # <HACK FOR PIE V2> if cfgdict.get('pipeline_root', None) in ['PieTwo']: from wbia_pie_v2._plugin import distance_to_score globals().update(locals()) edges = [] for qaid in tqdm.tqdm(qaids): daids_ = list(set(daids) - set([qaid])) pie_annot_distances = ibs.pie_v2_predict_light_distance( qaid, daids_, ) score_list = [ distance_to_score(pie_annot_distance, norm=500.0) for pie_annot_distance in pie_annot_distances ] values = sorted(zip(score_list, daids_))[::-1] keep = values[:ranks_top] daid_list = ut.take_column(keep, 1) for daid in daid_list: u, v = (qaid, daid) if v < u: u, v = v, u edges.append((u, v)) edges = set(edges) return edges # </HACK> if batch_size is not None: qaids_chunks = list(ut.ichunks(qaids, batch_size)) num_chunks = len(qaids_chunks) arg_iter = list( zip( [ibs.dbdir] * num_chunks, qaids_chunks, [daids] * num_chunks, [cfgdict] * num_chunks, [custom_nid_lookup] * num_chunks, [verbose] * num_chunks, [use_cache] * num_chunks, [invalidate_supercache] * num_chunks, [ranks_top] * num_chunks, ) ) nprocs = 8 logger.info('Creating %d processes' % (nprocs,)) executor = futures.ThreadPoolExecutor(nprocs) logger.info('Submitting workers') fs_chunk = [] for args in ut.ProgIter(arg_iter, lbl='submit matching threads'): fs = executor.submit(_make_rankings_worker, args) fs_chunk.append(fs) results = [] try: for fs in ut.ProgIter(fs_chunk, lbl='getting matching result'): result = fs.result() results.append(result) except Exception: raise finally: executor.shutdown(wait=True) assert len(results) == num_chunks edges = set(ut.flatten(results)) else: qreq_ = ibs.new_query_request( qaids, daids, cfgdict=cfgdict, custom_nid_lookup=custom_nid_lookup, verbose=infr.verbose >= 2, ) # cacher = qreq_.get_big_cacher() # if not cacher.exists(): # pass # # import sys # # sys.exit(1) cm_list = qreq_.execute( prog_hook=prog_hook, use_cache=use_cache, invalidate_supercache=invalidate_supercache, ) infr._set_vsmany_info(qreq_, cm_list) edges = set(_cm_breaking_worker(cm_list, review_cfg={'ranks_top': ranks_top})) return edges # return cm_list def _make_matches_from(infr, edges, config=None, prog_hook=None): from wbia.algo.verif import pairfeat if config is None: config = infr.verifier_params extr = pairfeat.PairwiseFeatureExtractor(infr.ibs, config=config) match_list = extr._exec_pairwise_match(edges, prog_hook=prog_hook) return match_list
[docs] def exec_vsone_subset(infr, edges, prog_hook=None): r""" Args: prog_hook (None): (default = None) CommandLine: python -m wbia.algo.graph.core exec_vsone_subset Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.core import * # NOQA >>> infr = testdata_infr('testdb1') >>> infr.ensure_full() >>> edges = [(1, 2), (2, 3)] >>> result = infr.exec_vsone_subset(edges) >>> print(result) """ match_list = infr._make_matches_from(edges, prog_hook) # TODO: is this code necessary anymore? vsone_matches = {e_(u, v): match for (u, v), match in zip(edges, match_list)} infr.vsone_matches.update(vsone_matches) edge_to_score = {e: match.fs.sum() for e, match in vsone_matches.items()} infr.graph.add_edges_from(edge_to_score.keys()) infr.set_edge_attrs('score', edge_to_score) return match_list
[docs] def lookup_cm(infr, aid1, aid2): """ Get chipmatch object associated with an edge if one exists. """ if infr.cm_list is None: return None, aid1, aid2 # TODO: keep chip matches in dictionary by default? aid2_idx = ut.make_index_lookup([cm.qaid for cm in infr.cm_list]) switch_order = False if aid1 in aid2_idx: idx = aid2_idx[aid1] cm = infr.cm_list[idx] if aid2 not in cm.daid2_idx: switch_order = True # raise KeyError('switch order') else: switch_order = True if switch_order: # switch order aid1, aid2 = aid2, aid1 idx = aid2_idx[aid1] cm = infr.cm_list[idx] if aid2 not in cm.daid2_idx: raise KeyError('No ChipMatch for edge (%r, %r)' % (aid1, aid2)) return cm, aid1, aid2
[docs] @profile def apply_match_edges(infr, review_cfg={}): """ Adds results from one-vs-many rankings as edges in the graph """ if infr.cm_list is None: infr.print('apply_match_edges - matching has not been run!') return infr.print('apply_match_edges', 1) edges = infr._cm_breaking(review_cfg) # Create match-based graph structure infr.print('apply_match_edges adding %d edges' % len(edges), 1) infr.graph.add_edges_from(edges) infr.apply_match_scores()
def _cm_breaking(infr, cm_list=None, review_cfg={}, scoring='annot'): """ >>> from wbia.algo.graph.core import * # NOQA >>> review_cfg = {} """ if cm_list is None: cm_list = infr.cm_list return _cm_breaking_worker( cm_list=cm_list, review_cfg=review_cfg, scoring=scoring ) def _cm_training_pairs( infr, qreq_=None, cm_list=None, top_gt=2, mid_gt=2, bot_gt=2, top_gf=2, mid_gf=2, bot_gf=2, rand_gt=2, rand_gf=2, rng=None, ): """ Constructs training data for a pairwise classifier CommandLine: python -m wbia.algo.graph.core _cm_training_pairs Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.core import * # NOQA >>> infr = testdata_infr('PZ_MTEST') >>> infr.exec_matching(cfgdict={ >>> 'can_match_samename': True, >>> 'K': 4, >>> 'Knorm': 1, >>> 'prescore_method': 'csum', >>> 'score_method': 'csum' >>> }) >>> from wbia.algo.graph.core import * # NOQA >>> exec(ut.execstr_funckw(infr._cm_training_pairs)) >>> rng = np.random.RandomState(42) >>> aid_pairs = np.array(infr._cm_training_pairs(rng=rng)) >>> print(len(aid_pairs)) >>> assert np.sum(aid_pairs.T[0] == aid_pairs.T[1]) == 0 """ if qreq_ is None: cm_list = infr.cm_list qreq_ = infr.qreq_ ibs = infr.ibs aid_pairs = [] dnids = qreq_.get_qreq_annot_nids(qreq_.daids) # dnids = qreq_.get_qreq_annot_nids(qreq_.daids) rng = ut.ensure_rng(rng) for cm in ut.ProgIter(cm_list, lbl='building pairs'): all_gt_aids = cm.get_top_gt_aids(ibs) all_gf_aids = cm.get_top_gf_aids(ibs) gt_aids = ut.take_percentile_parts(all_gt_aids, top_gt, mid_gt, bot_gt) gf_aids = ut.take_percentile_parts(all_gf_aids, top_gf, mid_gf, bot_gf) # get unscored examples unscored_gt_aids = [ aid for aid in qreq_.daids[cm.qnid == dnids] if aid not in cm.daid2_idx ] rand_gt_aids = ut.random_sample(unscored_gt_aids, rand_gt, rng=rng) # gf_aids = cm.get_groundfalse_daids() _gf_aids = qreq_.daids[cm.qnid != dnids] _gf_aids = qreq_.daids.compress(cm.qnid != dnids) # gf_aids = ibs.get_annot_groundfalse(cm.qaid, daid_list=qreq_.daids) rand_gf_aids = ut.random_sample(_gf_aids, rand_gf, rng=rng).tolist() chosen_daids = ut.unique(gt_aids + gf_aids + rand_gf_aids + rand_gt_aids) aid_pairs.extend([(cm.qaid, aid) for aid in chosen_daids if cm.qaid != aid]) return aid_pairs def _get_cm_agg_aid_ranking(infr, cc): aid_to_cm = {cm.qaid: cm for cm in infr.cm_list} all_scores = ut.ddict(list) for qaid in cc: cm = aid_to_cm[qaid] # should we be doing nids? for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()): all_scores[daid].append(score) max_scores = sorted((max(scores), aid) for aid, scores in all_scores.items())[ ::-1 ] ranked_aids = ut.take_column(max_scores, 1) return ranked_aids def _get_cm_edge_data(infr, edges, cm_list=None): symmetric = True if cm_list is None: cm_list = infr.cm_list # Find scores for the edges that exist in the graph edge_to_data = ut.ddict(dict) aid_to_cm = {cm.qaid: cm for cm in cm_list} for u, v in edges: if symmetric: u, v = e_(u, v) cm1 = aid_to_cm.get(u, None) cm2 = aid_to_cm.get(v, None) scores = [] ranks = [] for cm in ut.filter_Nones([cm1, cm2]): for aid in [u, v]: idx = cm.daid2_idx.get(aid, None) if idx is None: continue score = cm.annot_score_list[idx] rank = cm.get_annot_ranks([aid])[0] scores.append(score) ranks.append(rank) if len(scores) == 0: score = None rank = None else: # Choose whichever one gave the best score idx = vt.safe_argmax(scores, nans=False) score = scores[idx] rank = ranks[idx] edge_to_data[(u, v)]['score'] = score edge_to_data[(u, v)]['rank'] = rank return edge_to_data
[docs] @profile def apply_match_scores(infr): """ Applies precomputed matching scores to edges that already exist in the graph. Typically you should run infr.apply_match_edges() before running this. CommandLine: python -m wbia.algo.graph.core apply_match_scores --show Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.graph.core import * # NOQA >>> infr = testdata_infr('PZ_MTEST') >>> infr.exec_matching() >>> infr.apply_match_edges() >>> infr.apply_match_scores() >>> infr.get_edge_attrs('score') """ if infr.cm_list is None: infr.print('apply_match_scores - no scores to apply!') return infr.print('apply_match_scores', 1) edges = list(infr.graph.edges()) edge_to_data = infr._get_cm_edge_data(edges) # Remove existing attrs ut.nx_delete_edge_attr(infr.graph, 'score') ut.nx_delete_edge_attr(infr.graph, 'rank') ut.nx_delete_edge_attr(infr.graph, 'normscore') edges = list(edge_to_data.keys()) edge_scores = list(ut.take_column(edge_to_data.values(), 'score')) edge_scores = ut.replace_nones(edge_scores, np.nan) edge_scores = np.array(edge_scores) edge_ranks = np.array(ut.take_column(edge_to_data.values(), 'rank')) # take the inf-norm normscores = edge_scores / vt.safe_max(edge_scores, nans=False) # Add new attrs infr.set_edge_attrs('score', ut.dzip(edges, edge_scores)) infr.set_edge_attrs('rank', ut.dzip(edges, edge_ranks)) # Hack away zero probabilites # probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9 # probs = vt.normalize(probs, axis=1, ord=1, out=probs) # entropy = -(np.log2(probs) * probs).sum(axis=1) infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
[docs]class InfrLearning(object):
[docs] def learn_deploy_verifiers(infr, publish=False): """ Uses current knowledge to train verifiers for new unseen pairs. Example: >>> # DISABLE_DOCTEST >>> import wbia >>> ibs = wbia.opendb('PZ_MTEST') >>> infr = wbia.AnnotInference(ibs, aids='all') >>> infr.ensure_mst() >>> publish = False >>> infr.learn_deploy_verifiers() Ignore: publish = True """ infr.print('learn_deploy_verifiers') from wbia.algo.verif import vsone pblm = vsone.OneVsOneProblem(infr, verbose=True) pblm.primary_task_key = 'match_state' pblm.default_clf_key = 'RF' pblm.default_data_key = 'learn(sum,glob)' pblm.setup() dpath = '.' task_key = 'match_state' pblm.deploy(dpath, task_key=task_key, publish=publish) task_key = 'photobomb_state' if task_key in pblm.eval_task_keys: pblm.deploy(dpath, task_key=task_key)
[docs] def learn_evaluation_verifiers(infr): """ Creates a cross-validated ensemble of classifiers to evaluate verifier error cases and groundtruth errors. CommandLine: python -m wbia.algo.graph.mixin_matching learn_evaluation_verifiers Doctest: >>> # xdoctest: +REQUIRES(module:wbia_cnn, --slow) >>> import wbia >>> infr = wbia.AnnotInference( >>> 'PZ_MTEST', aids='all', autoinit='annotmatch', >>> verbose=4) >>> verifiers = infr.learn_evaluation_verifiers() >>> edges = list(infr.edges()) >>> verif = verifiers['match_state'] >>> probs = verif.predict_proba_df(edges) >>> print(probs) """ infr.print('learn_evaluataion_verifiers') from wbia.algo.verif import vsone pblm = vsone.OneVsOneProblem(infr, verbose=5) pblm.primary_task_key = 'match_state' pblm.eval_clf_keys = ['RF'] pblm.eval_data_keys = ['learn(sum,glob)'] pblm.setup_evaluation() if True: pblm.report_evaluation() verifiers = pblm._make_evaluation_verifiers(pblm.eval_task_keys) return verifiers
[docs] def load_published(infr): """ Downloads, caches, and loads pre-trained verifiers. This is the default action. """ from wbia.algo.verif import deploy ibs = infr.ibs species = ibs.get_primary_database_species(infr.aids) infr.print('Loading task_thresh for species: %r' % (species,)) assert species in infr.task_thresh_dict infr.task_thresh = infr.task_thresh_dict[species] infr.print('infr.task_thresh: %r' % (infr.task_thresh,)) infr.print('Loading verifiers for species: %r' % (species,)) try: infr.verifiers = deploy.Deployer().load_published(ibs, species) message = 'Loaded verifiers %r' % (infr.verifiers,) infr.print(message) except TypeError as ex: message = 'Error: Failed to load verifiers for %r' % (species,) ut.printex( ex, message, iswarning=True, tb=True, ) infr.print(message)
[docs] def load_latest_classifiers(infr, dpath): from wbia.algo.verif import deploy task_clf_fpaths = deploy.Deployer(dpath).find_latest_local() classifiers = {} for task_key, fpath in task_clf_fpaths.items(): clf_info = ut.load_data(fpath) assert ( clf_info['metadata']['task_key'] == task_key ), 'bad saved clf at fpath={}'.format(fpath) classifiers[task_key] = clf_info infr.verifiers = classifiers
# return classifiers
[docs] def photobomb_samples(infr): edges = list(infr.edges()) tags_list = list(infr.gen_edge_values('tags', edges=edges, default=[])) flags = ut.filterflags_general_tags(tags_list, has_any=['photobomb']) pb_edges = ut.compress(edges, flags) return pb_edges
class _RedundancyAugmentation(object): # def rand_neg_check_edges(infr, c1_nodes, c2_nodes): # """ # Find enough edges to between two pccs to make them k-negative complete # """ # k = infr.params['redun.neg'] # existing_edges = nxu.edges_cross(infr.graph, c1_nodes, c2_nodes) # reviewed_edges = { # edge: state # for edge, state in infr.get_edge_attrs( # 'decision', existing_edges, # default=UNREV).items() # if state != UNREV # } # n_neg = sum([state == NEGTV for state in reviewed_edges.values()]) # if n_neg < k: # # Find k random negative edges # check_edges = existing_edges - set(reviewed_edges) # if len(check_edges) < k: # edges = it.starmap(nxu.e_, it.product(c1_nodes, c2_nodes)) # for edge in edges: # if edge not in reviewed_edges: # check_edges.add(edge) # if len(check_edges) == k: # break # else: # check_edges = {} # return check_edges def find_neg_augment_edges(infr, cc1, cc2, k=None): """ Find enough edges to between two pccs to make them k-negative complete The two CCs should be disjoint and not have any positive edges between them. Args: cc1 (set): nodes in one PCC cc2 (set): nodes in another positive-disjoint PCC k (int): redundnacy level (if None uses infr.params['redun.neg']) Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph import demo >>> k = 2 >>> cc1, cc2 = {1}, {2, 3} >>> # --- return an augmentation if feasible >>> infr = demo.demodata_infr(ccs=[cc1, cc2], ignore_pair=True) >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k)) >>> assert edges == {(1, 2), (1, 3)} >>> # --- if infeasible return a partial augmentation >>> infr.add_feedback((1, 2), INCMP) >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k)) >>> assert edges == {(1, 3)} """ if k is None: k = infr.params['redun.neg'] assert cc1 is not cc2, 'CCs should be disjoint (but they are the same)' assert len(cc1.intersection(cc2)) == 0, 'CCs should be disjoint' existing_edges = set(nxu.edges_cross(infr.graph, cc1, cc2)) reviewed_edges = { edge: state for edge, state in zip( existing_edges, infr.edge_decision_from(existing_edges) ) if state != UNREV } # Find how many negative edges we already have num = sum([state == NEGTV for state in reviewed_edges.values()]) if num < k: # Find k random negative edges check_edges = existing_edges - set(reviewed_edges) # Check the existing but unreviewed edges first for edge in check_edges: num += 1 yield edge if num >= k: return # Check non-existing edges next seed = 2827295125 try: seed += sum(cc1) + sum(cc2) except Exception: pass rng = np.random.RandomState(seed) cc1 = ut.shuffle(list(cc1), rng=rng) cc2 = ut.shuffle(list(cc2), rng=rng) cc1 = ut.shuffle(list(cc1), rng=rng) for edge in it.starmap(nxu.e_, nxu.diag_product(cc1, cc2)): if edge not in existing_edges: num += 1 yield edge if num >= k: return def find_pos_augment_edges(infr, pcc, k=None): """ # [[1, 0], [0, 2], [1, 2], [3, 1]] pos_sub = nx.Graph([[0, 1], [1, 2], [0, 2], [1, 3]]) """ if k is None: pos_k = infr.params['redun.pos'] else: pos_k = k pos_sub = infr.pos_graph.subgraph(pcc) # TODO: # weight by pairs most likely to be comparable # First try to augment only with unreviewed existing edges unrev_avail = list(nxu.edges_inside(infr.unreviewed_graph, pcc)) try: check_edges = list( nxu.k_edge_augmentation( pos_sub, k=pos_k, avail=unrev_avail, partial=False ) ) except nx.NetworkXUnfeasible: check_edges = None if not check_edges: # Allow new edges to be introduced full_sub = infr.graph.subgraph(pcc).copy() new_avail = ut.estarmap(infr.e_, nx.complement(full_sub).edges()) full_avail = unrev_avail + new_avail n_max = (len(pos_sub) * (len(pos_sub) - 1)) // 2 n_complement = n_max - pos_sub.number_of_edges() if len(full_avail) == n_complement: # can use the faster algorithm check_edges = list( nxu.k_edge_augmentation(pos_sub, k=pos_k, partial=True) ) else: # have to use the slow approximate algo check_edges = list( nxu.k_edge_augmentation( pos_sub, k=pos_k, avail=full_avail, partial=True ) ) check_edges = set(it.starmap(e_, check_edges)) return check_edges @profile def find_pos_redun_candidate_edges(infr, k=None, verbose=False): r""" Searches for augmenting edges that would make PCCs k-positive redundant Doctest: >>> from wbia.algo.graph.mixin_matching import * # NOQA >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(ccs=[(1, 2, 3, 4, 5), (7, 8, 9, 10)]) >>> infr.add_feedback((2, 5), 'match') >>> infr.add_feedback((1, 5), 'notcomp') >>> infr.params['redun.pos'] = 2 >>> candidate_edges = list(infr.find_pos_redun_candidate_edges()) >>> result = ('candidate_edges = ' + ut.repr2(candidate_edges)) >>> print(result) candidate_edges = [] """ # Add random edges between exisiting non-redundant PCCs if k is None: k = infr.params['redun.pos'] # infr.find_non_pos_redundant_pccs(k=k, relax=True) pcc_gen = list(infr.positive_components()) prog = ut.ProgIter(pcc_gen, enabled=verbose, freq=1, adjust=False) for pcc in prog: if not infr.is_pos_redundant(pcc, k=k, relax=True, assume_connected=True): for edge in infr.find_pos_augment_edges(pcc, k=k): yield nxu.e_(*edge) @profile def find_neg_redun_candidate_edges(infr, k=None): """ Get pairs of PCCs that are not complete. Finds edges that might complete them. Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph.mixin_matching import * # NOQA >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(ccs=[(1,), (2,), (3,)], ignore_pair=True) >>> edges = list(infr.find_neg_redun_candidate_edges()) >>> assert len(edges) == 3, 'all should be needed here' >>> infr.add_feedback_from(edges, evidence_decision=NEGTV) >>> assert len(list(infr.find_neg_redun_candidate_edges())) == 0 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(pcc_sizes=[3] * 20, ignore_pair=True) >>> ccs = list(infr.positive_components()) >>> gen = infr.find_neg_redun_candidate_edges(k=2) >>> for edge in gen: >>> # What happens when we make ccs positive >>> print(infr.node_labels(edge)) >>> infr.add_feedback(edge, evidence_decision=POSTV) >>> import ubelt as ub >>> infr = demo.demodata_infr(pcc_sizes=[1] * 30, ignore_pair=True) >>> ccs = list(infr.positive_components()) >>> gen = infr.find_neg_redun_candidate_edges(k=3) >>> for chunk in ub.chunks(gen, 2): >>> for edge in chunk: >>> # What happens when we make ccs positive >>> print(infr.node_labels(edge)) >>> infr.add_feedback(edge, evidence_decision=POSTV) list(gen) """ if k is None: k = infr.params['redun.neg'] # Loop through all pairs for cc1, cc2 in infr.find_non_neg_redun_pccs(k=k): if len(cc1.intersection(cc2)) > 0: # If there is modification of the underlying graph while we # iterate, then two ccs may not be disjoint. Skip these cases. continue for u, v in infr.find_neg_augment_edges(cc1, cc2, k): edge = e_(u, v) infr.assert_edge(edge) yield edge
[docs]class CandidateSearch(_RedundancyAugmentation): """Search for candidate edges"""
[docs] @profile def find_lnbnn_candidate_edges( infr, desired_states=[UNREV], can_match_samename=False, can_match_sameimg=False, K=5, Knorm=5, requery=True, prescore_method='csum', score_method='csum', sv_on=True, cfgdict_=None, batch_size=None, ): """ Example: >>> # DISABLE_DOCTEST >>> # xdoctest: +REQUIRES(--slow) >>> from wbia.algo.graph import demo >>> infr = demo.demodata_mtest_infr() >>> cand_edges = infr.find_lnbnn_candidate_edges() >>> assert len(cand_edges) > 200, len(cand_edges) """ # Refresh the name labels # TODO: abstract into a Ranker class # do LNBNN query for new edges # Use one-vs-many to establish candidate edges to classify cfgdict = { 'resize_dim': 'width', 'dim_size': 700, 'requery': requery, 'can_match_samename': can_match_samename, 'can_match_sameimg': can_match_sameimg, 'K': K, 'Knorm': Knorm, 'sv_on': sv_on, 'prescore_method': prescore_method, 'score_method': score_method, } if cfgdict_ is not None: cfgdict.update(cfgdict_) print('[find_lnbnn_candidate_edges] Using cfgdict = %s' % (ut.repr3(cfgdict),)) ranks_top = infr.params['ranking.ntop'] response = infr.exec_matching( name_method='edge', cfgdict=cfgdict, batch_size=batch_size, ranks_top=ranks_top, ) if cfgdict_ is None: # infr.apply_match_edges(review_cfg={'ranks_top': 5}) lnbnn_results = set(infr._cm_breaking(review_cfg={'ranks_top': ranks_top})) else: assert response is not None lnbnn_results = set(response) candidate_edges = { edge for edge, state in zip(lnbnn_results, infr.edge_decision_from(lnbnn_results)) if state in desired_states } infr.print( 'ranking alg found {}/{} {} edges'.format( len(candidate_edges), len(lnbnn_results), desired_states ), 1, ) return candidate_edges
[docs] def ensure_task_probs(infr, edges): """ Ensures that probabilities are assigned to the edges. This gaurentees that infr.task_probs contains data for edges. (Currently only the primary task is actually ensured) CommandLine: python -m wbia.algo.graph.mixin_matching ensure_task_probs Doctest: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph.mixin_matching import * >>> import wbia >>> infr = wbia.AnnotInference('PZ_MTEST', aids='all', >>> autoinit='staging') >>> edges = list(infr.edges())[0:3] >>> infr.load_published() >>> assert len(infr.task_probs['match_state']) == 0 >>> infr.ensure_task_probs(edges) >>> assert len(infr.task_probs['match_state']) == 3 >>> infr.ensure_task_probs(edges) >>> assert len(infr.task_probs['match_state']) == 3 Doctest: >>> # DISABLE_DOCTEST >>> from wbia.algo.graph.mixin_matching import * >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2) >>> edges = list(infr.edges()) >>> infr.ensure_task_probs(edges) >>> assert all([np.isclose(sum(p.values()), 1) >>> for p in infr.task_probs['match_state'].values()]) """ if not infr.verifiers: raise Exception('Verifiers are needed to predict probabilities') # Construct pairwise features on edges in infr primary_task = 'match_state' match_task = infr.task_probs[primary_task] need_flags = [e not in match_task for e in edges] if any(need_flags): need_edges = ut.compress(edges, need_flags) need_edges = list(set(need_edges)) infr.print( 'There are {} edges without probabilities'.format(len(need_edges)), 1 ) # Only recompute for the needed edges task_probs = infr._make_task_probs(need_edges) # Store task probs in internal data structure # FIXME: this is slow for task, probs in task_probs.items(): probs_dict = probs.to_dict(orient='index') if task not in infr.task_probs: infr.task_probs[task] = probs_dict else: infr.task_probs[task].update(probs_dict) # Set edge task attribute as well infr.set_edge_attrs(task, probs_dict)
[docs] @profile def ensure_priority_scores(infr, priority_edges): """ Ensures that priority attributes are assigned to the edges. This does not change the state of the queue. Doctest: >>> import wbia >>> ibs = wbia.opendb('PZ_MTEST') >>> infr = wbia.AnnotInference(ibs, aids='all') >>> infr.ensure_mst() >>> priority_edges = list(infr.edges())[0:1] >>> infr.ensure_priority_scores(priority_edges) Doctest: >>> import wbia >>> ibs = wbia.opendb('PZ_MTEST') >>> infr = wbia.AnnotInference(ibs, aids='all') >>> infr.ensure_mst() >>> # infr.load_published() >>> priority_edges = list(infr.edges()) >>> infr.ensure_priority_scores(priority_edges) Doctest: >>> from wbia.algo.graph import demo >>> infr = demo.demodata_infr(num_pccs=6, p_incon=.5, size_std=2) >>> edges = list(infr.edges()) >>> infr.ensure_priority_scores(edges) """ infr.print('Checking for verifiers: %r' % (infr.verifiers,)) if infr.verifiers and infr.ibs is not None: infr.print( 'Prioritizing {} edges with one-vs-one probs'.format(len(priority_edges)), 1, ) infr.print('Using thresholds: %r' % (infr.task_thresh,)) infr.print( 'Using infr.params[autoreview.enabled] : %r' % (infr.params['autoreview.enabled'],) ) infr.print( 'Using infr.params[autoreview.prioritize_nonpos]: %r' % (infr.params['autoreview.prioritize_nonpos'],) ) infr.ensure_task_probs(priority_edges) infr.load_published() primary_task = 'match_state' match_probs = infr.task_probs[primary_task] primary_thresh = infr.task_thresh[primary_task] # Read match_probs into a DataFrame primary_probs = pd.DataFrame( ut.take(match_probs, priority_edges), index=nxu.ensure_multi_index(priority_edges, ('aid1', 'aid2')), ) # Convert match-state probabilities into priorities prob_match = primary_probs[POSTV] # Initialize priorities to probability of matching default_priority = prob_match.copy() # If the edges are currently between the same individual, then # prioritize by non-positive probability (because those edges might # expose an inconsistency) already_pos = [ infr.pos_graph.node_label(u) == infr.pos_graph.node_label(v) for u, v in priority_edges ] default_priority[already_pos] = 1 - default_priority[already_pos] if infr.params['autoreview.enabled']: if infr.params['autoreview.prioritize_nonpos']: # Give positives that pass automatic thresholds high priority _probs = primary_probs[POSTV] flags = _probs > primary_thresh[POSTV] default_priority[flags] = ( np.maximum(default_priority[flags], _probs[flags]) + 1 ) # Give negatives that pass automatic thresholds high priority _probs = primary_probs[NEGTV] flags = _probs > primary_thresh[NEGTV] default_priority[flags] = ( np.maximum(default_priority[flags], _probs[flags]) + 1 ) # Give not-comps that pass automatic thresholds high priority _probs = primary_probs[INCMP] flags = _probs > primary_thresh[INCMP] default_priority[flags] = ( np.maximum(default_priority[flags], _probs[flags]) + 1 ) infr.set_edge_attrs('prob_match', prob_match.to_dict()) infr.set_edge_attrs('default_priority', default_priority.to_dict()) metric = 'default_priority' priority = default_priority elif infr.cm_list is not None: infr.print( 'Prioritizing {} edges with one-vs-vsmany scores'.format( len(priority_edges) ) ) # Not given any deploy classifier, this is the best we can do scores = infr._make_lnbnn_scores(priority_edges) metric = 'normscore' priority = scores else: infr.print( 'WARNING: No verifiers to prioritize {} edge(s)'.format( len(priority_edges) ) ) metric = 'random' priority = np.zeros(len(priority_edges)) + 1e-6 infr.set_edge_attrs(metric, ut.dzip(priority_edges, priority)) return metric, priority
[docs] def ensure_prioritized(infr, priority_edges): priority_edges = list(priority_edges) metric, priority = infr.ensure_priority_scores(priority_edges) infr.prioritize(metric=metric, edges=priority_edges, scores=priority)
[docs] @profile def add_candidate_edges(infr, candidate_edges): candidate_edges = list(candidate_edges) new_edges = infr.ensure_edges_from(candidate_edges) if infr.test_mode: infr.apply_edge_truth(new_edges) if infr.params['redun.enabled']: priority_edges = list(infr.filter_edges_flagged_as_redun(candidate_edges)) infr.print( 'Got {} candidate edges, {} are new, ' 'and {} are non-redundant'.format( len(candidate_edges), len(new_edges), len(priority_edges) ) ) else: infr.print( 'Got {} candidate edges and {} are new'.format( len(candidate_edges), len(new_edges) ) ) priority_edges = candidate_edges if len(priority_edges) > 0: infr.ensure_prioritized(priority_edges) if hasattr(infr, 'on_new_candidate_edges'): # hack callback for demo infr.on_new_candidate_edges(infr, new_edges) return len(priority_edges)
[docs] @profile def refresh_candidate_edges(infr): """ Search for candidate edges. Assign each edge a priority and add to queue. """ infr.print('refresh_candidate_edges', 1) infr.assert_consistency_invariant() if infr.ibs is not None: candidate_edges = infr.find_lnbnn_candidate_edges() elif hasattr(infr, 'dummy_verif'): infr.print('Searching for dummy candidates') infr.print( 'dummy vsone params =' + ut.repr4(infr.dummy_verif.dummy_params, nl=1, si=True) ) ranks_top = infr.params['ranking.ntop'] candidate_edges = infr.dummy_verif.find_candidate_edges(K=ranks_top) else: raise Exception('No method available to search for candidate edges') infr.add_candidate_edges(candidate_edges) infr.assert_consistency_invariant()
@profile def _make_task_probs(infr, edges): """ Predict edge probs for each pairwise classifier task """ if infr.verifiers is None: raise ValueError('no classifiers exist') if not isinstance(infr.verifiers, dict): raise NotImplementedError('need to deploy or implement eval prediction') task_keys = list(infr.verifiers.keys()) task_probs = {} # infr.print('[make_taks_probs] predict {} for {} edges'.format( # ut.conj_phrase(task_keys, 'and'), len(edges))) for task_key in task_keys: infr.print('predict {} for {} edges'.format(task_key, len(edges))) verif = infr.verifiers[task_key] probs_df = verif.predict_proba_df(edges) task_probs[task_key] = probs_df return task_probs @profile def _make_lnbnn_scores(infr, edges): edge_to_data = infr._get_cm_edge_data(edges) edges = list(edge_to_data.keys()) edge_scores = list(ut.take_column(edge_to_data.values(), 'score')) edge_scores = ut.replace_nones(edge_scores, np.nan) edge_scores = np.array(edge_scores) # take the inf-norm normscores = edge_scores / vt.safe_max(edge_scores, nans=False) return normscores