Source code for wbia.detecttools.wbiadata

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
from wbia.detecttools.directory import Directory
from wbia.detecttools.wbiadata import common as com
from wbia.detecttools.wbiadata.wbia_image import IBEIS_Image
from tqdm import tqdm


[docs]class IBEIS_Data(object): # NOQA def __init__(ibsd, dataset_path, **kwargs): com._kwargs(kwargs, 'object_min_width', 1) com._kwargs(kwargs, 'object_min_height', 1) com._kwargs(kwargs, 'mine_negatives', False) com._kwargs(kwargs, 'mine_width_min', 50) com._kwargs(kwargs, 'mine_width_max', 400) com._kwargs(kwargs, 'mine_height_min', 50) com._kwargs(kwargs, 'mine_height_max', 400) com._kwargs(kwargs, 'mine_max_attempts', 100) com._kwargs(kwargs, 'mine_max_keep', 10) com._kwargs(kwargs, 'mine_overlap_margin', 0.25) com._kwargs(kwargs, 'mine_exclude_categories', []) com._kwargs(kwargs, 'mine_patches', False) com._kwargs(kwargs, 'mine_patch_width', 32) com._kwargs(kwargs, 'mine_patch_height', 32) com._kwargs(kwargs, 'mine_patch_stride_suggested', 16) com._kwargs(kwargs, 'mine_patch_overlap_margin', 0.5) ibsd.dataset_path = dataset_path ibsd.absolute_dataset_path = os.path.realpath(dataset_path) direct = Directory( os.path.join(ibsd.dataset_path, 'Annotations'), include_file_extensions=['xml'], ) ibsd.images = [] files = direct.files() print('Loading Database') for i, filename in tqdm(list(enumerate(files))): ibsd.images.append( IBEIS_Image(filename, ibsd.absolute_dataset_path, **kwargs) ) print(' ...Loaded') ibsd.categories_images = [] ibsd.categories_rois = [] for image in ibsd.images: temp = image.categories(unique=False, patches=True) ibsd.categories_rois += temp ibsd.categories_images += set(temp) if len(image.objects) == 0: ibsd.categories_images += ['BACKGROUND'] ibsd.distribution_images = com.histogram(ibsd.categories_images) ibsd.distribution_rois = com.histogram(ibsd.categories_rois) ibsd.rois = sum(ibsd.distribution_rois.values()) ibsd.categories = sorted(set(ibsd.categories_images)) def __str__(ibsd): return '<IBEIS Data Object | %s | %d images | %d categories | %d rois>' % ( ibsd.absolute_dataset_path, len(ibsd.images), len(ibsd.categories), ibsd.rois, ) def __repr__(ibsd): return '<IBEIS Data Object | %s>' % (ibsd.absolute_dataset_path) def __len__(ibsd): return len(ibsd.images) def __getitem__(ibsd, key): if isinstance(key, str): for image in ibsd.images: if key in image.filename: return image return None else: return ibsd.images[key]
[docs] def print_distribution(ibsd): def _print_line(category, spacing, images, rois): try: images = int(images) images = '% 5d' % (images,) except Exception: pass try: rois = int(rois) rois = '% 5d' % (rois,) except Exception: pass print('%s%s\t%s' % (category + ' ' * (spacing - len(category)), images, rois)) key_list = list(ibsd.distribution_rois.keys()) key_list += ['TOTAL', 'CATEGORY'] _max = max([len(category) for category in key_list]) + 3 _print_line('CATEGORY', _max, 'IMGs', 'ROIs') if 'BACKGROUND' in ibsd.distribution_images: _print_line('BACKGROUND', _max, ibsd.distribution_images['BACKGROUND'], '') for category in sorted(ibsd.distribution_rois): _print_line( category, _max, ibsd.distribution_images[category], ibsd.distribution_rois[category], ) _print_line('TOTAL', _max, len(ibsd.images), ibsd.rois)
[docs] def parse_dataset(ibsd, category, _type): filename = _type + '.txt' if category is not None: filename = category + '_' + filename filepath = os.path.join(ibsd.dataset_path, 'ImageSets', 'Main', filename) _dict = {} try: _file = open(filepath) for line in _file: line = line.strip().split(' ') _dict[line[0]] = int(line[-1]) except IOError as e: print('<%r> %s' % (e, filepath)) return _dict
[docs] def dataset( ibsd, positive_category, neg_exclude_categories=[], max_rois_pos=None, max_rois_neg=None, ): def _parse_dataset_file(category, _type): filepath = os.path.join( ibsd.dataset_path, 'ImageSets', 'Main', category + '_' + _type + '.txt' ) _dict = {} try: _file = open(filepath) for line in _file: line = line.strip().split(' ') _dict[line[0]] = int(line[-1]) except IOError as e: print('<%r> %s' % (e, filepath)) return _dict positives = [] negatives = [] validation = [] test = [] train_values = ibsd.parse_dataset(positive_category, 'train') train_values = ibsd.parse_dataset(positive_category, 'trainval') # val_values = _parse_dataset_file(positive_category, "val") test_values = ibsd.parse_dataset(positive_category, 'test') pos_rois = 0 neg_rois = 0 for image in ibsd.images: _train = train_values.get(image.filename, 0) # _val = val_values.get(image.filename, 0) _test = test_values.get(image.filename, 0) temp = image.categories(unique=False) flag = False if _train != 0: for val in temp: if val == positive_category: flag = True pos_rois += 1 elif val not in neg_exclude_categories: neg_rois += 1 if flag: positives.append(image) elif val not in neg_exclude_categories: negatives.append(image) # if _val != 0: # validation.append(image) if _test != 0: test.append(image) # Setup auto normalize variables for equal positives and negatives if max_rois_pos == 'auto' or max_rois_pos == -1: max_rois_pos = neg_rois if max_rois_neg == 'auto' or max_rois_neg == -1: max_rois_neg = pos_rois # Remove positives to target, not gauranteed to give target, but 'close'. if max_rois_pos is not None and len(positives) > 0: pos_density = float(pos_rois) / len(positives) target_num = int(max_rois_pos / pos_density) print('Normalizing Positives, Target: %d' % target_num) # Remove images to match target while len(positives) > target_num: positives.pop(com.randInt(0, len(positives) - 1)) # Recalculate rois left pos_rois = 0 for image in positives: temp = image.categories(unique=False) for val in temp: if val in positive_category: pos_rois += 1 # Remove negatives to target, not gauranteed to give target, but 'close'. if max_rois_neg is not None and len(negatives) > 0: neg_density = float(neg_rois) / len(negatives) target_num = int(max_rois_neg / neg_density) print('Normalizing Negatives, Target: %d ' % target_num) # Remove images to match target while len(negatives) > target_num: negatives.pop(com.randInt(0, len(negatives) - 1)) # Recalculate rois left neg_rois = 0 for image in negatives: temp = image.categories(unique=False) for val in temp: if val not in positive_category: neg_rois += 1 print('%s\t%s\t%s\t%s\t%s' % (' ', 'Pos', 'Neg', 'Val', 'Test')) print( '%s\t%s\t%s\t%s\t%s' % ('Images:', len(positives), len(negatives), len(validation), len(test)) ) print('%s\t%s\t%s\t%s\t%s' % ('ROIs: ', pos_rois, neg_rois, '', '')) return (positives, pos_rois), (negatives, neg_rois), validation, test
if __name__ == '__main__': information = { 'mine_negatives': True, 'mine_max_keep': 1, 'mine_exclude_categories': ['zebra_grevys', 'zebra_plains'], } dataset = IBEIS_Data('test/', **information) print(dataset) # Access specific information about the dataset print('Categories:', dataset.categories) print('Number of images:', len(dataset)) print('') dataset.print_distribution() print('') # Access specific image from dataset using filename or index print(dataset['2014_000002']) print(dataset['_000002']) # partial also works (takes first match) cont = True while cont: # Show the detection regions by drawing them on the source image print('Enter something to continue, empty to get new image') cont = dataset[com.randInt(0, len(dataset) - 1)].show() # Get all images using a specific positive set (pos, pos_rois), (neg, neg_rois), val, test = dataset.dataset('zebra_grevys') print(pos, pos_rois) print(neg, neg_rois) print(val) print(test) # Get a specific number of images (-1 for auto normalize to what the other gives) # (pos, pos_rois), (neg, neg_rois), val, test = dataset.dataset('zebra_grevys', max_rois_neg=-1) print('\nPositives:') for _pos in pos: print(_pos.image_path()) print(_pos.bounding_boxes(parts=True)) print('\nNegatives:') for _neg in neg: print(_neg.image_path()) print(_neg.bounding_boxes(parts=True)) print('\nValidation:') for _val in val: print(_val.image_path()) print(_val.bounding_boxes(parts=True)) print('\nTest:') for _test in test: print(_test.image_path()) print(_test.bounding_boxes(parts=True))