Source code for wbia.detecttools.pascaldata

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
from wbia.detecttools.directory import Directory
from wbia.detecttools.pascaldata import common as com
from wbia.detecttools.pascaldata.pascal_image import PASCAL_Image


[docs]class PASCAL_Data(object): def __init__(pascald, dataset_path, **kwargs): com._kwargs(kwargs, 'object_min_width', 32) com._kwargs(kwargs, 'object_min_height', 32) com._kwargs(kwargs, 'mine_patches', True) com._kwargs(kwargs, 'mine_negatives', True) com._kwargs(kwargs, 'mine_width_min', 50) com._kwargs(kwargs, 'mine_width_max', 400) com._kwargs(kwargs, 'mine_height_min', 50) com._kwargs(kwargs, 'mine_height_max', 400) com._kwargs(kwargs, 'mine_max_attempts', 100) com._kwargs(kwargs, 'mine_max_keep', 10) com._kwargs(kwargs, 'mine_overlap_margin', 0.25) com._kwargs(kwargs, 'mine_exclude_categories', []) pascald.dataset_path = dataset_path pascald.absolute_dataset_path = os.path.realpath(dataset_path) direct = Directory( os.path.join(pascald.dataset_path, 'Annotations'), include_file_extensions=['xml'], ) pascald.images = [] files = direct.files() print('Loading Database') for i, filename in enumerate(files): if len(files) > 10: if i % (len(files) / 10) == 0: print('%0.2f' % (float(i) / len(files))) pascald.images.append( PASCAL_Image(filename, pascald.absolute_dataset_path, **kwargs) ) print(' ...Loaded') pascald.categories_images = [] pascald.categories_rois = [] for image in pascald.images: temp = image.categories(unique=False, patches=True) pascald.categories_rois += temp pascald.categories_images += set(temp) if len(image.objects) == 0: pascald.categories_images += ['BACKGROUND'] pascald.distribution_images = com.histogram(pascald.categories_images) pascald.distribution_rois = com.histogram(pascald.categories_rois) pascald.rois = sum(pascald.distribution_rois.values()) pascald.categories = sorted(set(pascald.categories_images)) def __str__(pascald): return '<IBEIS Data Object | %s | %d images | %d categories | %d rois>' % ( pascald.absolute_dataset_path, len(pascald.images), len(pascald.categories), pascald.rois, ) def __repr__(pascald): return '<IBEIS Data Object | %s>' % (pascald.absolute_dataset_path) def __len__(pascald): return len(pascald.images) def __getitem__(pascald, key): if isinstance(key, str): for image in pascald.images: if key in image.filename: return image return None else: return pascald.images[key]
[docs] def print_distribution(pascald): def _print_line(category, spacing, images, rois): images = str(images) rois = str(rois) print('%s%s\t%s' % (category + ' ' * (spacing - len(category)), images, rois)) _max = ( max( [ len(category) for category in pascald.distribution_rois.keys() + ['TOTAL', 'CATEGORY'] ] ) + 3 ) _print_line('CATEGORY', _max, 'IMGs', 'ROIs') if 'BACKGROUND' in pascald.distribution_images: _print_line('BACKGROUND', _max, pascald.distribution_images['BACKGROUND'], '') for category in sorted(pascald.distribution_rois): _print_line( category, _max, pascald.distribution_images[category], pascald.distribution_rois[category], ) _print_line('TOTAL', _max, len(pascald.images), pascald.rois)
[docs] def dataset( pascald, positive_category, neg_exclude_categories=[], max_rois_pos=None, max_rois_neg=None, ): def _parse_dataset_file(category, _type): filepath = os.path.join( pascald.dataset_path, 'ImageSets', 'Main', category + '_' + _type + '.txt', ) _dict = {} try: _file = open(filepath) for line in _file: line = line.strip().split(' ') _dict[line[0]] = int(line[-1]) except IOError as e: print('<%r> %s' % (e, filepath)) return _dict positives = [] negatives = [] validation = [] test = [] train_values = _parse_dataset_file(positive_category, 'train') train_values = _parse_dataset_file(positive_category, 'trainval') val_values = _parse_dataset_file(positive_category, 'val') test_values = _parse_dataset_file(positive_category, 'test') pos_rois = 0 neg_rois = 0 for image in pascald.images: filename, ext = os.path.splitext(image.filename) _train = train_values.get(filename, 0) _val = val_values.get(filename, 0) _test = test_values.get(filename, 0) temp = image.categories(unique=False) flag = False if _train != 0: for val in temp: if val == positive_category: flag = True pos_rois += 1 elif val not in neg_exclude_categories: neg_rois += 1 if flag: positives.append(image) elif val not in neg_exclude_categories: negatives.append(image) if _val != 0: validation.append(image) if _test != 0: test.append(image) # Setup auto normalize variables for equal positives and negatives if max_rois_pos == 'auto' or max_rois_pos == -1: max_rois_pos = neg_rois if max_rois_neg == 'auto' or max_rois_neg == -1: max_rois_neg = pos_rois # Remove positives to target, not gauranteed to give target, but 'close'. if max_rois_pos is not None and len(positives) > 0: pos_density = float(pos_rois) / len(positives) target_num = int(max_rois_pos / pos_density) print('Normalizing Positives, Target: %d' % target_num) # Remove images to match target while len(positives) > target_num: positives.pop(com.randInt(0, len(positives) - 1)) # Recalculate rois left pos_rois = 0 for image in positives: temp = image.categories(unique=False) for val in temp: if val in positive_category: pos_rois += 1 # Remove negatives to target, not gauranteed to give target, but 'close'. if max_rois_neg is not None and len(negatives) > 0: neg_density = float(neg_rois) / len(negatives) target_num = int(max_rois_neg / neg_density) print('Normalizing Negatives, Target: %d ' % target_num) # Remove images to match target while len(negatives) > target_num: negatives.pop(com.randInt(0, len(negatives) - 1)) # Recalculate rois left neg_rois = 0 for image in negatives: temp = image.categories(unique=False) for val in temp: if val not in positive_category: neg_rois += 1 print('%s\t%s\t%s\t%s\t%s' % (' ', 'Pos', 'Neg', 'Val', 'Test')) print( '%s\t%s\t%s\t%s\t%s' % ('Images:', len(positives), len(negatives), len(validation), len(test)) ) print('%s\t%s\t%s\t%s\t%s' % ('ROIs: ', pos_rois, neg_rois, '', '')) return (positives, pos_rois), (negatives, neg_rois), validation, test
if __name__ == '__main__': information = { 'mine_negatives': True, 'mine_max_keep': 1, 'mine_exclude_categories': ['zebra_grevys', 'zebra_plains'], } dataset = PASCAL_Data('test/', **information) print(dataset) # Access specific information about the dataset print('Categories:', dataset.categories) print('Number of images:', len(dataset)) print('') dataset.print_distribution() print('') # Access specific image from dataset using filename or index print(dataset['2014_000002']) print(dataset['_000002']) # partial also works (takes first match) cont = True while cont: # Show the detection regions by drawing them on the source image print('Enter something to continue, empty to get new image') cont = dataset[com.randInt(0, len(dataset) - 1)].show() # Get all images using a specific positive set (pos, pos_rois), (neg, neg_rois), val, test = dataset.dataset('zebra_grevys') print(pos, pos_rois) print(neg, neg_rois) print(val) print(test) # Get a specific number of images (-1 for auto normalize to what the other gives) # (pos, pos_rois), (neg, neg_rois), val, test = dataset.dataset('zebra_grevys', max_rois_neg=-1) print('\nPositives:') for _pos in pos: print(_pos.image_path()) print(_pos.bounding_boxes(parts=True)) print('\nNegatives:') for _neg in neg: print(_neg.image_path()) print(_neg.bounding_boxes(parts=True)) print('\nValidation:') for _val in val: print(_val.image_path()) print(_val.bounding_boxes(parts=True)) print('\nTest:') for _test in test: print(_test.image_path()) print(_test.bounding_boxes(parts=True))