Source code for wbia.dbio.ingest_ggr

# -*- coding: utf-8 -*-
#!/usr/bin/env python  # NOQA
"""Converts a GGR-style raw data to IBEIS database."""
import logging
from wbia.detecttools.directory import Directory
from os.path import join, exists
import utool as ut
import wbia

(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')


def _fix_ggr2018_directory_structure(ggr_path):

    # Manual fixes for bad directories

    src_uri = join(ggr_path, 'Clarine\\ Plane\\ Kurungu/')
    dst_uri = join(ggr_path, '231/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '231B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(
        ggr_path,
        'Alex\\ Peltier\\ -\\ Plane\\ -\\ Ngurnit/giraffe\\ grevy\\ count\\ feb\\ 18/',
    )
    dst_uri = join(ggr_path, '232/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '232B/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(
        ggr_path, 'Mint\\ Media\\ Footage', 'Mpala\\ day\\ 1\\ spark', 'PANORAMA/'
    )
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, 'Mint\\ Media\\ Footage', 'Mpala\\ day\\ 1/')
    dst_uri = join(ggr_path, '233/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '233B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, 'Mint\\ Media\\ Footage', 'Mpala\\ day\\ 1\\ spark/')
    dst_uri = join(ggr_path, '233', '233B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, 'Mint\\ Media\\ Footage', 'Mpala\\ day2\\ /')
    dst_uri = join(ggr_path, '233', '233B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, 'Mint\\ Media\\ Footage', 'Mpala\\ day\\ 2\\ spark/')
    dst_uri = join(ggr_path, '233', '233B/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '103\\ \\(1\\)/')
    dst_uri = join(ggr_path, '103/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '103\\ \\(ccef473b\\)/')
    dst_uri = join(ggr_path, '103/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '108\\ \\(1\\)/')
    dst_uri = join(ggr_path, '108/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '226A\\ \\(Shaba\\ Funan\\ Camp\\)/')
    dst_uri = join(ggr_path, '226/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '226A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '121/*.*')
    dst_uri = join(ggr_path, '121', '121A/')
    ut.rsync(src_uri, dst_uri)
    for src_filepath in ut.glob(src_uri.replace('\\', '')):
        ut.delete(src_filepath)

    src_uri = join(ggr_path, '54', '54A\\(16\\)/')
    dst_uri = join(ggr_path, '54', '54A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '54', '54B\\(16\\)/')
    dst_uri = join(ggr_path, '54', '54B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '87', '87/')
    dst_uri = join(ggr_path, '87', '87A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '223', 'A/')
    dst_uri = join(ggr_path, '223', '223A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '223', 'B/')
    dst_uri = join(ggr_path, '223', '223B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '14', '15A/')
    dst_uri = join(ggr_path, '14', '14A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '73/')
    dst_uri = join(ggr_path, '85/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '117', '115A/')
    dst_uri = join(ggr_path, '117', '117A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '200\\ A/')
    dst_uri = join(ggr_path, '200', '200A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '200\\ B/')
    dst_uri = join(ggr_path, '200', '200B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '200\\ F/')
    dst_uri = join(ggr_path, '200', '200F/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '200A/')
    dst_uri = join(ggr_path, '201/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '201A/')
    ut.rsync(src_uri, dst_uri)
    # ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '201\\ E/')
    dst_uri = join(ggr_path, '201', '201E/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '201\\ F/')
    dst_uri = join(ggr_path, '201', '201F/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '200A/')
    dst_uri = join(ggr_path, '202/')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '202A/')
    ut.rsync(src_uri, dst_uri)
    # ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '202\\ B/')
    dst_uri = join(ggr_path, '202', '202B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '200', '202\\ F/')
    dst_uri = join(ggr_path, '202', '202F/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '230', '230A', 'El\\ Karama/*.*')
    dst_uri = join(ggr_path, '230', '230A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-1])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '136', '136B', '136B\\ Grevys\\ Rally/*.*')
    dst_uri = join(ggr_path, '136', '136B/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-1])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '160', '160E', '104DUSIT')
    if exists(src_uri):
        direct = Directory(src_uri, recursive=False)
        filename_list = direct.files()
        for filename in sorted(filename_list):
            dst_uri = filename.replace('104DUSIT/', '').replace('.JPG', '_.JPG')
            assert not exists(dst_uri)
            ut.rsync(filename, dst_uri)
        ut.delete(src_uri)

    src_uri = join(ggr_path, '222', '222B', '102DUSIT')
    if exists(src_uri):
        direct = Directory(src_uri, recursive=False)
        filename_list = direct.files()
        for filename in sorted(filename_list):
            dst_uri = filename.replace('102DUSIT/', '').replace('.JPG', '_.JPG')
            assert not exists(dst_uri)
            ut.rsync(filename, dst_uri)
        ut.delete(src_uri)

    # Errors found by QR codes

    # No conflicts
    src_uri = join(ggr_path, '5', '5A/')
    dst_uri = join(ggr_path, '5', '5B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '14', '14A/')
    dst_uri = join(ggr_path, '14', '14B/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '118', '118A/')
    dst_uri = join(ggr_path, '192')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '192A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '119', '119A/')
    dst_uri = join(ggr_path, '189')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '189A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '120', '120A/')
    dst_uri = join(ggr_path, '190')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '190A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '138', '138C/')
    dst_uri = join(ggr_path, '169', '169C/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri)

    # Conflicts - Move first

    src_uri = join(ggr_path, '115', '115A/')
    dst_uri = join(ggr_path, '191')
    ut.ensuredir(dst_uri)
    dst_uri = join(dst_uri, '191A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    src_uri = join(ggr_path, '148', '148A/')
    dst_uri = join(ggr_path, '149', '149A-temp/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    # Conflicts - Move second

    src_uri = join(ggr_path, '117', '117A/')
    dst_uri = join(ggr_path, '115', '115A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)

    src_uri = join(ggr_path, '149', '149A/')
    dst_uri = join(ggr_path, '148', '148A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    # Conflicts - Move third

    src_uri = join(ggr_path, '149', '149A-temp/')
    dst_uri = join(ggr_path, '149', '149A/')
    ut.rsync(src_uri, dst_uri)
    ut.delete(src_uri.replace('\\', ''))

    # Conflicts - Merge third

    src_uri = join(ggr_path, '57', '57A/')
    dst_uri = join(ggr_path, '25', '25A/')
    ut.rsync(src_uri, dst_uri)
    src_uri = src_uri.replace('\\', '')
    src_uri = '/'.join(src_uri.split('/')[:-2])
    ut.delete(src_uri)


[docs]def convert_ggr2018_to_wbia( ggr_path, dbdir=None, purge=True, dry_run=False, apply_updates=True, **kwargs ): r"""Convert the raw GGR2 (2018) data to an wbia database. Args ggr_path (str): Directory to folder *containing* raw GGR 2018 data dbdir (str): Output directory CommandLine: python -m wbia convert_ggr2018_to_wbia Example: >>> # SCRIPT >>> from wbia.dbio.ingest_ggr import * # NOQA >>> default_ggr_path = join('/', 'data', 'wbia', 'GGR2', 'GGR2018data') >>> default_dbdir = join('/', 'data', 'wbia', 'GGR2-IBEIS') >>> dbdir = ut.get_argval('--dbdir', type_=str, default=default_dbdir) >>> ggr_path = ut.get_argval('--ggr', type_=str, default=default_ggr_path) >>> result = convert_ggr2018_to_wbia(ggr_path, dbdir=dbdir, purge=False, dry_run=True, apply_updates=False) >>> print(result) """ ALLOWED_NUMBERS = list(range(1, 250)) ALLOWED_LETTERS = ['A', 'B', 'C', 'D', 'E', 'F'] ################################################################################ if apply_updates: _fix_ggr2018_directory_structure(ggr_path) ################################################################################ blacklist_filepath_set = set( [ join(ggr_path, 'Cameras info.numbers'), join(ggr_path, 'Cameras info.xlsx'), join(ggr_path, 'GGR_photos_MRC_29.1.18.ods'), join(ggr_path, 'Cameras info-2.numbers'), ] ) # Check root files direct = Directory(ggr_path) for filepath in direct.files(recursive=False): try: assert filepath in blacklist_filepath_set ut.delete(filepath) except AssertionError: logger.info('Unresolved root file found in %r' % (filepath,)) continue ################################################################################ if purge: ut.delete(dbdir) ibs = wbia.opendb(dbdir=dbdir) ################################################################################ # Check folder structure assert exists(ggr_path) direct = Directory(ggr_path, recursive=0) direct1_list = direct.directories() direct1_list.sort(key=lambda x: int(x.base()), reverse=False) for direct1 in direct1_list: if not dry_run: logger.info('Processing directory: %r' % (direct1,)) base1 = direct1.base() try: int(base1) except ValueError: logger.info('Error found in %r' % (direct1,)) continue try: assert len(direct1.files(recursive=False)) == 0 except AssertionError: logger.info('Files found in %r' % (direct1,)) continue seen_letter_list = [] direct1_ = Directory(direct1.absolute_directory_path, recursive=0) direct2_list = direct1_.directories() direct2_list.sort(key=lambda x: x.base(), reverse=False) for direct2 in direct2_list: base2 = direct2.base() try: assert base2.startswith(base1) except AssertionError: logger.info( 'Folder name heredity conflict %r with %r' % (direct2, direct1) ) continue try: assert len(base2) >= 2 assert ' ' not in base2 number = base2[:-1] letter = base2[-1] number = int(number) letter = letter.upper() assert number in ALLOWED_NUMBERS assert letter in ALLOWED_LETTERS seen_letter_list.append(letter) except ValueError: logger.info('Error found in %r' % (direct2,)) continue except AssertionError: logger.info('Folder name format error found in %r' % (direct2,)) continue direct2_ = Directory( direct2.absolute_directory_path, recursive=True, images=True ) try: assert len(direct2_.directories()) == 0 except AssertionError: logger.info('Folders exist in file only level %r' % (direct2,)) continue filepath_list = sorted(direct2_.files()) if not dry_run: try: gid_list = ibs.add_images(filepath_list) gid_list = ut.filter_Nones(gid_list) gid_list = sorted(list(set(gid_list))) imageset_text = 'GGR2,%d,%s' % (number, letter) note_list = [ '%s,%05d' % (imageset_text, index + 1) for index, gid in enumerate(gid_list) ] ibs.set_image_notes(gid_list, note_list) ibs.set_image_imagesettext(gid_list, [imageset_text] * len(gid_list)) except Exception as ex: # NOQA ut.embed() seen_letter_set = set(seen_letter_list) try: assert len(seen_letter_set) == len(seen_letter_list) except AssertionError: logger.info( 'Duplicate letters in %r with letters %r' % (direct1, seen_letter_list) ) continue try: assert 'A' in seen_letter_set except AssertionError: logger.info('WARNING: A camera not found in %r' % (direct1,)) continue return ibs