# -*- coding: utf-8 -*-
import logging
import utool as ut
from os.path import join, dirname, basename
(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')
[docs]def get_injured_sharks():
"""
>>> from wbia.scripts.getshark import * # NOQA
"""
import requests
url = 'http://www.whaleshark.org/getKeywordImages.jsp'
resp = requests.get(url)
assert resp.status_code == 200
keywords = resp.json()['keywords']
key_list = ut.take_column(keywords, 'indexName')
key_to_nice = {k['indexName']: k['readableName'] for k in keywords}
injury_patterns = [
'injury',
'net',
'hook',
'trunc',
'damage',
'scar',
'nicks',
'bite',
]
injury_keys = [
key for key in key_list if any([pat in key for pat in injury_patterns])
]
noninjury_keys = ut.setdiff(key_list, injury_keys)
injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA
noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA
key_list = injury_keys
keyed_images = {}
for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
key_url = url + '?indexName={indexName}'.format(indexName=key)
key_resp = requests.get(key_url)
assert key_resp.status_code == 200
key_imgs = key_resp.json()['images']
keyed_images[key] = key_imgs
key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
key_hist = ut.sort_dict(key_hist, 'vals')
logger.info(ut.repr3(key_hist))
nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
logger.info(ut.repr3(nice_key_hist))
key_to_urls = {key: ut.take_column(vals, 'url') for key, vals in keyed_images.items()}
overlaps = {}
import itertools
overlap_img_list = []
for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
num_overlap = len(overlap_imgs)
overlaps[(k1, k2)] = num_overlap
overlaps[(k1, k1)] = len(key_to_urls[k1])
if num_overlap > 0:
# logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
overlap_img_list.extend(overlap_imgs)
all_img_urls = list(set(ut.flatten(key_to_urls.values())))
num_all = len(all_img_urls) # NOQA
logger.info('num_all = %r' % (num_all,))
# Determine super-categories
categories = ['nicks', 'scar', 'trunc']
# Force these keys into these categories
key_to_cat = {'scarbite': 'other_injury'}
cat_to_keys = ut.ddict(list)
for key in key_to_urls.keys():
flag = 1
if key in key_to_cat:
cat = key_to_cat[key]
cat_to_keys[cat].append(key)
continue
for cat in categories:
if cat in key:
cat_to_keys[cat].append(key)
flag = 0
if flag:
cat = 'other_injury'
cat_to_keys[cat].append(key)
cat_urls = ut.ddict(list)
for cat, keys in cat_to_keys.items():
for key in keys:
cat_urls[cat].extend(key_to_urls[key])
cat_hist = {}
for cat in list(cat_urls.keys()):
cat_urls[cat] = list(set(cat_urls[cat]))
cat_hist[cat] = len(cat_urls[cat])
logger.info(ut.repr3(cat_to_keys))
logger.info(ut.repr3(cat_hist))
key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals])
# ingestset = {
# '__class__': 'ImageSet',
# 'images': ut.ddict(dict)
# }
# for key, key_imgs in keyed_images.items():
# for imgdict in key_imgs:
# url = imgdict['url']
# encid = imgdict['correspondingEncounterNumber']
# # Make structure
# encdict = encounters[encid]
# encdict['__class__'] = 'Encounter'
# imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
# imgdict['__class__'] = 'Image'
# cat = key_to_cat[key]
# annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
# annotdict['__class__'] = 'Annotation'
# # Ensure structures exist
# encdict['images'] = encdict.get('images', [])
# imgdict['annots'] = imgdict.get('annots', [])
# # Add an image to this encounter
# encdict['images'].append(imgdict)
# # Add an annotation to this image
# imgdict['annots'].append(annotdict)
# # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
# get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
# resp = requests.get(get_enc_url)
# logger.info(ut.repr3(encdict))
# logger.info(ut.repr3(encounters))
# Download the files to the local disk
# fpath_list =
all_urls = ut.unique(
ut.take_column(
ut.flatten(
ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()
),
'url',
)
)
dldir = ut.truepath('~/tmpsharks')
from os.path import commonprefix, basename # NOQA
prefix = commonprefix(all_urls)
suffix_list = [url_[len(prefix) :] for url_ in all_urls]
fname_list = [suffix.replace('/', '--') for suffix in suffix_list]
fpath_list = []
for url, fname in ut.ProgIter(
zip(all_urls, fname_list), lbl='downloading imgs', freq=1
):
fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False)
fpath_list.append(fpath)
# Make sure we keep orig info
# url_to_keys = ut.ddict(list)
url_to_info = ut.ddict(dict)
for key, imgdict_list in keyed_images.items():
for imgdict in imgdict_list:
url = imgdict['url']
info = url_to_info[url]
for k, v in imgdict.items():
info[k] = info.get(k, [])
info[k].append(v)
info['keys'] = info.get('keys', [])
info['keys'].append(key)
# url_to_keys[url].append(key)
info_list = ut.take(url_to_info, all_urls)
for info in info_list:
if len(set(info['correspondingEncounterNumber'])) > 1:
assert False, 'url with two different encounter nums'
# Combine duplicate tags
hashid_list = [
ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True)
]
groupxs = ut.group_indices(hashid_list)[1]
# Group properties by duplicate images
# groupxs = [g for g in groupxs if len(g) > 1]
fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
info_list_ = [
ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
for info_ in ut.apply_grouping(info_list, groupxs)
]
encid_list_ = [
ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_
]
keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]
clist = ut.ColumnLists(
{
'gpath': fpath_list_,
'url': url_list_,
'encid': encid_list_,
'key': keys_list_,
'cat': cats_list_,
}
)
# for info_ in ut.apply_grouping(info_list, groupxs):
# info = ut.dict_accum(*info_)
# info = ut.map_dict_vals(ut.flatten, info)
# x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
# if len(x) > 1:
# info = info.copy()
# del info['keys']
# logger.info(ut.repr3(info))
flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
clist = clist.compress(flags)
import wbia
ibs = wbia.opendb('WS_Injury', allow_newdir=True)
gid_list = ibs.add_images(clist['gpath'])
clist['gid'] = gid_list
failed_flags = ut.flag_None_items(clist['gid'])
logger.info('# failed %s' % (sum(failed_flags),))
passed_flags = ut.not_list(failed_flags)
clist = clist.compress(passed_flags)
ut.assert_all_not_None(clist['gid'])
# ibs.get_image_uris_original(clist['gid'])
ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)
# ut.zipflat(clist['cat'], clist['key'])
if False:
# Can run detection instead
clist['tags'] = ut.zipflat(clist['cat'])
aid_list = ibs.use_images_as_annotations(
clist['gid'], adjust_percent=0.01, tags_list=clist['tags']
)
aid_list
import wbia.plottool as pt
from wbia import core_annots
pt.qt4ensure()
# annots = ibs.annots()
# aids = [1, 2]
# ibs.depc_annot.get('hog', aids , 'hog')
# ibs.depc_annot.get('chip', aids, 'img')
for aid in ut.InteractiveIter(ibs.get_valid_aids()):
hogs = ibs.depc_annot.d.get_hog_hog([aid])
chips = ibs.depc_annot.d.get_chips_img([aid])
chip = chips[0]
hogimg = core_annots.make_hog_block_image(hogs[0])
pt.clf()
pt.imshow(hogimg, pnum=(1, 2, 1))
pt.imshow(chip, pnum=(1, 2, 2))
fig = pt.gcf()
fig.show()
fig.canvas.draw()
# logger.info(len(groupxs))
# if False:
# groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
# logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
# # FIX
# for fpath, fname in zip(fpath_list, fname_list):
# if ut.checkpath(fpath):
# ut.move(fpath, join(dirname(fpath), fname))
# logger.info('fpath = %r' % (fpath,))
# import wbia
# from wbia.dbio import ingest_dataset
# dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
# self = ingest_dataset.Ingestable2(dbdir)
if False:
# Show overlap matrix
import wbia.plottool as pt
import pandas as pd
import numpy as np
dict_ = overlaps
s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
df = s.unstack()
lhs, rhs = df.align(df.T)
df = lhs.add(rhs, fill_value=0).fillna(0)
label_texts = df.columns.values
def label_ticks(label_texts):
import wbia.plottool as pt
truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
ax = pt.gca()
ax.set_xticks(list(range(len(label_texts))))
ax.set_xticklabels(truncated_labels)
[lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
[lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels()]
# xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
# pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
ax.set_yticks(list(range(len(label_texts))))
ax.set_yticklabels(truncated_labels)
[lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels()]
[lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels()]
# [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]
# df = df.sort(axis=0)
# df = df.sort(axis=1)
sortx = np.argsort(df.sum(axis=1).values)[::-1]
df = df.take(sortx, axis=0)
df = df.take(sortx, axis=1)
fig = pt.figure(fnum=1)
fig.clf()
mat = df.values.astype(np.int32)
mat[np.diag_indices(len(mat))] = 0
vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
import matplotlib.colors
norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
pt.plt.colorbar()
pt.plt.grid('off')
label_ticks(label_texts)
fig.tight_layout()
# overlap_df = pd.DataFrame.from_dict(overlap_img_list)
class TmpImage(ut.NiceRepr):
pass
from skimage.feature import hog
from skimage import data, color, exposure
import wbia.plottool as pt
image2 = color.rgb2gray(data.astronaut()) # NOQA
fpath = './GOPR1120.JPG'
import vtool as vt
for fpath in [fpath]:
"""
http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
"""
image = vt.imread(fpath, grayscale=True)
image = pt.color_funcs.to_base01(image)
fig = pt.figure(fnum=2)
fd, hog_image = hog(
image,
orientations=8,
pixels_per_cell=(16, 16),
cells_per_block=(1, 1),
visualise=True,
)
fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)
ax1.axis('off')
ax1.imshow(image, cmap=pt.plt.cm.gray)
ax1.set_title('Input image')
ax1.set_adjustable('box-forced')
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02))
ax2.axis('off')
ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
ax2.set_title('Histogram of Oriented Gradients')
ax1.set_adjustable('box-forced')
pt.plt.show()
# for
[docs]def detect_sharks(ibs, gids):
# import wbia
# ibs = wbia.opendb('WS_ALL')
config = {
'algo': 'yolo',
'sensitivity': 0.2,
'config_filepath': ut.truepath(
'~/work/WS_ALL/localizer_backup/detect.yolo.2.cfg'
),
'weight_filepath': ut.truepath(
'~/work/WS_ALL/localizer_backup/detect.yolo.2.39000.weights'
),
'class_filepath': ut.truepath(
'~/work/WS_ALL/localizer_backup/detect.yolo.2.cfg.classes'
),
}
depc = ibs.depc_image
# imgsets = ibs.imagesets(text='Injured Sharks')
# images = ibs.images(imgsets.gids[0])
images = ibs.images(gids)
images = images.compress([ext not in ['.gif'] for ext in images.exts])
gid_list = images.gids
# result is a tuple:
# (score, bbox_list, theta_list, conf_list, class_list)
results_list = depc.get_property('localizations', gid_list, None, config=config)
results_list2 = []
multi_gids = []
failed_gids = []
# ibs.set_image_imagesettext(failed_gids, ['Fixme'] * len(failed_gids))
ibs.set_image_imagesettext(multi_gids, ['Fixme2'] * len(multi_gids))
failed_gids
for gid, res in zip(gid_list, results_list):
score, bbox_list, theta_list, conf_list, class_list = res
if len(bbox_list) == 0:
failed_gids.append(gid)
elif len(bbox_list) == 1:
results_list2.append((gid, bbox_list, theta_list))
elif len(bbox_list) > 1:
multi_gids.append(gid)
idx = conf_list.argmax()
res2 = (gid, bbox_list[idx : idx + 1], theta_list[idx : idx + 1])
results_list2.append(res2)
ut.dict_hist(([t[1].shape[0] for t in results_list]))
localized_imgs = ibs.images(ut.take_column(results_list2, 0))
assert all([len(a) == 1 for a in localized_imgs.aids])
old_annots = ibs.annots(ut.flatten(localized_imgs.aids))
# old_tags = old_annots.case_tags
# Override old bboxes
import numpy as np
bboxes = np.array(ut.take_column(results_list2, 1))[:, 0, :]
ibs.set_annot_bboxes(old_annots.aids, bboxes)
if False:
import wbia.plottool as pt
pt.qt4ensure()
inter = pt.MultiImageInteraction(
ibs.get_image_paths(ut.take_column(results_list2, 0)),
bboxes_list=ut.take_column(results_list2, 1),
)
inter.dump_to_disk('shark_loc', num=50, prefix='shark_loc')
inter.start()
inter = pt.MultiImageInteraction(ibs.get_image_paths(failed_gids))
inter.start()
inter = pt.MultiImageInteraction(ibs.get_image_paths(multi_gids))
inter.start()
[docs]def train_part_detector():
"""
Problem:
healthy sharks usually have a mostly whole body shot
injured sharks usually have a close up shot.
This distribution of images is likely what the injur-shark net is picking up on.
The goal is to train a detector that looks for things that look
like the distribution of injured sharks.
We will run this on healthy sharks to find the parts of
"""
import wbia
ibs = wbia.opendb('WS_ALL')
imgset = ibs.imagesets(text='Injured Sharks')
injured_annots = imgset.annots[0] # NOQA
# config = {
# 'dim_size': (224, 224),
# 'resize_dim': 'wh'
# }
from pydarknet import Darknet_YOLO_Detector
data_path = ibs.export_to_xml()
output_path = join(ibs.get_cachedir(), 'training', 'localizer')
ut.ensuredir(output_path)
dark = Darknet_YOLO_Detector()
results = dark.train(data_path, output_path)
del dark
localizer_weight_path, localizer_config_path, localizer_class_path = results
classifier_model_path = ibs.classifier_train()
labeler_model_path = ibs.labeler_train()
output_path = join(ibs.get_cachedir(), 'training', 'detector')
ut.ensuredir(output_path)
ut.copy(localizer_weight_path, join(output_path, 'localizer.weights'))
ut.copy(localizer_config_path, join(output_path, 'localizer.config'))
ut.copy(localizer_class_path, join(output_path, 'localizer.classes'))
ut.copy(classifier_model_path, join(output_path, 'classifier.npy'))
ut.copy(labeler_model_path, join(output_path, 'labeler.npy'))
# ibs.detector_train()
[docs]def purge_ensure_one_annot_per_images(ibs):
"""
pip install Pipe
"""
# Purge all but one annotation
images = ibs.images()
# images.aids
groups = images._annot_groups
import numpy as np
# Take all but the largest annotations per images
large_masks = [ut.index_to_boolmask([np.argmax(x)], len(x)) for x in groups.bbox_area]
small_masks = ut.lmap(ut.not_list, large_masks)
# Remove all but the largets annotation
small_aids = ut.zipcompress(groups.aid, small_masks)
small_aids = ut.flatten(small_aids)
# Fix any empty images
images = ibs.images()
empty_images = ut.where(np.array(images.num_annotations) == 0)
logger.info('empty_images = %r' % (empty_images,))
# list(map(basename, map(dirname, images.uris_original)))
def VecPipe(func):
import pipe
@pipe.Pipe
def wrapped(sequence):
return map(func, sequence)
# return (None if item is None else func(item) for item in sequence)
return wrapped
name_list = list(images.uris_original | VecPipe(dirname) | VecPipe(basename))
aids_list = images.aids
ut.assert_all_eq(list(aids_list | VecPipe(len)))
annots = ibs.annots(ut.flatten(aids_list))
annots.names = name_list
[docs]def shark_misc():
import wbia
ibs = wbia.opendb('WS_ALL')
aid_list = ibs.get_valid_aids()
flag_list = ibs.get_annot_been_adjusted(aid_list)
adjusted_aids = ut.compress(aid_list, flag_list)
return adjusted_aids
# if False:
# # TRY TO FIGURE OUT WHY URLS ARE MISSING IN STEP 1
# encounter_to_parsed1 = parsed1.group_items('encounter')
# encounter_to_parsed2 = parsed2.group_items('encounter')
# url_to_parsed1 = parsed1.group_items('img_url')
# url_to_parsed2 = parsed2.group_items('img_url')
# def set_overlap(set1, set2):
# set1 = set(set1)
# set2 = set(set2)
# return ut.odict([
# ('s1', len(set1)),
# ('s2', len(set2)),
# ('isect', len(set1.intersection(set2))),
# ('union', len(set1.union(set2))),
# ('s1 - s2', len(set1.difference(set2))),
# ('s2 - s1', len(set2.difference(set1))),
# ])
# logger.info('encounter overlap: ' + ut.repr3(set_overlap(encounter_to_parsed1, encounter_to_parsed2)))
# logger.info('url overlap: ' + ut.repr3(set_overlap(url_to_parsed1, url_to_parsed2)))
# url1 = list(url_to_parsed1.keys())
# url2 = list(url_to_parsed2.keys())
# # remove common prefixes
# from os.path import commonprefix, basename # NOQA
# cp1 = commonprefix(url1)
# cp2 = commonprefix(url2)
# #suffix1 = sorted([u[len(cp1):].lower() for u in url1])
# #suffix2 = sorted([u[len(cp2):].lower() for u in url2])
# suffix1 = sorted([u[len(cp1):] for u in url1])
# suffix2 = sorted([u[len(cp2):] for u in url2])
# logger.info('suffix overlap: ' + ut.repr3(set_overlap(suffix1, suffix2)))
# set1 = set(suffix1)
# set2 = set(suffix2)
# only1 = list(set1 - set1.intersection(set2))
# only2 = list(set2 - set1.intersection(set2))
# import numpy as np
# for suf in ut.ProgIter(only2, bs=True):
# dist = np.array(ut.edit_distance(suf, only1))
# idx = ut.argsort(dist)[0:3]
# if dist[idx][0] < 3:
# close = ut.take(only1, idx)
# logger.info('---')
# logger.info('suf = %r' % (join(cp2, suf),))
# logger.info('close = %s' % (ut.repr3([join(cp1, c) for c in close]),))
# logger.info('---')
# break
# # Associate keywords with original images
# #lower_urls = [x.lower() for x in parsed['img_url']]
# url_to_idx = ut.make_index_lookup(parsed1['img_url'])
# parsed1['keywords'] = [[] for _ in range(len(parsed1))]
# for url, keys in url_to_keys.items():
# # hack because urls are note in the same format
# url = url.replace('wildbook_data_dir', 'shepherd_data_dir')
# url = url.lower()
# if url in url_to_idx:
# idx = url_to_idx[url]
# parsed1['keywords'][idx].extend(keys)
# healthy_annots = ibs.annots(ibs.imagesets(text='Non-Injured Sharks').aids[0])
# ibs.set_annot_prop('healthy', healthy_annots.aids, [True] * len(healthy_annots))
# ['healthy' in t and len(t) > 0 for t in single_annots.case_tags]
# healthy_tags = []
# ut.find_duplicate_items(cur_img_uuids)
# ut.find_duplicate_items(new_img_uuids)
# cur_uuids = set(cur_img_uuids)
# new_uuids = set(new_img_uuids)
# both_uuids = new_uuids.intersection(cur_uuids)
# only_cur = cur_uuids - both_uuids
# only_new = new_uuids - both_uuids
# logger.info('len(cur_uuids) = %r' % (len(cur_uuids)))
# logger.info('len(new_uuids) = %r' % (len(new_uuids)))
# logger.info('len(both_uuids) = %r' % (len(both_uuids)))
# logger.info('len(only_cur) = %r' % (len(only_cur)))
# logger.info('len(only_new) = %r' % (len(only_new)))
# Ensure that data in both sets are syncronized
# images_both = []
# if False:
# logger.info('Removing small images')
# import numpy as np
# import vtool as vt
# imgsize_list = np.array([vt.open_image_size(gpath) for gpath in parsed['new_fpath']])
# sqrt_area_list = np.sqrt(np.prod(imgsize_list, axis=1))
# areq_flags_list = sqrt_area_list >= 750
# parsed = parsed.compress(areq_flags_list)