-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathpurge_database.py
63 lines (51 loc) · 2.71 KB
/
purge_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import argparse
import json
import os
from tqdm import tqdm
import cv2
def purge_image_data(data, images_dir, clean_dir):
for data_id in tqdm(data):
sample = data[data_id]
if sample['family'] not in ['Papilionidae', 'Pieridae', 'Nymphalidae', 'Lycaenidae', 'Hesperiidae', 'Riodinidae']:
continue
if sample['image_path'] == "" or '.JPG' in sample['image_path']:
correct_folder_name = sample['image_path'][11:21] + "R" if '.JPG' in sample['image_path'] else sample['image_name'][11:21] + "R"
path_to_image = os.path.join(images_dir, correct_folder_name,
sample['image_path'] if '.JPG' in sample['image_path'] else sample['image_name'])
path_to_clean_dir = os.path.join(clean_dir, correct_folder_name)
else:
path_to_image = os.path.join(images_dir, sample['image_path'], sample['image_name'])
path_to_clean_dir = os.path.join(clean_dir, sample['image_path'])
if not os.path.exists(path_to_clean_dir):
os.makedirs(path_to_clean_dir)
if os.path.exists(os.path.join(path_to_clean_dir, sample['image_path'] if '.JPG' in sample['image_path'] else sample['image_name'])):
continue
img = cv2.imread(path_to_image)
cv2.imwrite(os.path.join(path_to_clean_dir, sample['image_path'] if '.JPG' in sample['image_path'] else sample['image_name']), img)
def purge_json_data(data, json_path):
purged_data = {}
for data_id in tqdm(data):
sample = data[data_id]
if sample['family'] not in ['Papilionidae', 'Pieridae', 'Nymphalidae', 'Lycaenidae', 'Hesperiidae', 'Riodinidae']:
continue
purged_data[data_id] = sample
with open(json_path, 'w') as outfile:
json.dump(purged_data, outfile, indent=4)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--mini", help='Use the mini database for testing/debugging.', action='store_true')
parser.add_argument("--images_dir", help='Parent directory with images.', type=str, required=True)
parser.add_argument("--clean_dir", help='Clean directory with images.', type=str, required=True)
parser.add_argument("--json_path", help='Path to json with relevant data.', type=str, required=True)
args = parser.parse_args()
infile = 'database'
if args.mini:
infile = 'mini_database'
if os.path.isfile('../database/{}.json'.format(infile)):
with open('../database/{}.json'.format(infile)) as json_file:
data = json.load(json_file)
else:
print("File does not exist!")
exit()
purge_image_data(data, args.images_dir, args.clean_dir)
purge_json_data(data, args.json_path)