Skip to content

Commit 0691422

Browse files
author
Martha Morrissey
authored
speed up tile download with concurrent processing (#182)
* speed up tile download with concurrent proccessing * remove time print * make threadcount optional cli images argument * clean up * drop default threads down to 10 * switch to list comprehension, remove prints * remove time import
1 parent b35b424 commit 0691422

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

label_maker/images.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# pylint: disable=unused-argument
22
"""Generate an .npz file containing arrays for training machine learning algorithms"""
33

4+
import concurrent.futures
45
from os import makedirs, path as op
56
from random import shuffle
67

78
import numpy as np
89

910
from label_maker.utils import get_image_function
1011

11-
def download_images(dest_folder, classes, imagery, ml_type, background_ratio, imagery_offset=False, **kwargs):
12+
def download_images(dest_folder, classes, imagery, ml_type, background_ratio, threadcount, imagery_offset=False, **kwargs):
1213
"""Download satellite images specified by a URL and a label.npz file
1314
Parameters
1415
------------
@@ -28,6 +29,8 @@ def download_images(dest_folder, classes, imagery, ml_type, background_ratio, im
2829
background_ratio: float
2930
Determines the number of background images to download in single class problems. Ex. A value
3031
of 1 will download an equal number of background images to class images.
32+
threadcount: int
33+
Number of threads to use for faster parallel image download
3134
imagery_offset: list
3235
An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will
3336
move the images 15 pixels right and 5 pixels up relative to the requested tile bounds
@@ -72,5 +75,6 @@ def class_test(value):
7275
image_function = get_image_function(imagery)
7376
kwargs['imagery_offset'] = imagery_offset
7477

75-
for tile in tiles:
76-
image_function(tile, imagery, tiles_dir, kwargs)
78+
with concurrent.futures.ThreadPoolExecutor(max_workers=threadcount) as executor:
79+
[executor.submit(image_function, tile, imagery, tiles_dir, kwargs) for tile in tiles]
80+
executor.shutdown(wait=True)

label_maker/main.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def parse_args(args):
5050
subparsers.add_parser('download', parents=[pparser], help='', formatter_class=dhf)
5151
l = subparsers.add_parser('labels', parents=[pparser], help='', formatter_class=dhf)
5252
p = subparsers.add_parser('preview', parents=[pparser], help='', formatter_class=dhf)
53-
subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf)
53+
i = subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf)
5454
subparsers.add_parser('package', parents=[pparser], help='', formatter_class=dhf)
5555

5656
# labels has an optional parameter
@@ -60,6 +60,10 @@ def parse_args(args):
6060
p.add_argument('-n', '--number', default=5, type=int,
6161
help='number of examples images to create per class')
6262

63+
#images has optional parameter
64+
i.add_argument('-t', '--threadcount', default=10, type=int,
65+
help= 'thread count to use')
66+
6367
# turn namespace into dictinary
6468
parsed_args = vars(parser.parse_args(args))
6569

@@ -109,7 +113,8 @@ def cli():
109113
number = args.get('number')
110114
preview(dest_folder=dest_folder, number=number, **config)
111115
elif cmd == 'images':
112-
download_images(dest_folder=dest_folder, **config)
116+
threadcount = args.get('threadcount')
117+
download_images(dest_folder=dest_folder, threadcount=threadcount, **config)
113118
elif cmd == 'package':
114119
package_directory(dest_folder=dest_folder, **config)
115120

0 commit comments

Comments
 (0)