|
9 | 9 | from label_maker.utils import is_tif
|
10 | 10 |
|
11 | 11 |
|
12 |
| -def package_directory(dest_folder, classes, imagery, ml_type, seed=False, split_names=['train', 'test'], |
13 |
| - split_vals=[0.8, .2], **kwargs): |
| 12 | +def package_directory(dest_folder, classes, imagery, ml_type, seed=False, |
| 13 | + split_names=('train', 'test'), split_vals=(0.8, .2), |
| 14 | + **kwargs): |
14 | 15 | """Generate an .npz file containing arrays for training machine learning algorithms
|
15 | 16 |
|
16 | 17 | Parameters
|
17 | 18 | ------------
|
18 | 19 | dest_folder: str
|
19 | 20 | Folder to save labels, tiles, and final numpy arrays into
|
20 | 21 | classes: list
|
21 |
| - A list of classes for machine learning training. Each class is defined as a dict |
22 |
| - with two required properties: |
| 22 | + A list of classes for machine learning training. Each class is defined |
| 23 | + as a dict with two required properties: |
23 | 24 | - name: class name
|
24 | 25 | - filter: A Mapbox GL Filter.
|
25 | 26 | See the README for more details
|
26 | 27 | imagery: str
|
27 | 28 | Imagery template to download satellite images from.
|
28 | 29 | Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
|
29 | 30 | ml_type: str
|
30 |
| - Defines the type of machine learning. One of "classification", "object-detection", or "segmentation" |
| 31 | + Defines the type of machine learning. One of "classification", |
| 32 | + "object-detection", or "segmentation" |
31 | 33 | seed: int
|
32 | 34 | Random generator seed. Optional, use to make results reproducible.
|
33 |
| - split_vals: list |
34 |
| - Default: [0.8, 0.2] |
35 |
| - Percentage of data to put in each catagory listed in split_names. |
36 |
| - Must be floats and must sum to one. |
37 |
| - split_names: list |
38 |
| - Default: ['train', 'test'] |
| 35 | + split_vals: tuple |
| 36 | + Percentage of data to put in each catagory listed in split_names. Must |
| 37 | + be floats and must sum to one. Default: (0.8, 0.2) |
| 38 | + split_names: tupel |
| 39 | + Default: ('train', 'test') |
39 | 40 | List of names for each subset of the data.
|
40 | 41 | **kwargs: dict
|
41 |
| - Other properties from CLI config passed as keywords to other utility functions |
| 42 | + Other properties from CLI config passed as keywords to other utility |
| 43 | + functions. |
42 | 44 | """
|
43 | 45 | # if a seed is given, use it
|
44 | 46 | if seed:
|
45 | 47 | np.random.seed(seed)
|
46 | 48 |
|
47 | 49 | if len(split_names) != len(split_vals):
|
48 |
| - raise ValueError('`split_names` and `split_vals` must be the same length. Please update your config.') |
| 50 | + raise ValueError('`split_names` and `split_vals` must be the same ' |
| 51 | + 'length. Please update your config.') |
49 | 52 | if not np.isclose(sum(split_vals), 1):
|
50 | 53 | raise ValueError('`split_vals` must sum to one. Please update your config.')
|
51 | 54 |
|
@@ -105,7 +108,8 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False, split_
|
105 | 108 | split_n_samps = [len(x_vals) * val for val in split_vals]
|
106 | 109 |
|
107 | 110 | if np.any(split_n_samps == 0):
|
108 |
| - raise ValueError('split must not generate zero samples per partition, change ratio of values in config file.') |
| 111 | + raise ValueError('Split must not generate zero samples per partition. ' |
| 112 | + 'Change ratio of values in config file.') |
109 | 113 |
|
110 | 114 | # Convert into a cumulative sum to get indices
|
111 | 115 | split_inds = np.cumsum(split_n_samps).astype(np.integer)
|
|
0 commit comments