From 3d8ce2cf03ba9a168034bd749a2fa897695e3696 Mon Sep 17 00:00:00 2001 From: Jonas Date: Tue, 21 Mar 2017 13:11:16 +0100 Subject: [PATCH 01/15] loosen up requirements --- requirements.txt | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7a40899..6484883 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -usgs==0.1.9 -requests==2.7.0 -python-dateutil==2.5.1 -numpy==1.10.4 -termcolor==1.1.0 -rasterio==0.32.0 +usgs>=0.1.9 +requests>=2.7.0 +python-dateutil>=2.5.1 +numpy>=1.10.4 +termcolor>=1.1.0 +rasterio>=0.32.0 six>=1.8.0 -scipy==0.17.0 -scikit-image==0.12.3 -homura==0.1.3 -boto==2.39.0 -polyline==1.3 -geocoder==1.9.0 -matplotlib==1.5.1 +scipy>=0.17.0 +scikit-image>=0.12.3 +homura>=0.1.3 +boto>=2.39.0 +polyline>=1.3 +geocoder>=1.9.0 +matplotlib>=1.5.1 From 5a778dd4f151c4e8c73f0f83e0298dc91d222593 Mon Sep 17 00:00:00 2001 From: Jonas Solvsteen Date: Wed, 28 Jun 2017 11:38:08 +0200 Subject: [PATCH 02/15] Use Landsat Collection 1 dataset --- landsat/downloader.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/landsat/downloader.py b/landsat/downloader.py index c10f8cc..94c20c8 100644 --- a/landsat/downloader.py +++ b/landsat/downloader.py @@ -96,12 +96,14 @@ def usgs_eros(self, scene, path): error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text raise USGSInventoryAccessMissing(error_text) - download_url = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key) - if download_url: - self.output('Source: USGS EarthExplorer', normal=True, arrow=True) - return self.fetch(download_url[0], path) + response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key) + try: + download_url = response['data'][0] + except IndexError: + raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) + self.output('Source: USGS EarthExplorer', normal=True, arrow=True) + return self.fetch(download_url, path) - raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene) def google_storage(self, scene, path): From 85d6a933edab68973b5b439d98855433687e7102 Mon Sep 17 00:00:00 2001 From: Jonas Solvsteen Date: Wed, 28 Jun 2017 11:42:55 +0200 Subject: [PATCH 03/15] update USGS to >=0.2.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7a40899..d3b906b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -usgs==0.1.9 +usgs>=0.2.0 requests==2.7.0 python-dateutil==2.5.1 numpy==1.10.4 From 5f3bd079652d89239d294d541787597d02d29b3f Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sat, 19 Aug 2017 21:52:39 +0200 Subject: [PATCH 04/15] Download Collection 1 data from AWS Added download Collection 1 landsat 8 data from AWS after 1 may 2017: Example use: pre-collection en collection 1 Landsat 8 data - landsat download LC81970232017085LGN00 --bands 432 - landsat download LC08_L1TP_139045_20170304_20170316_01_T1 --bands 432 - landsat download LC81970232017085LGN00 - landsat download LC08_L1TP_139045_20170304_20170316_01_T1 --- landsat/downloader.py | 77 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/landsat/downloader.py b/landsat/downloader.py index 94c20c8..f61ab2b 100644 --- a/landsat/downloader.py +++ b/landsat/downloader.py @@ -68,7 +68,9 @@ def download(self, scenes, bands=None): # for all scenes if bands provided, first check AWS, if the bands exist # download them, otherwise use Google and then USGS. try: - # if bands are not provided, directly go to Goodle and then USGS + # if bands are not provided, directly go to Google and then USGS + if not isinstance(bands, list): + bands = [1,2,3,4,5,6,7,8,9,10,11] if not isinstance(bands, list): raise RemoteFileDoesntExist files.append(self.amazon_s3(scene, bands)) @@ -90,6 +92,7 @@ def usgs_eros(self, scene, path): # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: + print (self.usgs_user) api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) @@ -125,6 +128,7 @@ def google_storage(self, scene, path): sat = self.scene_interpreter(scene) url = self.google_storage_url(sat) + print (url) self.remote_file_exists(url) @@ -138,28 +142,52 @@ def amazon_s3(self, scene, bands): sat = self.scene_interpreter(scene) - # Always grab MTL.txt and QA band if bands are specified + # Always grab QA band if bands are specified + urls = [] + if 'BQA' not in bands: bands.append('QA') + + if len(scene) == 40: + for band in bands: + url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr") + #print (url2) + + # make sure it exist + self.remote_file_exists(url2) + urls.append(url2) + + url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD") + #print (url3) + + # make sure it exist + self.remote_file_exists(url3) + urls.append(url3) + + # Always grab MTL.txt and ANG band if bands are specified if 'MTL' not in bands: bands.append('MTL') - urls = [] + if 'ANG' not in bands and len(scene) == 40: + bands.append('ANG') for band in bands: # get url for the band url = self.amazon_s3_url(sat, band) + #print (url) # make sure it exist self.remote_file_exists(url) urls.append(url) + # create folder path = check_create_folder(join(self.download_dir, scene)) self.output('Source: AWS S3', normal=True, arrow=True) for url in urls: + #print(url) self.fetch(url, path) return path @@ -192,7 +220,7 @@ def fetch(self, url, path): self.output('Downloading: %s' % filename, normal=True, arrow=True) - # print(join(path, filename)) + print(join(path, filename)) # raise Exception if exists(join(path, filename)): size = getsize(join(path, filename)) @@ -236,10 +264,35 @@ def amazon_s3_url(self, sat, band): :returns: (String) The URL to a S3 file """ - if band != 'MTL': + if band != 'MTL' and band != 'ANG': filename = '%s_B%s.TIF' % (sat['scene'], band) else: filename = '%s_%s.txt' % (sat['scene'], band) + + #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])) + return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) + + def amazon_s3_url_type(self, sat, band, type): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :type type + TIF.ovr of WRK.IMD + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + file_extentie = '%s_B%s' + type + if band != 'MTL' and band != 'ANG': + filename = file_extentie % (sat['scene'], band) return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) @@ -280,6 +333,12 @@ def scene_interpreter(self, scene): The scene ID. :type scene: String +........ Pre-collection data style: +............LC81970232017085LGN00 + +........ collection 1 datas style: +........ LC08_L1TP_139045_20170304_20170316_01_T1 + :returns: dict @@ -300,10 +359,16 @@ def scene_interpreter(self, scene): 'scene': scene } if isinstance(scene, str) and len(scene) == 21: + #LC81970232017085LGN00 anatomy['path'] = scene[3:6] anatomy['row'] = scene[6:9] anatomy['sat'] = 'L' + scene[2:3] - + return anatomy + elif isinstance(scene, str) and len(scene) == 40: + #LC08_L1TP_139045_20170304_20170316_01_T1 + anatomy['path'] = scene[10:13] + anatomy['row'] = scene[13:16] + anatomy['sat'] = '/c1/L' + scene[3:4] return anatomy else: raise IncorrectSceneId('Received incorrect scene') From 3d6105e6154da6b545ee4a720701c3e8f9837e1a Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sat, 19 Aug 2017 21:54:33 +0200 Subject: [PATCH 05/15] Search dict results extended for collection 1 added extra fields to dict result for landsat search import field is the new 'product_id' in the Collection 1 datastructure example use: - landsat search --limit 2000 --pathrow 199,23 Added to Dict: result['results'] = [{'sceneID': i['sceneID'], 'sat_type': u'L8', 'path2': three_digit(i['path']), 'row2': three_digit(i['row']), 'download_links' : i['download_links'], 'BPF_NAME_OLI' : i['BPF_NAME_OLI'], 'thumbnail': i['browseURL'], 'date': i['acquisitionDate'], 'GROUND_CONTROL_POINTS_VERSION': i['GROUND_CONTROL_POINTS_VERSION'], 'DATE_L1_GENERATED': i['DATE_L1_GENERATED'], --- landsat/search.py | 82 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 6 deletions(-) diff --git a/landsat/search.py b/landsat/search.py index a7d956e..3891a58 100644 --- a/landsat/search.py +++ b/landsat/search.py @@ -89,10 +89,10 @@ def search(self, paths_rows=None, lat=None, lon=None, address=None, start_date=N """ search_string = self.query_builder(paths_rows, lat, lon, address, start_date, end_date, cloud_min, cloud_max) - + #print (search_string) # Have to manually build the URI to bypass requests URI encoding # The api server doesn't accept encoded URIs - + #print('%s?search=%s&limit=%s' % (self.api_url, search_string, limit)) r = requests.get('%s?search=%s&limit=%s' % (self.api_url, search_string, limit)) r_dict = json.loads(r.text) @@ -143,15 +143,85 @@ def search(self, paths_rows=None, lat=None, lon=None, address=None, start_date=N result['total_returned'] = len(r_dict['results']) result['results'] = [{'sceneID': i['sceneID'], 'sat_type': u'L8', - 'path': three_digit(i['path']), - 'row': three_digit(i['row']), + 'path2': three_digit(i['path']), + 'row2': three_digit(i['row']), + 'download_links' : i['download_links'], + 'BPF_NAME_OLI' : i['BPF_NAME_OLI'], 'thumbnail': i['browseURL'], 'date': i['acquisitionDate'], - 'cloud': i['cloudCoverFull']} - for i in r_dict['results']] + 'GROUND_CONTROL_POINTS_VERSION': i['GROUND_CONTROL_POINTS_VERSION'], + 'DATE_L1_GENERATED': i['DATE_L1_GENERATED'], + 'NADIR_OFFNADIR': i['NADIR_OFFNADIR'], + 'data_geometry': i['data_geometry'], + 'sunAzimuth': i['sunAzimuth'], + 'cloudCover': i['cloudCover'], + 'COLLECTION_NUMBER': i['COLLECTION_NUMBER'], + 'sceneCenterLatitude': i['sceneCenterLatitude'], + 'cartURL': i['cartURL'], + 'sunElevation': i['sunElevation'], + 'cloud_coverage': i['cloud_coverage'], + 'CLOUD_COVER_LAND': i['CLOUD_COVER_LAND'], + 'scene_id': i['scene_id'], + 'GROUND_CONTROL_POINTS_MODEL': i['GROUND_CONTROL_POINTS_MODEL'], + 'row': i['row'], + 'imageQuality1': i['imageQuality1'], + 'cloudCoverFull': i['cloudCoverFull'], + 'aws_index': i['aws_index'], + 'browseURL': i['browseURL'], + 'browseAvailable': i['browseAvailable'], + 'BPF_NAME_TIRS': i['BPF_NAME_TIRS'], + 'dayOrNight': i['dayOrNight'], + 'TIRS_SSM_MODEL': i['TIRS_SSM_MODEL'], + 'CPF_NAME': i['CPF_NAME'], + 'FULL_PARTIAL_SCENE': i['FULL_PARTIAL_SCENE'], + 'DATA_TYPE_L1': i['DATA_TYPE_L1'], + 'aws_thumbnail': i['aws_thumbnail'], + 'google_index': i['google_index'], + 'sceneStartTime': i['sceneStartTime'], + 'dateUpdated': i['dateUpdated'], + 'sensor': i['sensor'], + 'lowerRightCornerLatitude': i['lowerRightCornerLatitude'], + 'LANDSAT_PRODUCT_ID': i['LANDSAT_PRODUCT_ID'], + 'acquisitionDate': i['acquisitionDate'], + 'PROCESSING_SOFTWARE_VERSION': i['PROCESSING_SOFTWARE_VERSION'], + 'lowerRightCornerLongitude': i['lowerRightCornerLongitude'], + 'lowerLeftCornerLatitude': i['lowerLeftCornerLatitude'], + 'sceneCenterLongitude': i['sceneCenterLongitude'], + 'COLLECTION_CATEGORY': i['COLLECTION_CATEGORY'], + 'upperLeftCornerLongitude': i['upperLeftCornerLongitude'], + 'path': i['path'], + 'lowerLeftCornerLongitude': i['lowerLeftCornerLongitude'], + 'GEOMETRIC_RMSE_MODEL_X': i['GEOMETRIC_RMSE_MODEL_X'], + 'GEOMETRIC_RMSE_MODEL_Y': i['GEOMETRIC_RMSE_MODEL_Y'], + 'sceneStopTime': i['sceneStopTime'], + 'upperLeftCornerLatitude': i['upperLeftCornerLatitude'], + 'upperRightCornerLongitude': i['upperRightCornerLongitude'], + 'product_id': i['product_id'], + 'satellite_name': i['satellite_name'], + 'GEOMETRIC_RMSE_MODEL': i['GEOMETRIC_RMSE_MODEL'], + 'upperRightCornerLatitude': i['upperRightCornerLatitude'], + 'receivingStation': i['receivingStation'], + 'cloud': i['cloudCoverFull']} for i in r_dict['results']] return result +# 'REFLECTIVE_SAMPLES': i['REFLECTIVE_SAMPLES'], +# 'THERMAL_LINES': i['THERMAL_LINES'], +# 'PANCHROMATIC_LINES': i['PANCHROMATIC_LINES'], +# 'GRID_CELL_SIZE_THERMAL': i['GRID_CELL_SIZE_THERMAL'], +# 'REFLECTIVE_LINES': i['REFLECTIVE_LINES'], +# 'THERMAL_SAMPLES': i['THERMAL_SAMPLES'], +# 'PANCHROMATIC_SAMPLES': i['PANCHROMATIC_SAMPLES'], +# 'UTM_ZONE': i['UTM_ZONE'], +# 'GRID_CELL_SIZE_REFLECTIVE': i['GRID_CELL_SIZE_REFLECTIVE'], +# 'GRID_CELL_SIZE_PANCHROMATIC': i['GRID_CELL_SIZE_PANCHROMATIC'], +# 'ORIENTATION': i['ORIENTATION'], +# 'DATUM': i['DATUM'], +# 'RESAMPLING_OPTION': i['RESAMPLING_OPTION'], +# 'RLUT_FILE_NAME': i['RLUT_FILE_NAME'], +# 'ROLL_ANGLE': i['ROLL_ANGLE'], +# 'MAP_PROJECTION_L1': i['MAP_PROJECTION_L1'], + def query_builder(self, paths_rows=None, lat=None, lon=None, address=None, start_date=None, end_date=None, cloud_min=None, cloud_max=None): """ Builds the proper search syntax (query) for Landsat API. From ca732c9007106b129a30bac6b19410b3fe86fbb0 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sat, 19 Aug 2017 22:05:40 +0200 Subject: [PATCH 06/15] removed some print statements removed some forgotten print statements --- downloader.py | 381 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 downloader.py diff --git a/downloader.py b/downloader.py new file mode 100644 index 0000000..4d8d63c --- /dev/null +++ b/downloader.py @@ -0,0 +1,381 @@ +# Landsat Util +# License: CC0 1.0 Universal + +from __future__ import print_function, division, absolute_import + +from xml.etree import ElementTree +from os.path import join, exists, getsize + +import requests +from usgs import api, USGSError +from homura import download as fetch + +from .utils import check_create_folder, url_builder +from .mixins import VerbosityMixin +from . import settings + + +class RemoteFileDoesntExist(Exception): + """ Exception to be used when the remote file does not exist """ + pass + + +class IncorrectSceneId(Exception): + """ Exception to be used when scene id is incorrect """ + pass + + +class USGSInventoryAccessMissing(Exception): + """ Exception for when User does not have Inventory Service access """ + pass + + +class Downloader(VerbosityMixin): + """ The downloader class """ + + def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None): + self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR + self.google = settings.GOOGLE_STORAGE + self.s3 = settings.S3_LANDSAT + self.usgs_user = usgs_user + self.usgs_pass = usgs_pass + + # Make sure download directory exist + check_create_folder(self.download_dir) + + def download(self, scenes, bands=None): + """ + Download scenese from Google Storage or Amazon S3 if bands are provided + + :param scenes: + A list of scene IDs + :type scenes: + List + :param bands: + A list of bands. Default value is None. + :type scenes: + List + + :returns: + (List) includes downloaded scenes as key and source as value (aws or google) + """ + + if isinstance(scenes, list): + files = [] + + for scene in scenes: + + # for all scenes if bands provided, first check AWS, if the bands exist + # download them, otherwise use Google and then USGS. + try: + # if bands are not provided, directly go to Google and then USGS + if not isinstance(bands, list): + bands = [1,2,3,4,5,6,7,8,9,10,11] + if not isinstance(bands, list): + raise RemoteFileDoesntExist + files.append(self.amazon_s3(scene, bands)) + + except RemoteFileDoesntExist: + try: + files.append(self.google_storage(scene, self.download_dir)) + except RemoteFileDoesntExist: + files.append(self.usgs_eros(scene, self.download_dir)) + + return files + + else: + raise Exception('Expected sceneIDs list') + + def usgs_eros(self, scene, path): + """ Downloads the image from USGS """ + + # download from usgs if login information is provided + if self.usgs_user and self.usgs_pass: + try: + #print (self.usgs_user) + api_key = api.login(self.usgs_user, self.usgs_pass) + except USGSError as e: + error_tree = ElementTree.fromstring(str(e.message)) + error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text + raise USGSInventoryAccessMissing(error_text) + + response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key) + try: + download_url = response['data'][0] + except IndexError: + raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) + self.output('Source: USGS EarthExplorer', normal=True, arrow=True) + return self.fetch(download_url, path) + + raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene) + + def google_storage(self, scene, path): + """ + Google Storage Downloader. + + :param scene: + The scene id + :type scene: + String + :param path: + The directory path to where the image should be stored + :type path: + String + + :returns: + Boolean + """ + + sat = self.scene_interpreter(scene) + url = self.google_storage_url(sat) + #print (url) + + self.remote_file_exists(url) + + self.output('Source: Google Storage', normal=True, arrow=True) + return self.fetch(url, path) + + def amazon_s3(self, scene, bands): + """ + Amazon S3 downloader + """ + + sat = self.scene_interpreter(scene) + + # Always grab QA band if bands are specified + urls = [] + + if 'BQA' not in bands: + bands.append('QA') + + + if len(scene) == 40: + for band in bands: + url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr") + #print (url2) + + # make sure it exist + self.remote_file_exists(url2) + urls.append(url2) + + url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD") + #print (url3) + + # make sure it exist + self.remote_file_exists(url3) + urls.append(url3) + + # Always grab MTL.txt and ANG band if bands are specified + if 'MTL' not in bands: + bands.append('MTL') + + if 'ANG' not in bands and len(scene) == 40: + bands.append('ANG') + + for band in bands: + # get url for the band + url = self.amazon_s3_url(sat, band) + #print (url) + + # make sure it exist + self.remote_file_exists(url) + urls.append(url) + + + # create folder + path = check_create_folder(join(self.download_dir, scene)) + + self.output('Source: AWS S3', normal=True, arrow=True) + for url in urls: + #print(url) + self.fetch(url, path) + + return path + + def fetch(self, url, path): + """ Downloads the given url. + + :param url: + The url to be downloaded. + :type url: + String + :param path: + The directory path to where the image should be stored + :type path: + String + :param filename: + The filename that has to be downloaded + :type filename: + String + + :returns: + Boolean + """ + + segments = url.split('/') + filename = segments[-1] + + # remove query parameters from the filename + filename = filename.split('?')[0] + + self.output('Downloading: %s' % filename, normal=True, arrow=True) + + #print(join(path, filename)) + # raise Exception + if exists(join(path, filename)): + size = getsize(join(path, filename)) + if size == self.get_remote_file_size(url): + self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1) + + else: + fetch(url, path) + self.output('stored at %s' % path, normal=True, color='green', indent=1) + + return join(path, filename) + + def google_storage_url(self, sat): + """ + Returns a google storage url the contains the scene provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + + :returns: + (String) The URL to a google storage file + """ + filename = sat['scene'] + '.tar.bz' + return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename]) + + def amazon_s3_url(self, sat, band): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + if band != 'MTL' and band != 'ANG': + filename = '%s_B%s.TIF' % (sat['scene'], band) + else: + filename = '%s_%s.txt' % (sat['scene'], band) + + #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])) + return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) + + def amazon_s3_url_type(self, sat, band, type): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :type type + TIF.ovr of WRK.IMD + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + file_extentie = '%s_B%s' + type + if band != 'MTL' and band != 'ANG': + filename = file_extentie % (sat['scene'], band) + + return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) + + def remote_file_exists(self, url): + """ Checks whether the remote file exists. + + :param url: + The url that has to be checked. + :type url: + String + + :returns: + **True** if remote file exists and **False** if it doesn't exist. + """ + status = requests.head(url).status_code + + if status != 200: + raise RemoteFileDoesntExist + + def get_remote_file_size(self, url): + """ Gets the filesize of a remote file. + + :param url: + The url that has to be checked. + :type url: + String + + :returns: + int + """ + headers = requests.head(url).headers + return int(headers['content-length']) + + def scene_interpreter(self, scene): + """ Conver sceneID to rows, paths and dates. + + :param scene: + The scene ID. + :type scene: + String +........ Pre-collection data style: +............LC81970232017085LGN00 + +........ collection 1 datas style: +........ LC08_L1TP_139045_20170304_20170316_01_T1 + + + :returns: + dict + + :Example output: + + >>> anatomy = { + 'path': None, + 'row': None, + 'sat': None, + 'scene': scene + } + """ + anatomy = { + 'path': None, + 'row': None, + 'sat': None, + 'scene': scene + } + if isinstance(scene, str) and len(scene) == 21: + #LC81970232017085LGN00 + anatomy['path'] = scene[3:6] + anatomy['row'] = scene[6:9] + anatomy['sat'] = 'L' + scene[2:3] + return anatomy + elif isinstance(scene, str) and len(scene) == 40: + #LC08_L1TP_139045_20170304_20170316_01_T1 + anatomy['path'] = scene[10:13] + anatomy['row'] = scene[13:16] + anatomy['sat'] = '/c1/L' + scene[3:4] + return anatomy + else: + raise IncorrectSceneId('Received incorrect scene') + + +if __name__ == '__main__': + + d = Downloader() + + # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00']) From be5fb60aedd249fd32a122b290efe6319d62b97f Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sat, 19 Aug 2017 22:09:14 +0200 Subject: [PATCH 07/15] delete some print statements delete some forgotten print statements --- landsat/downloader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/landsat/downloader.py b/landsat/downloader.py index f61ab2b..4d8d63c 100644 --- a/landsat/downloader.py +++ b/landsat/downloader.py @@ -92,7 +92,7 @@ def usgs_eros(self, scene, path): # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: - print (self.usgs_user) + #print (self.usgs_user) api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) @@ -128,7 +128,7 @@ def google_storage(self, scene, path): sat = self.scene_interpreter(scene) url = self.google_storage_url(sat) - print (url) + #print (url) self.remote_file_exists(url) @@ -220,7 +220,7 @@ def fetch(self, url, path): self.output('Downloading: %s' % filename, normal=True, arrow=True) - print(join(path, filename)) + #print(join(path, filename)) # raise Exception if exists(join(path, filename)): size = getsize(join(path, filename)) From 5f7c719872b4e38d73ace0a58cdbf393a7e1a23b Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sat, 19 Aug 2017 22:10:24 +0200 Subject: [PATCH 08/15] wrong dir copied in wrong directory --- downloader.py | 381 -------------------------------------------------- 1 file changed, 381 deletions(-) delete mode 100644 downloader.py diff --git a/downloader.py b/downloader.py deleted file mode 100644 index 4d8d63c..0000000 --- a/downloader.py +++ /dev/null @@ -1,381 +0,0 @@ -# Landsat Util -# License: CC0 1.0 Universal - -from __future__ import print_function, division, absolute_import - -from xml.etree import ElementTree -from os.path import join, exists, getsize - -import requests -from usgs import api, USGSError -from homura import download as fetch - -from .utils import check_create_folder, url_builder -from .mixins import VerbosityMixin -from . import settings - - -class RemoteFileDoesntExist(Exception): - """ Exception to be used when the remote file does not exist """ - pass - - -class IncorrectSceneId(Exception): - """ Exception to be used when scene id is incorrect """ - pass - - -class USGSInventoryAccessMissing(Exception): - """ Exception for when User does not have Inventory Service access """ - pass - - -class Downloader(VerbosityMixin): - """ The downloader class """ - - def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None): - self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR - self.google = settings.GOOGLE_STORAGE - self.s3 = settings.S3_LANDSAT - self.usgs_user = usgs_user - self.usgs_pass = usgs_pass - - # Make sure download directory exist - check_create_folder(self.download_dir) - - def download(self, scenes, bands=None): - """ - Download scenese from Google Storage or Amazon S3 if bands are provided - - :param scenes: - A list of scene IDs - :type scenes: - List - :param bands: - A list of bands. Default value is None. - :type scenes: - List - - :returns: - (List) includes downloaded scenes as key and source as value (aws or google) - """ - - if isinstance(scenes, list): - files = [] - - for scene in scenes: - - # for all scenes if bands provided, first check AWS, if the bands exist - # download them, otherwise use Google and then USGS. - try: - # if bands are not provided, directly go to Google and then USGS - if not isinstance(bands, list): - bands = [1,2,3,4,5,6,7,8,9,10,11] - if not isinstance(bands, list): - raise RemoteFileDoesntExist - files.append(self.amazon_s3(scene, bands)) - - except RemoteFileDoesntExist: - try: - files.append(self.google_storage(scene, self.download_dir)) - except RemoteFileDoesntExist: - files.append(self.usgs_eros(scene, self.download_dir)) - - return files - - else: - raise Exception('Expected sceneIDs list') - - def usgs_eros(self, scene, path): - """ Downloads the image from USGS """ - - # download from usgs if login information is provided - if self.usgs_user and self.usgs_pass: - try: - #print (self.usgs_user) - api_key = api.login(self.usgs_user, self.usgs_pass) - except USGSError as e: - error_tree = ElementTree.fromstring(str(e.message)) - error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text - raise USGSInventoryAccessMissing(error_text) - - response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key) - try: - download_url = response['data'][0] - except IndexError: - raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) - self.output('Source: USGS EarthExplorer', normal=True, arrow=True) - return self.fetch(download_url, path) - - raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene) - - def google_storage(self, scene, path): - """ - Google Storage Downloader. - - :param scene: - The scene id - :type scene: - String - :param path: - The directory path to where the image should be stored - :type path: - String - - :returns: - Boolean - """ - - sat = self.scene_interpreter(scene) - url = self.google_storage_url(sat) - #print (url) - - self.remote_file_exists(url) - - self.output('Source: Google Storage', normal=True, arrow=True) - return self.fetch(url, path) - - def amazon_s3(self, scene, bands): - """ - Amazon S3 downloader - """ - - sat = self.scene_interpreter(scene) - - # Always grab QA band if bands are specified - urls = [] - - if 'BQA' not in bands: - bands.append('QA') - - - if len(scene) == 40: - for band in bands: - url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr") - #print (url2) - - # make sure it exist - self.remote_file_exists(url2) - urls.append(url2) - - url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD") - #print (url3) - - # make sure it exist - self.remote_file_exists(url3) - urls.append(url3) - - # Always grab MTL.txt and ANG band if bands are specified - if 'MTL' not in bands: - bands.append('MTL') - - if 'ANG' not in bands and len(scene) == 40: - bands.append('ANG') - - for band in bands: - # get url for the band - url = self.amazon_s3_url(sat, band) - #print (url) - - # make sure it exist - self.remote_file_exists(url) - urls.append(url) - - - # create folder - path = check_create_folder(join(self.download_dir, scene)) - - self.output('Source: AWS S3', normal=True, arrow=True) - for url in urls: - #print(url) - self.fetch(url, path) - - return path - - def fetch(self, url, path): - """ Downloads the given url. - - :param url: - The url to be downloaded. - :type url: - String - :param path: - The directory path to where the image should be stored - :type path: - String - :param filename: - The filename that has to be downloaded - :type filename: - String - - :returns: - Boolean - """ - - segments = url.split('/') - filename = segments[-1] - - # remove query parameters from the filename - filename = filename.split('?')[0] - - self.output('Downloading: %s' % filename, normal=True, arrow=True) - - #print(join(path, filename)) - # raise Exception - if exists(join(path, filename)): - size = getsize(join(path, filename)) - if size == self.get_remote_file_size(url): - self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1) - - else: - fetch(url, path) - self.output('stored at %s' % path, normal=True, color='green', indent=1) - - return join(path, filename) - - def google_storage_url(self, sat): - """ - Returns a google storage url the contains the scene provided. - - :param sat: - Expects an object created by scene_interpreter method - :type sat: - dict - - :returns: - (String) The URL to a google storage file - """ - filename = sat['scene'] + '.tar.bz' - return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename]) - - def amazon_s3_url(self, sat, band): - """ - Return an amazon s3 url the contains the scene and band provided. - - :param sat: - Expects an object created by scene_interpreter method - :type sat: - dict - :param filename: - The filename that has to be downloaded from Amazon - :type filename: - String - - :returns: - (String) The URL to a S3 file - """ - if band != 'MTL' and band != 'ANG': - filename = '%s_B%s.TIF' % (sat['scene'], band) - else: - filename = '%s_%s.txt' % (sat['scene'], band) - - #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])) - return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) - - def amazon_s3_url_type(self, sat, band, type): - """ - Return an amazon s3 url the contains the scene and band provided. - - :param sat: - Expects an object created by scene_interpreter method - :type sat: - dict - :type type - TIF.ovr of WRK.IMD - :param filename: - The filename that has to be downloaded from Amazon - :type filename: - String - - :returns: - (String) The URL to a S3 file - """ - file_extentie = '%s_B%s' + type - if band != 'MTL' and band != 'ANG': - filename = file_extentie % (sat['scene'], band) - - return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) - - def remote_file_exists(self, url): - """ Checks whether the remote file exists. - - :param url: - The url that has to be checked. - :type url: - String - - :returns: - **True** if remote file exists and **False** if it doesn't exist. - """ - status = requests.head(url).status_code - - if status != 200: - raise RemoteFileDoesntExist - - def get_remote_file_size(self, url): - """ Gets the filesize of a remote file. - - :param url: - The url that has to be checked. - :type url: - String - - :returns: - int - """ - headers = requests.head(url).headers - return int(headers['content-length']) - - def scene_interpreter(self, scene): - """ Conver sceneID to rows, paths and dates. - - :param scene: - The scene ID. - :type scene: - String -........ Pre-collection data style: -............LC81970232017085LGN00 - -........ collection 1 datas style: -........ LC08_L1TP_139045_20170304_20170316_01_T1 - - - :returns: - dict - - :Example output: - - >>> anatomy = { - 'path': None, - 'row': None, - 'sat': None, - 'scene': scene - } - """ - anatomy = { - 'path': None, - 'row': None, - 'sat': None, - 'scene': scene - } - if isinstance(scene, str) and len(scene) == 21: - #LC81970232017085LGN00 - anatomy['path'] = scene[3:6] - anatomy['row'] = scene[6:9] - anatomy['sat'] = 'L' + scene[2:3] - return anatomy - elif isinstance(scene, str) and len(scene) == 40: - #LC08_L1TP_139045_20170304_20170316_01_T1 - anatomy['path'] = scene[10:13] - anatomy['row'] = scene[13:16] - anatomy['sat'] = '/c1/L' + scene[3:4] - return anatomy - else: - raise IncorrectSceneId('Received incorrect scene') - - -if __name__ == '__main__': - - d = Downloader() - - # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00']) From a7e8e13da28889a1e0c4660479b9a087b241110a Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 11:40:35 +0200 Subject: [PATCH 09/15] Add New Googlestorage Bucket for Landsat Collection 1 data --- landsat/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/landsat/settings.py b/landsat/settings.py index cfe9e9a..b1f8f05 100644 --- a/landsat/settings.py +++ b/landsat/settings.py @@ -15,6 +15,7 @@ SATELLITE = 'L8' L8_METADATA_URL = 'http://landsat.usgs.gov/metadata_service/bulk_metadata_files/LANDSAT_8.csv' GOOGLE_STORAGE = 'http://storage.googleapis.com/earthengine-public/landsat/' +GOOGLE_STORAGE_C1 = 'http://storage.googleapis.com/gcp-public-data-landsat/LC08/01/' S3_LANDSAT = 'http://landsat-pds.s3.amazonaws.com/' API_URL = 'https://api.developmentseed.org/satellites/landsat' From cb8bf2a19bbcfde798585f48f3e9ff529195cb7c Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 11:45:26 +0200 Subject: [PATCH 10/15] Add support for download Landsat Collection 1 data from new Google storage bucket Add support for download Landsat Collection 1 data from new Google storage bucket Only the *.TIF, _MTL.txt and _ANG..TXT are downloadable. Files *.TIF.ovr and _wrk.IMD are forbidden to download. On AWS these file are accessable. --- landsat/downloader.py | 154 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 21 deletions(-) diff --git a/landsat/downloader.py b/landsat/downloader.py index 4d8d63c..6538545 100644 --- a/landsat/downloader.py +++ b/landsat/downloader.py @@ -19,6 +19,11 @@ class RemoteFileDoesntExist(Exception): """ Exception to be used when the remote file does not exist """ pass +class Forbidden403(Exception): + """ Exception to be used when the user is forbidden to use the remote file + This a appears when downloading some of the files from google storage bucket gcp-public-data-landsat + """ + pass class IncorrectSceneId(Exception): """ Exception to be used when scene id is incorrect """ @@ -36,6 +41,7 @@ class Downloader(VerbosityMixin): def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None): self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR self.google = settings.GOOGLE_STORAGE + self.google_C1 = settings.GOOGLE_STORAGE_C1 self.s3 = settings.S3_LANDSAT self.usgs_user = usgs_user self.usgs_pass = usgs_pass @@ -74,10 +80,17 @@ def download(self, scenes, bands=None): if not isinstance(bands, list): raise RemoteFileDoesntExist files.append(self.amazon_s3(scene, bands)) + #files.append(self.google_storage_new(scene, bands)) except RemoteFileDoesntExist: try: - files.append(self.google_storage(scene, self.download_dir)) + if len(scene) == 40: + #Collection 1 data: Product_id + files.append(self.google_storage_new(scene, bands)) + #files.append(self.amazon_s3(scene, bands)) + else: + #Pre-Collection data: scene_id len(scene = 21) + files.append(self.google_storage(scene, self.download_dir)) except RemoteFileDoesntExist: files.append(self.usgs_eros(scene, self.download_dir)) @@ -128,13 +141,127 @@ def google_storage(self, scene, path): sat = self.scene_interpreter(scene) url = self.google_storage_url(sat) - #print (url) self.remote_file_exists(url) self.output('Source: Google Storage', normal=True, arrow=True) return self.fetch(url, path) + def google_storage_new(self, scene, bands): + """ + Google downloader new version for collection 1 data: No tar.bz file but a lot files + """ + + sat = self.scene_interpreter(scene) + # Always grab QA band if bands are specified + urls = [] + + if 'BQA' not in bands: + bands.append('QA') + + if len(scene) == 40: + for band in bands: + url2 = self.google_storage_url_type_new(sat, band, ".TIF.ovr") + #print (url2) + + # make sure it exist + self.remote_file_exists(url2) + urls.append(url2) + + url3 = self.google_storage_url_type_new(sat, band, "_wrk.IMD") + #print (url3) + + # make sure it exist + self.remote_file_exists(url3) + urls.append(url3) + + # Always grab MTL.txt and ANG band if bands are specified + if 'MTL' not in bands: + bands.append('MTL') + + if 'ANG' not in bands and len(scene) == 40: + bands.append('ANG') + + for band in bands: + # get url for the band + url = self.google_storage_url_new(sat, band) + #print (url) + + # make sure it exist + self.remote_file_exists(url) + urls.append(url) + + + # create folder + path = check_create_folder(join(self.download_dir, scene)) + + self.output('Source: Google Storage S3', normal=True, arrow=True) + for url in urls: + self.fetch(url, path) + + return path + + def google_storage_url(self, sat): + """ + Returns a google storage url the contains the scene provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + + :returns: + (String) The URL to a google storage file + """ + filename = sat['scene'] + '.tar.bz' + return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename]) + + def google_storage_url_new(self, sat, band): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + if band != 'MTL' and band != 'ANG': + filename = '%s_B%s.TIF' % (sat['scene'], band) + else: + filename = '%s_%s.txt' % (sat['scene'], band) + return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename]) + + def google_storage_url_type_new(self, sat, band, type): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :type type + TIF.ovr of WRK.IMD + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + file_extentie = '%s_B%s' + type + if band != 'MTL' and band != 'ANG': + filename = file_extentie % (sat['scene'], band) + + return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename]) + def amazon_s3(self, scene, bands): """ Amazon S3 downloader @@ -220,7 +347,6 @@ def fetch(self, url, path): self.output('Downloading: %s' % filename, normal=True, arrow=True) - #print(join(path, filename)) # raise Exception if exists(join(path, filename)): size = getsize(join(path, filename)) @@ -233,21 +359,6 @@ def fetch(self, url, path): return join(path, filename) - def google_storage_url(self, sat): - """ - Returns a google storage url the contains the scene provided. - - :param sat: - Expects an object created by scene_interpreter method - :type sat: - dict - - :returns: - (String) The URL to a google storage file - """ - filename = sat['scene'] + '.tar.bz' - return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename]) - def amazon_s3_url(self, sat, band): """ Return an amazon s3 url the contains the scene and band provided. @@ -268,8 +379,6 @@ def amazon_s3_url(self, sat, band): filename = '%s_B%s.TIF' % (sat['scene'], band) else: filename = '%s_%s.txt' % (sat['scene'], band) - - #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])) return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) def amazon_s3_url_type(self, sat, band, type): @@ -309,7 +418,10 @@ def remote_file_exists(self, url): """ status = requests.head(url).status_code - if status != 200: + if status == 403: + print("403: " + url) + elif status != 200: + #print ("File doesnotexits: %s-%s" % (url,status)) raise RemoteFileDoesntExist def get_remote_file_size(self, url): From 6d2cf73819389fdda91d1092ed5944987f5a6408 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 19:59:36 +0200 Subject: [PATCH 11/15] Added a comment Added a comment --- downloader.py | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 504 insertions(+) create mode 100644 downloader.py diff --git a/downloader.py b/downloader.py new file mode 100644 index 0000000..2ea2480 --- /dev/null +++ b/downloader.py @@ -0,0 +1,504 @@ +# Landsat Util +# License: CC0 1.0 Universal + +from __future__ import print_function, division, absolute_import + +from xml.etree import ElementTree +from os.path import join, exists, getsize + +import requests +from usgs import api, USGSError +from homura import download as fetch + +from .utils import check_create_folder, url_builder, geocode +from .mixins import VerbosityMixin +from . import settings + + +class RemoteFileDoesntExist(Exception): + """ Exception to be used when the remote file does not exist """ + pass + +class Forbidden403(Exception): + """ Exception to be used when the user is forbidden to use the remote file + This a appears when downloading some of the files from google storage bucket gcp-public-data-landsat + """ + pass + +class IncorrectSceneId(Exception): + """ Exception to be used when scene id is incorrect """ + pass + + +class USGSInventoryAccessMissing(Exception): + """ Exception for when User does not have Inventory Service access """ + pass + + +class Downloader(VerbosityMixin): + """ The downloader class """ + + def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None): + self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR + self.google = settings.GOOGLE_STORAGE + self.google_C1 = settings.GOOGLE_STORAGE_C1 + self.s3 = settings.S3_LANDSAT + self.usgs_user = usgs_user + self.usgs_pass = usgs_pass + + # Make sure download directory exist + check_create_folder(self.download_dir) + + def download(self, scenes, bands=None): + """ + Download scenese from Google Storage or Amazon S3 if bands are provided + + :param scenes: + A list of scene IDs + :type scenes: + List + :param bands: + A list of bands. Default value is None. + :type scenes: + List + + :returns: + (List) includes downloaded scenes as key and source as value (aws or google) + """ + #loc = geocode('1600 Pennsylvania Ave NW, Washington, DC 20500') + #print("DOWNLOAD*********************************") + #print(loc) + #self.assertEqual(round(loc['lat'], 3), 38.898) + #self.assertEqual(round(loc['lon'], 3), -77.037) + #self.assertRaises(ValueError, utils.geocode, 'Pennsylvania Ave NW, Washington, DC') + #self.assertEqual({'lat': 38.8987709, 'lon': -77.0351295}, + # utils.geocode('Pennsylvania Ave NW, Washington, DC', 10.)) + #loc2 = geocode('Pennsylvania Ave NW, Washington, DC', 10.) + #print(loc2) + + if isinstance(scenes, list): + files = [] + + for scene in scenes: + + # for all scenes if bands provided, first check AWS, if the bands exist + # download them, otherwise use Google and then USGS. + try: + # if bands are not provided, directly go to Google and then USGS + if not isinstance(bands, list): + bands = [1,2,3,4,5,6,7,8,9,10,11] + if not isinstance(bands, list): + raise RemoteFileDoesntExist + files.append(self.amazon_s3(scene, bands)) + #files.append(self.google_storage_new(scene, bands)) + + except RemoteFileDoesntExist: + try: + if len(scene) == 40: + #Collection 1 data: Product_id + files.append(self.google_storage_new(scene, bands)) + #files.append(self.amazon_s3(scene, bands)) + else: + #Pre-Collection data: scene_id len(scene = 21) + files.append(self.google_storage(scene, self.download_dir)) + except RemoteFileDoesntExist: + files.append(self.usgs_eros(scene, self.download_dir)) + + return files + + else: + raise Exception('Expected sceneIDs list') + + def usgs_eros(self, scene, path): + """ Downloads the image from USGS """ + + # download from usgs if login information is provided + if self.usgs_user and self.usgs_pass: + try: + #print (self.usgs_user) + api_key = api.login(self.usgs_user, self.usgs_pass) + except USGSError as e: + error_tree = ElementTree.fromstring(str(e.message)) + error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text + raise USGSInventoryAccessMissing(error_text) + + response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key) + try: + download_url = response['data'][0] + except IndexError: + raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) + self.output('Source: USGS EarthExplorer', normal=True, arrow=True) + return self.fetch(download_url, path) + + raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene) + + def google_storage(self, scene, path): + """ + Google Storage Downloader. + + :param scene: + The scene id + :type scene: + String + :param path: + The directory path to where the image should be stored + :type path: + String + + :returns: + Boolean + """ + + sat = self.scene_interpreter(scene) + url = self.google_storage_url(sat) + + self.remote_file_exists(url) + + self.output('Source: Google Storage', normal=True, arrow=True) + return self.fetch(url, path) + + def google_storage_new(self, scene, bands): + """ + Google downloader new version for collection 1 data: No tar.bz file but a lot files + """ + + sat = self.scene_interpreter(scene) + # Always grab QA band if bands are specified + urls = [] + + if 'BQA' not in bands: + bands.append('QA') + + if len(scene) == 40: + for band in bands: + url2 = self.google_storage_url_type_new(sat, band, ".TIF.ovr") + #print (url2) + + # make sure it exist + self.remote_file_exists(url2) + urls.append(url2) + + url3 = self.google_storage_url_type_new(sat, band, "_wrk.IMD") + #print (url3) + + # make sure it exist + self.remote_file_exists(url3) + urls.append(url3) + + # Always grab MTL.txt and ANG band if bands are specified + if 'MTL' not in bands: + bands.append('MTL') + + if 'ANG' not in bands and len(scene) == 40: + bands.append('ANG') + + for band in bands: + # get url for the band + url = self.google_storage_url_new(sat, band) + #print (url) + + # make sure it exist + self.remote_file_exists(url) + urls.append(url) + + + # create folder + path = check_create_folder(join(self.download_dir, scene)) + + self.output('Source: Google Storage S3', normal=True, arrow=True) + for url in urls: + self.fetch(url, path) + + return path + + def google_storage_url(self, sat): + """ + Returns a google storage url the contains the scene provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + + :returns: + (String) The URL to a google storage file + """ + filename = sat['scene'] + '.tar.bz' + return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename]) + + def google_storage_url_new(self, sat, band): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + if band != 'MTL' and band != 'ANG': + filename = '%s_B%s.TIF' % (sat['scene'], band) + else: + filename = '%s_%s.txt' % (sat['scene'], band) + return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename]) + + def google_storage_url_type_new(self, sat, band, type): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :type type + TIF.ovr of WRK.IMD + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + file_extentie = '%s_B%s' + type + if band != 'MTL' and band != 'ANG': + filename = file_extentie % (sat['scene'], band) + + return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename]) + + def amazon_s3(self, scene, bands): + """ + Amazon S3 downloader + """ + + sat = self.scene_interpreter(scene) + + # Always grab QA band if bands are specified + urls = [] + + if 'BQA' not in bands: + bands.append('QA') + + + if len(scene) == 40: + for band in bands: + url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr") + #print (url2) + + # make sure it exist + self.remote_file_exists(url2) + urls.append(url2) + + url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD") + #print (url3) + + # make sure it exist + self.remote_file_exists(url3) + urls.append(url3) + + # Always grab MTL.txt and ANG band if bands are specified + if 'MTL' not in bands: + bands.append('MTL') + + if 'ANG' not in bands and len(scene) == 40: + bands.append('ANG') + + for band in bands: + # get url for the band + url = self.amazon_s3_url(sat, band) + #print (url) + + # make sure it exist + self.remote_file_exists(url) + urls.append(url) + + + # create folder + path = check_create_folder(join(self.download_dir, scene)) + + self.output('Source: AWS S3', normal=True, arrow=True) + for url in urls: + #print(url) + self.fetch(url, path) + + return path + + def fetch(self, url, path): + """ Downloads the given url. + + :param url: + The url to be downloaded. + :type url: + String + :param path: + The directory path to where the image should be stored + :type path: + String + :param filename: + The filename that has to be downloaded + :type filename: + String + + :returns: + Boolean + """ + + segments = url.split('/') + filename = segments[-1] + + # remove query parameters from the filename + filename = filename.split('?')[0] + + self.output('Downloading: %s' % filename, normal=True, arrow=True) + + # raise Exception + if exists(join(path, filename)): + size = getsize(join(path, filename)) + if size == self.get_remote_file_size(url): + self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1) + + else: + #TODO: Try catch for files that are forbidden: + fetch(url, path) + self.output('stored at %s' % path, normal=True, color='green', indent=1) + + return join(path, filename) + + def amazon_s3_url(self, sat, band): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + if band != 'MTL' and band != 'ANG': + filename = '%s_B%s.TIF' % (sat['scene'], band) + else: + filename = '%s_%s.txt' % (sat['scene'], band) + return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) + + def amazon_s3_url_type(self, sat, band, type): + """ + Return an amazon s3 url the contains the scene and band provided. + + :param sat: + Expects an object created by scene_interpreter method + :type sat: + dict + :type type + TIF.ovr of WRK.IMD + :param filename: + The filename that has to be downloaded from Amazon + :type filename: + String + + :returns: + (String) The URL to a S3 file + """ + file_extentie = '%s_B%s' + type + if band != 'MTL' and band != 'ANG': + filename = file_extentie % (sat['scene'], band) + + return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]) + + def remote_file_exists(self, url): + """ Checks whether the remote file exists. + + :param url: + The url that has to be checked. + :type url: + String + + :returns: + **True** if remote file exists and **False** if it doesn't exist. + """ + status = requests.head(url).status_code + + if status == 403: + print("403: " + url) + elif status != 200: + #print ("File doesnotexits: %s-%s" % (url,status)) + raise RemoteFileDoesntExist + + def get_remote_file_size(self, url): + """ Gets the filesize of a remote file. + + :param url: + The url that has to be checked. + :type url: + String + + :returns: + int + """ + headers = requests.head(url).headers + return int(headers['content-length']) + + def scene_interpreter(self, scene): + """ Conver sceneID to rows, paths and dates. + + :param scene: + The scene ID. + :type scene: + String +........ Pre-collection data style: +............LC81970232017085LGN00 + +........ collection 1 datas style: +........ LC08_L1TP_139045_20170304_20170316_01_T1 + + + :returns: + dict + + :Example output: + + >>> anatomy = { + 'path': None, + 'row': None, + 'sat': None, + 'scene': scene + } + """ + anatomy = { + 'path': None, + 'row': None, + 'sat': None, + 'scene': scene + } + if isinstance(scene, str) and len(scene) == 21: + #LC81970232017085LGN00 + anatomy['path'] = scene[3:6] + anatomy['row'] = scene[6:9] + anatomy['sat'] = 'L' + scene[2:3] + return anatomy + elif isinstance(scene, str) and len(scene) == 40: + #LC08_L1TP_139045_20170304_20170316_01_T1 + anatomy['path'] = scene[10:13] + anatomy['row'] = scene[13:16] + anatomy['sat'] = '/c1/L' + scene[3:4] + return anatomy + else: + raise IncorrectSceneId('Received incorrect scene') + + +if __name__ == '__main__': + + d = Downloader() + + # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00']) From d54845c4d6ea5bf1c56d1b273fef916b727af5c8 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 20:03:05 +0200 Subject: [PATCH 12/15] Added test with Collection 1 Data Added test with Collection 1 Data and change a test because of the changed datastructure --- tests/test_download.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_download.py b/tests/test_download.py index 71c2318..41a61b1 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -22,9 +22,12 @@ def setUpClass(cls): cls.temp_folder = mkdtemp() cls.d = Downloader(download_dir=cls.temp_folder) cls.scene = 'LT81360082013127LGN01' - cls.scene_2 = 'LC82050312014229LGN00' + #cls.scene_2 = 'LC82050312014229LGN00' + cls.scene_2 = 'LC81990232017067LGN00' cls.scene_s3 = 'LC80010092015051LGN00' cls.scene_s3_2 = 'LC82050312015136LGN00' + #Collection 1 data: Product_ID + cls_scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1' cls.scene_size = 59239149 @classmethod @@ -73,6 +76,11 @@ def test_download(self, mock_fetch): test_paths = [self.temp_folder + '/' + self.scene + '.tar.bz'] self.assertEqual(test_paths, paths) + # When passing product_id AWS should be triggered (Collection 1 data structure) + paths = self.d.download([self.scene4], bands=[11]) + test_paths = [self.temp_folder + '/' + self.scene4 ] + self.assertEqual(test_paths, paths) + @mock.patch('landsat.downloader.Downloader.google_storage') def test_download_google_when_amazon_is_unavailable(self, fake_google): """ Test whether google or amazon are correctly selected based on input """ From ff6e4f952beebfb299f7b3c48183bfbd748a66f6 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 20:32:51 +0200 Subject: [PATCH 13/15] Fixed an error in test_download Fixed an error in test_download --- tests/test_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_download.py b/tests/test_download.py index 41a61b1..3afec71 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -77,8 +77,8 @@ def test_download(self, mock_fetch): self.assertEqual(test_paths, paths) # When passing product_id AWS should be triggered (Collection 1 data structure) - paths = self.d.download([self.scene4], bands=[11]) - test_paths = [self.temp_folder + '/' + self.scene4 ] + paths = self.d.download([self.scene_s4], bands=[11]) + test_paths = [self.temp_folder + '/' + self.scene_s4 ] self.assertEqual(test_paths, paths) @mock.patch('landsat.downloader.Downloader.google_storage') From 1fb8be24e77e53329888fed0bb1c61edaa12a073 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Sun, 20 Aug 2017 20:41:28 +0200 Subject: [PATCH 14/15] Corrected a typo --- tests/test_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_download.py b/tests/test_download.py index 3afec71..c503f22 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -27,7 +27,7 @@ def setUpClass(cls): cls.scene_s3 = 'LC80010092015051LGN00' cls.scene_s3_2 = 'LC82050312015136LGN00' #Collection 1 data: Product_ID - cls_scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1' + cls.scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1' cls.scene_size = 59239149 @classmethod From d864163f1995b896e01ae0928dcfcbd60b17d1b5 Mon Sep 17 00:00:00 2001 From: Mellian <31151900+jmellian@users.noreply.github.com> Date: Fri, 25 Aug 2017 08:37:41 +0200 Subject: [PATCH 15/15] Update requirements.txt --- requirements.txt | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8e34c93..96e701e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -usgs>=0.2.1 -requests>=2.7.0 -python-dateutil>=2.5.1 -numpy>=1.10.4 -termcolor>=1.1.0 -rasterio>=0.32.0 -six>=1.8.0 -scipy>=0.17.0 -scikit-image>=0.12.3 -homura>=0.1.3 -boto>=2.39.0 -polyline>=1.3 -geocoder>=1.9.0 -matplotlib>=1.5.1 +usgs>=0.1.9,<1.* +requests~=2.7 +python-dateutil>=2.5.1,<3.* +numpy>=1.10.4,<2.* +termcolor~=1.1 +rasterio~=0.32 +six~=1.8 +scipy~=0.17 +scikit-image>=0.12.3,<1.* +homura>=0.1.3,<1.* +boto~=2.39 +polyline~=1.3 +geocoder~=1.9 +matplotlib>=1.5.1,<2.*