From 3d8ce2cf03ba9a168034bd749a2fa897695e3696 Mon Sep 17 00:00:00 2001
From: Jonas <josl@dhi-gras.com>
Date: Tue, 21 Mar 2017 13:11:16 +0100
Subject: [PATCH 01/15] loosen up requirements

---
 requirements.txt | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7a40899..6484883 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,14 @@
-usgs==0.1.9
-requests==2.7.0
-python-dateutil==2.5.1
-numpy==1.10.4
-termcolor==1.1.0
-rasterio==0.32.0
+usgs>=0.1.9
+requests>=2.7.0
+python-dateutil>=2.5.1
+numpy>=1.10.4
+termcolor>=1.1.0
+rasterio>=0.32.0
 six>=1.8.0
-scipy==0.17.0
-scikit-image==0.12.3
-homura==0.1.3
-boto==2.39.0
-polyline==1.3
-geocoder==1.9.0
-matplotlib==1.5.1
+scipy>=0.17.0
+scikit-image>=0.12.3
+homura>=0.1.3
+boto>=2.39.0
+polyline>=1.3
+geocoder>=1.9.0
+matplotlib>=1.5.1

From 5a778dd4f151c4e8c73f0f83e0298dc91d222593 Mon Sep 17 00:00:00 2001
From: Jonas Solvsteen <josl@dhigroup.com>
Date: Wed, 28 Jun 2017 11:38:08 +0200
Subject: [PATCH 02/15] Use Landsat Collection 1 dataset

---
 landsat/downloader.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/landsat/downloader.py b/landsat/downloader.py
index c10f8cc..94c20c8 100644
--- a/landsat/downloader.py
+++ b/landsat/downloader.py
@@ -96,12 +96,14 @@ def usgs_eros(self, scene, path):
                 error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text
                 raise USGSInventoryAccessMissing(error_text)
 
-            download_url = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key)
-            if download_url:
-                self.output('Source: USGS EarthExplorer', normal=True, arrow=True)
-                return self.fetch(download_url[0], path)
+            response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key)
+            try:
+                download_url = response['data'][0]
+            except IndexError:
+                raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene)
+            self.output('Source: USGS EarthExplorer', normal=True, arrow=True)
+            return self.fetch(download_url, path)
 
-            raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene)
         raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene)
 
     def google_storage(self, scene, path):

From 85d6a933edab68973b5b439d98855433687e7102 Mon Sep 17 00:00:00 2001
From: Jonas Solvsteen <josl@dhigroup.com>
Date: Wed, 28 Jun 2017 11:42:55 +0200
Subject: [PATCH 03/15] update USGS to >=0.2.0

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7a40899..d3b906b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-usgs==0.1.9
+usgs>=0.2.0
 requests==2.7.0
 python-dateutil==2.5.1
 numpy==1.10.4

From 5f3bd079652d89239d294d541787597d02d29b3f Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sat, 19 Aug 2017 21:52:39 +0200
Subject: [PATCH 04/15] Download Collection 1 data from AWS

Added download Collection 1 landsat 8 data from AWS after 1 may 2017:
Example use: pre-collection en collection 1 Landsat 8 data
- landsat download LC81970232017085LGN00 --bands 432
- landsat download LC08_L1TP_139045_20170304_20170316_01_T1 --bands 432

- landsat download LC81970232017085LGN00
- landsat download LC08_L1TP_139045_20170304_20170316_01_T1
---
 landsat/downloader.py | 77 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/landsat/downloader.py b/landsat/downloader.py
index 94c20c8..f61ab2b 100644
--- a/landsat/downloader.py
+++ b/landsat/downloader.py
@@ -68,7 +68,9 @@ def download(self, scenes, bands=None):
                 # for all scenes if bands provided, first check AWS, if the bands exist
                 # download them, otherwise use Google and then USGS.
                 try:
-                    # if bands are not provided, directly go to Goodle and then USGS
+                    # if bands are not provided, directly go to Google and then USGS
+                    if not isinstance(bands, list):
+                       bands = [1,2,3,4,5,6,7,8,9,10,11]
                     if not isinstance(bands, list):
                         raise RemoteFileDoesntExist
                     files.append(self.amazon_s3(scene, bands))
@@ -90,6 +92,7 @@ def usgs_eros(self, scene, path):
         # download from usgs if login information is provided
         if self.usgs_user and self.usgs_pass:
             try:
+                print (self.usgs_user)
                 api_key = api.login(self.usgs_user, self.usgs_pass)
             except USGSError as e:
                 error_tree = ElementTree.fromstring(str(e.message))
@@ -125,6 +128,7 @@ def google_storage(self, scene, path):
 
         sat = self.scene_interpreter(scene)
         url = self.google_storage_url(sat)
+        print (url)
 
         self.remote_file_exists(url)
 
@@ -138,28 +142,52 @@ def amazon_s3(self, scene, bands):
 
         sat = self.scene_interpreter(scene)
 
-        # Always grab MTL.txt and QA band if bands are specified
+        # Always grab QA band if bands are specified
+        urls = []
+
         if 'BQA' not in bands:
             bands.append('QA')
 
+
+        if len(scene) == 40:
+           for band in bands:
+               url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr")
+               #print (url2)
+
+               # make sure it exist
+               self.remote_file_exists(url2)
+               urls.append(url2)
+
+               url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD")
+               #print (url3)
+
+               # make sure it exist
+               self.remote_file_exists(url3)
+               urls.append(url3)
+
+        # Always grab MTL.txt and ANG band if bands are specified
         if 'MTL' not in bands:
             bands.append('MTL')
 
-        urls = []
+        if 'ANG' not in bands and len(scene) == 40:
+            bands.append('ANG')
 
         for band in bands:
             # get url for the band
             url = self.amazon_s3_url(sat, band)
+            #print (url)
 
             # make sure it exist
             self.remote_file_exists(url)
             urls.append(url)
+            
 
         # create folder
         path = check_create_folder(join(self.download_dir, scene))
 
         self.output('Source: AWS S3', normal=True, arrow=True)
         for url in urls:
+            #print(url)
             self.fetch(url, path)
 
         return path
@@ -192,7 +220,7 @@ def fetch(self, url, path):
 
         self.output('Downloading: %s' % filename, normal=True, arrow=True)
 
-        # print(join(path, filename))
+        print(join(path, filename))
         # raise Exception
         if exists(join(path, filename)):
             size = getsize(join(path, filename))
@@ -236,10 +264,35 @@ def amazon_s3_url(self, sat, band):
         :returns:
             (String) The URL to a S3 file
         """
-        if band != 'MTL':
+        if band != 'MTL' and band != 'ANG':
             filename = '%s_B%s.TIF' % (sat['scene'], band)
         else:
             filename = '%s_%s.txt' % (sat['scene'], band)
+        
+        #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]))
+        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
+
+    def amazon_s3_url_type(self, sat, band, type):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :type type
+            TIF.ovr of WRK.IMD
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        file_extentie = '%s_B%s' + type
+        if band != 'MTL' and band != 'ANG':
+            filename = file_extentie % (sat['scene'], band)
 
         return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
 
@@ -280,6 +333,12 @@ def scene_interpreter(self, scene):
             The scene ID.
         :type scene:
             String
+........    Pre-collection data style:
+............LC81970232017085LGN00
+
+........    collection 1 datas style:
+........    LC08_L1TP_139045_20170304_20170316_01_T1
+
 
         :returns:
             dict
@@ -300,10 +359,16 @@ def scene_interpreter(self, scene):
             'scene': scene
         }
         if isinstance(scene, str) and len(scene) == 21:
+            #LC81970232017085LGN00
             anatomy['path'] = scene[3:6]
             anatomy['row'] = scene[6:9]
             anatomy['sat'] = 'L' + scene[2:3]
-
+            return anatomy
+        elif isinstance(scene, str) and len(scene) == 40:
+            #LC08_L1TP_139045_20170304_20170316_01_T1
+            anatomy['path'] = scene[10:13]
+            anatomy['row'] = scene[13:16]
+            anatomy['sat'] = '/c1/L' + scene[3:4]
             return anatomy
         else:
             raise IncorrectSceneId('Received incorrect scene')

From 3d6105e6154da6b545ee4a720701c3e8f9837e1a Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sat, 19 Aug 2017 21:54:33 +0200
Subject: [PATCH 05/15] Search dict results extended for collection 1

added extra fields to dict result for landsat search
import field is the new 'product_id' in the Collection 1 datastructure

example use:
- landsat search  --limit 2000 --pathrow 199,23

Added to Dict:
                result['results'] = [{'sceneID': i['sceneID'],
                                      'sat_type': u'L8',
                                      'path2': three_digit(i['path']),
                                      'row2': three_digit(i['row']),
                                      'download_links' : i['download_links'],
                                      'BPF_NAME_OLI' : i['BPF_NAME_OLI'],
                                      'thumbnail': i['browseURL'],
                                      'date': i['acquisitionDate'],
                                      'GROUND_CONTROL_POINTS_VERSION': i['GROUND_CONTROL_POINTS_VERSION'],
                                      'DATE_L1_GENERATED': i['DATE_L1_GENERATED'],
---
 landsat/search.py | 82 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 6 deletions(-)

diff --git a/landsat/search.py b/landsat/search.py
index a7d956e..3891a58 100644
--- a/landsat/search.py
+++ b/landsat/search.py
@@ -89,10 +89,10 @@ def search(self, paths_rows=None, lat=None, lon=None, address=None, start_date=N
         """
 
         search_string = self.query_builder(paths_rows, lat, lon, address, start_date, end_date, cloud_min, cloud_max)
-
+        #print (search_string)
         # Have to manually build the URI to bypass requests URI encoding
         # The api server doesn't accept encoded URIs
-
+        #print('%s?search=%s&limit=%s' % (self.api_url, search_string, limit))
         r = requests.get('%s?search=%s&limit=%s' % (self.api_url, search_string, limit))
 
         r_dict = json.loads(r.text)
@@ -143,15 +143,85 @@ def search(self, paths_rows=None, lat=None, lon=None, address=None, start_date=N
                 result['total_returned'] = len(r_dict['results'])
                 result['results'] = [{'sceneID': i['sceneID'],
                                       'sat_type': u'L8',
-                                      'path': three_digit(i['path']),
-                                      'row': three_digit(i['row']),
+                                      'path2': three_digit(i['path']),
+                                      'row2': three_digit(i['row']),
+                                      'download_links' : i['download_links'],
+                                      'BPF_NAME_OLI' : i['BPF_NAME_OLI'],
                                       'thumbnail': i['browseURL'],
                                       'date': i['acquisitionDate'],
-                                      'cloud': i['cloudCoverFull']}
-                                     for i in r_dict['results']]
+                                      'GROUND_CONTROL_POINTS_VERSION': i['GROUND_CONTROL_POINTS_VERSION'],
+                                      'DATE_L1_GENERATED': i['DATE_L1_GENERATED'],
+                                      'NADIR_OFFNADIR': i['NADIR_OFFNADIR'],
+                                      'data_geometry': i['data_geometry'],
+                                      'sunAzimuth': i['sunAzimuth'],
+                                      'cloudCover': i['cloudCover'],
+                                      'COLLECTION_NUMBER': i['COLLECTION_NUMBER'],
+                                      'sceneCenterLatitude': i['sceneCenterLatitude'],
+                                      'cartURL': i['cartURL'],
+                                      'sunElevation': i['sunElevation'],
+                                      'cloud_coverage': i['cloud_coverage'],
+                                      'CLOUD_COVER_LAND': i['CLOUD_COVER_LAND'],
+                                      'scene_id': i['scene_id'],
+                                      'GROUND_CONTROL_POINTS_MODEL': i['GROUND_CONTROL_POINTS_MODEL'],
+                                      'row': i['row'],
+                                      'imageQuality1': i['imageQuality1'],
+                                      'cloudCoverFull': i['cloudCoverFull'],
+                                      'aws_index': i['aws_index'],
+                                      'browseURL': i['browseURL'],
+                                      'browseAvailable': i['browseAvailable'],
+                                      'BPF_NAME_TIRS': i['BPF_NAME_TIRS'],
+                                      'dayOrNight': i['dayOrNight'],
+                                      'TIRS_SSM_MODEL': i['TIRS_SSM_MODEL'],
+                                      'CPF_NAME': i['CPF_NAME'],
+                                      'FULL_PARTIAL_SCENE': i['FULL_PARTIAL_SCENE'],
+                                      'DATA_TYPE_L1': i['DATA_TYPE_L1'],
+                                      'aws_thumbnail': i['aws_thumbnail'],
+                                      'google_index': i['google_index'],
+                                      'sceneStartTime': i['sceneStartTime'],
+                                      'dateUpdated': i['dateUpdated'],
+                                      'sensor': i['sensor'],
+                                      'lowerRightCornerLatitude': i['lowerRightCornerLatitude'],
+                                      'LANDSAT_PRODUCT_ID': i['LANDSAT_PRODUCT_ID'],
+                                      'acquisitionDate': i['acquisitionDate'],
+                                      'PROCESSING_SOFTWARE_VERSION': i['PROCESSING_SOFTWARE_VERSION'],
+                                      'lowerRightCornerLongitude': i['lowerRightCornerLongitude'],
+                                      'lowerLeftCornerLatitude': i['lowerLeftCornerLatitude'],
+                                      'sceneCenterLongitude': i['sceneCenterLongitude'],
+                                      'COLLECTION_CATEGORY': i['COLLECTION_CATEGORY'],
+                                      'upperLeftCornerLongitude': i['upperLeftCornerLongitude'],
+                                      'path': i['path'],
+                                      'lowerLeftCornerLongitude': i['lowerLeftCornerLongitude'],
+                                      'GEOMETRIC_RMSE_MODEL_X': i['GEOMETRIC_RMSE_MODEL_X'],
+                                      'GEOMETRIC_RMSE_MODEL_Y': i['GEOMETRIC_RMSE_MODEL_Y'],
+                                      'sceneStopTime': i['sceneStopTime'],
+                                      'upperLeftCornerLatitude': i['upperLeftCornerLatitude'],
+                                      'upperRightCornerLongitude': i['upperRightCornerLongitude'],
+                                      'product_id': i['product_id'],
+                                      'satellite_name': i['satellite_name'],
+                                      'GEOMETRIC_RMSE_MODEL': i['GEOMETRIC_RMSE_MODEL'],
+                                      'upperRightCornerLatitude': i['upperRightCornerLatitude'],
+                                      'receivingStation': i['receivingStation'],
+                                      'cloud': i['cloudCoverFull']} for i in r_dict['results']]
 
         return result
 
+#                                      'REFLECTIVE_SAMPLES': i['REFLECTIVE_SAMPLES'],
+#                                      'THERMAL_LINES': i['THERMAL_LINES'],
+#                                      'PANCHROMATIC_LINES': i['PANCHROMATIC_LINES'],
+#                                      'GRID_CELL_SIZE_THERMAL': i['GRID_CELL_SIZE_THERMAL'],
+#                                      'REFLECTIVE_LINES': i['REFLECTIVE_LINES'],
+#                                      'THERMAL_SAMPLES': i['THERMAL_SAMPLES'],
+#                                      'PANCHROMATIC_SAMPLES': i['PANCHROMATIC_SAMPLES'],
+#                                      'UTM_ZONE': i['UTM_ZONE'],
+#                                      'GRID_CELL_SIZE_REFLECTIVE': i['GRID_CELL_SIZE_REFLECTIVE'],
+#                                      'GRID_CELL_SIZE_PANCHROMATIC': i['GRID_CELL_SIZE_PANCHROMATIC'],
+#                                      'ORIENTATION': i['ORIENTATION'],
+#                                      'DATUM': i['DATUM'],
+#                                      'RESAMPLING_OPTION': i['RESAMPLING_OPTION'],
+#                                      'RLUT_FILE_NAME': i['RLUT_FILE_NAME'],
+#                                      'ROLL_ANGLE': i['ROLL_ANGLE'],
+#                                      'MAP_PROJECTION_L1': i['MAP_PROJECTION_L1'],
+
     def query_builder(self, paths_rows=None, lat=None, lon=None, address=None, start_date=None, end_date=None,
                       cloud_min=None, cloud_max=None):
         """ Builds the proper search syntax (query) for Landsat API.

From ca732c9007106b129a30bac6b19410b3fe86fbb0 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sat, 19 Aug 2017 22:05:40 +0200
Subject: [PATCH 06/15] removed some print statements

removed some forgotten print statements
---
 downloader.py | 381 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 381 insertions(+)
 create mode 100644 downloader.py

diff --git a/downloader.py b/downloader.py
new file mode 100644
index 0000000..4d8d63c
--- /dev/null
+++ b/downloader.py
@@ -0,0 +1,381 @@
+# Landsat Util
+# License: CC0 1.0 Universal
+
+from __future__ import print_function, division, absolute_import
+
+from xml.etree import ElementTree
+from os.path import join, exists, getsize
+
+import requests
+from usgs import api, USGSError
+from homura import download as fetch
+
+from .utils import check_create_folder, url_builder
+from .mixins import VerbosityMixin
+from . import settings
+
+
+class RemoteFileDoesntExist(Exception):
+    """ Exception to be used when the remote file does not exist """
+    pass
+
+
+class IncorrectSceneId(Exception):
+    """ Exception to be used when scene id is incorrect """
+    pass
+
+
+class USGSInventoryAccessMissing(Exception):
+    """ Exception for when User does not have Inventory Service access """
+    pass
+
+
+class Downloader(VerbosityMixin):
+    """ The downloader class """
+
+    def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None):
+        self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR
+        self.google = settings.GOOGLE_STORAGE
+        self.s3 = settings.S3_LANDSAT
+        self.usgs_user = usgs_user
+        self.usgs_pass = usgs_pass
+
+        # Make sure download directory exist
+        check_create_folder(self.download_dir)
+
+    def download(self, scenes, bands=None):
+        """
+        Download scenese from Google Storage or Amazon S3 if bands are provided
+
+        :param scenes:
+            A list of scene IDs
+        :type scenes:
+            List
+        :param bands:
+            A list of bands. Default value is None.
+        :type scenes:
+            List
+
+        :returns:
+            (List) includes downloaded scenes as key and source as value (aws or google)
+        """
+
+        if isinstance(scenes, list):
+            files = []
+
+            for scene in scenes:
+
+                # for all scenes if bands provided, first check AWS, if the bands exist
+                # download them, otherwise use Google and then USGS.
+                try:
+                    # if bands are not provided, directly go to Google and then USGS
+                    if not isinstance(bands, list):
+                       bands = [1,2,3,4,5,6,7,8,9,10,11]
+                    if not isinstance(bands, list):
+                        raise RemoteFileDoesntExist
+                    files.append(self.amazon_s3(scene, bands))
+
+                except RemoteFileDoesntExist:
+                    try:
+                        files.append(self.google_storage(scene, self.download_dir))
+                    except RemoteFileDoesntExist:
+                        files.append(self.usgs_eros(scene, self.download_dir))
+
+            return files
+
+        else:
+            raise Exception('Expected sceneIDs list')
+
+    def usgs_eros(self, scene, path):
+        """ Downloads the image from USGS """
+
+        # download from usgs if login information is provided
+        if self.usgs_user and self.usgs_pass:
+            try:
+                #print (self.usgs_user)
+                api_key = api.login(self.usgs_user, self.usgs_pass)
+            except USGSError as e:
+                error_tree = ElementTree.fromstring(str(e.message))
+                error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text
+                raise USGSInventoryAccessMissing(error_text)
+
+            response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key)
+            try:
+                download_url = response['data'][0]
+            except IndexError:
+                raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene)
+            self.output('Source: USGS EarthExplorer', normal=True, arrow=True)
+            return self.fetch(download_url, path)
+
+        raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene)
+
+    def google_storage(self, scene, path):
+        """
+        Google Storage Downloader.
+
+        :param scene:
+            The scene id
+        :type scene:
+            String
+        :param path:
+            The directory path to where the image should be stored
+        :type path:
+            String
+
+        :returns:
+            Boolean
+        """
+
+        sat = self.scene_interpreter(scene)
+        url = self.google_storage_url(sat)
+        #print (url)
+
+        self.remote_file_exists(url)
+
+        self.output('Source: Google Storage', normal=True, arrow=True)
+        return self.fetch(url, path)
+
+    def amazon_s3(self, scene, bands):
+        """
+        Amazon S3 downloader
+        """
+
+        sat = self.scene_interpreter(scene)
+
+        # Always grab QA band if bands are specified
+        urls = []
+
+        if 'BQA' not in bands:
+            bands.append('QA')
+
+
+        if len(scene) == 40:
+           for band in bands:
+               url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr")
+               #print (url2)
+
+               # make sure it exist
+               self.remote_file_exists(url2)
+               urls.append(url2)
+
+               url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD")
+               #print (url3)
+
+               # make sure it exist
+               self.remote_file_exists(url3)
+               urls.append(url3)
+
+        # Always grab MTL.txt and ANG band if bands are specified
+        if 'MTL' not in bands:
+            bands.append('MTL')
+
+        if 'ANG' not in bands and len(scene) == 40:
+            bands.append('ANG')
+
+        for band in bands:
+            # get url for the band
+            url = self.amazon_s3_url(sat, band)
+            #print (url)
+
+            # make sure it exist
+            self.remote_file_exists(url)
+            urls.append(url)
+            
+
+        # create folder
+        path = check_create_folder(join(self.download_dir, scene))
+
+        self.output('Source: AWS S3', normal=True, arrow=True)
+        for url in urls:
+            #print(url)
+            self.fetch(url, path)
+
+        return path
+
+    def fetch(self, url, path):
+        """ Downloads the given url.
+
+        :param url:
+            The url to be downloaded.
+        :type url:
+            String
+        :param path:
+            The directory path to where the image should be stored
+        :type path:
+            String
+        :param filename:
+            The filename that has to be downloaded
+        :type filename:
+            String
+
+        :returns:
+            Boolean
+        """
+
+        segments = url.split('/')
+        filename = segments[-1]
+
+        # remove query parameters from the filename
+        filename = filename.split('?')[0]
+
+        self.output('Downloading: %s' % filename, normal=True, arrow=True)
+
+        #print(join(path, filename))
+        # raise Exception
+        if exists(join(path, filename)):
+            size = getsize(join(path, filename))
+            if size == self.get_remote_file_size(url):
+                self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1)
+
+        else:
+            fetch(url, path)
+        self.output('stored at %s' % path, normal=True, color='green', indent=1)
+
+        return join(path, filename)
+
+    def google_storage_url(self, sat):
+        """
+        Returns a google storage url the contains the scene provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+
+        :returns:
+            (String) The URL to a google storage file
+        """
+        filename = sat['scene'] + '.tar.bz'
+        return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename])
+
+    def amazon_s3_url(self, sat, band):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        if band != 'MTL' and band != 'ANG':
+            filename = '%s_B%s.TIF' % (sat['scene'], band)
+        else:
+            filename = '%s_%s.txt' % (sat['scene'], band)
+        
+        #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]))
+        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
+
+    def amazon_s3_url_type(self, sat, band, type):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :type type
+            TIF.ovr of WRK.IMD
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        file_extentie = '%s_B%s' + type
+        if band != 'MTL' and band != 'ANG':
+            filename = file_extentie % (sat['scene'], band)
+
+        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
+
+    def remote_file_exists(self, url):
+        """ Checks whether the remote file exists.
+
+        :param url:
+            The url that has to be checked.
+        :type url:
+            String
+
+        :returns:
+            **True** if remote file exists and **False** if it doesn't exist.
+        """
+        status = requests.head(url).status_code
+
+        if status != 200:
+            raise RemoteFileDoesntExist
+
+    def get_remote_file_size(self, url):
+        """ Gets the filesize of a remote file.
+
+        :param url:
+            The url that has to be checked.
+        :type url:
+            String
+
+        :returns:
+            int
+        """
+        headers = requests.head(url).headers
+        return int(headers['content-length'])
+
+    def scene_interpreter(self, scene):
+        """ Conver sceneID to rows, paths and dates.
+
+        :param scene:
+            The scene ID.
+        :type scene:
+            String
+........    Pre-collection data style:
+............LC81970232017085LGN00
+
+........    collection 1 datas style:
+........    LC08_L1TP_139045_20170304_20170316_01_T1
+
+
+        :returns:
+            dict
+
+        :Example output:
+
+        >>> anatomy = {
+                'path': None,
+                'row': None,
+                'sat': None,
+                'scene': scene
+            }
+        """
+        anatomy = {
+            'path': None,
+            'row': None,
+            'sat': None,
+            'scene': scene
+        }
+        if isinstance(scene, str) and len(scene) == 21:
+            #LC81970232017085LGN00
+            anatomy['path'] = scene[3:6]
+            anatomy['row'] = scene[6:9]
+            anatomy['sat'] = 'L' + scene[2:3]
+            return anatomy
+        elif isinstance(scene, str) and len(scene) == 40:
+            #LC08_L1TP_139045_20170304_20170316_01_T1
+            anatomy['path'] = scene[10:13]
+            anatomy['row'] = scene[13:16]
+            anatomy['sat'] = '/c1/L' + scene[3:4]
+            return anatomy
+        else:
+            raise IncorrectSceneId('Received incorrect scene')
+
+
+if __name__ == '__main__':
+
+    d = Downloader()
+
+    # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00'])

From be5fb60aedd249fd32a122b290efe6319d62b97f Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sat, 19 Aug 2017 22:09:14 +0200
Subject: [PATCH 07/15] delete some print statements

delete some forgotten print statements
---
 landsat/downloader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/landsat/downloader.py b/landsat/downloader.py
index f61ab2b..4d8d63c 100644
--- a/landsat/downloader.py
+++ b/landsat/downloader.py
@@ -92,7 +92,7 @@ def usgs_eros(self, scene, path):
         # download from usgs if login information is provided
         if self.usgs_user and self.usgs_pass:
             try:
-                print (self.usgs_user)
+                #print (self.usgs_user)
                 api_key = api.login(self.usgs_user, self.usgs_pass)
             except USGSError as e:
                 error_tree = ElementTree.fromstring(str(e.message))
@@ -128,7 +128,7 @@ def google_storage(self, scene, path):
 
         sat = self.scene_interpreter(scene)
         url = self.google_storage_url(sat)
-        print (url)
+        #print (url)
 
         self.remote_file_exists(url)
 
@@ -220,7 +220,7 @@ def fetch(self, url, path):
 
         self.output('Downloading: %s' % filename, normal=True, arrow=True)
 
-        print(join(path, filename))
+        #print(join(path, filename))
         # raise Exception
         if exists(join(path, filename)):
             size = getsize(join(path, filename))

From 5f7c719872b4e38d73ace0a58cdbf393a7e1a23b Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sat, 19 Aug 2017 22:10:24 +0200
Subject: [PATCH 08/15] wrong dir

copied in wrong directory
---
 downloader.py | 381 --------------------------------------------------
 1 file changed, 381 deletions(-)
 delete mode 100644 downloader.py

diff --git a/downloader.py b/downloader.py
deleted file mode 100644
index 4d8d63c..0000000
--- a/downloader.py
+++ /dev/null
@@ -1,381 +0,0 @@
-# Landsat Util
-# License: CC0 1.0 Universal
-
-from __future__ import print_function, division, absolute_import
-
-from xml.etree import ElementTree
-from os.path import join, exists, getsize
-
-import requests
-from usgs import api, USGSError
-from homura import download as fetch
-
-from .utils import check_create_folder, url_builder
-from .mixins import VerbosityMixin
-from . import settings
-
-
-class RemoteFileDoesntExist(Exception):
-    """ Exception to be used when the remote file does not exist """
-    pass
-
-
-class IncorrectSceneId(Exception):
-    """ Exception to be used when scene id is incorrect """
-    pass
-
-
-class USGSInventoryAccessMissing(Exception):
-    """ Exception for when User does not have Inventory Service access """
-    pass
-
-
-class Downloader(VerbosityMixin):
-    """ The downloader class """
-
-    def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None):
-        self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR
-        self.google = settings.GOOGLE_STORAGE
-        self.s3 = settings.S3_LANDSAT
-        self.usgs_user = usgs_user
-        self.usgs_pass = usgs_pass
-
-        # Make sure download directory exist
-        check_create_folder(self.download_dir)
-
-    def download(self, scenes, bands=None):
-        """
-        Download scenese from Google Storage or Amazon S3 if bands are provided
-
-        :param scenes:
-            A list of scene IDs
-        :type scenes:
-            List
-        :param bands:
-            A list of bands. Default value is None.
-        :type scenes:
-            List
-
-        :returns:
-            (List) includes downloaded scenes as key and source as value (aws or google)
-        """
-
-        if isinstance(scenes, list):
-            files = []
-
-            for scene in scenes:
-
-                # for all scenes if bands provided, first check AWS, if the bands exist
-                # download them, otherwise use Google and then USGS.
-                try:
-                    # if bands are not provided, directly go to Google and then USGS
-                    if not isinstance(bands, list):
-                       bands = [1,2,3,4,5,6,7,8,9,10,11]
-                    if not isinstance(bands, list):
-                        raise RemoteFileDoesntExist
-                    files.append(self.amazon_s3(scene, bands))
-
-                except RemoteFileDoesntExist:
-                    try:
-                        files.append(self.google_storage(scene, self.download_dir))
-                    except RemoteFileDoesntExist:
-                        files.append(self.usgs_eros(scene, self.download_dir))
-
-            return files
-
-        else:
-            raise Exception('Expected sceneIDs list')
-
-    def usgs_eros(self, scene, path):
-        """ Downloads the image from USGS """
-
-        # download from usgs if login information is provided
-        if self.usgs_user and self.usgs_pass:
-            try:
-                #print (self.usgs_user)
-                api_key = api.login(self.usgs_user, self.usgs_pass)
-            except USGSError as e:
-                error_tree = ElementTree.fromstring(str(e.message))
-                error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text
-                raise USGSInventoryAccessMissing(error_text)
-
-            response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key)
-            try:
-                download_url = response['data'][0]
-            except IndexError:
-                raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene)
-            self.output('Source: USGS EarthExplorer', normal=True, arrow=True)
-            return self.fetch(download_url, path)
-
-        raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene)
-
-    def google_storage(self, scene, path):
-        """
-        Google Storage Downloader.
-
-        :param scene:
-            The scene id
-        :type scene:
-            String
-        :param path:
-            The directory path to where the image should be stored
-        :type path:
-            String
-
-        :returns:
-            Boolean
-        """
-
-        sat = self.scene_interpreter(scene)
-        url = self.google_storage_url(sat)
-        #print (url)
-
-        self.remote_file_exists(url)
-
-        self.output('Source: Google Storage', normal=True, arrow=True)
-        return self.fetch(url, path)
-
-    def amazon_s3(self, scene, bands):
-        """
-        Amazon S3 downloader
-        """
-
-        sat = self.scene_interpreter(scene)
-
-        # Always grab QA band if bands are specified
-        urls = []
-
-        if 'BQA' not in bands:
-            bands.append('QA')
-
-
-        if len(scene) == 40:
-           for band in bands:
-               url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr")
-               #print (url2)
-
-               # make sure it exist
-               self.remote_file_exists(url2)
-               urls.append(url2)
-
-               url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD")
-               #print (url3)
-
-               # make sure it exist
-               self.remote_file_exists(url3)
-               urls.append(url3)
-
-        # Always grab MTL.txt and ANG band if bands are specified
-        if 'MTL' not in bands:
-            bands.append('MTL')
-
-        if 'ANG' not in bands and len(scene) == 40:
-            bands.append('ANG')
-
-        for band in bands:
-            # get url for the band
-            url = self.amazon_s3_url(sat, band)
-            #print (url)
-
-            # make sure it exist
-            self.remote_file_exists(url)
-            urls.append(url)
-            
-
-        # create folder
-        path = check_create_folder(join(self.download_dir, scene))
-
-        self.output('Source: AWS S3', normal=True, arrow=True)
-        for url in urls:
-            #print(url)
-            self.fetch(url, path)
-
-        return path
-
-    def fetch(self, url, path):
-        """ Downloads the given url.
-
-        :param url:
-            The url to be downloaded.
-        :type url:
-            String
-        :param path:
-            The directory path to where the image should be stored
-        :type path:
-            String
-        :param filename:
-            The filename that has to be downloaded
-        :type filename:
-            String
-
-        :returns:
-            Boolean
-        """
-
-        segments = url.split('/')
-        filename = segments[-1]
-
-        # remove query parameters from the filename
-        filename = filename.split('?')[0]
-
-        self.output('Downloading: %s' % filename, normal=True, arrow=True)
-
-        #print(join(path, filename))
-        # raise Exception
-        if exists(join(path, filename)):
-            size = getsize(join(path, filename))
-            if size == self.get_remote_file_size(url):
-                self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1)
-
-        else:
-            fetch(url, path)
-        self.output('stored at %s' % path, normal=True, color='green', indent=1)
-
-        return join(path, filename)
-
-    def google_storage_url(self, sat):
-        """
-        Returns a google storage url the contains the scene provided.
-
-        :param sat:
-            Expects an object created by scene_interpreter method
-        :type sat:
-            dict
-
-        :returns:
-            (String) The URL to a google storage file
-        """
-        filename = sat['scene'] + '.tar.bz'
-        return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename])
-
-    def amazon_s3_url(self, sat, band):
-        """
-        Return an amazon s3 url the contains the scene and band provided.
-
-        :param sat:
-            Expects an object created by scene_interpreter method
-        :type sat:
-            dict
-        :param filename:
-            The filename that has to be downloaded from Amazon
-        :type filename:
-            String
-
-        :returns:
-            (String) The URL to a S3 file
-        """
-        if band != 'MTL' and band != 'ANG':
-            filename = '%s_B%s.TIF' % (sat['scene'], band)
-        else:
-            filename = '%s_%s.txt' % (sat['scene'], band)
-        
-        #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]))
-        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
-
-    def amazon_s3_url_type(self, sat, band, type):
-        """
-        Return an amazon s3 url the contains the scene and band provided.
-
-        :param sat:
-            Expects an object created by scene_interpreter method
-        :type sat:
-            dict
-        :type type
-            TIF.ovr of WRK.IMD
-        :param filename:
-            The filename that has to be downloaded from Amazon
-        :type filename:
-            String
-
-        :returns:
-            (String) The URL to a S3 file
-        """
-        file_extentie = '%s_B%s' + type
-        if band != 'MTL' and band != 'ANG':
-            filename = file_extentie % (sat['scene'], band)
-
-        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
-
-    def remote_file_exists(self, url):
-        """ Checks whether the remote file exists.
-
-        :param url:
-            The url that has to be checked.
-        :type url:
-            String
-
-        :returns:
-            **True** if remote file exists and **False** if it doesn't exist.
-        """
-        status = requests.head(url).status_code
-
-        if status != 200:
-            raise RemoteFileDoesntExist
-
-    def get_remote_file_size(self, url):
-        """ Gets the filesize of a remote file.
-
-        :param url:
-            The url that has to be checked.
-        :type url:
-            String
-
-        :returns:
-            int
-        """
-        headers = requests.head(url).headers
-        return int(headers['content-length'])
-
-    def scene_interpreter(self, scene):
-        """ Conver sceneID to rows, paths and dates.
-
-        :param scene:
-            The scene ID.
-        :type scene:
-            String
-........    Pre-collection data style:
-............LC81970232017085LGN00
-
-........    collection 1 datas style:
-........    LC08_L1TP_139045_20170304_20170316_01_T1
-
-
-        :returns:
-            dict
-
-        :Example output:
-
-        >>> anatomy = {
-                'path': None,
-                'row': None,
-                'sat': None,
-                'scene': scene
-            }
-        """
-        anatomy = {
-            'path': None,
-            'row': None,
-            'sat': None,
-            'scene': scene
-        }
-        if isinstance(scene, str) and len(scene) == 21:
-            #LC81970232017085LGN00
-            anatomy['path'] = scene[3:6]
-            anatomy['row'] = scene[6:9]
-            anatomy['sat'] = 'L' + scene[2:3]
-            return anatomy
-        elif isinstance(scene, str) and len(scene) == 40:
-            #LC08_L1TP_139045_20170304_20170316_01_T1
-            anatomy['path'] = scene[10:13]
-            anatomy['row'] = scene[13:16]
-            anatomy['sat'] = '/c1/L' + scene[3:4]
-            return anatomy
-        else:
-            raise IncorrectSceneId('Received incorrect scene')
-
-
-if __name__ == '__main__':
-
-    d = Downloader()
-
-    # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00'])

From a7e8e13da28889a1e0c4660479b9a087b241110a Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 11:40:35 +0200
Subject: [PATCH 09/15] Add New Googlestorage Bucket for Landsat Collection 1
 data

---
 landsat/settings.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/landsat/settings.py b/landsat/settings.py
index cfe9e9a..b1f8f05 100644
--- a/landsat/settings.py
+++ b/landsat/settings.py
@@ -15,6 +15,7 @@
 SATELLITE = 'L8'
 L8_METADATA_URL = 'http://landsat.usgs.gov/metadata_service/bulk_metadata_files/LANDSAT_8.csv'
 GOOGLE_STORAGE = 'http://storage.googleapis.com/earthengine-public/landsat/'
+GOOGLE_STORAGE_C1 = 'http://storage.googleapis.com/gcp-public-data-landsat/LC08/01/'
 S3_LANDSAT = 'http://landsat-pds.s3.amazonaws.com/'
 API_URL = 'https://api.developmentseed.org/satellites/landsat'
 

From cb8bf2a19bbcfde798585f48f3e9ff529195cb7c Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 11:45:26 +0200
Subject: [PATCH 10/15] Add support for download Landsat Collection 1 data from
 new Google storage bucket

Add support for download Landsat Collection 1 data from new Google storage bucket
Only the *.TIF, _MTL.txt and _ANG..TXT are downloadable. Files *.TIF.ovr and _wrk.IMD are forbidden to download. On AWS these file are accessable.
---
 landsat/downloader.py | 154 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 133 insertions(+), 21 deletions(-)

diff --git a/landsat/downloader.py b/landsat/downloader.py
index 4d8d63c..6538545 100644
--- a/landsat/downloader.py
+++ b/landsat/downloader.py
@@ -19,6 +19,11 @@ class RemoteFileDoesntExist(Exception):
     """ Exception to be used when the remote file does not exist """
     pass
 
+class Forbidden403(Exception):
+    """ Exception to be used when the user is forbidden to use the remote file 
+        This a appears when downloading some of the files from google storage bucket gcp-public-data-landsat
+    """
+    pass
 
 class IncorrectSceneId(Exception):
     """ Exception to be used when scene id is incorrect """
@@ -36,6 +41,7 @@ class Downloader(VerbosityMixin):
     def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None):
         self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR
         self.google = settings.GOOGLE_STORAGE
+        self.google_C1 = settings.GOOGLE_STORAGE_C1
         self.s3 = settings.S3_LANDSAT
         self.usgs_user = usgs_user
         self.usgs_pass = usgs_pass
@@ -74,10 +80,17 @@ def download(self, scenes, bands=None):
                     if not isinstance(bands, list):
                         raise RemoteFileDoesntExist
                     files.append(self.amazon_s3(scene, bands))
+                    #files.append(self.google_storage_new(scene, bands))
 
                 except RemoteFileDoesntExist:
                     try:
-                        files.append(self.google_storage(scene, self.download_dir))
+                        if len(scene) == 40:
+                           #Collection 1 data: Product_id
+                           files.append(self.google_storage_new(scene, bands))
+                           #files.append(self.amazon_s3(scene, bands))
+                        else:
+                           #Pre-Collection data: scene_id len(scene = 21)
+                           files.append(self.google_storage(scene, self.download_dir))
                     except RemoteFileDoesntExist:
                         files.append(self.usgs_eros(scene, self.download_dir))
 
@@ -128,13 +141,127 @@ def google_storage(self, scene, path):
 
         sat = self.scene_interpreter(scene)
         url = self.google_storage_url(sat)
-        #print (url)
 
         self.remote_file_exists(url)
 
         self.output('Source: Google Storage', normal=True, arrow=True)
         return self.fetch(url, path)
 
+    def google_storage_new(self, scene, bands):
+        """
+        Google downloader new version for collection 1 data: No tar.bz file but a lot files
+        """
+
+        sat = self.scene_interpreter(scene)
+        # Always grab QA band if bands are specified
+        urls = []
+
+        if 'BQA' not in bands:
+            bands.append('QA')
+
+        if len(scene) == 40:
+           for band in bands:
+               url2 = self.google_storage_url_type_new(sat, band, ".TIF.ovr")
+               #print (url2)
+
+               # make sure it exist
+               self.remote_file_exists(url2)
+               urls.append(url2)
+
+               url3 = self.google_storage_url_type_new(sat, band, "_wrk.IMD")
+               #print (url3)
+
+               # make sure it exist
+               self.remote_file_exists(url3)
+               urls.append(url3)
+
+        # Always grab MTL.txt and ANG band if bands are specified
+        if 'MTL' not in bands:
+            bands.append('MTL')
+
+        if 'ANG' not in bands and len(scene) == 40:
+            bands.append('ANG')
+
+        for band in bands:
+            # get url for the band
+            url = self.google_storage_url_new(sat, band)
+            #print (url)
+
+            # make sure it exist
+            self.remote_file_exists(url)
+            urls.append(url)
+            
+
+        # create folder
+        path = check_create_folder(join(self.download_dir, scene))
+
+        self.output('Source: Google Storage S3', normal=True, arrow=True)
+        for url in urls:
+            self.fetch(url, path)
+
+        return path
+
+    def google_storage_url(self, sat):
+        """
+        Returns a google storage url the contains the scene provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+
+        :returns:
+            (String) The URL to a google storage file
+        """
+        filename = sat['scene'] + '.tar.bz'
+        return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename])
+
+    def google_storage_url_new(self, sat, band):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        if band != 'MTL' and band != 'ANG':
+            filename = '%s_B%s.TIF' % (sat['scene'], band)
+        else:
+            filename = '%s_%s.txt' % (sat['scene'], band)
+        return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename])
+
+    def google_storage_url_type_new(self, sat, band, type):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :type type
+            TIF.ovr of WRK.IMD
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        file_extentie = '%s_B%s' + type
+        if band != 'MTL' and band != 'ANG':
+            filename = file_extentie % (sat['scene'], band)
+
+        return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename])
+
     def amazon_s3(self, scene, bands):
         """
         Amazon S3 downloader
@@ -220,7 +347,6 @@ def fetch(self, url, path):
 
         self.output('Downloading: %s' % filename, normal=True, arrow=True)
 
-        #print(join(path, filename))
         # raise Exception
         if exists(join(path, filename)):
             size = getsize(join(path, filename))
@@ -233,21 +359,6 @@ def fetch(self, url, path):
 
         return join(path, filename)
 
-    def google_storage_url(self, sat):
-        """
-        Returns a google storage url the contains the scene provided.
-
-        :param sat:
-            Expects an object created by scene_interpreter method
-        :type sat:
-            dict
-
-        :returns:
-            (String) The URL to a google storage file
-        """
-        filename = sat['scene'] + '.tar.bz'
-        return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename])
-
     def amazon_s3_url(self, sat, band):
         """
         Return an amazon s3 url the contains the scene and band provided.
@@ -268,8 +379,6 @@ def amazon_s3_url(self, sat, band):
             filename = '%s_B%s.TIF' % (sat['scene'], band)
         else:
             filename = '%s_%s.txt' % (sat['scene'], band)
-        
-        #print (url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename]))
         return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
 
     def amazon_s3_url_type(self, sat, band, type):
@@ -309,7 +418,10 @@ def remote_file_exists(self, url):
         """
         status = requests.head(url).status_code
 
-        if status != 200:
+        if status == 403:
+            print("403: " + url)
+        elif status != 200:
+            #print ("File doesnotexits: %s-%s" % (url,status))
             raise RemoteFileDoesntExist
 
     def get_remote_file_size(self, url):

From 6d2cf73819389fdda91d1092ed5944987f5a6408 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 19:59:36 +0200
Subject: [PATCH 11/15] Added a comment

Added a comment
---
 downloader.py | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 504 insertions(+)
 create mode 100644 downloader.py

diff --git a/downloader.py b/downloader.py
new file mode 100644
index 0000000..2ea2480
--- /dev/null
+++ b/downloader.py
@@ -0,0 +1,504 @@
+# Landsat Util
+# License: CC0 1.0 Universal
+
+from __future__ import print_function, division, absolute_import
+
+from xml.etree import ElementTree
+from os.path import join, exists, getsize
+
+import requests
+from usgs import api, USGSError
+from homura import download as fetch
+
+from .utils import check_create_folder, url_builder, geocode
+from .mixins import VerbosityMixin
+from . import settings
+
+
+class RemoteFileDoesntExist(Exception):
+    """ Exception to be used when the remote file does not exist """
+    pass
+
+class Forbidden403(Exception):
+    """ Exception to be used when the user is forbidden to use the remote file 
+        This a appears when downloading some of the files from google storage bucket gcp-public-data-landsat
+    """
+    pass
+
+class IncorrectSceneId(Exception):
+    """ Exception to be used when scene id is incorrect """
+    pass
+
+
+class USGSInventoryAccessMissing(Exception):
+    """ Exception for when User does not have Inventory Service access """
+    pass
+
+
+class Downloader(VerbosityMixin):
+    """ The downloader class """
+
+    def __init__(self, verbose=False, download_dir=None, usgs_user=None, usgs_pass=None):
+        self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR
+        self.google = settings.GOOGLE_STORAGE
+        self.google_C1 = settings.GOOGLE_STORAGE_C1
+        self.s3 = settings.S3_LANDSAT
+        self.usgs_user = usgs_user
+        self.usgs_pass = usgs_pass
+
+        # Make sure download directory exist
+        check_create_folder(self.download_dir)
+
+    def download(self, scenes, bands=None):
+        """
+        Download scenese from Google Storage or Amazon S3 if bands are provided
+
+        :param scenes:
+            A list of scene IDs
+        :type scenes:
+            List
+        :param bands:
+            A list of bands. Default value is None.
+        :type scenes:
+            List
+
+        :returns:
+            (List) includes downloaded scenes as key and source as value (aws or google)
+        """
+        #loc = geocode('1600 Pennsylvania Ave NW, Washington, DC 20500')
+        #print("DOWNLOAD*********************************")
+        #print(loc)
+        #self.assertEqual(round(loc['lat'], 3), 38.898)
+        #self.assertEqual(round(loc['lon'], 3), -77.037)
+        #self.assertRaises(ValueError, utils.geocode, 'Pennsylvania Ave NW, Washington, DC')
+        #self.assertEqual({'lat': 38.8987709, 'lon': -77.0351295},
+        #                 utils.geocode('Pennsylvania Ave NW, Washington, DC', 10.))
+        #loc2 = geocode('Pennsylvania Ave NW, Washington, DC', 10.)
+        #print(loc2)
+
+        if isinstance(scenes, list):
+            files = []
+
+            for scene in scenes:
+
+                # for all scenes if bands provided, first check AWS, if the bands exist
+                # download them, otherwise use Google and then USGS.
+                try:
+                    # if bands are not provided, directly go to Google and then USGS
+                    if not isinstance(bands, list):
+                       bands = [1,2,3,4,5,6,7,8,9,10,11]
+                    if not isinstance(bands, list):
+                        raise RemoteFileDoesntExist
+                    files.append(self.amazon_s3(scene, bands))
+                    #files.append(self.google_storage_new(scene, bands))
+
+                except RemoteFileDoesntExist:
+                    try:
+                        if len(scene) == 40:
+                           #Collection 1 data: Product_id
+                           files.append(self.google_storage_new(scene, bands))
+                           #files.append(self.amazon_s3(scene, bands))
+                        else:
+                           #Pre-Collection data: scene_id len(scene = 21)
+                           files.append(self.google_storage(scene, self.download_dir))
+                    except RemoteFileDoesntExist:
+                        files.append(self.usgs_eros(scene, self.download_dir))
+
+            return files
+
+        else:
+            raise Exception('Expected sceneIDs list')
+
+    def usgs_eros(self, scene, path):
+        """ Downloads the image from USGS """
+
+        # download from usgs if login information is provided
+        if self.usgs_user and self.usgs_pass:
+            try:
+                #print (self.usgs_user)
+                api_key = api.login(self.usgs_user, self.usgs_pass)
+            except USGSError as e:
+                error_tree = ElementTree.fromstring(str(e.message))
+                error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text
+                raise USGSInventoryAccessMissing(error_text)
+
+            response = api.download('LANDSAT_8_C1', 'EE', [scene], api_key=api_key)
+            try:
+                download_url = response['data'][0]
+            except IndexError:
+                raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene)
+            self.output('Source: USGS EarthExplorer', normal=True, arrow=True)
+            return self.fetch(download_url, path)
+
+        raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene)
+
+    def google_storage(self, scene, path):
+        """
+        Google Storage Downloader.
+
+        :param scene:
+            The scene id
+        :type scene:
+            String
+        :param path:
+            The directory path to where the image should be stored
+        :type path:
+            String
+
+        :returns:
+            Boolean
+        """
+
+        sat = self.scene_interpreter(scene)
+        url = self.google_storage_url(sat)
+
+        self.remote_file_exists(url)
+
+        self.output('Source: Google Storage', normal=True, arrow=True)
+        return self.fetch(url, path)
+
+    def google_storage_new(self, scene, bands):
+        """
+        Google downloader new version for collection 1 data: No tar.bz file but a lot files
+        """
+
+        sat = self.scene_interpreter(scene)
+        # Always grab QA band if bands are specified
+        urls = []
+
+        if 'BQA' not in bands:
+            bands.append('QA')
+
+        if len(scene) == 40:
+           for band in bands:
+               url2 = self.google_storage_url_type_new(sat, band, ".TIF.ovr")
+               #print (url2)
+
+               # make sure it exist
+               self.remote_file_exists(url2)
+               urls.append(url2)
+
+               url3 = self.google_storage_url_type_new(sat, band, "_wrk.IMD")
+               #print (url3)
+
+               # make sure it exist
+               self.remote_file_exists(url3)
+               urls.append(url3)
+
+        # Always grab MTL.txt and ANG band if bands are specified
+        if 'MTL' not in bands:
+            bands.append('MTL')
+
+        if 'ANG' not in bands and len(scene) == 40:
+            bands.append('ANG')
+
+        for band in bands:
+            # get url for the band
+            url = self.google_storage_url_new(sat, band)
+            #print (url)
+
+            # make sure it exist
+            self.remote_file_exists(url)
+            urls.append(url)
+            
+
+        # create folder
+        path = check_create_folder(join(self.download_dir, scene))
+
+        self.output('Source: Google Storage S3', normal=True, arrow=True)
+        for url in urls:
+            self.fetch(url, path)
+
+        return path
+
+    def google_storage_url(self, sat):
+        """
+        Returns a google storage url the contains the scene provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+
+        :returns:
+            (String) The URL to a google storage file
+        """
+        filename = sat['scene'] + '.tar.bz'
+        return url_builder([self.google, sat['sat'], sat['path'], sat['row'], filename])
+
+    def google_storage_url_new(self, sat, band):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        if band != 'MTL' and band != 'ANG':
+            filename = '%s_B%s.TIF' % (sat['scene'], band)
+        else:
+            filename = '%s_%s.txt' % (sat['scene'], band)
+        return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename])
+
+    def google_storage_url_type_new(self, sat, band, type):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :type type
+            TIF.ovr of WRK.IMD
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        file_extentie = '%s_B%s' + type
+        if band != 'MTL' and band != 'ANG':
+            filename = file_extentie % (sat['scene'], band)
+
+        return url_builder([self.google_C1, sat['path'], sat['row'], sat['scene'], filename])
+
+    def amazon_s3(self, scene, bands):
+        """
+        Amazon S3 downloader
+        """
+
+        sat = self.scene_interpreter(scene)
+
+        # Always grab QA band if bands are specified
+        urls = []
+
+        if 'BQA' not in bands:
+            bands.append('QA')
+
+
+        if len(scene) == 40:
+           for band in bands:
+               url2 = self.amazon_s3_url_type(sat, band, ".TIF.ovr")
+               #print (url2)
+
+               # make sure it exist
+               self.remote_file_exists(url2)
+               urls.append(url2)
+
+               url3 = self.amazon_s3_url_type(sat, band, "_wrk.IMD")
+               #print (url3)
+
+               # make sure it exist
+               self.remote_file_exists(url3)
+               urls.append(url3)
+
+        # Always grab MTL.txt and ANG band if bands are specified
+        if 'MTL' not in bands:
+            bands.append('MTL')
+
+        if 'ANG' not in bands and len(scene) == 40:
+            bands.append('ANG')
+
+        for band in bands:
+            # get url for the band
+            url = self.amazon_s3_url(sat, band)
+            #print (url)
+
+            # make sure it exist
+            self.remote_file_exists(url)
+            urls.append(url)
+            
+
+        # create folder
+        path = check_create_folder(join(self.download_dir, scene))
+
+        self.output('Source: AWS S3', normal=True, arrow=True)
+        for url in urls:
+            #print(url)
+            self.fetch(url, path)
+
+        return path
+
+    def fetch(self, url, path):
+        """ Downloads the given url.
+
+        :param url:
+            The url to be downloaded.
+        :type url:
+            String
+        :param path:
+            The directory path to where the image should be stored
+        :type path:
+            String
+        :param filename:
+            The filename that has to be downloaded
+        :type filename:
+            String
+
+        :returns:
+            Boolean
+        """
+
+        segments = url.split('/')
+        filename = segments[-1]
+
+        # remove query parameters from the filename
+        filename = filename.split('?')[0]
+
+        self.output('Downloading: %s' % filename, normal=True, arrow=True)
+
+        # raise Exception
+        if exists(join(path, filename)):
+            size = getsize(join(path, filename))
+            if size == self.get_remote_file_size(url):
+                self.output('%s already exists on your system' % filename, normal=True, color='green', indent=1)
+
+        else:
+            #TODO: Try catch for files that are forbidden: 
+            fetch(url, path)
+        self.output('stored at %s' % path, normal=True, color='green', indent=1)
+
+        return join(path, filename)
+
+    def amazon_s3_url(self, sat, band):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        if band != 'MTL' and band != 'ANG':
+            filename = '%s_B%s.TIF' % (sat['scene'], band)
+        else:
+            filename = '%s_%s.txt' % (sat['scene'], band)
+        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
+
+    def amazon_s3_url_type(self, sat, band, type):
+        """
+        Return an amazon s3 url the contains the scene and band provided.
+
+        :param sat:
+            Expects an object created by scene_interpreter method
+        :type sat:
+            dict
+        :type type
+            TIF.ovr of WRK.IMD
+        :param filename:
+            The filename that has to be downloaded from Amazon
+        :type filename:
+            String
+
+        :returns:
+            (String) The URL to a S3 file
+        """
+        file_extentie = '%s_B%s' + type
+        if band != 'MTL' and band != 'ANG':
+            filename = file_extentie % (sat['scene'], band)
+
+        return url_builder([self.s3, sat['sat'], sat['path'], sat['row'], sat['scene'], filename])
+
+    def remote_file_exists(self, url):
+        """ Checks whether the remote file exists.
+
+        :param url:
+            The url that has to be checked.
+        :type url:
+            String
+
+        :returns:
+            **True** if remote file exists and **False** if it doesn't exist.
+        """
+        status = requests.head(url).status_code
+
+        if status == 403:
+            print("403: " + url)
+        elif status != 200:
+            #print ("File doesnotexits: %s-%s" % (url,status))
+            raise RemoteFileDoesntExist
+
+    def get_remote_file_size(self, url):
+        """ Gets the filesize of a remote file.
+
+        :param url:
+            The url that has to be checked.
+        :type url:
+            String
+
+        :returns:
+            int
+        """
+        headers = requests.head(url).headers
+        return int(headers['content-length'])
+
+    def scene_interpreter(self, scene):
+        """ Conver sceneID to rows, paths and dates.
+
+        :param scene:
+            The scene ID.
+        :type scene:
+            String
+........    Pre-collection data style:
+............LC81970232017085LGN00
+
+........    collection 1 datas style:
+........    LC08_L1TP_139045_20170304_20170316_01_T1
+
+
+        :returns:
+            dict
+
+        :Example output:
+
+        >>> anatomy = {
+                'path': None,
+                'row': None,
+                'sat': None,
+                'scene': scene
+            }
+        """
+        anatomy = {
+            'path': None,
+            'row': None,
+            'sat': None,
+            'scene': scene
+        }
+        if isinstance(scene, str) and len(scene) == 21:
+            #LC81970232017085LGN00
+            anatomy['path'] = scene[3:6]
+            anatomy['row'] = scene[6:9]
+            anatomy['sat'] = 'L' + scene[2:3]
+            return anatomy
+        elif isinstance(scene, str) and len(scene) == 40:
+            #LC08_L1TP_139045_20170304_20170316_01_T1
+            anatomy['path'] = scene[10:13]
+            anatomy['row'] = scene[13:16]
+            anatomy['sat'] = '/c1/L' + scene[3:4]
+            return anatomy
+        else:
+            raise IncorrectSceneId('Received incorrect scene')
+
+
+if __name__ == '__main__':
+
+    d = Downloader()
+
+    # d.download(['LC81990242015046LGN00', 'LC80030172015001LGN00'])

From d54845c4d6ea5bf1c56d1b273fef916b727af5c8 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 20:03:05 +0200
Subject: [PATCH 12/15] Added test with  Collection 1 Data

Added test with  Collection 1 Data and change a test because of the changed datastructure
---
 tests/test_download.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/test_download.py b/tests/test_download.py
index 71c2318..41a61b1 100644
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -22,9 +22,12 @@ def setUpClass(cls):
         cls.temp_folder = mkdtemp()
         cls.d = Downloader(download_dir=cls.temp_folder)
         cls.scene = 'LT81360082013127LGN01'
-        cls.scene_2 = 'LC82050312014229LGN00'
+        #cls.scene_2 = 'LC82050312014229LGN00'
+        cls.scene_2 = 'LC81990232017067LGN00'
         cls.scene_s3 = 'LC80010092015051LGN00'
         cls.scene_s3_2 = 'LC82050312015136LGN00'
+        #Collection 1 data: Product_ID
+        cls_scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1'
         cls.scene_size = 59239149
 
     @classmethod
@@ -73,6 +76,11 @@ def test_download(self, mock_fetch):
         test_paths = [self.temp_folder + '/' + self.scene + '.tar.bz']
         self.assertEqual(test_paths, paths)
 
+        # When passing product_id AWS should be triggered (Collection 1 data structure)
+        paths = self.d.download([self.scene4], bands=[11])
+        test_paths = [self.temp_folder + '/' + self.scene4 ]
+        self.assertEqual(test_paths, paths)
+
     @mock.patch('landsat.downloader.Downloader.google_storage')
     def test_download_google_when_amazon_is_unavailable(self, fake_google):
         """ Test whether google or amazon are correctly selected based on input """

From ff6e4f952beebfb299f7b3c48183bfbd748a66f6 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 20:32:51 +0200
Subject: [PATCH 13/15] Fixed an error in test_download

Fixed an error in test_download
---
 tests/test_download.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_download.py b/tests/test_download.py
index 41a61b1..3afec71 100644
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -77,8 +77,8 @@ def test_download(self, mock_fetch):
         self.assertEqual(test_paths, paths)
 
         # When passing product_id AWS should be triggered (Collection 1 data structure)
-        paths = self.d.download([self.scene4], bands=[11])
-        test_paths = [self.temp_folder + '/' + self.scene4 ]
+        paths = self.d.download([self.scene_s4], bands=[11])
+        test_paths = [self.temp_folder + '/' + self.scene_s4 ]
         self.assertEqual(test_paths, paths)
 
     @mock.patch('landsat.downloader.Downloader.google_storage')

From 1fb8be24e77e53329888fed0bb1c61edaa12a073 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Sun, 20 Aug 2017 20:41:28 +0200
Subject: [PATCH 14/15] Corrected a typo

---
 tests/test_download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_download.py b/tests/test_download.py
index 3afec71..c503f22 100644
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -27,7 +27,7 @@ def setUpClass(cls):
         cls.scene_s3 = 'LC80010092015051LGN00'
         cls.scene_s3_2 = 'LC82050312015136LGN00'
         #Collection 1 data: Product_ID
-        cls_scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1'
+        cls.scene_s4 = 'LC08_L1TP_139045_20170304_20170316_01_T1'
         cls.scene_size = 59239149
 
     @classmethod

From d864163f1995b896e01ae0928dcfcbd60b17d1b5 Mon Sep 17 00:00:00 2001
From: Mellian <31151900+jmellian@users.noreply.github.com>
Date: Fri, 25 Aug 2017 08:37:41 +0200
Subject: [PATCH 15/15] Update requirements.txt

---
 requirements.txt | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8e34c93..96e701e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,14 @@
-usgs>=0.2.1
-requests>=2.7.0
-python-dateutil>=2.5.1
-numpy>=1.10.4
-termcolor>=1.1.0
-rasterio>=0.32.0
-six>=1.8.0
-scipy>=0.17.0
-scikit-image>=0.12.3
-homura>=0.1.3
-boto>=2.39.0
-polyline>=1.3
-geocoder>=1.9.0
-matplotlib>=1.5.1
+usgs>=0.1.9,<1.*
+requests~=2.7
+python-dateutil>=2.5.1,<3.*
+numpy>=1.10.4,<2.*
+termcolor~=1.1
+rasterio~=0.32
+six~=1.8
+scipy~=0.17
+scikit-image>=0.12.3,<1.*
+homura>=0.1.3,<1.*
+boto~=2.39
+polyline~=1.3
+geocoder~=1.9
+matplotlib>=1.5.1,<2.*