-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathscp_ICESat2_files.py
386 lines (366 loc) · 15.7 KB
/
scp_ICESat2_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
#!/usr/bin/env python
u"""
scp_ICESat2_files.py
Written by Tyler Sutterley (03/2024)
Copies ICESat-2 HDF5 data from between a local host and a remote host
can switch between pushing and pulling to/from remote
PUSH to remote: s.put(local_file, remote_file)
PULL from remote: s.get(remote_file,local_path=local_file)
CALLING SEQUENCE:
python scp_ICESat2_files.py --host <host> --user <username> \
--product ATL06 --release 003 --granule 10 11 12 --cycle 1 2 \
--remote <path_to_remote> --verbose --mode 0o775
COMMAND LINE OPTIONS:
-h, --help: list the command line options
--host X: Remote server host
--user X: Remote server username
-D X, --directory X: Local working directory
--remote X: Remote working directory
--product X: ICESat-2 data product to copy
--release X: ICESat-2 data release to copy
--version X: ICESat-2 data version to copy
--granule X: ICESat-2 granule regions to copy
--cycle X: ICESat-2 cycle to copy
--track X: ICESat-2 tracks to copy
-C, --clobber: overwrite existing data in transfer
-V, --verbose: output information about each synced file
--push: Transfer files from local computer to remote server
-L, --list: only list files to be transferred
-M X, --mode X: permission mode of directories and files copied
PYTHON DEPENDENCIES:
paramiko: Native Python SSHv2 protocol library
http://www.paramiko.org/
https://github.com/paramiko/paramiko
scp: scp module for paramiko
https://github.com/jbardin/scp.py
UPDATE HISTORY:
Updated 03/2024: use pathlib to define and operate on paths
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: use f-strings for ascii and verbose outputs
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 10/2021: using python logging for handling verbose output
Updated 10/2020: using argparse to set parameters
Updated 05/2020: adjust regular expression to run ATL07 sea ice products
Updated 09/2019: sort subdirectories.
Updated 07/2019: using Python3 compliant division. regex for file versions
Written 05/2019
"""
from __future__ import print_function, division
import sys
import os
import re
import io
import scp
import getpass
import logging
import pathlib
import argparse
import builtins
import paramiko
import posixpath
import numpy as np
# PURPOSE: create argument parser
def arguments():
parser = argparse.ArgumentParser(
description="""Copies ICESat-2 HDF5 data from between a local host and
remote host
"""
)
# ICESat-2 Products
PRODUCTS = {}
PRODUCTS['ATL03'] = 'Global Geolocated Photon Data'
PRODUCTS['ATL04'] = 'Normalized Relative Backscatter'
PRODUCTS['ATL06'] = 'Land Ice Height'
PRODUCTS['ATL07'] = 'Sea Ice Height'
PRODUCTS['ATL08'] = 'Land and Vegetation Height'
PRODUCTS['ATL09'] = 'Atmospheric Layer Characteristics'
PRODUCTS['ATL10'] = 'Sea Ice Freeboard'
PRODUCTS['ATL12'] = 'Ocean Surface Height'
PRODUCTS['ATL13'] = 'Inland Water Surface Height'
# command line parameters
# remote server credentials
parser.add_argument('--host','-H',
type=str, default='',
help='Hostname of the remote server')
parser.add_argument('--user','-U',
type=str, default='',
help='Remote server username')
# working data directories
parser.add_argument('--directory','-D',
type=pathlib.Path,
default=pathlib.Path.cwd(),
help='Local working directory')
parser.add_argument('--remote','-R',
type=str, default='',
help='Remote working directory')
# ICESat-2 parameters
# ICESat-2 data product
parser.add_argument('--product','-p',
metavar='PRODUCTS', type=str,
choices=PRODUCTS.keys(), default='ATL06',
help='ICESat-2 data product to copy')
# ICESat-2 data release
parser.add_argument('--release','-r',
type=str, default='006',
help='ICESat-2 data release to copy')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+',
help='ICESat-2 data versions to copy')
# ICESat-2 granule region
parser.add_argument('--granule','-g',
metavar='REGION', type=int, nargs='+',
choices=range(1,15), default=range(1,15),
help='ICESat-2 granule regions to copy')
# ICESat-2 orbital cycle
parser.add_argument('--cycle','-c',
type=int, nargs='+',
help='ICESat-2 orbital cycles to copy')
# ICESat-2 reference ground tracks
parser.add_argument('--track','-t',
metavar='RGT', type=int, nargs='+',
choices=range(1,1388), default=range(1,1388),
help='ICESat-2 Reference Ground Tracks (RGTs) to copy')
# sync options
parser.add_argument('--push','-P',
default=False, action='store_true',
help='Transfer files from local computer to remote server')
parser.add_argument('--list','-L',
default=False, action='store_true',
help='Only print files that could be transferred')
# verbose will output information about each copied file
parser.add_argument('--verbose','-V',
default=False, action='store_true',
help='Verbose output of run')
# clobber will overwrite the existing data
parser.add_argument('--clobber','-C',
default=False, action='store_true',
help='Overwrite existing data')
# permissions mode of the local directories and files (number in octal)
parser.add_argument('--mode','-M',
type=lambda x: int(x,base=8), default=0o775,
help='Permissions mode of output directories and files')
# return the parser
return parser
# This is the main part of the program that calls the individual functions
def main():
# Read the system arguments listed after the program
parser = arguments()
args,_ = parser.parse_known_args()
# use entered host and username
client_kwds = {}
client_kwds.setdefault('hostname',args.host)
client_kwds.setdefault('username',args.user)
# use ssh configuration file to extract hostname, user and identityfile
user_config_file = pathlib.Path().home().joinpath('.ssh','config')
if user_config_file.exists():
# read ssh configuration file and parse with paramiko
ssh_config = paramiko.SSHConfig()
with user_config_file.open(mode='r') as f:
ssh_config.parse(f)
# lookup hostname from list of hosts
user_config = ssh_config.lookup(args.host)
client_kwds['hostname'] = user_config['hostname']
# get username if not entered from command-line
if args.user is None and 'username' in user_config.keys():
client_kwds['username'] = user_config['user']
# use identityfile if in ssh configuration file
if 'identityfile' in user_config.keys():
client_kwds['key_filename'] = user_config['identityfile']
# open HOST ssh client for USER (and use password if no IDENTITYFILE)
client = attempt_login(**client_kwds)
# open secure FTP client
client_ftp = client.open_sftp()
# verbosity settings
if args.verbose or args.list:
logging.getLogger("paramiko").setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO)
else:
logging.basicConfig(level=logging.CRITICAL)
# print username for remote client
logging.info(f'{client_kwds["username"]}@{client_kwds["hostname"]}:\n')
# run program
scp_ICESat2_files(client, client_ftp, args.directory, args.remote,
args.product, args.release, args.version, args.granule, args.cycle,
args.track, PUSH=args.push, LIST=args.list, CLOBBER=args.clobber,
MODE=args.mode)
# close the secure FTP server
client_ftp.close()
# close the ssh client
client = None
# PURPOSE: try logging onto the server and catch authentication errors
def attempt_login(**client_kwds):
# open HOST ssh client
kwds = client_kwds.copy()
client = paramiko.SSHClient()
client.load_system_host_keys()
tryagain = True
# add initial attempt
attempts = 1
# use identification file
try:
client.connect(**kwds)
except paramiko.ssh_exception.AuthenticationException:
pass
else:
return client
# add attempt
attempts += 1
# phrase for entering password
phrase = f'Password for {kwds["username"]}@{kwds["hostname"]}: '
# remove key_filename from keywords
kwds.pop('key_filename') if 'key_filename' in kwds.keys() else None
# enter password securely from command-line
while tryagain:
kwds['password'] = getpass.getpass(phrase)
try:
client.connect(*kwds)
except paramiko.ssh_exception.AuthenticationException:
pass
else:
kwds.pop('password')
return client
# retry with new password
logging.critical(f'Authentication Failed (Attempt {attempts:d})')
tryagain = builtins.input('Try Different Password? (Y/N): ') in ('Y','y')
# add attempt
attempts += 1
# exit program if not trying again
sys.exit()
# PURPOSE: copies ICESat-2 HDF5 files between a remote host and a local host
def scp_ICESat2_files(client, client_ftp, DIRECTORY, REMOTE, PRODUCT,
RELEASE, VERSIONS, GRANULES, CYCLES, TRACKS, CLOBBER=False,
PUSH=False, LIST=False, MODE=0o775):
# check if directory exists and recursively create if not
DIRECTORY = pathlib.Path(DIRECTORY).expanduser().absolute()
DIRECTORY.mkdir(mode=MODE, parents=True, exist_ok=True)
# find ICESat-2 HDF5 files in the subdirectory for product and release
if TRACKS:
regex_track = r'|'.join([rf'{T:04d}' for T in TRACKS])
else:
regex_track = r'\d{4}'
if CYCLES:
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
# compile regular expression operator for finding subdirectories
# and extracting date information from the subdirectory
rx1 = re.compile(r'(\d+)\.(\d+)\.(\d+)',re.VERBOSE)
# compile regular expression operator for extracting data from files
args = (PRODUCT,regex_track,regex_cycle,regex_granule,RELEASE,regex_version)
regex_pattern = (r'(processed_)?({0})(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).h5$')
rx2 = re.compile(regex_pattern.format(*args,re.VERBOSE))
# if pushing from local directory to remote directory
if PUSH:
# find all local subdirectories
SUBDIRECTORY = [s for s in DIRECTORY.iterdir() if rx1.match(s.name)]
# for each subdirectory to run
for local_dir in sorted(SUBDIRECTORY):
# find files within local directory
remote_path = posixpath.join(REMOTE, local_dir.name)
file_list = [f for f in local_dir.iterdir() if rx2.match(f.name)]
for local_file in sorted(file_list):
# check if data directory exists and recursively create if not
remote_makedirs(client_ftp, remote_path, LIST=LIST, MODE=MODE)
# push file from local to remote
remote_file = posixpath.join(remote_path, local_file.name)
scp_push_file(client, client_ftp, local_file, remote_file,
CLOBBER=CLOBBER, LIST=LIST, MODE=MODE)
else:
# find all remote subdirectories
SUBDIRECTORY = [s for s in client_ftp.listdir(REMOTE) if rx1.match(s)]
# for each subdirectory to run
for sub in sorted(SUBDIRECTORY):
# find remote files
remote_path = posixpath.join(REMOTE,sub)
file_list = [f for f in client_ftp.listdir(remote_path) if rx2.match(f)]
for fi in sorted(file_list):
# push file from local to remote
local_file = DIRECTORY.joinpath(sub, fi)
remote_file = posixpath.join(remote_path, fi)
scp_pull_file(client, client_ftp, local_file, remote_file,
CLOBBER=CLOBBER, LIST=LIST, MODE=MODE)
# PURPOSE: recursively create directories on remote server
def remote_makedirs(client_ftp, remote_dir, LIST=False, MODE=0o775):
dirs = remote_dir.split(posixpath.sep)
remote_path = dirs[0] if dirs[0] else posixpath.sep
for s in dirs:
if (s not in client_ftp.listdir(remote_path)) and not LIST:
client_ftp.mkdir(posixpath.join(remote_path,s), MODE)
remote_path = posixpath.join(remote_path,s)
# PURPOSE: push a local file to a remote host checking if file exists
# and if the local file is newer than the remote file (reprocessed)
# set the permissions mode of the remote transferred file to MODE
def scp_push_file(client, client_ftp, local_file, remote_file,
CLOBBER=False, LIST=False, MODE=0o775):
# check if local file is newer than the remote file
TEST = False
OVERWRITE = 'clobber'
remote_dir = posixpath.dirname(remote_file)
if (local_file.name in client_ftp.listdir(remote_dir)):
local_mtime = local_file.stat().st_mtime
remote_mtime = client_ftp.stat(remote_file).st_mtime
# if local file is newer: overwrite the remote file
if (even(local_mtime) > even(remote_mtime)):
TEST = True
OVERWRITE = 'overwrite'
else:
TEST = True
OVERWRITE = 'new'
# if file does not exist remotely, is to be overwritten, or CLOBBER is set
if TEST or CLOBBER:
logging.info(f'{str(local_file)} --> ')
logging.info(f'\t{str(remote_file)} ({OVERWRITE})\n')
# if not only listing files
if not LIST:
# copy local files to remote server
with scp.SCPClient(client.get_transport(), socket_timeout=20) as s:
s.put(local_file, remote_file, preserve_times=True)
# change the permissions level of the transported file to MODE
client_ftp.chmod(remote_file, MODE)
# PURPOSE: pull file from a remote host checking if file exists locally
# and if the remote file is newer than the local file (reprocessed)
# set the permissions mode of the local transferred file to MODE
def scp_pull_file(client, client_ftp, local_file, remote_file,
CLOBBER=False, LIST=False, MODE=0o775):
# check if remote file is newer than the local file
TEST = False
OVERWRITE = 'clobber'
if local_file.exists():
local_mtime = local_file.stat().st_mtime
remote_mtime = client_ftp.stat(remote_file).st_mtime
# if remote file is newer: overwrite the local file
if (even(remote_mtime) > even(local_mtime)):
TEST = True
OVERWRITE = 'overwrite'
else:
TEST = True
OVERWRITE = 'new'
# if file does not exist locally, is to be overwritten, or CLOBBER is set
if TEST or CLOBBER:
logging.info(f'{str(remote_file)} -->')
logging.info(f'\t{str(local_file)} ({OVERWRITE})\n')
# if not only listing files
if not LIST:
# check if directory exists and recursively create if not
local_file.parent.mkdir(mode=MODE, parents=True, exist_ok=True)
# copy local files from remote server
with scp.SCPClient(client.get_transport(), socket_timeout=20) as s:
s.get(remote_file, local_path=local_file, preserve_times=True)
# change the permissions level of the transported file to MODE
local_file.chmod(mode=MODE)
# PURPOSE: rounds a number to an even number less than or equal to original
def even(i):
return 2*int(i//2)
# run main program
if __name__ == '__main__':
main()