#!/usr/bin/env python u""" scp_ICESat2_files.py Written by Tyler Sutterley (05/2020) Copies ICESat-2 HDF5 data from between a local host and a remote host can switch between pushing and pulling to/from remote PUSH to remote: s.put(local_file, remote_file) PULL from remote: s.get(remote_file,local_path=local_file) CALLING SEQUENCE: python scp_ICESat2_files.py --host=<host> --user=<username> \ --product=ATL06 --release=205 --granule=10,11,12 --cycle=1,2 \ --remote=<path_to_remote> --verbose --mode=0o775 COMMAND LINE OPTIONS: -h, --help: list the command line options --host=X: Remote server host --user=X: Remote server username -D X, --directory=X: Local working directory --remote=X: Remote working directory --product=X: ICESat-2 data product to copy --release=X: ICESat-2 data release to copy --version=X: ICESat-2 data version to copy --granule=X: ICESat-2 granule regions to copy --cycle=X: ICESat-2 cycle to copy --track=X: ICESat-2 tracks to copy -C, --clobber: overwrite existing data in transfer -V, --verbose: output information about each synced file -M X, --mode=X: permission mode of directories and files synced --push: Transfer files from local computer to remote server -L, --list: only list files to be transferred PYTHON DEPENDENCIES: paramiko: Native Python SSHv2 protocol library http://www.paramiko.org/ https://github.com/paramiko/paramiko scp: scp module for paramiko https://github.com/jbardin/scp.py UPDATE HISTORY: Updated 05/2020: adjust regular expression to run ATL07 sea ice products Updated 09/2019: sort subdirectories. Updated 07/2019: using Python3 compliant division. regex for file versions Written 05/2019 """ from __future__ import print_function, division import sys import os import re import io import scp import getopt import getpass import logging import paramiko import posixpath import numpy as np #-- PURPOSE: help module to describe the optional input command-line parameters def usage(): print('\nHelp: {0}'.format(os.path.basename(sys.argv[0]))) print(' --host=X\t\tRemote server host') print(' --user=X\t\tRemote server user') print(' -D X, --directory=X\tLocal working directory') print(' --remote=X\t\tRemote working directory') print(' --product=X\t\tICESat-2 data product to copy') print(' --release=X\t\tICESat-2 data release to copy') print(' --version=X\t\tICESat-2 data version to copy') print(' --granule=X\t\tICESat-2 granule regions to copy') print(' --cycle=X\t\tICESat-2 cycles to copy') print(' --track=X\t\tICESat-2 tracks to copy') print(' -C, --clobber\t\tOverwrite existing data in transfer') print(' -V, --verbose\t\tOutput information about each created file') print(' -M X, --mode=X\t\tPermission mode of directories and files created') print(' --push\t\t\tTransfer files from local computer to remote server') print(' -L, --list\t\tOnly list files to be transferred\n') #-- Main program that calls scp_ICESat2_files() def main(): #-- Read the system arguments listed after the program long_options = ['help','host=','user=','directory=','remote=','product=', 'release=','version=','granule=','cycle=','track=','verbose','clobber', 'mode=','push','list'] optlist,arglist = getopt.getopt(sys.argv[1:],'hD:VCM:L',long_options) #-- command line parameters HOST = '' USER = None IDENTITYFILE = None #-- working data directories DIRECTORY = os.getcwd() REMOTE = '' #-- ICESat-2 parameters PRODUCT = 'ATL06' RELEASE = '002' VERSIONS = None CYCLES = np.arange(1,6) GRANULES = None TRACKS = None VERBOSE = False CLOBBER = False #-- permissions mode of the local directories and files (number in octal) MODE = 0o775 PUSH = False LIST = False for opt, arg in optlist: if opt in ('-h','--help'): usage() sys.exit() elif opt in ("--host"): HOST = arg elif opt in ("--user"): USER = arg elif opt in ("-D","--directory"): DIRECTORY = os.path.expanduser(arg) elif opt in ("--remote"): REMOTE = arg elif opt in ("--product"): PRODUCT = arg elif opt in ("--release"): RELEASE = arg elif opt in ("--version"): VERSIONS = np.array(arg.split(','), dtype=np.int) elif opt in ("--granule"): GRANULES = np.array(arg.split(','), dtype=np.int) elif opt in ("--cycle"): CYCLES = np.sort(arg.split(',')).astype(np.int) elif opt in ("--track"): TRACKS = np.sort(arg.split(',')).astype(np.int) elif opt in ("-V","--verbose"): VERBOSE = True elif opt in ("-C","--clobber"): CLOBBER = True elif opt in ("-M","--mode"): MODE = int(arg, 8) elif opt in ("--push"): PUSH = True elif opt in ("-L","--list"): LIST = True #-- use ssh configuration file to extract hostname, user and identityfile user_config_file = os.path.join(os.environ['HOME'],".ssh","config") if os.path.exists(user_config_file): #-- read ssh configuration file and parse with paramiko ssh_config = paramiko.SSHConfig() with open(user_config_file) as f: ssh_config.parse(f) #-- lookup hostname from list of hosts user_config = ssh_config.lookup(HOST) HOST = user_config['hostname'] #-- get username if not entered from command-line if USER is None and 'username' in user_config.keys(): USER = user_config['username'] #-- use identityfile if in ssh configuration file if 'identityfile' in user_config.keys(): IDENTITYFILE = user_config['identityfile'] #-- open HOST ssh client for USER (and use password if no IDENTITYFILE) client = attempt_login(HOST, USER, IDENTITYFILE=IDENTITYFILE) #-- open secure FTP client client_ftp = client.open_sftp() #-- verbosity settings if VERBOSE or LIST: logging.getLogger("paramiko").setLevel(logging.WARNING) print('{0}@{1}:\n'.format(USER, HOST)) #-- run program scp_ICESat2_files(client, client_ftp, DIRECTORY, REMOTE, PRODUCT, RELEASE, VERSIONS, GRANULES, CYCLES, TRACKS, CLOBBER=CLOBBER, VERBOSE=VERBOSE, PUSH=PUSH, LIST=LIST, MODE=MODE) #-- close the secure FTP server client_ftp.close() #-- close the ssh client client = None #-- PURPOSE: try logging onto the server and catch authentication errors def attempt_login(HOST, USER, IDENTITYFILE=None): #-- open HOST ssh client client = paramiko.SSHClient() client.load_system_host_keys() tryagain = True attempts = 1 #-- use identification file if IDENTITYFILE: try: client.connect(HOST, username=USER, key_filename=IDENTITYFILE) except paramiko.ssh_exception.AuthenticationException: pass else: return client attempts += 1 #-- enter password securely from command-line while tryagain: PASSWORD = getpass.getpass('Password for {0}@{1}: '.format(USER,HOST)) try: client.connect(HOST, username=USER, password=PASSWORD) except paramiko.ssh_exception.AuthenticationException: pass else: del PASSWORD return client #-- retry with new password print('Authentication Failed (Attempt {0:d})'.format(attempts)) tryagain = builtins.input('Try Different Password? (Y/N): ') in ('Y','y') attempts += 1 #-- exit program if not trying again sys.exit() #-- PURPOSE: copies ICESat-2 HDF5 files between a remote host and a local host def scp_ICESat2_files(client, client_ftp, DIRECTORY, REMOTE, PRODUCT, RELEASE, VERSIONS, GRANULES, CYCLES, TRACKS, CLOBBER=False, VERBOSE=False, PUSH=False, LIST=False, MODE=0o775): #-- find ICESat-2 HDF5 files in the subdirectory for product and release TRACKS = np.arange(1,1388) if not np.any(TRACKS) else TRACKS CYCLES = np.arange(1,3) if not np.any(CYCLES) else CYCLES GRANULES = np.arange(1,15) if not np.any(GRANULES) else GRANULES VERSIONS = np.arange(1,10) if not np.any(VERSIONS) else VERSIONS regex_track = '|'.join(['{0:04d}'.format(T) for T in TRACKS]) regex_cycle = '|'.join(['{0:02d}'.format(C) for C in CYCLES]) regex_granule = '|'.join(['{0:02d}'.format(G) for G in GRANULES]) regex_version = '|'.join(['{0:02d}'.format(V) for V in VERSIONS]) #-- compile regular expression operator for finding subdirectories #-- and extracting date information from the subdirectory rx1 = re.compile('(\d+)\.(\d+)\.(\d+)',re.VERBOSE) #-- compile regular expression operator for extracting data from files args = (PRODUCT,regex_track,regex_cycle,regex_granule,RELEASE,regex_version) regex_pattern = ('(processed_)?({0})(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})' '(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).h5$') rx2 = re.compile(regex_pattern.format(*args,re.VERBOSE)) #-- if pushing from local directory to remote directory if PUSH: #-- find all local subdirectories SUBDIRECTORY = [s for s in os.listdir(DIRECTORY) if rx1.match(s)] #-- for each subdirectory to run for sub in sorted(SUBDIRECTORY): #-- find files within local directory local_dir = os.path.join(DIRECTORY,sub) remote_path = os.path.join(DIRECTORY,sub) file_list = [f for f in os.listdir(local_dir) if rx2.match(f)] for fi in sorted(file_list): #-- check if data directory exists and recursively create if not remote_makedirs(client_ftp, remote_path, LIST=LIST, MODE=MODE) #-- push file from local to remote scp_push_file(client, client_ftp, fi, local_dir, remote_path, CLOBBER=CLOBBER, VERBOSE=VERBOSE, LIST=LIST, MODE=MODE) else: #-- find all remote subdirectories SUBDIRECTORY = [s for s in client_ftp.listdir(REMOTE) if rx1.match(s)] #-- for each subdirectory to run for sub in sorted(SUBDIRECTORY): #-- local and remote directories local_dir = os.path.join(DIRECTORY,sub) remote_path = posixpath.join(REMOTE,sub) #-- find remote files for hemisphere file_list=[f for f in client_ftp.listdir(remote_path) if rx2.match(f)] for fi in sorted(file_list): #-- check if data directory exists and recursively create if not if not os.access(local_dir, os.F_OK) and not LIST: os.makedirs(local_dir, MODE) #-- push file from local to remote scp_pull_file(client, client_ftp, fi, local_dir, remote_path, CLOBBER=CLOBBER, VERBOSE=VERBOSE, LIST=LIST, MODE=MODE) #-- PURPOSE: recursively create directories on remote server def remote_makedirs(client_ftp, remote_dir, LIST=False, MODE=0o775): dirs = remote_dir.split(posixpath.sep) remote_path = dirs[0] if dirs[0] else posixpath.sep for s in dirs: if (s not in client_ftp.listdir(remote_path)) and not LIST: client_ftp.mkdir(posixpath.join(remote_path,s), MODE) remote_path = posixpath.join(remote_path,s) #-- PURPOSE: push a local file to a remote host checking if file exists #-- and if the local file is newer than the remote file (reprocessed) #-- set the permissions mode of the remote transferred file to MODE def scp_push_file(client, client_ftp, transfer_file, local_dir, remote_dir, CLOBBER=False, VERBOSE=False, LIST=False, MODE=0o775): #-- local and remote versions of file local_file = os.path.join(local_dir,transfer_file) remote_file = posixpath.join(remote_dir,transfer_file) #-- check if local file is newer than the remote file TEST = False OVERWRITE = 'clobber' if (transfer_file in client_ftp.listdir(remote_dir)): local_mtime = os.stat(local_file).st_mtime remote_mtime = client_ftp.stat(remote_file).st_mtime #-- if local file is newer: overwrite the remote file if (even(local_mtime) > even(remote_mtime)): TEST = True OVERWRITE = 'overwrite' else: TEST = True OVERWRITE = 'new' #-- if file does not exist remotely, is to be overwritten, or CLOBBER is set if TEST or CLOBBER: if VERBOSE or LIST: print('{0} --> '.format(local_file)) print('\t{0} ({1})\n'.format(remote_file,OVERWRITE)) #-- if not only listing files if not LIST: #-- copy local files to remote server with scp.SCPClient(client.get_transport(), socket_timeout=20) as s: s.put(local_file, remote_file, preserve_times=True) #-- change the permissions level of the transported file to MODE client_ftp.chmod(remote_file, MODE) #-- PURPOSE: pull file from a remote host checking if file exists locally #-- and if the remote file is newer than the local file (reprocessed) #-- set the permissions mode of the local transferred file to MODE def scp_pull_file(client, client_ftp, transfer_file, local_dir, remote_dir, CLOBBER=False, VERBOSE=False, LIST=False, MODE=0o775): #-- local and remote versions of file local_file = os.path.join(local_dir,transfer_file) remote_file = posixpath.join(remote_dir,transfer_file) #-- check if remote file is newer than the local file TEST = False OVERWRITE = 'clobber' if os.access(local_file, os.F_OK): local_mtime = os.stat(local_file).st_mtime remote_mtime = client_ftp.stat(remote_file).st_mtime #-- if remote file is newer: overwrite the local file if (even(remote_mtime) > even(local_mtime)): TEST = True OVERWRITE = 'overwrite' else: TEST = True OVERWRITE = 'new' #-- if file does not exist locally, is to be overwritten, or CLOBBER is set if TEST or CLOBBER: if VERBOSE or LIST: print('{0} --> '.format(remote_file)) print('\t{0} ({1})\n'.format(local_file,OVERWRITE)) #-- if not only listing files if not LIST: #-- copy local files from remote server with scp.SCPClient(client.get_transport(), socket_timeout=20) as s: s.get(remote_file, local_path=local_file, preserve_times=True) #-- change the permissions level of the transported file to MODE os.chmod(local_file, MODE) #-- PURPOSE: rounds a number to an even number less than or equal to original def even(i): return 2*int(i//2) #-- run main program if __name__ == '__main__': main()