""" Methods for importing Helios data. """ from datetime import date, time, datetime, timedelta import os import pathlib import urllib.error from urllib.error import URLError from collections import OrderedDict import warnings import astropy.constants as constants import astropy.units as u import numpy as np import pandas as pd from bs4 import BeautifulSoup import requests from heliopy import config from heliopy.data import util from heliopy.data import cdasrest data_dir = config['download_dir'] use_hdf = config['use_hdf'] helios_dir = os.path.join(data_dir, 'helios') # new http base_url remote_base_url = 'http://helios-data.ssl.berkeley.edu/data/' def _check_probe(probe): probe = str(probe) assert probe == '1' or probe == '2', 'Probe number must be 1 or 2' return probe def _dist_file_dir(probe, year, doy): return os.path.join(helios_dir, 'helios{}'.format(probe), 'dist', '{}'.format(year), '{}'.format(int(doy))) def _loaddistfile(probe, year, doy, hour, minute, second): """ Method to load a Helios distribution file. Returns opened file and location of file if file exists. If file doesn't exist raises an OSError. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. year : int Year doy : int Day of year hour : int Hour. minute : int Minute second : int Second Returns ------- f : file Opened distribution function file filename : string Filename of opened file """ probe = _check_probe(probe) # Work out location of file yearstring = str(year)[-2:] filedir = _dist_file_dir(probe, year, doy) filename = os.path.join(filedir, 'h' + probe + 'y' + yearstring + 'd' + str(doy).zfill(3) + 'h' + str(hour).zfill(2) + 'm' + str(minute).zfill(2) + 's' + str(second).zfill(2) + '_') # Try to open distribution file for extension in ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1']: try: f = open(filename + extension) filename += extension except OSError: continue if 'f' not in locals(): raise OSError('Could not find file with name ' + filename[:-1]) else: return f, filename def _dist_filename_to_hms(path): """Given distribution filename, extract hour, minute, second""" # year = int(path[-21:-19]) + 1900 # doy = int(path[-18:-15]) hour = int(path[-14:-12]) minute = int(path[-11:-9]) second = int(path[-8:-6]) return hour, minute, second def integrated_dists(probe, starttime, endtime, verbose=False): """ Returns the integrated distributions from experiments i1a and i1b in Helios distribution function files. The distributions are integrated over all angles and given as a function of proton velocity. Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval verbose : bool, optional If ``True``, print information whilst loading. Default is ``False``. Returns ------- distinfo : Series Infromation stored in the top of distribution function files. """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] distlist = {'a': [], 'b': []} starttime_orig = starttime # Loop through each day while starttime < endtime: year = starttime.year doy = starttime.strftime('%j') # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'integrated_dists.hdf' hdffile = os.path.join(dist_dir, hdffile) todays_dists = {'a': [], 'b': []} # Check if data is already saved if os.path.isfile(hdffile): for key in todays_dists: todays_dists[key] = pd.read_hdf(hdffile, key=key) distlist[key].append(todays_dists[key]) starttime += timedelta(days=1) continue # If not saved, generate a derived file else: # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: hour, minute, second = _dist_filename_to_hms(path) try: a, b = integrated_dists_single(probe, year, doy, hour, minute, second) except RuntimeError as err: strerr = 'No ion distribution function data in file' if str(err) == strerr: continue raise err t = datetime.combine(starttime.date(), time(hour, minute, second)) if verbose: print(t) dists = {'a': a, 'b': b} for key in dists: dist = dists[key] dist['Time'] = t dist = dist.set_index(['Time', 'v'], drop=True) todays_dists[key].append(dist) # Go through a and b and concat all the data for key in todays_dists: todays_dists[key] = pd.concat(todays_dists[key]) if use_hdf: todays_dists[key].to_hdf(hdffile, key=key, mode='a') distlist[key].append(todays_dists[key]) starttime += timedelta(days=1) for key in distlist: distlist[key] = util.timefilter(distlist[key], starttime_orig, endtime) return distlist def integrated_dists_single(probe, year, doy, hour, minute, second): """ Returns the integrated distributions from experiments i1a and i1b in Helios distribution function files. The distributions are integrated over all angles and given as a function of proton velocity. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. year : int Year doy : int Day of year hour : int Hour minute : int Minute. second : int Second Returns ------- i1a : DataFrame i1a integrated distribution function. i1b : DataFrame i1b integrated distribution function. """ probe = _check_probe(probe) f, _ = _loaddistfile(probe, year, doy, hour, minute, second) for line in f: if line[0:19] == ' 1-D i1a integrated': break # i1a distribution function i1adf = f.readline().split() f.readline() i1avs = f.readline().split() f.readline() # i1b distribution file i1bdf = f.readline().split() f.readline() i1bvs = f.readline().split() i1a = pd.DataFrame({'v': i1avs, 'df': i1adf}, dtype=float) i1b = pd.DataFrame({'v': i1bvs, 'df': i1bdf}, dtype=float) f.close() return i1a, i1b def electron_dist_single(probe, year, doy, hour, minute, second, remove_advect=False): """ Read in 2D electron distribution function. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. year : int Year doy : int Day of year hour : int Hour. minute : int Minute second : int Second remove_advect : bool, optional If ``False``, the distribution is returned in the spacecraft frame. If ``True``, the distribution is returned in the solar wind frame, by subtracting the spacecraft velocity from the velcoity of each bin. Note this significantly slows down reading in the distribution. Returns ------- dist : DataFrame 2D electron distribution function """ probe = _check_probe(probe) f, filename = _loaddistfile(probe, year, doy, hour, minute, second) startline = None for i, line in enumerate(f): # Find start of electron distribution function if line[0:4] == ' 2-D': startline = i + 2 # Throw away next line (just has max of distribution) f.readline() # Throw away next line (just has table headings) if f.readline()[0:27] == ' no electron data available': return None break nlines = None for i, line in enumerate(f): if 'Degree, Pizzo correction' in line: break nlines = i + 1 if startline is None: return None ########################################## # Read and process electron distribution # ########################################## # Arguments for reading in data readargs = {'usecols': [0, 1, 2, 3, 4, 5], 'names': ['Az', 'E_bin', 'pdf', 'counts', 'vx', 'vy'], 'delim_whitespace': True, 'skiprows': startline, 'nrows': nlines} # Read in data dist = pd.read_csv(filename, **readargs) if dist.empty: return None # Remove spacecraft abberation # Assumes that spacecraft motion is always in the ecliptic (x-y) # plane if remove_advect: params = distparams_single(probe, year, doy, hour, minute, second) dist['vx'] += params['helios_vr'] dist['vy'] += params['helios_v'] # Convert to SI units dist[['vx', 'vy']] *= 1e3 dist['pdf'] *= 1e12 # Calculate spherical coordinates of energy bins dist['|v|'], _, dist['phi'] =\ util._cart2sph(dist['vx'], dist['vy'], 0) # Calculate bin energy assuming particles are electrons dist['E_electron'] = 0.5 * constants.m_e.value *\ ((dist['|v|']) ** 2) # Convert to multi-index using Azimuth and energy bin dist = dist.set_index(['E_bin', 'Az']) f.close() return dist def distparams(probe, starttime, endtime, verbose=False): """ Read in distribution parameters found in the header of distribution files. Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval verbose : bool, optional If ``True``, print information whilst loading. Default is ``False``. Returns ------- distinfo : Series Infromation stored in the top of distribution function files """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] paramlist = [] starttime_orig = starttime # Loop through each day while starttime < endtime: year = starttime.year doy = starttime.strftime('%j') # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'distparams.hdf' hdffile = os.path.join(dist_dir, hdffile) if os.path.isfile(hdffile): todays_params = pd.read_hdf(hdffile) elif not os.path.isdir(dist_dir): starttime += timedelta(days=1) continue else: todays_params = [] # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: hour, minute, second = _dist_filename_to_hms(path) if verbose: print(starttime.date(), hour, minute, second) p = distparams_single(probe, year, doy, hour, minute, second) todays_params.append(p) todays_params = pd.concat(todays_params, ignore_index=True, axis=1).T todays_params = todays_params.set_index('Time', drop=False) # Convert columns to numeric types todays_params = todays_params.apply(pd.to_numeric, errors='ignore') todays_params['Time'] = pd.to_datetime(todays_params['Time']) if use_hdf: todays_params.to_hdf(hdffile, key='distparams', mode='w') paramlist.append(todays_params) starttime += timedelta(days=1) return util.timefilter(paramlist, starttime_orig, endtime) def distparams_single(probe, year, doy, hour, minute, second): """ Read in parameters from a single distribution function measurement. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. year : int Year doy : int Day of year hour : int Hour minute : int Minute second : int Second Returns ------- distparams : Series Distribution parameters from top of distribution function file. """ probe = _check_probe(probe) f, _ = _loaddistfile(probe, year, doy, hour, minute, second) _, month, day = util.doy2ymd(year, doy) dtime = datetime(year, month, day, hour, minute, second) distparams = pd.Series(dtime, index=['Time']) # Ignore the Pizzo et. al. correction at top of file for _ in range(0, 3): f.readline() # Line of flags flags = f.readline().split() distparams['imode'] = int(flags[0]) # Alternating energy/azimuth shift on? distparams['ishift'] = bool(flags[1]) # Possibly H2 abberation shift? distparams['iperihelion_shift'] = bool(flags[2]) # Indicates a HDM file which contained bad data (frames), but could be # handled as NDM file distparams['minus'] = int(flags[3]) # 0 = no instrument, 1 = i1a, 2 = I3 distparams['ion_instrument'] = int(flags[4]) distparams['data_rate'] = 1 if ('hdm' in f.name) else 0 # 2 lines of Helios location information location = f.readline().split() distparams['r_sun'] = float(location[0]) # Heliospheric distance (AU) distparams['clong'] = float(location[1]) # Carrington longitude (deg) distparams['clat'] = float(location[2]) # Carrington lattitude (deg) distparams['carrot'] = int(f.readline().split()[0]) # Carrington cycle # 2 lines of Earth location information earth_loc = f.readline().split() # Heliospheric distance (AU) distparams['earth_rsun'] = float(earth_loc[0]) # Carrington longitude (deg) distparams['earth_clong'] = float(earth_loc[1]) # Carrington lattitude (deg) distparams['earth_clat'] = float(earth_loc[2]) earth_loc = f.readline().split() # Angle between Earth and Helios (deg) distparams['earth_he_angle'] = float(earth_loc[0]) # Carrington rotation distparams['earth_carrot'] = int(earth_loc[1]) # Helios velocity information helios_v = f.readline().split() # Helios radial velocity (km/s) distparams['helios_vr'] = float(helios_v[0]) * 1731 # Helios tangential velocity (km/s) distparams['helios_v'] = float(helios_v[1]) * 1731 # i1a integrated ion parameters i1a_proton_params = f.readline().split() # Proton number density (cm^-3) distparams['np_i1a'] = float(i1a_proton_params[0]) # Proton velocity (km/s) distparams['vp_i1a'] = float(i1a_proton_params[1]) # Proton temperature (K) distparams['Tp_i1a'] = float(i1a_proton_params[2]) i1a_proton_params = f.readline().split() # Proton azimuth flow angle (deg) distparams['v_az_i1a'] = float(i1a_proton_params[0]) # Proton elevation flow angle (deg) distparams['v_el_i1a'] = float(i1a_proton_params[1]) assert distparams['v_az_i1a'] < 360,\ 'Flow azimuth must be less than 360 degrees' # i1a integrated alpha parameters (possibly all zero?) i1a_alpha_params = f.readline().split() # Alpha number density (cm^-3) distparams['na_i1a'] = float(i1a_alpha_params[0]) # Alpha velocity (km/s) distparams['va_i1a'] = float(i1a_alpha_params[1]) # Alpha temperature (K) distparams['Ta_i1a'] = float(i1a_alpha_params[2]) # i1b integrated ion parameters i1b_proton_params = f.readline().split() # Proton number density (cm^-3) distparams['np_i1b'] = float(i1b_proton_params[0]) # Proton velocity (km/s) distparams['vp_i1b'] = float(i1b_proton_params[1]) # Proton temperature (K) distparams['Tp_i1b'] = float(i1b_proton_params[2]) # Magnetic field (out by a factor of 10 in data files for some reason) B = f.readline().split() distparams['Bx'] = float(B[0]) / 10 distparams['By'] = float(B[1]) / 10 distparams['Bz'] = float(B[2]) / 10 sigmaB = f.readline().split() distparams['sigmaBx'] = float(sigmaB[0]) / 10 distparams['sigmaBy'] = float(sigmaB[1]) / 10 distparams['sigmaBz'] = float(sigmaB[2]) / 10 # Replace bad values with nans to_replace = {'Tp_i1a': [-1.0, 0], 'np_i1a': [-1.0, 0], 'vp_i1a': [-1.0, 0], 'Tp_i1b': [-1.0, 0], 'np_i1b': [-1.0, 0], 'vp_i1b': [-1.0, 0], 'sigmaBx': -0.01, 'sigmaBy': -0.01, 'sigmaBz': -0.01, 'Bx': 0.0, 'By': 0.0, 'Bz': 0.0, 'v_az_i1a': [-1, 0], 'v_el_i1a': [-1, 0], 'na_i1a': [-1, 0], 'va_i1a': [-1, 0], 'Ta_i1a': [-1, 0]} distparams = distparams.replace(to_replace, np.nan) f.close() return distparams def electron_dists(probe, starttime, endtime, remove_advect=False, verbose=False): """ Return 2D electron distributions between *starttime* and *endtime* Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval remove_advect : bool, optional If *False*, the distribution is returned in the spacecraft frame. If *True*, the distribution is returned in the solar wind frame, by subtracting the spacecraft velocity from the velcoity of each bin. Note this significantly slows down reading in the distribution. verbose : bool, optional If ``True``, print dates when loading files. Default is ``False``. Returns ------- dists : DataFrame Electron distribution functions """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] distlist = [] # Loop through each day starttime_orig = starttime while starttime < endtime: year = starttime.year doy = starttime.strftime('%j') if verbose: print('Loading electron dists from year', year, 'doy', doy) # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) print(dist_dir) # If directory doesn't exist, print error and continue if not os.path.exists(dist_dir): print('No electron distributions available for year', year, 'doy', doy) starttime += timedelta(days=1) continue # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'electron_dists.hdf' hdffile = os.path.join(dist_dir, hdffile) if os.path.isfile(hdffile): todays_dist = pd.read_hdf(hdffile) distlist.append(todays_dist) starttime += timedelta(days=1) continue todays_dist = [] # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: hour, minute, second = _dist_filename_to_hms(path) try: d = electron_dist_single(probe, year, doy, hour, minute, second) except RuntimeError as err: strerr = 'No electron distribution function data in file' if str(err) == strerr: continue raise err if d is None: continue t = datetime.combine(starttime.date(), time(hour, minute, second)) d['Time'] = t if verbose: print(t) todays_dist.append(d) if todays_dist == []: starttime += timedelta(days=1) continue todays_dist = pd.concat(todays_dist) todays_dist = todays_dist.set_index('Time', append=True) if use_hdf: todays_dist.to_hdf(hdffile, key='electron_dists', mode='w') distlist.append(todays_dist) starttime += timedelta(days=1) if distlist == []: raise RuntimeError('No electron data available for times ' + str(starttime_orig) + ' to ' + str(endtime)) return util.timefilter(distlist, starttime_orig, endtime) def ion_dists(probe, starttime, endtime, remove_advect=False, verbose=False): """ Return 3D ion distributions between *starttime* and *endtime* Parameters ---------- probe : int Helios probe to import data from. Must be 1 or 2. starttime : datetime Start of interval endtime : datetime End of interval remove_advect : bool, optional If *False*, the distribution is returned in the spacecraft frame. If *True*, the distribution is returned in the solar wind frame, by subtracting the spacecraft velocity from the velcoity of each bin. Note this significantly slows down reading in the distribution. verbose : bool, optional If ``True``, print dates when loading files. Default is ``False``. Returns ------- distinfo : Series Infromation stored in the top of distribution function files. """ extensions = ['hdm.0', 'hdm.1', 'ndm.0', 'ndm.1'] distlist = [] # Loop through each day starttime_orig = starttime while starttime < endtime: year = starttime.year doy = int(starttime.strftime('%j')) if verbose: print('Loading ion dists from year', year, 'doy', doy) # Directory for today's distribution files dist_dir = _dist_file_dir(probe, year, doy) # If directory doesn't exist, print error and continue if not os.path.exists(dist_dir): print('No ion distributions available for year', year, 'doy', doy) starttime += timedelta(days=1) continue # Locaiton of hdf file to save to/load from hdffile = 'h' + probe + str(year) + str(doy).zfill(3) +\ 'ion_dists.hdf' hdffile = os.path.join(dist_dir, hdffile) if os.path.isfile(hdffile): todays_dist = pd.read_hdf(hdffile) distlist.append(todays_dist) starttime += timedelta(days=1) continue todays_dist = [] # Get every distribution function file present for this day for f in os.listdir(dist_dir): path = os.path.join(dist_dir, f) # Check for distribution function if path[-5:] in extensions: hour, minute, second = _dist_filename_to_hms(path) try: d = ion_dist_single(probe, year, doy, hour, minute, second) except RuntimeError as err: strerr = 'No ion distribution function data in file' if str(err) == strerr: continue raise err t = datetime.combine(starttime.date(), time(hour, minute, second)) d['Time'] = t if verbose: print(t) todays_dist.append(d) if todays_dist == []: starttime += timedelta(days=1) continue todays_dist = pd.concat(todays_dist) todays_dist = todays_dist.set_index('Time', append=True) if use_hdf: todays_dist.to_hdf(hdffile, key='ion_dist', mode='w') distlist.append(todays_dist) starttime += timedelta(days=1) if distlist == []: raise RuntimeError('No data available for times ' + str(starttime_orig) + ' to ' + str(endtime)) return util.timefilter(distlist, starttime_orig, endtime) def ion_dist_single(probe, year, doy, hour, minute, second, remove_advect=False): """ Read in ion distribution function. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. year : int Year doy : int Day of year hour : int Hour minute : int Minute. second : int Second remove_advect : bool, optional If *False*, the distribution is returned in the spacecraft frame. If *True*, the distribution is returned in the solar wind frame, by subtracting the spacecraft velocity from the velcoity of each bin. Note this significantly slows down reading in the distribution. Returns ------- dist : DataFrame 3D ion distribution function """ probe = _check_probe(probe) f, filename = _loaddistfile(probe, year, doy, hour, minute, second) nionlines = None # Number of lines in ion distribution linesread = 0 # Stores the total number of lines read in the file # Loop through file to find end of ion distribution function for i, line in enumerate(f): # Find start of proton distribution function if line[0:23] == 'Maximum of distribution': ionstartline = i + 1 # Find number of lines in ion distribution function if line[0:4] == ' 2-D': nionlines = i - ionstartline break linesread += i # Bizzare case where there are two proton distributions in one file, # or there's no electron data available for i, line in enumerate(f): if line[0:23] == 'Maximum of distribution' or\ line[0:30] == ' 1.2 Degree, Pizzo correction' or\ line[0:30] == ' -1.2 Degree, Pizzo correction': warnings.warn("More than one ion distribution function found", RuntimeWarning) # NOTE: Bodge linesread -= 1 break f.close() # If there's no electron data to get number of lines, set end of ion # distribution function to end of file if nionlines is None: nionlines = i - ionstartline + 1 ##################################### # Read and process ion distribution # ##################################### # If no ion data in file if nionlines < 1: raise RuntimeError('No ion distribution function data in file') # Arguments for reading in data readargs = {'usecols': [0, 1, 2, 3, 4, 5, 6, 7], 'names': ['Az', 'El', 'E_bin', 'pdf', 'counts', 'vx', 'vy', 'vz'], 'delim_whitespace': True, 'skiprows': ionstartline, 'nrows': nionlines} # Read in data dist = pd.read_csv(filename, **readargs) # Remove spacecraft abberation # Assumes that spacecraft motion is always in the ecliptic (x-y) # plane if remove_advect: params = distparams_single(probe, year, doy, hour, minute, second) dist['vx'] += params['helios_vr'] dist['vy'] += params['helios_v'] # Convert to SI units dist[['vx', 'vy', 'vz']] *= 1e3 dist['pdf'] *= 1e12 # Calculate magnitude, elevation and azimuth of energy bins dist['|v|'], dist['theta'], dist['phi'] =\ util._cart2sph(dist['vx'], dist['vy'], dist['vz']) # Calculate bin energy assuming particles are protons dist['E_proton'] = 0.5 * constants.m_p.value * ((dist['|v|']) ** 2) # Convert to multi-index using azimuth, elevation, and energy bins dist = dist.set_index(['E_bin', 'El', 'Az']) return dist class _CoreFitDownloader(util.Downloader): def __init__(self, probe): self.probe = _check_probe(probe) self.units = OrderedDict([ ('B instrument', u.dimensionless_unscaled), ('Bx', u.nT), ('By', u.nT), ('Bz', u.nT), ('sigma B', u.nT), ('Ion instrument', u.dimensionless_unscaled), ('Status', u.dimensionless_unscaled), ('Tp_par', u.K), ('Tp_perp', u.K), ('carrot', u.dimensionless_unscaled), ('r_sun', u.AU), ('clat', u.deg), ('clong', u.deg), ('earth_he_angle', u.deg), ('n_p', u.cm**-3), ('vp_x', u.km / u.s), ('vp_y', u.km / u.s), ('vp_z', u.km / u.s), ('vth_p_par', u.km / u.s), ('vth_p_perp', u.km / u.s)]) def intervals(self, starttime, endtime): return self.intervals_daily(starttime, endtime) def fname(self, interval): year = interval.start.strftime('%Y') doy = interval.start.strftime('%j') return f'h{self.probe}_{year}_{doy.zfill(3)}_corefit.csv' def local_dir(self, interval): year = interval.start.strftime('%Y') return pathlib.Path('helios') / 'corefit' / year def download(self, interval): local_dir = self.local_path(interval).parent local_dir.mkdir(parents=True, exist_ok=True) year = interval.start.strftime('%Y') remote_dir = (pathlib.Path('E1_experiment') / 'New_proton_corefit_data_2017' / 'ascii' / f'helios{self.probe}' / f'{year}') remote_url = '{}{}'.format(remote_base_url, remote_dir) try: util._download_remote(remote_url, self.fname(interval), local_dir) except urllib.error.HTTPError: raise util.NoDataError def load_local_file(self, interval): return pd.read_csv(self.local_path(interval), parse_dates=['Time']) def corefit(probe, starttime, endtime): """ Read in merged data set Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time endtime : datetime Interval end time try_download : bool, optional If ``False`` don't try to download data if it is missing locally. Returns ------- data : :class:`~sunpy.timeseries.TimeSeries` Data set """ dl = _CoreFitDownloader(probe) return dl.load(starttime, endtime) class _4hzDownloader(util.Downloader): def __init__(self, probe): self.probe = _check_probe(probe) self.units = OrderedDict([('Bx', u.nT), ('By', u.nT), ('Bz', u.nT), ('|B|', u.nT)]) def intervals(self, starttime, endtime): return self.intervals_daily(starttime, endtime) def fname(self, interval): year = int(interval.start.strftime('%Y')) doy = int(interval.start.strftime('%j')) return 'he{}1s{}{:03}.asc'.format(self.probe, year - 1900, doy) def local_dir(self, interval): year = interval.start.strftime('%Y') return pathlib.Path('helios') / 'mag4hz' / year def download(self, interval): remote_dir = ('E2_experiment/' 'Data_Cologne_Nov2016_bestdata/' 'HR%20-%20High%20Resolution%204Hz%20Data/' f'helios{self.probe}') remote_url = f'{remote_base_url}/{remote_dir}' local_fname = self.fname(interval) remote_fname = None # Because the filename contains a number between 0 and 24 at the end, # get a list of all the filenames and compare them to the filename # we want def get_file_list(url, ext='', params={}): response = requests.get(url, params=params) if response.ok: response_text = response.text else: return response.raise_for_status() soup = BeautifulSoup(response_text, 'html.parser') complete_file_list = [node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)] return complete_file_list ext = 'asc' remote_file_list = get_file_list(remote_url, ext) for filename in remote_file_list: if local_fname[:-4] in filename: remote_fname = filename break if remote_fname is None: raise util.NoDataError dl_dir = self.local_path(interval).parent util._download_remote(remote_url, remote_fname, dl_dir) # Rename to a sensible and deterministic file name downloaded_path = (dl_dir / remote_fname) new_path = self.local_path(interval) downloaded_path.rename(new_path) def load_local_file(self, interval): # Read in data headings = ['Time', 'Bx', 'By', 'Bz'] cols = [0, 4, 5, 6] data = pd.read_csv(self.local_path(interval), names=headings, header=None, usecols=cols, delim_whitespace=True) # Convert date info to datetime data['Time'] = pd.to_datetime(data['Time'], format='%Y-%m-%dT%H:%M:%S') data = data.set_index('Time', drop=True) return data def mag_4hz(probe, starttime, endtime): """ Read in 4Hz magnetic field data. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time endtime : datetime Interval end time try_download : bool, optional If ``False`` don't try to download data if it is missing locally. Returns ------- data : :class:`~sunpy.timeseries.TimeSeries` 4Hz magnetic field data set """ dl = _4hzDownloader(probe) return dl.load(starttime, endtime) class _NessDownloader(util.Downloader): def __init__(self, probe): self.probe = _check_probe(probe) self.units = OrderedDict([('probe', u.dimensionless_unscaled), ('naverage', u.dimensionless_unscaled), ('Bx', u.nT), ('By', u.nT), ('Bz', u.nT), ('|B|', u.nT), ('sigma_Bx', u.nT), ('sigma_By', u.nT), ('sigma_Bz', u.nT)]) def intervals(self, starttime, endtime): return self.intervals_daily(starttime, endtime) def fname(self, interval): year = int(interval.start.strftime('%Y')) doy = int(interval.start.strftime('%j')) return 'h{}{}{:03}.asc'.format(self.probe, year - 1900, doy) def local_dir(self, interval): year = interval.start.strftime('%Y') return pathlib.Path('helios') / 'mag4hz' / year def download(self, interval): remote_dir = (pathlib.Path('E3_experiment') / 'helios{}_6sec_ness'.format(self.probe) / interval.start.strftime('%Y')) remote_url = f'{remote_base_url}{remote_dir}' try: util._download_remote(remote_url, self.fname(interval), self.local_path(interval).parent) except URLError: raise util.NoDataError def load_local_file(self, interval): # Read in data headings = ['probe', 'year', 'doy', 'hour', 'minute', 'second', 'naverage', 'Bx', 'By', 'Bz', '|B|', 'sigma_Bx', 'sigma_By', 'sigma_Bz'] colspecs = [(1, 2), (2, 4), (4, 7), (7, 9), (9, 11), (11, 13), (13, 15), (15, 22), (22, 29), (29, 36), (36, 42), (42, 48), (48, 54), (54, 60)] data = pd.read_fwf(self.local_path(interval), names=headings, header=None, colspecs=colspecs) # Process data data['year'] += 1900 # Convert date info to datetime data['Time'] = pd.to_datetime(data['year'], format='%Y') + \ pd.to_timedelta(data['doy'] - 1, unit='d') + \ pd.to_timedelta(data['hour'], unit='h') + \ pd.to_timedelta(data['minute'], unit='m') + \ pd.to_timedelta(data['second'], unit='s') data = data.drop(['year', 'doy', 'hour', 'minute', 'second'], axis=1) data = data.set_index('Time', drop=False) return data def mag_ness(probe, starttime, endtime): """ Read in 6 second magnetic field data. Parameters ---------- probe : int, string Helios probe to import data from. Must be 1 or 2. starttime : datetime Interval start time endtime : datetime Interval end time try_download : bool, optional If ``False`` don't try to download data if it is missing locally. Returns ------- data : DataFrame 6 second magnetic field data set """ dl = _NessDownloader(probe) return dl.load(starttime, endtime) def _docstring(identifier, extra): return cdasrest._docstring(identifier, 'M', extra) def _helios(starttime, endtime, identifier, units=None, warn_missing_units=True): """ Generic method for downloading Helios data from CDAWeb. """ dl = cdasrest.CDASDwonloader('helios', identifier, 'helios', units=units, warn_missing_units=warn_missing_units) return dl.load(starttime, endtime) def merged(probe, starttime, endtime): identifier = f'HELIOS{probe}_40SEC_MAG-PLASMA' return _helios(starttime, endtime, identifier, warn_missing_units=False) merged.__doc__ = _docstring( 'HELIOS1_40SEC_MAG-PLASMA', 'merged magnetic field and plasma data.')