#!/usr/bin/env python # -*- coding: utf-8 -*- # vim: tabstop=4 shiftwidth=4 softtabstop=4 # # Copyright (C) 2014-2017 GEM Foundation and G. Weatherill # # OpenQuake is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # OpenQuake is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with OpenQuake. If not, see <http://www.gnu.org/licenses/>. """ Module to get GMPE residuals - total, inter and intra {'GMPE': {'IMT1': {'Total': [], 'Inter event': [], 'Intra event': []}, 'IMT2': { ... }}} """ from __future__ import print_function import sys import re import h5py import warnings import numpy as np from datetime import datetime from math import sqrt, ceil from scipy.special import erf from scipy.stats import scoreatpercentile, norm from scipy.linalg import solve from copy import deepcopy from collections import OrderedDict from openquake.hazardlib.gsim import get_available_gsims from openquake.hazardlib.gsim.gmpe_table import GMPETable from openquake.hazardlib.gsim.base import GMPE import smtk.intensity_measures as ims from openquake.hazardlib import imt from smtk.strong_motion_selector import SMRecordSelector from smtk.trellis.trellis_plots import _get_gmpe_name from smtk.sm_table import GroundMotionTable from smtk.sm_utils import SCALAR_XY, get_interpolated_period,\ convert_accel_units GSIM_LIST = get_available_gsims() GSIM_KEYS = set(GSIM_LIST.keys()) # SCALAR_IMTS = ["PGA", "PGV", "PGD", "CAV", "Ia"] SCALAR_IMTS = ["PGA", "PGV"] STDDEV_KEYS = ["Mean", "Total", "Inter event", "Intra event"] def _check_gsim_list(gsim_list): """ Checks the list of GSIM models and returns an instance of the openquake.hazardlib.gsim class. Raises error if GSIM is not supported in OpenQuake :param list gsim_list: List of GSIM names (str) :returns: Ordered dictionary of GMPE names and instances """ output_gsims = [] for gsim in gsim_list: if isinstance(gsim, GMPE): # Is an instantated GMPE, so pass directly to list output_gsims.append((_get_gmpe_name(gsim), gsim)) elif gsim.startswith("GMPETable"): # Get filename match = re.match(r'^GMPETable\(([^)]+?)\)$', gsim) filepath = match.group(1).split("=")[1] gmpe_table = GMPETable(gmpe_table=filepath[1:-1]) output_gsims.append((_get_gmpe_name(gmpe_table), gmpe_table)) elif not (gsim in GSIM_LIST): raise ValueError('%s Not supported by OpenQuake' % gsim) else: output_gsims.append((gsim, GSIM_LIST[gsim]())) return OrderedDict(output_gsims) def get_geometric_mean(fle): """ Retreive geometric mean of the ground motions from the file - or calculate if not in file :param fle: Instance of :class: h5py.File """ # periods = fle["IMS/X/Spectra/Response/Periods"].value if not ("H" in fle["IMS"].keys()): # Horizontal spectra not in record x_spc = fle["IMS/X/Spectra/Response/Acceleration/damping_05"].values y_spc = fle["IMS/Y/Spectra/Response/Acceleration/damping_05"].values periods = fle["IMS/X/Spectra/Response/Periods"].values sa_geom = np.sqrt(x_spc * y_spc) else: if "Geometric" in fle["IMS/H/Spectra/Response/Acceleration"].keys(): sa_geom = fle[ "IMS/H/Spectra/Response/Acceleration/Geometric/damping_05" ].value periods = fle["IMS/X/Spectra/Periods"].values idx = periods > 0 periods = periods[idx] sa_geom = sa_geom[idx] else: # Horizontal spectra not in record x_spc = fle[ "IMS/X/Spectra/Response/Acceleration/damping_05"].values y_spc = fle[ "IMS/Y/Spectra/Response/Acceleration/damping_05"].values sa_geom = np.sqrt(x_spc * y_spc) return sa_geom def get_gmrotd50(fle): """ Retrieve GMRotD50 from file (or calculate if not present) :param fle: Instance of :class: h5py.File """ periods = fle["IMS/X/Spectra/Response/Periods"].value periods = periods[periods > 0.] if not ("H" in fle["IMS"].keys()): # Horizontal spectra not in record x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_gmrotd50 = ims.gmrotdpp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0)[0] else: if "GMRotD50" in fle["IMS/H/Spectra/Response/Acceleration"].keys(): sa_gmrotd50 = fle[ "IMS/H/Spectra/Response/Acceleration/GMRotD50/damping_05" ].value else: # Horizontal spectra not in record - calculate from time series x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_gmrotd50 = ims.gmrotdpp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0)[0] return sa_gmrotd50 def get_gmroti50(fle): """ Retreive GMRotI50 from file (or calculate if not present) :param fle: Instance of :class: h5py.File """ periods = fle["IMS/X/Spectra/Response/Periods"].value periods = periods[periods > 0.] if not ("H" in fle["IMS"].keys()): # Horizontal spectra not in record x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_gmroti50 = ims.gmrotipp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0)[0] else: if "GMRotI50" in fle["IMS/H/Spectra/Response/Acceleration"].keys(): sa_gmroti50 = fle[ "IMS/H/Spectra/Response/Acceleration/GMRotI50/damping_05" ].value else: # Horizontal spectra not in record - calculate from time series x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_gmroti50 = ims.gmrotipp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0) # Assumes Psuedo-spectral acceleration sa_gmroti50 = sa_gmroti50["PSA"] return sa_gmroti50 def get_rotd50(fle): """ Retrieve RotD50 from file (or calculate if not present) :param fle: Instance of :class: h5py.File """ periods = fle["IMS/H/Spectra/Response/Periods"].value periods = periods[periods > 0.] if not ("H" in fle["IMS"].keys()): # Horizontal spectra not in record x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_rotd50 = ims.rotdpp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0)[0] else: if "RotD50" in fle["IMS/H/Spectra/Response/Acceleration"].keys(): sa_rotd50 = fle[ "IMS/H/Spectra/Response/Acceleration/RotD50/damping_05" ].value else: # Horizontal spectra not in record - calculate from time series x_acc = ["Time Series/X/Original Record/Acceleration"] y_acc = ["Time Series/Y/Original Record/Acceleration"] sa_rotd50 = ims.rotdpp(x_acc.value, x_acc.attrs["Time-step"], y_acc.value, y_acc.attrs["Time-step"], periods, 50.0)[0] return sa_rotd50 SPECTRA_FROM_FILE = {"Geometric": get_geometric_mean, "GMRotI50": get_gmroti50, "GMRotD50": get_gmrotd50, "RotD50": get_rotd50} def get_scalar(fle, i_m, component="Geometric"): """ Retrieves the scalar IM from the database :param fle: Instance of :class: h5py.File :param str i_m: Intensity measure :param str component: Horizontal component of IM """ if not ("H" in fle["IMS"].keys()): x_im = fle["IMS/X/Scalar/" + i_m].value[0] y_im = fle["IMS/Y/Scalar/" + i_m].value[0] return SCALAR_XY[component](x_im, y_im) else: if i_m in fle["IMS/H/Scalar"].keys(): return fle["IMS/H/Scalar/" + i_m].value[0] else: raise ValueError("Scalar IM %s not in record database" % i_m) # The following methods are used for the MultivariateLLH function def _build_matrices(contexts, gmpe, imtx): """ Constructs the R and Z_G matrices (based on the implementation in the supplement to Mak et al (2017) """ neqs = len(contexts) nrecs = sum([ctxt["Num. Sites"] for ctxt in contexts]) r_mat = np.zeros(nrecs, dtype=float) z_g_mat = np.zeros([nrecs, neqs], dtype=float) expected_mat = np.zeros(nrecs, dtype=float) # Get observations observations = np.zeros(nrecs) i = 0 # Determine the total number of records and pass the log of the # obserations to the observations dictionary for ctxt in contexts: n_s = ctxt["Num. Sites"] observations[i:(i + n_s)] = np.log(ctxt["Observations"][imtx]) i += n_s i = 0 for j, ctxt in enumerate(contexts): if not("Intra event" in ctxt["Expected"][gmpe][imtx]) and\ not("Inter event" in ctxt["Expected"][gmpe][imtx]): # Only the total sigma exists # Total sigma is used as intra-event sigma (from S. Mak) n_r = len(ctxt["Expected"][gmpe][imtx]["Total"]) r_mat[i:(i + n_r)] = ctxt["Expected"][gmpe][imtx]["Total"] expected_mat[i:(i + n_r)] = ctxt["Expected"][gmpe][imtx]["Mean"] # Inter-event sigma is set to 0 i += n_r continue n_r = len(ctxt["Expected"][gmpe][imtx]["Intra event"]) r_mat[i:(i + n_r)] = ctxt["Expected"][gmpe][imtx]["Intra event"] # Get expected mean expected_mat[i:(i + n_r)] = ctxt["Expected"][gmpe][imtx]["Mean"] if len(ctxt["Expected"][gmpe][imtx]["Inter event"]) == 1: # Single inter event residual z_g_mat[i:(i + n_r), j] =\ ctxt["Expected"][gmpe][imtx]["Inter event"][0] else: # inter-event residual given at a vector z_g_mat[i:(i + n_r), j] =\ ctxt["Expected"][gmpe][imtx]["Inter event"] i += n_r v_mat = np.diag(r_mat ** 2.) + z_g_mat.dot(z_g_mat.T) return observations, v_mat, expected_mat, neqs, nrecs def get_multivariate_ll(contexts, gmpe, imt): """ Returns the multivariate loglikelihood, as described om equation 7 of Mak et al. (2017) """ observations, v_mat, expected_mat, neqs, nrecs = _build_matrices( contexts, gmpe, imt) sign, logdetv = np.linalg.slogdet(v_mat) b_mat = observations - expected_mat return (float(nrecs) * np.log(2.0 * np.pi) + logdetv + (b_mat.T.dot(solve(v_mat, b_mat)))) / 2. def bootstrap_llh(ij, contexts, gmpes, imts): """ Applyies the cluster bootstrap. A set of events, equal in length to that of the original data, is sampled randomly from the list of contexts. All of the sigmas for that specific event are transfered to the sample """ # Sample contexts timer_on = datetime.now() neqs = len(contexts) isamp = np.random.randint(0, neqs, neqs) new_contexts = [contexts[i] for i in isamp] outputs = np.zeros([len(gmpes), len(imts)]) for i, gmpe in enumerate(gmpes): for j, imtx in enumerate(imts): outputs[i, j] = get_multivariate_ll(new_contexts, gmpe, imtx) print("Bootstrap completed in {:.2f} seconds".format( (datetime.now() - timer_on).total_seconds())) return outputs class Residuals(object): """ Class to derive sets of residuals for a list of ground motion residuals according to the GMPEs """ def __init__(self, gmpe_list, imts): """ :param list gmpe_list: List of GMPE names (using the standard openquake strings) :param list imts: List of Intensity Measures """ self.gmpe_list = _check_gsim_list(gmpe_list) self.number_gmpes = len(self.gmpe_list) self.types = OrderedDict([(gmpe, {}) for gmpe in self.gmpe_list]) self.residuals = [] self.modelled = [] self.imts = imts self.unique_indices = {} self.gmpe_sa_limits = {} self.gmpe_scalars = {} for gmpe in self.gmpe_list: gmpe_dict_1 = OrderedDict([]) gmpe_dict_2 = OrderedDict([]) self.unique_indices[gmpe] = {} # Get the period range and the coefficient types # gmpe_i = GSIM_LIST[gmpe]() gmpe_i = self.gmpe_list[gmpe] for c in dir(gmpe_i): if 'COEFFS' in c: pers = [sa.period for sa in getattr(gmpe_i, c).sa_coeffs] min_per, max_per = (min(pers), max(pers)) self.gmpe_sa_limits[gmpe] = (min_per, max_per) for c in dir(gmpe_i): if 'COEFFS' in c: self.gmpe_scalars[gmpe] = list( getattr(gmpe_i, c).non_sa_coeffs.keys()) for imtx in self.imts: if "SA(" in imtx: period = imt.from_string(imtx).period if period < min_per or period > max_per: print("IMT %s outside period range for GMPE %s" % (imtx, gmpe)) gmpe_dict_1[imtx] = None gmpe_dict_2[imtx] = None continue gmpe_dict_1[imtx] = {} gmpe_dict_2[imtx] = {} self.unique_indices[gmpe][imtx] = [] self.types[gmpe][imtx] = [] for res_type in \ self.gmpe_list[gmpe].DEFINED_FOR_STANDARD_DEVIATION_TYPES: gmpe_dict_1[imtx][res_type] = [] gmpe_dict_2[imtx][res_type] = [] self.types[gmpe][imtx].append(res_type) gmpe_dict_2[imtx]["Mean"] = [] self.residuals.append([gmpe, gmpe_dict_1]) self.modelled.append([gmpe, gmpe_dict_2]) self.residuals = OrderedDict(self.residuals) self.modelled = OrderedDict(self.modelled) self.number_records = None self.contexts = None def get_residuals(self, database, nodal_plane_index=1, component="Geometric", normalise=True): """ Calculate the residuals for a set of ground motion records :param database: a record database. It can be either a :class:`smtk.sm_database.GroundMotionDatabase` or a :class:`smtk.sm_table.GroundMotionTable` """ # FIXME: this is hacky. One should merge sm and gm databases into # a single storage interface backed by a common storage type: calculate_observations = True if isinstance(database, GroundMotionTable): contexts = database.get_contexts(self.imts, nodal_plane_index, component) calculate_observations = False else: contexts = database.get_contexts(nodal_plane_index) # Fetch now outside the loop for efficiency the IMTs which need # acceleration units conversion from cm/s/s to g. Conversion will be # done inside the loop: accel_imts = tuple([imtx for imtx in self.imts if (imtx == "PGA" or "SA(" in imtx)]) # Contexts is in either case a list of dictionaries self.contexts = [] for context in contexts: # Get the observed strong ground motions if database is a # sm_database.GroundMotionDatabase (if a # sm_table.GroundMotionTable, observations are already calculated) if calculate_observations: context = self.get_observations(SMRecordSelector(database), context, component) # convert all IMTS with acceleration units, which are supposed to # be in cm/s/s, to g: for a_imt in accel_imts: context['Observations'][a_imt] = \ convert_accel_units(context['Observations'][a_imt], 'cm/s/s', 'g') # Get the expected ground motions context = self.get_expected_motions(context) context = self.calculate_residuals(context, normalise) for gmpe in self.residuals.keys(): for imtx in self.residuals[gmpe].keys(): if not context["Residual"][gmpe][imtx]: continue for res_type in self.residuals[gmpe][imtx].keys(): if res_type == "Inter event": inter_ev = \ context["Residual"][gmpe][imtx][res_type] if np.all( np.fabs(inter_ev - inter_ev[0]) < 1.0E-12): # Single inter-event residual self.residuals[gmpe][imtx][res_type].append( inter_ev[0]) # Append indices self.unique_indices[gmpe][imtx].append( np.array([0])) else: # Inter event residuals per-site e.g. Chiou # & Youngs (2008; 2014) case self.residuals[gmpe][imtx][res_type].extend( inter_ev.tolist()) self.unique_indices[gmpe][imtx].append( np.arange(len(inter_ev))) else: self.residuals[gmpe][imtx][res_type].extend( context["Residual"][gmpe][imtx][res_type].tolist()) self.modelled[gmpe][imtx][res_type].extend( context["Expected"][gmpe][imtx][res_type].tolist()) self.modelled[gmpe][imtx]["Mean"].extend( context["Expected"][gmpe][imtx]["Mean"].tolist()) self.contexts.append(context) for gmpe in self.residuals.keys(): for imtx in self.residuals[gmpe].keys(): if not self.residuals[gmpe][imtx]: continue for res_type in self.residuals[gmpe][imtx].keys(): self.residuals[gmpe][imtx][res_type] = np.array( self.residuals[gmpe][imtx][res_type]) self.modelled[gmpe][imtx][res_type] = np.array( self.modelled[gmpe][imtx][res_type]) self.modelled[gmpe][imtx]["Mean"] = np.array( self.modelled[gmpe][imtx]["Mean"]) def get_observations(self, sm_record_selector, context, component="Geometric"): """ Get the obsered ground motions from the database. *NOTE*: IMTs in acceleration units (e.g. PGA, SA) are supposed to return their values in cm/s/s (which is by default the unit in which they are stored) """ select_records = \ sm_record_selector.select_from_event_id(context["EventID"]) observations = OrderedDict([(imtx, []) for imtx in self.imts]) selection_string = "IMS/H/Spectra/Response/Acceleration/" for record in select_records: fle = h5py.File(record.datafile, "r") for imtx in self.imts: if imtx in SCALAR_IMTS: observations[imtx].append( get_scalar(fle, imtx, component)) elif "SA(" in imtx: target_period = imt.from_string(imtx).period spectrum = fle[selection_string + component + "/damping_05"].value periods = fle["IMS/H/Spectra/Response/Periods"].value observations[imtx].append(get_interpolated_period( target_period, periods, spectrum)) else: raise "IMT %s is unsupported!" % imtx fle.close() for imtx in self.imts: observations[imtx] = np.array(observations[imtx]) context["Observations"] = observations context["Num. Sites"] = len(select_records) return context # def get_observations(self, sm_record_selector, context, # component="Geometric"): # """ # Get the obsered ground motions from the database # """ # select_records = \ # sm_record_selector.select_from_event_id(context["EventID"]) # observations = OrderedDict([(imtx, []) for imtx in self.imts]) # selection_string = "IMS/H/Spectra/Response/Acceleration/" # for record in select_records: # fle = h5py.File(record.datafile, "r") # for imtx in self.imts: # if imtx in SCALAR_IMTS: # if imtx == "PGA": # observations[imtx].append( # get_scalar(fle, imtx, component) / 981.0) # else: # observations[imtx].append( # get_scalar(fle, imtx, component)) # # elif "SA(" in imtx: # target_period = imt.from_string(imtx).period # spectrum = fle[selection_string + component + # "/damping_05"].value # periods = fle["IMS/H/Spectra/Response/Periods"].value # observations[imtx].append(get_interpolated_period( # target_period, periods, spectrum) / 981.0) # else: # raise "IMT %s is unsupported!" % imtx # fle.close() # for imtx in self.imts: # observations[imtx] = np.array(observations[imtx]) # context["Observations"] = observations # context["Num. Sites"] = len(select_records) # return context def get_expected_motions(self, context): """ Calculate the expected ground motions from the context """ # TODO Rake hack will be removed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if not context["Rupture"].rake: context["Rupture"].rake = 0.0 expected = OrderedDict([(gmpe, {}) for gmpe in self.gmpe_list]) # Period range for GSIM for gmpe in self.gmpe_list: expected[gmpe] = OrderedDict([(imtx, {}) for imtx in self.imts]) for imtx in self.imts: gsim = self.gmpe_list[gmpe] if "SA(" in imtx: period = imt.from_string(imtx).period if period < self.gmpe_sa_limits[gmpe][0] or\ period > self.gmpe_sa_limits[gmpe][1]: expected[gmpe][imtx] = None continue mean, stddev = gsim.get_mean_and_stddevs( context["Sites"], context["Rupture"], context["Distances"], imt.from_string(imtx), self.types[gmpe][imtx]) expected[gmpe][imtx]["Mean"] = mean for i, res_type in enumerate(self.types[gmpe][imtx]): expected[gmpe][imtx][res_type] = stddev[i] context["Expected"] = expected return context def calculate_residuals(self, context, normalise=True): """ Calculate the residual terms """ # Calculate residual residual = {} for gmpe in self.gmpe_list: residual[gmpe] = OrderedDict([]) for imtx in self.imts: residual[gmpe][imtx] = {} obs = np.log(context["Observations"][imtx]) if not context["Expected"][gmpe][imtx]: residual[gmpe][imtx] = None continue mean = context["Expected"][gmpe][imtx]["Mean"] total_stddev = context["Expected"][gmpe][imtx]["Total"] residual[gmpe][imtx]["Total"] = (obs - mean) / total_stddev if "Inter event" in self.residuals[gmpe][imtx].keys(): inter, intra = self._get_random_effects_residuals( obs, mean, context["Expected"][gmpe][imtx]["Inter event"], context["Expected"][gmpe][imtx]["Intra event"], normalise) residual[gmpe][imtx]["Inter event"] = inter residual[gmpe][imtx]["Intra event"] = intra context["Residual"] = residual return context def _get_random_effects_residuals(self, obs, mean, inter, intra, normalise=True): """ Calculates the random effects residuals using the inter-event residual formula described in Abrahamson & Youngs (1992) Eq. 10 """ nvals = float(len(mean)) inter_res = ((inter ** 2.) * sum(obs - mean)) /\ (nvals * (inter ** 2.) + (intra ** 2.)) intra_res = obs - (mean + inter_res) if normalise: return inter_res / inter, intra_res / intra return inter_res, intra_res def get_residual_statistics(self): """ Retreives the mean and standard deviation values of the residuals """ statistics = OrderedDict([(gmpe, OrderedDict([])) for gmpe in self.gmpe_list]) for gmpe in self.gmpe_list: for imtx in self.imts: if not self.residuals[gmpe][imtx]: continue statistics[gmpe][imtx] = \ self.get_residual_statistics_for(gmpe, imtx) return statistics def get_residual_statistics_for(self, gmpe, imtx): """ Retreives the mean and standard deviation values of the residuals for a given gmpe and imtx :param gmpe: (string) the gmpe. It must be in the list of this object's gmpes :param imtx: (string) the imt. It must be in the imts defined for the given `gmpe` """ residuals = self.residuals[gmpe][imtx] return {res_type: {"Mean": np.nanmean(residuals[res_type]), "Std Dev": np.nanstd(residuals[res_type])} for res_type in self.types[gmpe][imtx]} def pretty_print(self, filename=None, sep=","): """ Print the information to screen or to file """ if filename: fid = open(filename, "w") else: fid = sys.stdout fid.write("Ground Motion Residuals\n") # Print headers event = self.contexts[0] header_set = [] header_set.extend([key for key in event["Distances"].__dict__]) header_set.extend([key for key in event["Sites"].__dict__]) header_set.extend(["{:s}-Obs.".format(imtx) for imtx in self.imts]) for imtx in self.imts: for gmpe in self.gmpe_list: if not event["Expected"][gmpe][imtx]: continue for key in event["Expected"][gmpe][imtx].keys(): header_set.append( "{:s}-{:s}-{:s}-Exp.".format(imtx, gmpe, key)) for imtx in self.imts: for gmpe in self.gmpe_list: if not event["Residual"][gmpe][imtx]: continue for key in event["Residual"][gmpe][imtx].keys(): header_set.append( "{:s}-{:s}-{:s}-Res.".format(imtx, gmpe, key)) header_set = self._extend_header_set(header_set) fid.write("%s\n" % sep.join(header_set)) for event in self.contexts: self._pprint_event(fid, event, sep) if filename: fid.close() def _pprint_event(self, fid, event, sep): """ Pretty print the information for each event """ # Print rupture info rupture_str = sep.join([ "{:s}{:s}{:s}".format(key, sep, str(val)) for key, val in event["Rupture"].__dict__.items()]) fid.write("Rupture: %s %s %s\n" % (str(event["EventID"]), sep, rupture_str)) # For each record for i in range(event["Num. Sites"]): data = [] # Distances for key in event["Distances"].__dict__: data.append("{:.4f}".format( getattr(event["Distances"], key)[i])) # Sites for key in event["Sites"].__dict__: data.append("{:.4f}".format(getattr(event["Sites"], key)[i])) # Observations for imtx in self.imts: data.append("{:.8e}".format(event["Observations"][imtx][i])) # Expected for imtx in self.imts: for gmpe in self.gmpe_list: if not event["Expected"][gmpe][imtx]: continue for key in event["Expected"][gmpe][imtx].keys(): data.append("{:.8e}".format( event["Expected"][gmpe][imtx][key][i])) # Residuals for imtx in self.imts: for gmpe in self.gmpe_list: if not event["Expected"][gmpe][imtx]: continue for key in event["Residual"][gmpe][imtx].keys(): data.append("{:.8e}".format( event["Residual"][gmpe][imtx][key][i])) self._extend_data_print(data, event, i) fid.write("%s\n" % sep.join(data)) def _extend_header_set(self, header_set): """ Additional headers to add to the pretty print - does nothing here but overwritten in subclasses """ return header_set def _extend_data_print(self, data, event, i): """ Additional data to add to the pretty print - also does nothing here but overwritten in subclasses """ return data def _get_magnitudes(self): """ Returns an array of magnitudes equal in length to the number of residuals """ magnitudes = np.array([]) for ctxt in self.contexts: magnitudes = np.hstack([ magnitudes, ctxt["Rupture"].mag * np.ones(len(ctxt["Distances"].repi))]) return magnitudes def get_likelihood_values(self): """ Returns the likelihood values for Total, plus inter- and intra-event residuals according to Equation 9 of Scherbaum et al (2004) """ statistics = self.get_residual_statistics() lh_values = OrderedDict([(gmpe, OrderedDict([])) for gmpe in self.gmpe_list]) for gmpe in self.gmpe_list: for imtx in self.imts: if not self.residuals[gmpe][imtx]: print("IMT %s not found in Residuals for %s" % (imtx, gmpe)) continue lh_values[gmpe][imtx] = {} values = self._get_likelihood_values_for(gmpe, imtx) for res_type, data in values.items(): l_h, median_lh = data lh_values[gmpe][imtx][res_type] = l_h statistics[gmpe][imtx][res_type]["Median LH"] =\ median_lh return lh_values, statistics def _get_likelihood_values_for(self, gmpe, imt): """ Returns the likelihood values for Total, plus inter- and intra-event residuals according to Equation 9 of Scherbaum et al (2004) for the given gmpe and the given intensity measure type. `gmpe` must be in this object gmpe(s) list and imt must be defined for the given gmpe: this two conditions are not checked for here. :return: a dict mapping the residual type(s) (string) to the tuple lh, median_lh where the first is the array of likelihood values and the latter is the median of those values """ ret = {} for res_type in self.types[gmpe][imt]: zvals = np.fabs(self.residuals[gmpe][imt][res_type]) l_h = 1.0 - erf(zvals / sqrt(2.)) median_lh = np.nanpercentile(l_h, 50.0) ret[res_type] = l_h, median_lh return ret def get_loglikelihood_values(self, imts): """ Returns the loglikelihood fit of the GMPEs to data using the loglikehood (LLH) function described in Scherbaum et al. (2009) Scherbaum, F., Delavaud, E., Riggelsen, C. (2009) "Model Selection in Seismic Hazard Analysis: An Information-Theoretic Perspective", Bulletin of the Seismological Society of America, 99(6), 3234-3247 :param imts: List of intensity measures for LLH calculation """ log_residuals = OrderedDict([(gmpe, np.array([])) for gmpe in self.gmpe_list]) imt_list = [(imtx, None) for imtx in imts] imt_list.append(("All", None)) llh = OrderedDict([(gmpe, OrderedDict(imt_list)) for gmpe in self.gmpe_list]) for gmpe in self.gmpe_list: for imtx in imts: if not (imtx in self.imts) or not self.residuals[gmpe][imtx]: print("IMT %s not found in Residuals for %s" % (imtx, gmpe)) continue # Get log-likelihood distance for IMT asll = np.log2(norm.pdf(self.residuals[gmpe][imtx]["Total"], 0., 1.0)) log_residuals[gmpe] = np.hstack([ log_residuals[gmpe], asll]) llh[gmpe][imtx] = -(1.0 / float(len(asll))) * np.sum(asll) llh[gmpe]["All"] = -(1. / float(len(log_residuals[gmpe]))) *\ np.sum(log_residuals[gmpe]) # Get weights weights = np.array([2.0 ** -llh[gmpe]["All"] for gmpe in self.gmpe_list]) weights = weights / np.sum(weights) model_weights = OrderedDict([ (gmpe, weights[iloc]) for iloc, gmpe in enumerate(self.gmpe_list)] ) return llh, model_weights # Mak et al multivariate LLH functions def get_multivariate_loglikelihood_values(self, sum_imts=False): """ Calculates the multivariate LLH for a set of GMPEs and IMTS according to the approach described in Mak et al. (2017) Mak, S., Clements, R. A. and Schorlemmer, D. (2017) "Empirical Evaluation of Hierarchical Ground-Motion Models: Score Uncertainty and Model Weighting", Bulletin of the Seismological Society of America, 107(2), 949-965 :param sum_imts: If True then retuns a single multivariate LLH value summing the values from all imts, otherwise returns sepearate multivariate LLH for each imt. """ multi_llh_values = OrderedDict([(gmpe, {}) for gmpe in self.gmpe_list]) # Get number of events and records for gmpe in self.gmpe_list: print("GMPE = {:s}".format(gmpe)) for j, imtx in enumerate(self.imts): if self.residuals[gmpe][imtx] is None: # IMT missing for this GMPE multi_llh_values[gmpe][imtx] = 0.0 else: multi_llh_values[gmpe][imtx] = get_multivariate_ll( self.contexts, gmpe, imtx) if sum_imts: total_llh = 0.0 for imtx in self.imts: if np.isnan(multi_llh_values[gmpe][imtx]): continue total_llh += multi_llh_values[gmpe][imtx] multi_llh_values[gmpe] = total_llh return multi_llh_values def bootstrap_multivariate_llhvalues(self, number_bootstraps, sum_imts=False, parallelize=False, concurrent_tasks=8): """ Bootstrap the analysis using cluster sampling, as describe in Mak et al. 2017. OpenQuake's :class: `openquake.baselib.parallel.Starmap` utility is invoked to parallelise the calculations by bootstrap """ # Setup multivariate log-likelihood dict multi_llh_values = [] nmods = [] for i, gmpe in enumerate(self.gmpe_list): for j, imtx in enumerate(self.imts): nmods.append((i, j)) multi_llh_values.append((gmpe, imtx)) outputs = np.zeros([len(self.gmpe_list), len(self.imts), number_bootstraps]) if parallelize: raise NotImplementedError("Parellelisation not turned on yet!") else: for j in range(number_bootstraps): print("Bootstrap {:g} of {:g}".format(j + 1, number_bootstraps)) outputs[:, :, j] = bootstrap_llh(j, self.contexts, self.gmpe_list, self.imts) distinctiveness = self.get_distinctiveness(outputs, number_bootstraps, sum_imts) return distinctiveness, outputs def get_distinctiveness(self, outputs, number_bootstraps, sum_imts): """ Return the distinctiveness index as described in equation 9 of Mak et al. (2017) """ ngmpes = len(self.gmpe_list) nbs = float(number_bootstraps) nimts = float(len(self.imts)) if sum_imts: distinctiveness = np.zeros([ngmpes, ngmpes]) # Get only one index for each GMPE for i, gmpe in enumerate(self.gmpe_list): for j, gmpe in enumerate(self.gmpe_list): if i == j: continue data_i = outputs[i, :, :] data_j = outputs[j, :, :] distinctiveness[i, j] = float(np.sum(data_i < data_j) - np.sum(data_j < data_i)) / (nbs * nimts) return distinctiveness else: distinctiveness = np.zeros([ngmpes, ngmpes, len(self.imts)]) for i, gmpe in enumerate(self.gmpe_list): for j, gmpe in enumerate(self.gmpe_list): if i == j: continue for k in range(len(self.imts)): data_i = outputs[i, k, :] data_j = outputs[j, k, :] distinctiveness[i, j, k] =\ float(np.sum(data_i < data_j) - np.sum(data_j < data_i)) / nbs return distinctiveness def get_edr_values(self, bandwidth=0.01, multiplier=3.0): """ Calculates the EDR values for each GMPE according to the Euclidean Distance Ranking method of Kale & Akkar (2013) Kale, O., and Akkar, S. (2013) "A New Procedure for Selecting and Ranking Ground Motion Predicion Equations (GMPEs): The Euclidean Distance-Based Ranking Method", Bulletin of the Seismological Society of America, 103(2A), 1069 - 1084. :param float bandwidth: Discretisation width :param float multiplier: "Multiplier of standard deviation (equation 8 of Kale and Akkar) """ edr_values = OrderedDict([(gmpe, {}) for gmpe in self.gmpe_list]) for gmpe in self.gmpe_list: obs, expected, stddev = self._get_edr_gmpe_information(gmpe) results = self._get_edr(obs, expected, stddev, bandwidth, multiplier) edr_values[gmpe]["MDE Norm"] = results[0] edr_values[gmpe]["sqrt Kappa"] = results[1] edr_values[gmpe]["EDR"] = results[2] return edr_values def _get_edr_gmpe_information(self, gmpe): """ Extract the observed ground motions, expected and total standard deviation for the GMPE (aggregating over all IMS) """ obs = np.array([], dtype=float) expected = np.array([], dtype=float) stddev = np.array([], dtype=float) for imtx in self.imts: for context in self.contexts: obs = np.hstack([obs, np.log(context["Observations"][imtx])]) expected = np.hstack([expected, context["Expected"][gmpe][imtx]["Mean"]]) stddev = np.hstack([stddev, context["Expected"][gmpe][imtx]["Total"]]) return obs, expected, stddev def _get_edr(self, obs, expected, stddev, bandwidth=0.01, multiplier=3.0): """ Calculated the Euclidean Distanced-Based Rank for a set of observed and expected values from a particular GMPE """ nvals = len(obs) min_d = bandwidth / 2. kappa = self._get_edr_kappa(obs, expected) mu_d = obs - expected d1c = np.fabs(obs - (expected - (multiplier * stddev))) d2c = np.fabs(obs - (expected + (multiplier * stddev))) dc_max = ceil(np.max(np.array([np.max(d1c), np.max(d2c)]))) num_d = len(np.arange(min_d, dc_max, bandwidth)) mde = np.zeros(nvals) for iloc in range(0, num_d): d_val = (min_d + (float(iloc) * bandwidth)) * np.ones(nvals) d_1 = d_val - min_d d_2 = d_val + min_d p_1 = norm.cdf((d_1 - mu_d) / stddev) -\ norm.cdf((-d_1 - mu_d) / stddev) p_2 = norm.cdf((d_2 - mu_d) / stddev) -\ norm.cdf((-d_2 - mu_d) / stddev) mde += (p_2 - p_1) * d_val inv_n = 1.0 / float(nvals) mde_norm = np.sqrt(inv_n * np.sum(mde ** 2.)) edr = np.sqrt(kappa * inv_n * np.sum(mde ** 2.)) return mde_norm, np.sqrt(kappa), edr def _get_edr_kappa(self, obs, expected): """ Returns the correction factor kappa """ mu_a = np.mean(obs) mu_y = np.mean(expected) b_1 = np.sum((obs - mu_a) * (expected - mu_y)) /\ np.sum((obs - mu_a) ** 2.) b_0 = mu_y - b_1 * mu_a y_c = expected - ((b_0 + b_1 * obs) - obs) de_orig = np.sum((obs - expected) ** 2.) de_corr = np.sum((obs - y_c) ** 2.) return de_orig / de_corr GSIM_MODEL_DATA_TESTS = { "Residuals": lambda residuals, config: residuals.get_residual_statistics(), "Likelihood": lambda residuals, config: residuals.get_likelihood_values(), "LLH": lambda residuals, config: residuals.get_loglikelihood_values( config.get("LLH IMTs", [imt for imt in residuals.imts])), "MultivariateLLH": lambda residuals, config: residuals.get_multivariate_loglikelihood_values(), "EDR": lambda residuals, config: residuals.get_edr_values( config.get("bandwidth", 0.01), config.get("multiplier", 3.0)) } # Deprecated functions for GMPE to data testing - kept here for backward # compatibility class Likelihood(Residuals): """ Implements the likelihood function of Scherbaum et al. (2004) Scherbaum, F., Cotton, F. and Smit, P. (2004) "On the Use of Response Spectral-Reference Data for the Selection and Ranking of Ground-Motion Models for Seismic Hazard Analysis in Regions of Moderate Seismicity: The Case of Rock Motion". Bulletin of the Seismological Society of America, 94(6), 2164-2185 Now deprecated """ def __init__(self, gmpe_list, imts): warnings.warn("Likelihood tool is deprecated. Use function " "get_likelihood_values() in main Residuals class", DeprecationWarning, stacklevel=2) super().__init__(gmpe_list, imts) class LLH(Residuals): """ Implements the average sample log-likelihood estimator from Scherbaum et al (2009). Scherbaum, F., Delavaud, E., Riggelsen, C. (2009) "Model Selection in Seismic Hazard Analysis: An Information-Theoretic Perspective", Bulletin of the Seismological Society of America, 99(6), 3234-3247 """ def __init__(self, gmpe_list, imts): warnings.warn("Likelihood tool is deprecated. Use function " "get_loglikelihood_values(imts) in main Residuals class", DeprecationWarning, stacklevel=2) super().__init__(gmpe_list, imts) class MultivariateLLH(Residuals): """ Multivariate formulation of the LLH function as proposed by Mak et al. (2017) Mak, S., Clements, R. A. and Schorlemmer, D. (2017) "Empirical Evaluation of Hierarchical Ground-Motion Models: Score Uncertainty and Model Weighting", Bulletin of the Seismological Society of America, 107(2), 949-965 """ def __init__(self, gmpe_list, imts): warnings.warn("Multivariate LLH tool is deprecated. Use " "function get_multivariate_loglikelihood_values() " "in the main Residuals class", DeprecationWarning, stacklevel=2) super().__init__(gmpe_list, imts) def get_likelihood_values(self, sum_imts=False): """ Calculates the multivariate LLH for a set of GMPEs and IMTS according to the approach described in Mak et al. (2017) """ self.get_multivariate_loglikelihood_values(sum_imts) class EDR(Residuals): """ Implements the Euclidean Distance-Based Ranking Method for GMPE selection by Kale & Akkar (2013) Kale, O., and Akkar, S. (2013) "A New Procedure for Selecting and Ranking Ground Motion Predicion Equations (GMPEs): The Euclidean Distance-Based Ranking Method", Bulletin of the Seismological Society of America, 103(2A), 1069 - 1084. """ def __init__(self, gmpe_list, imts): warnings.warn("EDR tool is deprecated. Use function get_edr_values() " "in the main Residuals class", DeprecationWarning, stacklevel=2) super().__init__(gmpe_list, imts) class SingleStationAnalysis(object): """ Class to analyse residual sets recorded at specific stations """ def __init__(self, site_id_list, gmpe_list, imts): """ """ self.site_ids = site_id_list self.input_gmpe_list = deepcopy(gmpe_list) self.gmpe_list = _check_gsim_list(gmpe_list) self.imts = imts self.site_residuals = [] self.types = OrderedDict([(gmpe, {}) for gmpe in self.gmpe_list]) for gmpe in self.gmpe_list: # if not gmpe in GSIM_LIST: # raise ValueError("%s not supported in OpenQuake" % gmpe) for imtx in self.imts: self.types[gmpe][imtx] = [] for res_type in \ self.gmpe_list[gmpe].DEFINED_FOR_STANDARD_DEVIATION_TYPES: self.types[gmpe][imtx].append(res_type) def get_site_residuals(self, database, component="Geometric"): """ Calculates the total, inter-event and within-event residuals for each site """ # imt_dict = dict([(imtx, {}) for imtx in self.imts]) for site_id in self.site_ids: print(site_id) selector = SMRecordSelector(database) site_db = selector.select_from_site_id(site_id, as_db=True) resid = Residuals(self.input_gmpe_list, self.imts) resid.get_residuals(site_db, normalise=False, component=component) setattr( resid, "site_analysis", self._set_empty_dict()) setattr( resid, "site_expected", self._set_empty_dict()) self.site_residuals.append(resid) def _set_empty_dict(self): """ Sets an empty set of nested dictionaries for each GMPE and each IMT """ return OrderedDict([ (gmpe, dict([(imtx, {}) for imtx in self.imts])) for gmpe in self.gmpe_list]) def residual_statistics(self, pretty_print=False, filename=None): """ Get single-station residual statistics for each site """ output_resid = [] for t_resid in self.site_residuals: resid = deepcopy(t_resid) for gmpe in self.gmpe_list: for imtx in self.imts: if not resid.residuals[gmpe][imtx]: continue n_events = len(resid.residuals[gmpe][imtx]["Total"]) resid.site_analysis[gmpe][imtx]["events"] = n_events resid.site_analysis[gmpe][imtx]["Total"] = np.copy( t_resid.residuals[gmpe][imtx]["Total"]) resid.site_analysis[gmpe][imtx]["Expected Total"] = \ np.copy(t_resid.modelled[gmpe][imtx]["Total"]) if not ("Intra event" in t_resid.residuals[gmpe][imtx]): # GMPE has no within-event term - skip continue resid.site_analysis[gmpe][imtx]["Intra event"] = np.copy( t_resid.residuals[gmpe][imtx]["Intra event"]) resid.site_analysis[gmpe][imtx]["Inter event"] = np.copy( t_resid.residuals[gmpe][imtx]["Inter event"]) delta_s2ss = self._get_delta_s2ss( resid.residuals[gmpe][imtx]["Intra event"], n_events) delta_woes = \ resid.site_analysis[gmpe][imtx]["Intra event"] - \ delta_s2ss resid.site_analysis[gmpe][imtx]["dS2ss"] = delta_s2ss resid.site_analysis[gmpe][imtx]["dWo,es"] = delta_woes resid.site_analysis[gmpe][imtx]["phi_ss,s"] = \ self._get_single_station_phi( resid.residuals[gmpe][imtx]["Intra event"], delta_s2ss, n_events) # Get expected values too resid.site_analysis[gmpe][imtx]["Expected Inter"] =\ np.copy(t_resid.modelled[gmpe][imtx]["Inter event"]) resid.site_analysis[gmpe][imtx]["Expected Intra"] =\ np.copy(t_resid.modelled[gmpe][imtx]["Intra event"]) output_resid.append(resid) self.site_residuals = output_resid return self.get_total_phi_ss(pretty_print, filename) def _get_delta_s2ss(self, intra_event, n_events): """ Returns the average within-event residual for the site from Rodriguez-Marek et al. (2011) Equation 8 """ return (1. / float(n_events)) * np.sum(intra_event) def _get_single_station_phi(self, intra_event, delta_s2ss, n_events): """ Returns the single-station phi for the specific station Rodriguez-Marek et al. (2011) Equation 11 """ phiss = np.sum((intra_event - delta_s2ss) ** 2.) / float(n_events - 1) return np.sqrt(phiss) def get_total_phi_ss(self, pretty_print=None, filename=None): """ Returns the station averaged single-station phi Rodriguez-Marek et al. (2011) Equation 10 """ if pretty_print: if filename: fid = open(filename, "w") else: fid = sys.stdout phi_ss = self._set_empty_dict() phi_s2ss = self._set_empty_dict() n_sites = float(len(self.site_residuals)) for gmpe in self.gmpe_list: if pretty_print: print("%s" % gmpe, file=fid) for imtx in self.imts: if pretty_print: print("%s" % imtx, file=fid) if not ("Intra event" in self.site_residuals[0].site_analysis[ gmpe][imtx]): print("GMPE %s and IMT %s do not have defined " "random effects residuals" % (str(gmpe), str(imtx))) continue n_events = [] numerator_sum = 0.0 d2ss = [] for iloc, resid in enumerate(self.site_residuals): d2ss.append(resid.site_analysis[gmpe][imtx]["dS2ss"]) n_events.append(resid.site_analysis[gmpe][imtx]["events"]) numerator_sum += np.sum(( resid.site_analysis[gmpe][imtx]["Intra event"] - resid.site_analysis[gmpe][imtx]["dS2ss"]) ** 2.) if pretty_print: print("Site ID, %s, dS2Ss, %12.8f, " "phiss_s, %12.8f, Num Records, %s" % ( self.site_ids[iloc], resid.site_analysis[gmpe][imtx]["dS2ss"], resid.site_analysis[gmpe][imtx]["phi_ss,s"], resid.site_analysis[gmpe][imtx]["events"]), file=fid) d2ss = np.array(d2ss) phi_s2ss[gmpe][imtx] = {"Mean": np.mean(d2ss), "StdDev": np.std(d2ss)} phi_ss[gmpe][imtx] = np.sqrt( numerator_sum / float(np.sum(np.array(n_events)) - 1)) if pretty_print: print("TOTAL RESULTS FOR GMPE", file=fid) for gmpe in self.gmpe_list: print("%s" % gmpe, file=fid) for imtx in self.imts: print("%s, phi_ss, %12.8f, phi_s2ss(Mean)," " %12.8f, phi_s2ss(Std. Dev), %12.8f" % (imtx, phi_ss[gmpe][imtx], phi_s2ss[gmpe][imtx]["Mean"], phi_s2ss[gmpe][imtx]["StdDev"]), file=fid) if filename: fid.close() return phi_ss, phi_s2ss