#!/usr/bin/env python # -*- coding: utf-8 -*- # vim: tabstop=4 shiftwidth=4 softtabstop=4 # # Copyright (C) 2014-2017 GEM Foundation and G. Weatherill # # OpenQuake is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # OpenQuake is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with OpenQuake. If not, see <http://www.gnu.org/licenses/>. """ Module managing GMPE+IMT residual plot data. This module avoids the use of classes and inhertances as simple functions accomplish the task without unnecessary overhead. All non-private functions should return the same dicts (see docstrings for details) """ import numpy as np from scipy.stats import linregress def _tojson(*numpy_objs): '''Utility function which returns a list where each element of numpy_objs is converted to its python equivalent (float or list)''' ret = [] # problem: browsers might not be happy with JSON 'NAN', so convert # NaNs to None. Unfortunately, the conversion must be done element wise # in numpy (seems not to exist a pandas na filter): for obj in numpy_objs: isscalar = np.isscalar(obj) nan_indices = None if isscalar else \ np.argwhere(np.isnan(obj)).flatten() # note: numpy.float64(N).tolist() returns a python float, so: obj = None if isscalar and np.isnan(obj) else obj.tolist() if nan_indices is not None: for idx in nan_indices: obj[idx] = None ret.append(obj) return ret # tuple(_.tolist() for _ in numpy_objs) def residuals_density_distribution(residuals, gmpe, imt, bin_width=0.5, as_json=False): '''Returns the density distribution of the given gmpe and imt :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Residuals :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'mean' (float) and 'Std Dev' (float) representing the mean and standard deviation of the data ''' statistics = residuals.get_residual_statistics_for(gmpe, imt) plot_data = {} data = residuals.residuals[gmpe][imt] for res_type in data.keys(): vals, bins = _get_histogram_data(data[res_type], bin_width=bin_width) mean = statistics[res_type]["Mean"] stddev = statistics[res_type]["Std Dev"] x = bins[:-1] y = vals if as_json: mean, stddev, x, y = _tojson(mean, stddev, x, y) plot_data[res_type] = \ {'x': x, 'y': y, 'mean': mean, 'stddev': stddev, 'xlabel': "Z (%s)" % imt, 'ylabel': "Frequency"} return plot_data def _get_histogram_data(data, bin_width=0.5): """ Retreives the histogram of the residuals """ # ignore nans otherwise max and min raise: bins = np.arange(np.floor(np.nanmin(data)), np.ceil(np.nanmax(data)) + bin_width, bin_width) # work on finite numbers to prevent np.histogram raising: vals = np.histogram(data[np.isfinite(data)], bins, density=True)[0] return vals.astype(float), bins def likelihood(residuals, gmpe, imt, bin_width=0.1, as_json=False): '''Returns the likelihood of the given gmpe and imt :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Likelihood :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'median' (float) representing the median of the data ''' plot_data = {} data = residuals._get_likelihood_values_for(gmpe, imt) for res_type in data.keys(): lh_vals, median_lh = data[res_type] vals, bins = _get_lh_histogram_data(lh_vals, bin_width=bin_width) x = bins[:-1] y = vals if as_json: median_lh, x, y = _tojson(median_lh, x, y) plot_data[res_type] = \ {'x': x, 'y': y, 'median': median_lh, 'xlabel': "LH (%s)" % imt, 'ylabel': "Frequency"} return plot_data def _get_lh_histogram_data(lh_values, bin_width=0.1): """ Retreives the histogram of the likelihoods """ bins = np.arange(0.0, 1.0 + bin_width, bin_width) # work on finite numbers to prevent np.histogram raising: vals = np.histogram(lh_values[np.isfinite(lh_values)], bins, density=True)[0] return vals.astype(float), bins def residuals_with_magnitude(residuals, gmpe, imt, as_json=False): '''Returns the residuals of the given gmpe and imt vs. magnitude :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Residuals :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'slope' (float), 'intercept' (float) and 'pvalue' (float) representing the linear regression of the data ''' plot_data = {} data = residuals.residuals[gmpe][imt] for res_type in data.keys(): x = _get_magnitudes(residuals, gmpe, imt, res_type) slope, intercept, _, pval, _ = _nanlinregress(x, data[res_type]) y = data[res_type] if as_json: x, y, slope, intercept, pval = \ _tojson(x, y, slope, intercept, pval) plot_data[res_type] = \ {'x': x, 'y': y, 'slope': slope, 'intercept': intercept, 'pvalue': pval, 'xlabel': "Magnitude", 'ylabel': "Z (%s)" % imt} return plot_data def _get_magnitudes(residuals, gmpe, imt, res_type): """ Returns an array of magnitudes equal in length to the number of residuals """ magnitudes = np.array([]) for i, ctxt in enumerate(residuals.contexts): if res_type == "Inter event": nval = np.ones( len(residuals.unique_indices[gmpe][imt][i]) ) else: nval = np.ones(len(ctxt["Distances"].repi)) magnitudes = np.hstack([magnitudes, ctxt["Rupture"].mag * nval]) return magnitudes def residuals_with_vs30(residuals, gmpe, imt, as_json=False): '''Returns the residuals of the given gmpe and imt vs. vs30 :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Residuals :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'slope' (float), 'intercept' (float) and 'pvalue' (float) representing the linear regression of the data ''' plot_data = {} data = residuals.residuals[gmpe][imt] for res_type in data.keys(): x = _get_vs30(residuals, gmpe, imt, res_type) slope, intercept, _, pval, _ = _nanlinregress(x, data[res_type]) y = data[res_type] if as_json: x, y, slope, intercept, pval = \ _tojson(x, y, slope, intercept, pval) plot_data[res_type] = \ {'x': x, 'y': y, 'slope': slope, 'intercept': intercept, 'pvalue': pval, 'xlabel': "Vs30 (m/s)", 'ylabel': "Z (%s)" % imt} return plot_data def _get_vs30(residuals, gmpe, imt, res_type): """ """ vs30 = np.array([]) for i, ctxt in enumerate(residuals.contexts): if res_type == "Inter event": vs30 = np.hstack([vs30, ctxt["Sites"].vs30[ residuals.unique_indices[gmpe][imt][i]]]) else: vs30 = np.hstack([vs30, ctxt["Sites"].vs30]) return vs30 def residuals_with_distance(residuals, gmpe, imt, distance_type="rjb", as_json=False): '''Returns the residuals of the given gmpe and imt vs. distance :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Residuals :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'slope' (float), 'intercept' (float) and 'pvalue' (float) representing the linear regression of the data ''' plot_data = {} data = residuals.residuals[gmpe][imt] for res_type in data.keys(): x = _get_distances(residuals, gmpe, imt, res_type, distance_type) slope, intercept, _, pval, _ = _nanlinregress(x, data[res_type]) y = data[res_type] if as_json: x, y, slope, intercept, pval = \ _tojson(x, y, slope, intercept, pval) plot_data[res_type] = \ {'x': x, 'y': y, 'slope': slope, 'intercept': intercept, 'pvalue': pval, 'xlabel': "%s Distance (km)" % distance_type, 'ylabel': "Z (%s)" % imt} return plot_data def _get_distances(residuals, gmpe, imt, res_type, distance_type): """ """ distances = np.array([]) for i, ctxt in enumerate(residuals.contexts): # Get the distances if res_type == "Inter event": ctxt_dist = getattr(ctxt["Distances"], distance_type)[ residuals.unique_indices[gmpe][imt][i]] distances = np.hstack([distances, ctxt_dist]) else: distances = np.hstack([ distances, getattr(ctxt["Distances"], distance_type) ]) return distances def residuals_with_depth(residuals, gmpe, imt, as_json=False): '''Returns the residuals of the given gmpe and imt vs. depth :param residuals: Residuals as instance of :class: smtk.gmpe_residuals.Residuals :param gmpe: (string) the gmpe/gsim :param imt: (string) the intensity measure type :param as_json: when True, converts all numpy numeric values (scalar and arrays) to their python equivalent. False by default :return: a dict mapping each residual type (string, e.g. 'Intra event') to a dict with (at least) the mandatory keys 'x', 'y', 'xlabel', 'ylabel' representing the plot data. Additional keys: 'slope' (float), 'intercept' (float) and 'pvalue' (float) representing the linear regression of the data ''' plot_data = {} data = residuals.residuals[gmpe][imt] for res_type in data.keys(): x = _get_depths(residuals, gmpe, imt, res_type) slope, intercept, _, pval, _ = _nanlinregress(x, data[res_type]) y = data[res_type] if as_json: x, y, slope, intercept, pval = \ _tojson(x, y, slope, intercept, pval) plot_data[res_type] = \ {'x': x, 'y': y, 'slope': slope, 'intercept': intercept, 'pvalue': pval, 'xlabel': "Hypocentral Depth (km)", 'ylabel': "Z (%s)" % imt} return plot_data def _get_depths(residuals, gmpe, imt, res_type): """ Returns an array of magnitudes equal in length to the number of residuals """ depths = np.array([]) for i, ctxt in enumerate(residuals.contexts): if res_type == "Inter event": nvals = np.ones(len(residuals.unique_indices[gmpe][imt][i])) else: nvals = np.ones(len(ctxt["Distances"].repi)) # TODO This hack needs to be fixed!!! if not ctxt["Rupture"].hypo_depth: depths = np.hstack([depths, 10.0 * nvals]) else: depths = np.hstack([depths, ctxt["Rupture"].hypo_depth * nvals]) return depths def _nanlinregress(x, y): '''Calls scipy linregress only on finite numbers of x and y''' finite = np.isfinite(x) & np.isfinite(y) if not finite.any(): # empty arrays passed to linreg raise ValueError: # force returning an object with nans: return linregress([np.nan], [np.nan]) return linregress(x[finite], y[finite])