python source code of energy

import numpy as np

from astropy import units as u

from .regressor_classifier_base import RegressorClassifierBase
from sklearn.ensemble import RandomForestRegressor


class EnergyRegressor(RegressorClassifierBase):
    """This class collects one regressor for every camera type -- given
    by `cam_id_list` -- to get an estimate for the energy of an
    air-shower.  The class interfaces with `scikit-learn` in that it
    relays all function calls not exeplicitly defined here through
    `.__getattr__` to any regressor in its collection.  This gives us
    the full power and convenience of scikit-learn and we only have to
    bother about the high-level interface.

    The fitting approach is to train on single images; then separately
    predict an image's energy and combine the different predictions
    for all the telscopes in each event in one single energy estimate.

    TODO: come up with a better merging approach to combine the
          estimators of the various camera types

    Parameters
    ----------
    regressor : scikit-learn regressor
        the regressor you want to use to estimate the shower energies
    cam_id_list : list of strings
        list of identifiers to differentiate the various sources of
        the images; could be the camera IDs or even telescope IDs. We
        will train one regressor for each of the identifiers.
    unit : astropy.Quantity
        scikit-learn regressors don't work with astropy unit. so, tell
        in advance in which unit we want to deal here.
    kwargs
        arguments to be passed on to the constructor of the regressors

    """

    def __init__(
        self, regressor=RandomForestRegressor, cam_id_list="cam", unit=u.TeV, **kwargs
    ):
        super().__init__(model=regressor, cam_id_list=cam_id_list, unit=unit, **kwargs)

    def predict_by_event(self, event_list):
        """expects a list of events where every "event" is a dictionary
        mapping telescope identifiers to the list of feature-lists by
        the telescopes of that type in this event. Refer to the Note
        section in `.reshuffle_event_list` for an description how `event_list`
        is supposed to look like.  The singular estimate for the event
        is simply the mean of the various estimators of the event.

        event_list : list of "events"
            cf. `.reshuffle_event_list` under Notes

        Returns
        -------
        dict :
            dictionary that contains various statistical modes (mean,
            median, standard deviation) of the predicted quantity of
            every telescope for all events.

        Raises
        ------
        KeyError:
            if there is a telescope identifier in `event_list` that is not a
            key in the regressor dictionary

        """

        predict_mean = []
        predict_median = []
        predict_std = []
        for evt in event_list:
            predicts = []
            weights = []
            for cam_id, tels in evt.items():
                try:
                    t_res = self.model_dict[cam_id].predict(tels).tolist()
                    predicts += t_res
                except KeyError:
                    # QUESTION if there is no trained classifier for
                    # `cam_id`, raise an error or just pass this
                    # camera type?
                    raise KeyError(
                        "cam_id '{}' in event_list but no model defined: {}".format(
                            cam_id, [k for k in self.model_dict]
                        )
                    )

                try:
                    # if a `namedtuple` is provided, we can weight the different images
                    # using some of the provided features
                    weights += [t.sum_signal_cam / t.impact_dist for t in tels]
                except AttributeError:
                    # otherwise give every image the same weight
                    weights += [1] * len(tels)

            predict_mean.append(np.average(predicts, weights=weights))
            predict_median.append(np.median(predicts))
            predict_std.append(np.std(predicts))

        return {
            "mean": np.array(predict_mean) * self.unit,
            "median": np.array(predict_median) * self.unit,
            "std": np.array(predict_std) * self.unit,
        }

    def predict_by_telescope_type(self, event_list):
        """same as `predict_dict` only that it returns a list of dictionaries
        with an estimate for the target quantity for every telescope
        type separately.

        more for testing- and performance-measuring-purpouses -- to
        see how the different telescope types behave throughout the
        energy ranges and if a better (energy-dependant) combination
        of the separate telescope-wise estimators (compared to the
        mean) can be achieved.

        """

        predict_list_dict = []
        for evt in event_list:
            res_dict = {}
            for cam_id, tels in evt.items():
                t_res = self.model_dict[cam_id].predict(tels).tolist()
                res_dict[cam_id] = np.mean(t_res) * self.unit
            predict_list_dict.append(res_dict)

        return predict_list_dict

    @classmethod
    def load(cls, path, cam_id_list, unit=u.TeV):
        """this is only here to overwrite the unit argument with an astropy
        quantity

        Parameters
        ----------
        path : string
            the path where the pre-trained, pickled regressors are
            stored `path` is assumed to contain a `{cam_id}` keyword
            to be replaced by each camera identifier in `cam_id_list`
            (or at least a naked `{}`).
        cam_id_list : list
            list of camera identifiers like telescope ID or camera ID
            and the assumed distinguishing feature in the filenames of
            the various pickled regressors.
        unit : astropy.Quantity
            scikit-learn regressor do not work with units. so append
            this one to the predictions. assuming that the models
            where trained with consistent units. (default: u.TeV)

        Returns
        -------
        EnergyRegressor:
            a ready-to-use instance of this class to predict any
            quantity you have trained for

        """
        return super().load(path, cam_id_list, unit)