Python pandas.Dataframe() Examples

The following are code examples for showing how to use pandas.Dataframe(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 6 votes vote down vote up
def add_group_component(self, components, name, group):
        """Adds a component with given name that contains all of the components
        in group.

        Parameters
        ----------
        components: Dataframe with components.
        name: Name of new group component.
        group: List of components that form the group.

        Returns
        -------
        Dataframe with components.
        """
        new_comp = components[components['component'].isin(set(group))].copy()
        new_comp['component'] = name
        components = components.append(new_comp)
        return components 
Example 2
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 6 votes vote down vote up
def predictive_samples(self, df):
        """Sample from the posterior predictive distribution.

        Parameters
        ----------
        df: Dataframe with dates for predictions (column ds), and capacity
            (column cap) if logistic growth.

        Returns
        -------
        Dictionary with keys "trend", "seasonal", and "yhat" containing
        posterior predictive samples for that component. "seasonal" is the sum
        of seasonalities, holidays, and added regressors.
        """
        df = self.setup_dataframe(df.copy())
        sim_values = self.sample_posterior_predictive(df)
        return sim_values 
Example 3
Project: jdata   Author: learn2Pro   File: xgb_feature.py    (license) View Source Project 6 votes vote down vote up
def get_actions(start_date, end_date):
    """

    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        actions = pickle.load(open(dump_path))
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])  # type: pd.DataFrame
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        pickle.dump(actions, open(dump_path, 'w'))
    return actions 
Example 4
Project: JData   Author: Xls1994   File: gen_feat.py    (license) View Source Project 6 votes vote down vote up
def get_actions(start_date, end_date):
    """
    ????????????action??
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.csv' % (start_date, end_date)
    if os.path.exists(dump_path):
        # actions = pickle.load(open(dump_path))
        actions = pd.read_csv(dump_path)
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])  # type: pd.DataFrame
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        # pickle.dump(actions, open(dump_path, 'w'))
        actions.to_csv(dump_path, index=False)
    print 'action combination finish...'
    return actions 
Example 5
Project: JData   Author: edvardHua   File: gen_feat.py    (license) View Source Project 6 votes vote down vote up
def get_actions(start_date, end_date):
    """
    ????????? actions
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        actions = pickle.load(open(dump_path))
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        pickle.dump(actions, open(dump_path, 'w'))
    return actions 
Example 6
Project: WNTR   Author: USEPA   File: fragility_curve.py    (license) View Source Project 6 votes vote down vote up
def sample_damage_state(self, Pr):
        """
        Sample the damage state using a uniform random variable
        
         Parameters
        -----------
        Pr : pd.Dataframe
            Probability of exceeding a damage state
            
        Returns
        -------
        damage_state : pd.Series
            The damage state of each element
        """
        p = pd.Series(data = np.random.uniform(size=Pr.shape[0]), index=Pr.index)
        
        damage_state = pd.Series(data=[None]* Pr.shape[0], index=Pr.index)
        
        for DS_names in Pr.columns:
            damage_state[p < Pr[DS_names]] = DS_names
        
        return damage_state 
Example 7
Project: pastas   Author: pastas   File: stressmodels.py    (license) View Source Project 6 votes vote down vote up
def get_stress(self, p=None, tindex=None):
        """Returns the stress or stresses of the time series object as a pandas
        DataFrame.

        If the time series object has multiple stresses each column
        represents a stress.

        Returns
        -------
        stress: pd.Dataframe
            Pandas dataframe of the stress(es)

        """
        if tindex is not None:
            return self.stress[tindex]
        else:
            return self.stress 
Example 8
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 5 votes vote down vote up
def predict(self, df=None):
        """Predict using the prophet model.

        Parameters
        ----------
        df: pd.DataFrame with dates for predictions (column ds), and capacity
            (column cap) if logistic growth. If not provided, predictions are
            made on the history.

        Returns
        -------
        A pd.DataFrame with the forecast components.
        """
        if df is None:
            df = self.history.copy()
        else:
            if df.shape[0] == 0:
                raise ValueError('Dataframe has no rows.')
            df = self.setup_dataframe(df.copy())

        df['trend'] = self.predict_trend(df)
        seasonal_components = self.predict_seasonal_components(df)
        intervals = self.predict_uncertainty(df)

        # Drop columns except ds, cap, floor, and trend
        cols = ['ds', 'trend']
        if 'cap' in df:
            cols.append('cap')
        if self.logistic_floor:
            cols.append('floor')
        # Add in forecast components
        df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
        df2['yhat'] = df2['trend'] + df2['seasonal']
        return df2 
Example 9
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 5 votes vote down vote up
def sample_model(self, df, seasonal_features, iteration):
        """Simulate observations from the extrapolated generative model.

        Parameters
        ----------
        df: Prediction dataframe.
        seasonal_features: pd.DataFrame of seasonal features.
        iteration: Int sampling iteration to use parameters from.

        Returns
        -------
        Dataframe with trend, seasonality, and yhat, each like df['t'].
        """
        trend = self.sample_predictive_trend(df, iteration)

        beta = self.params['beta'][iteration]
        seasonal = np.matmul(seasonal_features.as_matrix(), beta) * self.y_scale

        sigma = self.params['sigma_obs'][iteration]
        noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale

        return pd.DataFrame({
            'yhat': trend + seasonal + noise,
            'trend': trend,
            'seasonal': seasonal,
        }) 
Example 10
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 5 votes vote down vote up
def make_future_dataframe(self, periods, freq='D', include_history=True):
        """Simulate the trend using the extrapolated generative model.

        Parameters
        ----------
        periods: Int number of periods to forecast forward.
        freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
        include_history: Boolean to include the historical dates in the data
            frame for predictions.

        Returns
        -------
        pd.Dataframe that extends forward from the end of self.history for the
        requested number of periods.
        """
        last_date = self.history_dates.max()
        dates = pd.date_range(
            start=last_date,
            periods=periods + 1,  # An extra in case we include start
            freq=freq)
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:periods]  # Return correct number of periods

        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))

        return pd.DataFrame({'ds': dates}) 
Example 11
Project: JDcontest   Author: zsyandjyhouse   File: get_actions.py    (license) View Source Project 5 votes vote down vote up
def get_actions(start_time, end_time):
    """
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    FilePath = "../JData/"
    ActionAllFile = "JData_Action_All.csv"
    #ActionAllFile = "JData_Action_before_327.csv"
    action_all = pd.read_csv(FilePath + ActionAllFile,nrows=100000)
    action_all.time = pd.to_datetime(action_all['time'],format='%Y-%m-%d %H:%M:%S')
    actions = action_all[(action_all.time >= start_time) & (action_all.time <= end_time)]
    return actions 
Example 12
Project: WNTR   Author: USEPA   File: fragility_curve.py    (license) View Source Project 5 votes vote down vote up
def cdf_probability(self, x):
        """
        Return the CDF probability for each state, based on the value of x
        
        Parameters
        -----------
        x : pd.Series
            Control variable for each element
            
        Returns
        --------
        Pr : pd.Dataframe
            Probability of exceeding a damage state
        
        """
        state_names = [name for name, state in self.states()]
        
        Pr = pd.DataFrame(index = x.index, columns=state_names)

        for element in Pr.index:
            for state_name, state in self.states():
                try:
                    dist=state.distribution[element]
                except:
                    dist=state.distribution['Default']
                Pr.loc[element, state_name] = dist.cdf(x[element])
            
        return Pr 
Example 13
Project: SimpleSQLite   Author: thombashi   File: core.py    (license) View Source Project 5 votes vote down vote up
def select_as_dataframe(
            self, table_name, column_list=None, where=None, extra=None):
        """
        Get data in the database and return fetched data as a
        :py:class:`pandas.Dataframe` instance.

        :param str table_name: |arg_select_table_name|
        :param list column_list: |arg_select_as_xx_column_list|
        :param str where: |arg_select_where|
        :param str extra: |arg_select_extra|
        :return: Table data as a :py:class:`pandas.Dataframe` instance.
        :rtype: pandas.DataFrame
        :raises simplesqlite.NullDatabaseConnectionError:
            |raises_check_connection|
        :raises simplesqlite.TableNotFoundError:
            |raises_verify_table_existence|
        :raises simplesqlite.OperationalError: |raises_operational_error|

        :Example:
            :ref:`example-select-as-dataframe`

        .. note::
            ``pandas`` package required to execute this method.
        """

        import pandas

        if column_list is None:
            column_list = self.get_attr_name_list(table_name)

        result = self.select(
            select=",".join(SqlQuery.to_attr_str_list(column_list)),
            table_name=table_name, where=where, extra=extra)

        if result is None:
            return pandas.DataFrame()

        return pandas.DataFrame(result.fetchall(), columns=column_list) 
Example 14
Project: Eskapade   Author: KaveIO   File: histogrammar_filler.py    (license) View Source Project 4 votes vote down vote up
def construct_empty_hist(self, columns):
        """Create an (empty) histogram of right type

        Create a multi-dim histogram by iterating through the columns in
        reverse order and passing a single-dim hist as input to the next
        column.

        :param list columns: histogram columns
        :returns: created histogram
        :rtype: histogrammar.Count
        """

        hist = hg.Count()

        # create a multi-dim histogram by iterating through the columns in reverse order
        # and passing a single-dim hist as input to the next column
        for col in reversed(columns):
            # histogram type depends on the data type
            dt = np.dtype(self.var_dtype[col])

            # processing function, e.g. only accept boolians during filling
            f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type]
            if len(columns) == 1:
                # df[col] is a pd.series
                quant = lambda x, fnc=f: fnc(x)
            else:
                # df[columns] is a pd.Dataframe
                # fix column to col
                quant = lambda x, fnc=f, clm=col: fnc(x[clm])

            is_number = isinstance(dt.type(), np.number)
            is_timestamp = isinstance(dt.type(), np.datetime64)

            if is_number or is_timestamp:
                # numbers and timestamps are put in a sparse binned histogram
                bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs)
                hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist)
            else:
                # string and boolians are treated as categories
                hist = hg.Categorize(quantity=quant, value=hist)

        # FIXME stick data types and number of dimension to histogram
        dta = [self.var_dtype[col] for col in columns]
        hist.datatype = dta[0] if len(columns) == 1 else dta
        hist.n_dim = len(columns)

        @property
        def n_bins(self):
            if hasattr(self, 'num'):
                return self.num
            elif hasattr(self, 'size'):
                return self.size
            else:
                raise RuntimeError('Cannot retrieve number of bins from hgr hist')
        hist.n_bins = n_bins

        return hist 
Example 15
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 4 votes vote down vote up
def make_all_seasonality_features(self, df):
        """Dataframe with seasonality features.

        Includes seasonality features, holiday features, and added regressors.

        Parameters
        ----------
        df: pd.DataFrame with dates for computing seasonality features and any
            added regressors.

        Returns
        -------
        pd.DataFrame with regression features.
        list of prior scales for each column of the features dataframe.
        """
        seasonal_features = []
        prior_scales = []

        # Seasonality features
        for name, props in self.seasonalities.items():
            features = self.make_seasonality_features(
                df['ds'],
                props['period'],
                props['fourier_order'],
                name,
            )
            seasonal_features.append(features)
            prior_scales.extend(
                [props['prior_scale']] * features.shape[1])

        # Holiday features
        if self.holidays is not None:
            features, holiday_priors = self.make_holiday_features(df['ds'])
            seasonal_features.append(features)
            prior_scales.extend(holiday_priors)

        # Additional regressors
        for name, props in self.extra_regressors.items():
            seasonal_features.append(pd.DataFrame(df[name]))
            prior_scales.append(props['prior_scale'])

        if len(seasonal_features) == 0:
            seasonal_features.append(
                pd.DataFrame({'zeros': np.zeros(df.shape[0])}))
            prior_scales.append(1.)
        return pd.concat(seasonal_features, axis=1), prior_scales 
Example 16
Project: prophet   Author: facebook   File: forecaster.py    (license) View Source Project 4 votes vote down vote up
def predict_seasonal_components(self, df):
        """Predict seasonality components, holidays, and added regressors.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with seasonal components.
        """
        seasonal_features, _ = self.make_all_seasonality_features(df)
        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        components = pd.DataFrame({
            'col': np.arange(seasonal_features.shape[1]),
            'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
        })
        # Add total for all regression components
        components = components.append(pd.DataFrame({
            'col': np.arange(seasonal_features.shape[1]),
            'component': 'seasonal',
        }))
        # Add totals for seasonality, holiday, and extra regressors
        components = self.add_group_component(
            components, 'seasonalities', self.seasonalities.keys())
        if self.holidays is not None:
            components = self.add_group_component(
                components, 'holidays', self.holidays['holiday'].unique())
        components = self.add_group_component(
            components, 'extra_regressors', self.extra_regressors.keys())
        # Remove the placeholder
        components = components[components['component'] != 'zeros']

        X = seasonal_features.as_matrix()
        data = {}
        for component, features in components.groupby('component'):
            cols = features.col.tolist()
            comp_beta = self.params['beta'][:, cols]
            comp_features = X[:, cols]
            comp = (
                np.matmul(comp_features, comp_beta.transpose())
                * self.y_scale  # noqa W503
            )
            data[component] = np.nanmean(comp, axis=1)
            data[component + '_lower'] = np.nanpercentile(comp, lower_p,
                                                            axis=1)
            data[component + '_upper'] = np.nanpercentile(comp, upper_p,
                                                            axis=1)
        return pd.DataFrame(data)