Python pandas.Dataframe() Examples
The following are code examples for showing how to use pandas.Dataframe(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.
Example 1
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 6 votes |
def add_group_component(self, components, name, group): """Adds a component with given name that contains all of the components in group. Parameters ---------- components: Dataframe with components. name: Name of new group component. group: List of components that form the group. Returns ------- Dataframe with components. """ new_comp = components[components['component'].isin(set(group))].copy() new_comp['component'] = name components = components.append(new_comp) return components
Example 2
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 6 votes |
def predictive_samples(self, df): """Sample from the posterior predictive distribution. Parameters ---------- df: Dataframe with dates for predictions (column ds), and capacity (column cap) if logistic growth. Returns ------- Dictionary with keys "trend", "seasonal", and "yhat" containing posterior predictive samples for that component. "seasonal" is the sum of seasonalities, holidays, and added regressors. """ df = self.setup_dataframe(df.copy()) sim_values = self.sample_posterior_predictive(df) return sim_values
Example 3
Project: jdata Author: learn2Pro File: xgb_feature.py (license) View Source Project | 6 votes |
def get_actions(start_date, end_date): """ :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date) if os.path.exists(dump_path): actions = pickle.load(open(dump_path)) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) # type: pd.DataFrame actions = actions[(actions.time >= start_date) & (actions.time < end_date)] pickle.dump(actions, open(dump_path, 'w')) return actions
Example 4
Project: JData Author: Xls1994 File: gen_feat.py (license) View Source Project | 6 votes |
def get_actions(start_date, end_date): """ ????????????action?? :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.csv' % (start_date, end_date) if os.path.exists(dump_path): # actions = pickle.load(open(dump_path)) actions = pd.read_csv(dump_path) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) # type: pd.DataFrame actions = actions[(actions.time >= start_date) & (actions.time < end_date)] # pickle.dump(actions, open(dump_path, 'w')) actions.to_csv(dump_path, index=False) print 'action combination finish...' return actions
Example 5
Project: JData Author: edvardHua File: gen_feat.py (license) View Source Project | 6 votes |
def get_actions(start_date, end_date): """ ????????? actions :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date) if os.path.exists(dump_path): actions = pickle.load(open(dump_path)) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) actions = actions[(actions.time >= start_date) & (actions.time < end_date)] pickle.dump(actions, open(dump_path, 'w')) return actions
Example 6
Project: WNTR Author: USEPA File: fragility_curve.py (license) View Source Project | 6 votes |
def sample_damage_state(self, Pr): """ Sample the damage state using a uniform random variable Parameters ----------- Pr : pd.Dataframe Probability of exceeding a damage state Returns ------- damage_state : pd.Series The damage state of each element """ p = pd.Series(data = np.random.uniform(size=Pr.shape[0]), index=Pr.index) damage_state = pd.Series(data=[None]* Pr.shape[0], index=Pr.index) for DS_names in Pr.columns: damage_state[p < Pr[DS_names]] = DS_names return damage_state
Example 7
Project: pastas Author: pastas File: stressmodels.py (license) View Source Project | 6 votes |
def get_stress(self, p=None, tindex=None): """Returns the stress or stresses of the time series object as a pandas DataFrame. If the time series object has multiple stresses each column represents a stress. Returns ------- stress: pd.Dataframe Pandas dataframe of the stress(es) """ if tindex is not None: return self.stress[tindex] else: return self.stress
Example 8
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 5 votes |
def predict(self, df=None): """Predict using the prophet model. Parameters ---------- df: pd.DataFrame with dates for predictions (column ds), and capacity (column cap) if logistic growth. If not provided, predictions are made on the history. Returns ------- A pd.DataFrame with the forecast components. """ if df is None: df = self.history.copy() else: if df.shape[0] == 0: raise ValueError('Dataframe has no rows.') df = self.setup_dataframe(df.copy()) df['trend'] = self.predict_trend(df) seasonal_components = self.predict_seasonal_components(df) intervals = self.predict_uncertainty(df) # Drop columns except ds, cap, floor, and trend cols = ['ds', 'trend'] if 'cap' in df: cols.append('cap') if self.logistic_floor: cols.append('floor') # Add in forecast components df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1) df2['yhat'] = df2['trend'] + df2['seasonal'] return df2
Example 9
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 5 votes |
def sample_model(self, df, seasonal_features, iteration): """Simulate observations from the extrapolated generative model. Parameters ---------- df: Prediction dataframe. seasonal_features: pd.DataFrame of seasonal features. iteration: Int sampling iteration to use parameters from. Returns ------- Dataframe with trend, seasonality, and yhat, each like df['t']. """ trend = self.sample_predictive_trend(df, iteration) beta = self.params['beta'][iteration] seasonal = np.matmul(seasonal_features.as_matrix(), beta) * self.y_scale sigma = self.params['sigma_obs'][iteration] noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale return pd.DataFrame({ 'yhat': trend + seasonal + noise, 'trend': trend, 'seasonal': seasonal, })
Example 10
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 5 votes |
def make_future_dataframe(self, periods, freq='D', include_history=True): """Simulate the trend using the extrapolated generative model. Parameters ---------- periods: Int number of periods to forecast forward. freq: Any valid frequency for pd.date_range, such as 'D' or 'M'. include_history: Boolean to include the historical dates in the data frame for predictions. Returns ------- pd.Dataframe that extends forward from the end of self.history for the requested number of periods. """ last_date = self.history_dates.max() dates = pd.date_range( start=last_date, periods=periods + 1, # An extra in case we include start freq=freq) dates = dates[dates > last_date] # Drop start if equals last_date dates = dates[:periods] # Return correct number of periods if include_history: dates = np.concatenate((np.array(self.history_dates), dates)) return pd.DataFrame({'ds': dates})
Example 11
Project: JDcontest Author: zsyandjyhouse File: get_actions.py (license) View Source Project | 5 votes |
def get_actions(start_time, end_time): """ :param start_date: :param end_date: :return: actions: pd.Dataframe """ FilePath = "../JData/" ActionAllFile = "JData_Action_All.csv" #ActionAllFile = "JData_Action_before_327.csv" action_all = pd.read_csv(FilePath + ActionAllFile,nrows=100000) action_all.time = pd.to_datetime(action_all['time'],format='%Y-%m-%d %H:%M:%S') actions = action_all[(action_all.time >= start_time) & (action_all.time <= end_time)] return actions
Example 12
Project: WNTR Author: USEPA File: fragility_curve.py (license) View Source Project | 5 votes |
def cdf_probability(self, x): """ Return the CDF probability for each state, based on the value of x Parameters ----------- x : pd.Series Control variable for each element Returns -------- Pr : pd.Dataframe Probability of exceeding a damage state """ state_names = [name for name, state in self.states()] Pr = pd.DataFrame(index = x.index, columns=state_names) for element in Pr.index: for state_name, state in self.states(): try: dist=state.distribution[element] except: dist=state.distribution['Default'] Pr.loc[element, state_name] = dist.cdf(x[element]) return Pr
Example 13
Project: SimpleSQLite Author: thombashi File: core.py (license) View Source Project | 5 votes |
def select_as_dataframe( self, table_name, column_list=None, where=None, extra=None): """ Get data in the database and return fetched data as a :py:class:`pandas.Dataframe` instance. :param str table_name: |arg_select_table_name| :param list column_list: |arg_select_as_xx_column_list| :param str where: |arg_select_where| :param str extra: |arg_select_extra| :return: Table data as a :py:class:`pandas.Dataframe` instance. :rtype: pandas.DataFrame :raises simplesqlite.NullDatabaseConnectionError: |raises_check_connection| :raises simplesqlite.TableNotFoundError: |raises_verify_table_existence| :raises simplesqlite.OperationalError: |raises_operational_error| :Example: :ref:`example-select-as-dataframe` .. note:: ``pandas`` package required to execute this method. """ import pandas if column_list is None: column_list = self.get_attr_name_list(table_name) result = self.select( select=",".join(SqlQuery.to_attr_str_list(column_list)), table_name=table_name, where=where, extra=extra) if result is None: return pandas.DataFrame() return pandas.DataFrame(result.fetchall(), columns=column_list)
Example 14
Project: Eskapade Author: KaveIO File: histogrammar_filler.py (license) View Source Project | 4 votes |
def construct_empty_hist(self, columns): """Create an (empty) histogram of right type Create a multi-dim histogram by iterating through the columns in reverse order and passing a single-dim hist as input to the next column. :param list columns: histogram columns :returns: created histogram :rtype: histogrammar.Count """ hist = hg.Count() # create a multi-dim histogram by iterating through the columns in reverse order # and passing a single-dim hist as input to the next column for col in reversed(columns): # histogram type depends on the data type dt = np.dtype(self.var_dtype[col]) # processing function, e.g. only accept boolians during filling f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type] if len(columns) == 1: # df[col] is a pd.series quant = lambda x, fnc=f: fnc(x) else: # df[columns] is a pd.Dataframe # fix column to col quant = lambda x, fnc=f, clm=col: fnc(x[clm]) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) if is_number or is_timestamp: # numbers and timestamps are put in a sparse binned histogram bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs) hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist) else: # string and boolians are treated as categories hist = hg.Categorize(quantity=quant, value=hist) # FIXME stick data types and number of dimension to histogram dta = [self.var_dtype[col] for col in columns] hist.datatype = dta[0] if len(columns) == 1 else dta hist.n_dim = len(columns) @property def n_bins(self): if hasattr(self, 'num'): return self.num elif hasattr(self, 'size'): return self.size else: raise RuntimeError('Cannot retrieve number of bins from hgr hist') hist.n_bins = n_bins return hist
Example 15
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 4 votes |
def make_all_seasonality_features(self, df): """Dataframe with seasonality features. Includes seasonality features, holiday features, and added regressors. Parameters ---------- df: pd.DataFrame with dates for computing seasonality features and any added regressors. Returns ------- pd.DataFrame with regression features. list of prior scales for each column of the features dataframe. """ seasonal_features = [] prior_scales = [] # Seasonality features for name, props in self.seasonalities.items(): features = self.make_seasonality_features( df['ds'], props['period'], props['fourier_order'], name, ) seasonal_features.append(features) prior_scales.extend( [props['prior_scale']] * features.shape[1]) # Holiday features if self.holidays is not None: features, holiday_priors = self.make_holiday_features(df['ds']) seasonal_features.append(features) prior_scales.extend(holiday_priors) # Additional regressors for name, props in self.extra_regressors.items(): seasonal_features.append(pd.DataFrame(df[name])) prior_scales.append(props['prior_scale']) if len(seasonal_features) == 0: seasonal_features.append( pd.DataFrame({'zeros': np.zeros(df.shape[0])})) prior_scales.append(1.) return pd.concat(seasonal_features, axis=1), prior_scales
Example 16
Project: prophet Author: facebook File: forecaster.py (license) View Source Project | 4 votes |
def predict_seasonal_components(self, df): """Predict seasonality components, holidays, and added regressors. Parameters ---------- df: Prediction dataframe. Returns ------- Dataframe with seasonal components. """ seasonal_features, _ = self.make_all_seasonality_features(df) lower_p = 100 * (1.0 - self.interval_width) / 2 upper_p = 100 * (1.0 + self.interval_width) / 2 components = pd.DataFrame({ 'col': np.arange(seasonal_features.shape[1]), 'component': [x.split('_delim_')[0] for x in seasonal_features.columns], }) # Add total for all regression components components = components.append(pd.DataFrame({ 'col': np.arange(seasonal_features.shape[1]), 'component': 'seasonal', })) # Add totals for seasonality, holiday, and extra regressors components = self.add_group_component( components, 'seasonalities', self.seasonalities.keys()) if self.holidays is not None: components = self.add_group_component( components, 'holidays', self.holidays['holiday'].unique()) components = self.add_group_component( components, 'extra_regressors', self.extra_regressors.keys()) # Remove the placeholder components = components[components['component'] != 'zeros'] X = seasonal_features.as_matrix() data = {} for component, features in components.groupby('component'): cols = features.col.tolist() comp_beta = self.params['beta'][:, cols] comp_features = X[:, cols] comp = ( np.matmul(comp_features, comp_beta.transpose()) * self.y_scale # noqa W503 ) data[component] = np.nanmean(comp, axis=1) data[component + '_lower'] = np.nanpercentile(comp, lower_p, axis=1) data[component + '_upper'] = np.nanpercentile(comp, upper_p, axis=1) return pd.DataFrame(data)