Python pandas.concat() Examples
The following are 30
code examples of pandas.concat().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: pre_submission.py From MPContribs with MIT License | 7 votes |
def get_table(results, letter): y = "Δ{}".format(letter) df = Table( RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])]) ) x0, x1 = map(float, df["δ"].iloc[[0, -1]]) pad = 0.15 * (x1 - x0) mask = (results[3] > x0 - pad) & (results[3] < x1 + pad) x, fit = results[3][mask], results[4][mask] df.set_index("δ", inplace=True) df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)])) df2.set_index("δ", inplace=True) cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"] return ( pd.concat([df, df2], sort=True) .sort_index() .reset_index() .rename(columns={"index": "δ"}) .fillna("")[cols] )
Example #2
Source File: dataloader_m.py From models with MIT License | 7 votes |
def prepro_pos_table(pos_tables): """Extracts unique positions and sorts them.""" if not isinstance(pos_tables, list): pos_tables = [pos_tables] pos_table = None for next_pos_table in pos_tables: if pos_table is None: pos_table = next_pos_table else: pos_table = pd.concat([pos_table, next_pos_table]) pos_table = pos_table.groupby('chromo').apply( lambda df: pd.DataFrame({'pos': np.unique(df['pos'])})) pos_table.reset_index(inplace=True) pos_table = pos_table[['chromo', 'pos']] pos_table.sort_values(['chromo', 'pos'], inplace=True) return pos_table
Example #3
Source File: cbc_hb.py From lifestyles with MIT License | 6 votes |
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10): all_attributes = pd.get_dummies(profiles).columns profiles_dummies = pd.get_dummies(profiles, drop_first=True) choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1) respondants = selections.columns n_attributes_in_model = profiles_dummies.shape[1] n_participants = selections.shape[1] with pm.Model(): # https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf # need to include the covariance matrix as a parent of `partsworth` alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model)) partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model)) cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())] trace = pm.sample(sample) return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes)
Example #4
Source File: DataReader.py From tensorflow-DeepFM with MIT License | 6 votes |
def gen_feat_dict(self): if self.dfTrain is None: dfTrain = pd.read_csv(self.trainfile) else: dfTrain = self.dfTrain if self.dfTest is None: dfTest = pd.read_csv(self.testfile) else: dfTest = self.dfTest df = pd.concat([dfTrain, dfTest]) self.feat_dict = {} tc = 0 for col in df.columns: if col in self.ignore_cols: continue if col in self.numeric_cols: # map to a single index self.feat_dict[col] = tc tc += 1 else: us = df[col].unique() self.feat_dict[col] = dict(zip(us, range(tc, len(us)+tc))) tc += len(us) self.feat_dim = tc
Example #5
Source File: runTests.py From svviz with MIT License | 6 votes |
def saveTimingInfo(summary): timingsPath = "test_timings.csv" git_version = subprocess.check_output(["git", "describe"]).strip() new_row = summary[["timing"]].T new_row["date"] = [datetime.datetime.now()] new_row["version"] = git_version if os.path.exists(timingsPath): timings = pandas.read_csv(timingsPath, index_col=0) timings = pandas.concat([timings, new_row]) else: timings = new_row timings.to_csv(timingsPath) print(timings)
Example #6
Source File: chart.py From ipygee with MIT License | 6 votes |
def concat(*plots): """ Concatenate plots. The type of the resulting plot will be the type of the first parsed plot """ first = plots[0] if isinstance(first, DateTimeLine): chart = DateTimeLine() else: chart = Line() y_data = {} for plot in plots: p_data = plot.y_data for serie, data in p_data.items(): y_data[serie] = data chart.add(serie, data) chart.y_data = y_data return chart
Example #7
Source File: test_statistics.py From esmlab with Apache License 2.0 | 6 votes |
def test_weighted_mean(dim, level, wgts_name): res = esmlab.weighted_mean(dset, dim=dim, weights=wgts[wgts_name]) df = dset.to_dataframe() df_w = wgts.to_dataframe()[wgts_name] if not dim: res = res.to_array().data d = pd.concat([df, df_w], axis=1) expected = d.apply( lambda x: np.ma.average(np.ma.MaskedArray(x, mask=np.isnan(x)), weights=d.t_s_wgts) )[['da1', 'da2']] expected = expected.to_xarray().data np.testing.assert_allclose(res, expected) else: expected = df.groupby(level=level).apply( wavg, weights=wgts[wgts_name].data, col_names=['da1', 'da2'] ) res = res.to_dataframe() assert_frame_equal(res.sort_index(), expected.sort_index())
Example #8
Source File: observation_history.py From tensortrade with Apache License 2.0 | 6 votes |
def observe(self) -> np.array: """Returns the rows to be observed by the agent.""" rows = self.rows.copy() if len(rows) < self.window_size: size = self.window_size - len(rows) padding = np.zeros((size, rows.shape[1])) padding = pd.DataFrame(padding, columns=self.rows.columns) rows = pd.concat([padding, rows], ignore_index=True, sort=False) if isinstance(rows, pd.DataFrame): rows = rows.fillna(0, axis=1) rows = rows.values rows = np.nan_to_num(rows) return rows
Example #9
Source File: test_introspective_rationale_explainer.py From interpret-text with MIT License | 6 votes |
def test_bert_explain_local(self): train_data = get_ssts_dataset('train') test_data = get_ssts_dataset('test') X_train = train_data[TEXT_COL] X_test = test_data[TEXT_COL] preprocessor =BertPreprocessor() df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1) df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1) model_config = BERT_MODEL_CONFIG explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_BERT, cuda=CUDA) explainer.build_model_config(model_config) explainer.set_preprocessor(preprocessor) explainer.load() explainer.fit(df_train, df_test) local_explanation = explainer.explain_local(SENTENCE) # BERT adds [CLS] at the beginning of a sentence and [SEP] at the end of each sentence but we remove them. assert len(local_explanation.local_importance_values) == len(SENTENCE.split())
Example #10
Source File: ciftify_postPINT2_sub2sub.py From ciftify with MIT License | 6 votes |
def calc_allroiidx_distances(vertices_df, roi, surfL, surfR, pvertex_colname): ''' loop over all subjects calculating distances for one roi ''' ## determine the surface for measurment hemi = vertices_df.loc[vertices_df.roiidx==roi,'hemi'].values[0] if hemi == "L": surf = surfL if hemi == "R": surf = surfR ## subset the dataframe roidf = vertices_df.loc[vertices_df.roiidx==roi,:] ## run all the subjects and return into a tupley thing of results all_dfs = (calc_subdistances_distances(roidf, surf, subid, pvertex_colname) for subid in vertices_df.subid.unique()) ## concatenate all the results roi_sub2sub = pd.concat(all_dfs, ignore_index=True) return(roi_sub2sub)
Example #11
Source File: analyze_estimates.py From performance_tracker with GNU General Public License v3.0 | 6 votes |
def match_arrivals_with_schedule(estimated_trips, schedule_direction): schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True) estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True) schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index() matched_estimates = [ match_times( stop_id, stop_estimates, schedule_direction[schedule_direction["stop_id"] == stop_id], ) for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"]) ] matched_estimates = [x for x in matched_estimates if x is not None] matched_estimates = pd.concat(matched_estimates) matched_estimates["since_scheduled"] = ( matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"] ) return matched_estimates
Example #12
Source File: ecg_delineate.py From NeuroKit with MIT License | 6 votes |
def _ecg_delineate_check(waves, rpeaks): """This function replaces the delineated features with np.nan if its standardized distance from R-peaks is more than 3.""" df = pd.DataFrame.from_dict(waves) features_columns = df.columns df = pd.concat([df, pd.DataFrame({"ECG_R_Peaks": rpeaks})], axis=1) # loop through all columns to calculate the z distance for column in features_columns: # pylint: disable=W0612 df = _calculate_abs_z(df, features_columns) # Replace with nan if distance > 3 for col in features_columns: for i in range(len(df)): if df["Dist_R_" + col][i] > 3: df[col][i] = np.nan # Return df without distance columns df = df[features_columns] waves = df.to_dict("list") return waves
Example #13
Source File: movielens_preprocess.py From striatum with BSD 2-Clause "Simplified" License | 6 votes |
def movie_preprocessing(movie): movie_col = list(movie.columns) movie_tag = [doc.split('|') for doc in movie['tag']] tag_table = {token: idx for idx, token in enumerate(set(itertools.chain.from_iterable(movie_tag)))} movie_tag = pd.DataFrame(movie_tag) tag_table = pd.DataFrame(tag_table.items()) tag_table.columns = ['Tag', 'Index'] # use one-hot encoding for movie genres (here called tag) tag_dummy = np.zeros([len(movie), len(tag_table)]) for i in range(len(movie)): for j in range(len(tag_table)): if tag_table['Tag'][j] in list(movie_tag.iloc[i, :]): tag_dummy[i, j] = 1 # combine the tag_dummy one-hot encoding table to original movie files movie = pd.concat([movie, pd.DataFrame(tag_dummy)], 1) movie_col.extend(['tag' + str(i) for i in range(len(tag_table))]) movie.columns = movie_col movie = movie.drop('tag', 1) return movie
Example #14
Source File: test_introspective_rationale_explainer.py From interpret-text with MIT License | 6 votes |
def test_rnn_explain_local(self): train_data = get_ssts_dataset('train') test_data = get_ssts_dataset('test') all_data = pd.concat([train_data, test_data]) X_train = train_data[TEXT_COL] X_test = test_data[TEXT_COL] preprocessor = GlovePreprocessor(count_threshold=TOKEN_COUNT_THRESHOLD, token_cutoff=MAX_SENT_COUNT) preprocessor.build_vocab(all_data[TEXT_COL]) df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1) df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1) model_config = RNN_MODEL_CONFIG explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_RNN, cuda=CUDA) explainer.build_model_config(model_config) explainer.set_preprocessor(preprocessor) explainer.load() explainer.fit(df_train, df_test) local_explanation = explainer.explain_local(SENTENCE) assert len(local_explanation.local_importance_values) == len(SENTENCE.split())
Example #15
Source File: dataproc.py From pydiogment with BSD 3-Clause "New" or "Revised" License | 6 votes |
def balance_dataset(data): # define column names column_names = list(data.columns) # assert equal number o samples per class samples_pro_emotion = {e: len(data[data.emotion == e]) for e in data.emotion.unique()} balanced_data = pd.concat([data[data.emotion == e].sample(min(samples_pro_emotion.values())) for e in data.emotion.unique()], axis=0, keys=list(data.columns)) # split data X = balanced_data.iloc[:, :-1] y = balanced_data.iloc[:, -1:].astype('category') # print("%25s : %s" % ("Data with balanced sets", str(balanced_data.shape))) return balanced_data, X, y, column_names
Example #16
Source File: misc.py From steppy-toolkit with MIT License | 6 votes |
def transform(self, numerical_feature_list, categorical_feature_list): """ Args: numerical_feature_list: list of numerical features categorical_feature_list: list of categorical features Returns: Dictionary with following keys: features: DataFrame with concatenated features feature_names: list of features names categorical_features: list of categorical feature names """ features = numerical_feature_list + categorical_feature_list for feature in features: feature = self._format_target(feature) feature.set_index(self.id_column, drop=True, inplace=True) features = pd.concat(features, axis=1).astype(np.float32).reset_index() outputs = dict() outputs['features'] = features outputs['feature_names'] = list(features.columns) outputs['categorical_features'] = self._get_feature_names(categorical_feature_list) return outputs
Example #17
Source File: action_detector_diagnosis.py From DETAD with MIT License | 5 votes |
def _limit_prediction(self): """ Of each class J, limit the predictions to the top scoring (N_j * self.limit_factor) predictions, where N_j is the number of ground truth instances for class J. """ ground_truth_gbvn = self.ground_truth.groupby('label') prediction_gbvn = self.prediction.groupby('label') filtered_prediction_df_list = [] for label, this_ground_truth in ground_truth_gbvn: try: # Check if there is at least one prediction for this class. this_prediction = prediction_gbvn.get_group(label) except Exception as e: continue # pick the top (len(this_ground_truth)*self.limit_factor) predictions filtered_prediction_df_list += [this_prediction.nlargest(n=int(len(this_ground_truth)*self.limit_factor), columns='score')] filtered_prediction = pd.concat(filtered_prediction_df_list, ignore_index=True) # reset prediction ids filtered_prediction['prediction-id'] = range(len(filtered_prediction)) return filtered_prediction
Example #18
Source File: utils.py From thewarden with MIT License | 5 votes |
def df_unpack(df, column, fillna=None): ret = None if fillna is None: ret = pd.concat([df, pd.DataFrame( (d for idx, d in df[column].iteritems()))], axis=1) del ret[column] else: ret = pd.concat([df, pd.DataFrame( (d for idx, d in df[column].iteritems())).fillna(fillna)], axis=1) del ret[column] return ret
Example #19
Source File: storageExt.py From FINE with MIT License | 5 votes |
def getDataForTimeSeriesAggregation(self): """ Function for getting the required data if a time series aggregation is requested. """ weightDict, data = {}, [] I = [(self.fullChargeOpRateFix, self.fullChargeOpRateMax, 'chargeRate_', self.chargeTsaWeight), (self.fullDischargeOpRateFix, self.fullDischargeOpRateMax, 'dischargeRate_', self.dischargeTsaWeight), (self.fullStateOfChargeOpRateFix, self.fullStateOfChargeOpRateMax, '_SOCRate_', self.stateOfChargeTsaWeight), (self.fullOpexPerChargeOpTimeSeries, None, '_opexPerChargeOp_', self.opexChargeOpTsaWeight)] for rateFix, rateMax, rateName, rateWeight in I: weightDict, data = self.prepareTSAInput(rateFix, rateMax, rateName, rateWeight, weightDict, data) return (pd.concat(data, axis=1), weightDict) if data else (None, {})
Example #20
Source File: conversion.py From FINE with MIT License | 5 votes |
def getDataForTimeSeriesAggregation(self): """ Function for getting the required data if a time series aggregation is requested. """ weightDict, data = {}, [] weightDict, data = self.prepareTSAInput(self.fullOperationRateFix, self.fullOperationRateMax, '_operationRate_', self.tsaWeight, weightDict, data) return (pd.concat(data, axis=1), weightDict) if data else (None, {})
Example #21
Source File: transmission.py From FINE with MIT License | 5 votes |
def getDataForTimeSeriesAggregation(self): """ Function for getting the required data if a time series aggregation is requested. """ weightDict, data = {}, [] weightDict, data = self.prepareTSAInput(self.fullOperationRateFix, self.fullOperationRateMax, '_operationRate_', self.tsaWeight, weightDict, data) return (pd.concat(data, axis=1), weightDict) if data else (None, {})
Example #22
Source File: storage.py From FINE with MIT License | 5 votes |
def getDataForTimeSeriesAggregation(self): """ Function for getting the required data if a time series aggregation is requested. """ weightDict, data = {}, [] I = [(self.fullChargeOpRateFix, self.fullChargeOpRateMax, 'chargeRate_', self.chargeTsaWeight), (self.fullDischargeOpRateFix, self.fullDischargeOpRateMax, 'dischargeRate_', self.dischargeTsaWeight)] for rateFix, rateMax, rateName, rateWeight in I: weightDict, data = self.prepareTSAInput(rateFix, rateMax, rateName, rateWeight, weightDict, data) return (pd.concat(data, axis=1), weightDict) if data else (None, {})
Example #23
Source File: utils.py From FINE with MIT License | 5 votes |
def buildFullTimeSeries(df, periodsOrder, axis=1): data = [] for p in periodsOrder: data.append(df.loc[p]) return pd.concat(data, axis=axis, ignore_index=True)
Example #24
Source File: annotators.py From EarthSim with BSD 3-Clause "New" or "Revised" License | 5 votes |
def annotated_points(self): element = self.point_stream.element groups = [] for g, idx in self._group_data.items(): df = element.iloc[idx].dframe() df[self.column] = g groups.append(df) data = pd.concat(groups).sort_values(self.column) if groups else [] return element.clone(data, vdims=self.column).opts(plot={'color_index': self.column}, style={'cmap': 'Category20'})
Example #25
Source File: 13_house_price.py From deep-learning-note with MIT License | 5 votes |
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net(train_features.shape[1]) train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) preds = net(test_features).detach().numpy() test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('./data/HousePrice/submission.csv', index=False)
Example #26
Source File: ledger.py From tensortrade with Apache License 2.0 | 5 votes |
def as_frame(self, sort_by_order_seq=False) -> pd.DataFrame: if not sort_by_order_seq: return pd.DataFrame(self.transactions) df = pd.DataFrame(self.transactions) frames = [] for poid in df.poid.unique(): frames += [df.loc[df.poid == poid, :]] return pd.concat(frames, ignore_index=True, axis=0)
Example #27
Source File: test_statistics.py From esmlab with Apache License 2.0 | 5 votes |
def test_weighted_std(dim, level, wgts_name): res = esmlab.weighted_std(dset, dim=dim, weights=wgts[wgts_name]) df = dset.to_dataframe() df_w = wgts.to_dataframe()[wgts_name] if not dim: d = pd.concat([df, df_w], axis=1) df_w_mean = d.apply( lambda x: np.ma.average(np.ma.MaskedArray(x, mask=np.isnan(x)), weights=d.t_s_wgts) )[['da1', 'da2']] temp_df = (df - df_w_mean) ** 2 temp_df = temp_df.multiply(df_w, axis='index').sum() total_weights_da1 = df_w[df['da1'].notnull()].sum() total_weights_da2 = df_w[df['da2'].notnull()].sum() res = res.to_array().to_pandas() expected = res.copy(True) expected['da1'] = np.sqrt(temp_df['da1'] / total_weights_da1) expected['da2'] = np.sqrt(temp_df['da2'] / total_weights_da2) np.testing.assert_allclose(res, expected) else: df_w_mean = df.groupby(level=level).apply( wavg, weights=wgts[wgts_name].data, col_names=['da1', 'da2'] ) temp_df = (df - df_w_mean) ** 2 temp_df = temp_df.multiply(df_w, axis='index').sum(level=level) total_weights_da1 = df_w[df['da1'].notnull()].sum(level=level) total_weights_da2 = df_w[df['da2'].notnull()].sum(level=level) res = res.to_dataframe() expected = pd.DataFrame(columns=res.columns) expected['da1'] = np.sqrt(temp_df['da1'] / total_weights_da1) expected['da2'] = np.sqrt(temp_df['da2'] / total_weights_da2) assert_frame_equal(res.sort_index(), expected.sort_index())
Example #28
Source File: nba_scraper.py From nba_scraper with GNU General Public License v3.0 | 5 votes |
def scrape_date_range( date_from, date_to, data_format="pandas", data_dir=f"{Path.home()}/nbadata.csv" ): """ Function scrapes all `regular-season` nba games between two dates Inputs: date_from - Date to scrape from date_to - Date to scrape to data_format - the format of the data the user wants returned. This is either a pandas dataframe or a csv file data_dir - a filepath which to write the csv file if that option is chosen. If no filepath is passed then it will attempt to write to the user's home directory Outputs: nba_df - If pandas is chosen then this function will return this pandas dataframe object. If csv then a csv file will be written but None will be returned """ check_format(data_format) check_valid_dates(date_from, date_to) game_ids = sf.get_date_games(date_from, date_to) scraped_games = [] for game in game_ids: print(f"Scraping game id: {game}") scraped_games.append(sf.main_scrape(game)) if data_format == "pandas": return pd.concat(scraped_games) else: pd.concat(scraped_games).to_csv(data_dir, index=False) return None
Example #29
Source File: nba_scraper.py From nba_scraper with GNU General Public License v3.0 | 5 votes |
def scrape_season(season, data_format="pandas", data_dir=f"{Path.home()}/nbadata.csv"): """ This function scrapes and entire season and either returns it as a pandas dataframe or writes it to file as a csv file Inputs: season - season to be scraped must be an integer data_format - the format of the data the user wants returned. This is either a pandas dataframe or a csv file data_dir - a filepath which to write the csv file if that option is chosen. If no filepath is passed then it will attempt to write to the user's home directory Outputs: nba_df - If pandas is chosen then this function will return this pandas dataframe object. If csv then a csv file will be written but None will be returned """ check_format(data_format) scraped_games = [] game_ids = list(range(int(f"2{season-2001}00001"), int(f"2{season-2001}01231"))) for game in game_ids: if game == 21201214: print(f"Game {game} is not available") continue else: print(f"Scraping game id: 00{game}") scraped_games.append(sf.main_scrape(f"00{game}")) if len(scraped_games) == 0: return nba_df = pd.concat(scraped_games) if data_format == "pandas": return nba_df else: nba_df.to_csv(f"{data_dir}/nba{season}.csv", index=False) return None
Example #30
Source File: stacking_val_predict.py From argus-freesound with MIT License | 5 votes |
def calc_lwlrap_on_val(): probs_df_lst = [] for fold in config.folds: fold_probs_path = PREDICTION_DIR / f'fold_{fold}' / 'val' / 'probs.csv' probs_df = pd.read_csv(fold_probs_path) probs_df.set_index('fname', inplace=True) probs_df_lst.append(probs_df) probs_df = pd.concat(probs_df_lst, axis=0) train_curated_df = pd.read_csv(config.train_curated_csv_path) lwlrap = LwlrapBase(config.classes) for i, row in train_curated_df.iterrows(): target = np.zeros(len(config.classes)) for label in row.labels.split(','): target[config.class2index[label]] = 1. pred = probs_df.loc[row.fname].values lwlrap.accumulate(target[np.newaxis], pred[np.newaxis]) result = { 'overall_lwlrap': lwlrap.overall_lwlrap(), 'per_class_lwlrap': {cls: lwl for cls, lwl in zip(config.classes, lwlrap.per_class_lwlrap())} } print(result) with open(PREDICTION_DIR / 'val_lwlrap.json', 'w') as file: json.dump(result, file, indent=2)