Python pandas.concat() Examples

The following are 30 code examples for showing how to use pandas.concat(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: svviz   Author: svviz   File: runTests.py    License: MIT License 6 votes vote down vote up
def saveTimingInfo(summary):
    timingsPath = "test_timings.csv"
    git_version = subprocess.check_output(["git", "describe"]).strip()
    
    new_row = summary[["timing"]].T
    new_row["date"] = [datetime.datetime.now()]
    new_row["version"] = git_version


    if os.path.exists(timingsPath):
        timings = pandas.read_csv(timingsPath, index_col=0)
        timings = pandas.concat([timings, new_row])
    else:
        timings = new_row

    timings.to_csv(timingsPath)

    print(timings) 
Example 2
Project: MPContribs   Author: materialsproject   File: pre_submission.py    License: MIT License 6 votes vote down vote up
def get_table(results, letter):
    y = "Δ{}".format(letter)
    df = Table(
        RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])
    )
    x0, x1 = map(float, df["δ"].iloc[[0, -1]])
    pad = 0.15 * (x1 - x0)
    mask = (results[3] > x0 - pad) & (results[3] < x1 + pad)
    x, fit = results[3][mask], results[4][mask]
    df.set_index("δ", inplace=True)
    df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)]))
    df2.set_index("δ", inplace=True)
    cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"]
    return (
        pd.concat([df, df2], sort=True)
        .sort_index()
        .reset_index()
        .rename(columns={"index": "δ"})
        .fillna("")[cols]
    ) 
Example 3
Project: tensorflow-DeepFM   Author: ChenglongChen   File: DataReader.py    License: MIT License 6 votes vote down vote up
def gen_feat_dict(self):
        if self.dfTrain is None:
            dfTrain = pd.read_csv(self.trainfile)
        else:
            dfTrain = self.dfTrain
        if self.dfTest is None:
            dfTest = pd.read_csv(self.testfile)
        else:
            dfTest = self.dfTest
        df = pd.concat([dfTrain, dfTest])
        self.feat_dict = {}
        tc = 0
        for col in df.columns:
            if col in self.ignore_cols:
                continue
            if col in self.numeric_cols:
                # map to a single index
                self.feat_dict[col] = tc
                tc += 1
            else:
                us = df[col].unique()
                self.feat_dict[col] = dict(zip(us, range(tc, len(us)+tc)))
                tc += len(us)
        self.feat_dim = tc 
Example 4
Project: models   Author: kipoi   File: dataloader_m.py    License: MIT License 6 votes vote down vote up
def prepro_pos_table(pos_tables):
    """Extracts unique positions and sorts them."""
    if not isinstance(pos_tables, list):
        pos_tables = [pos_tables]

    pos_table = None
    for next_pos_table in pos_tables:
        if pos_table is None:
            pos_table = next_pos_table
        else:
            pos_table = pd.concat([pos_table, next_pos_table])
        pos_table = pos_table.groupby('chromo').apply(
            lambda df: pd.DataFrame({'pos': np.unique(df['pos'])}))
        pos_table.reset_index(inplace=True)
        pos_table = pos_table[['chromo', 'pos']]
        pos_table.sort_values(['chromo', 'pos'], inplace=True)
    return pos_table 
Example 5
Project: ipygee   Author: fitoprincipe   File: chart.py    License: MIT License 6 votes vote down vote up
def concat(*plots):
    """ Concatenate plots. The type of the resulting plot will be the type
        of the first parsed plot
    """
    first = plots[0]
    if isinstance(first, DateTimeLine):
        chart = DateTimeLine()
    else:
        chart = Line()

    y_data = {}
    for plot in plots:
        p_data = plot.y_data
        for serie, data in p_data.items():
            y_data[serie] = data
            chart.add(serie, data)

    chart.y_data = y_data
    return chart 
Example 6
Project: steppy-toolkit   Author: minerva-ml   File: misc.py    License: MIT License 6 votes vote down vote up
def transform(self, numerical_feature_list, categorical_feature_list):
        """
        Args:
            numerical_feature_list: list of numerical features
            categorical_feature_list: list of categorical features

        Returns:
            Dictionary with following keys:
                features: DataFrame with concatenated features
                feature_names: list of features names
                categorical_features: list of categorical feature names
        """
        features = numerical_feature_list + categorical_feature_list
        for feature in features:
            feature = self._format_target(feature)
            feature.set_index(self.id_column, drop=True, inplace=True)
        features = pd.concat(features, axis=1).astype(np.float32).reset_index()

        outputs = dict()
        outputs['features'] = features
        outputs['feature_names'] = list(features.columns)
        outputs['categorical_features'] = self._get_feature_names(categorical_feature_list)
        return outputs 
Example 7
Project: esmlab   Author: NCAR   File: test_statistics.py    License: Apache License 2.0 6 votes vote down vote up
def test_weighted_mean(dim, level, wgts_name):
    res = esmlab.weighted_mean(dset, dim=dim, weights=wgts[wgts_name])
    df = dset.to_dataframe()
    df_w = wgts.to_dataframe()[wgts_name]
    if not dim:
        res = res.to_array().data
        d = pd.concat([df, df_w], axis=1)
        expected = d.apply(
            lambda x: np.ma.average(np.ma.MaskedArray(x, mask=np.isnan(x)), weights=d.t_s_wgts)
        )[['da1', 'da2']]
        expected = expected.to_xarray().data
        np.testing.assert_allclose(res, expected)
    else:

        expected = df.groupby(level=level).apply(
            wavg, weights=wgts[wgts_name].data, col_names=['da1', 'da2']
        )

        res = res.to_dataframe()
        assert_frame_equal(res.sort_index(), expected.sort_index()) 
Example 8
Project: tensortrade   Author: tensortrade-org   File: observation_history.py    License: Apache License 2.0 6 votes vote down vote up
def observe(self) -> np.array:
        """Returns the rows to be observed by the agent."""
        rows = self.rows.copy()

        if len(rows) < self.window_size:
            size = self.window_size - len(rows)
            padding = np.zeros((size, rows.shape[1]))
            padding = pd.DataFrame(padding, columns=self.rows.columns)
            rows = pd.concat([padding, rows], ignore_index=True, sort=False)

        if isinstance(rows, pd.DataFrame):
            rows = rows.fillna(0, axis=1)
            rows = rows.values

        rows = np.nan_to_num(rows)

        return rows 
Example 9
Project: interpret-text   Author: interpretml   File: test_introspective_rationale_explainer.py    License: MIT License 6 votes vote down vote up
def test_bert_explain_local(self):
        train_data = get_ssts_dataset('train')
        test_data = get_ssts_dataset('test')
        X_train = train_data[TEXT_COL]
        X_test = test_data[TEXT_COL]
        preprocessor =BertPreprocessor()

        df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1)
        df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1)
        model_config = BERT_MODEL_CONFIG
        explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_BERT, cuda=CUDA)
        explainer.build_model_config(model_config)
        explainer.set_preprocessor(preprocessor)
        explainer.load()
        explainer.fit(df_train, df_test)

        local_explanation = explainer.explain_local(SENTENCE)
        # BERT adds [CLS] at the beginning of a sentence and [SEP] at the end of each sentence  but we remove them.
        assert len(local_explanation.local_importance_values) == len(SENTENCE.split()) 
Example 10
Project: interpret-text   Author: interpretml   File: test_introspective_rationale_explainer.py    License: MIT License 6 votes vote down vote up
def test_rnn_explain_local(self):
        train_data = get_ssts_dataset('train')
        test_data = get_ssts_dataset('test')
        all_data = pd.concat([train_data, test_data])
        X_train = train_data[TEXT_COL]
        X_test = test_data[TEXT_COL]
        preprocessor = GlovePreprocessor(count_threshold=TOKEN_COUNT_THRESHOLD, token_cutoff=MAX_SENT_COUNT)
        preprocessor.build_vocab(all_data[TEXT_COL])

        df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1)
        df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1)
        model_config = RNN_MODEL_CONFIG
        explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_RNN, cuda=CUDA)
        explainer.build_model_config(model_config)
        explainer.set_preprocessor(preprocessor)
        explainer.load()
        explainer.fit(df_train, df_test)

        local_explanation = explainer.explain_local(SENTENCE)
        assert len(local_explanation.local_importance_values) == len(SENTENCE.split()) 
Example 11
Project: ciftify   Author: edickie   File: ciftify_postPINT2_sub2sub.py    License: MIT License 6 votes vote down vote up
def calc_allroiidx_distances(vertices_df, roi, surfL, surfR, pvertex_colname):
    '''
    loop over all subjects calculating distances for one roi
    '''
    ## determine the surface for measurment
    hemi = vertices_df.loc[vertices_df.roiidx==roi,'hemi'].values[0]
    if hemi == "L": surf = surfL
    if hemi == "R": surf = surfR

    ## subset the dataframe
    roidf = vertices_df.loc[vertices_df.roiidx==roi,:]

    ## run all the subjects and return into a tupley thing of results
    all_dfs = (calc_subdistances_distances(roidf, surf, subid, pvertex_colname) for subid in vertices_df.subid.unique())
    ## concatenate all the results
    roi_sub2sub = pd.concat(all_dfs, ignore_index=True)
    return(roi_sub2sub) 
Example 12
Project: performance_tracker   Author: metro-ontime   File: analyze_estimates.py    License: GNU General Public License v3.0 6 votes vote down vote up
def match_arrivals_with_schedule(estimated_trips, schedule_direction):
    schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True)
    estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True)
    schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index()
    matched_estimates = [
        match_times(
            stop_id,
            stop_estimates,
            schedule_direction[schedule_direction["stop_id"] == stop_id],
        )
        for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"])
    ]
    matched_estimates = [x for x in matched_estimates if x is not None]
    matched_estimates = pd.concat(matched_estimates)
    matched_estimates["since_scheduled"] = (
        matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"]
    )
    return matched_estimates 
Example 13
Project: NeuroKit   Author: neuropsychology   File: ecg_delineate.py    License: MIT License 6 votes vote down vote up
def _ecg_delineate_check(waves, rpeaks):
    """This function replaces the delineated features with np.nan if its standardized distance from R-peaks is more than
    3."""
    df = pd.DataFrame.from_dict(waves)
    features_columns = df.columns

    df = pd.concat([df, pd.DataFrame({"ECG_R_Peaks": rpeaks})], axis=1)

    # loop through all columns to calculate the z distance
    for column in features_columns:  # pylint: disable=W0612
        df = _calculate_abs_z(df, features_columns)

    # Replace with nan if distance > 3
    for col in features_columns:
        for i in range(len(df)):
            if df["Dist_R_" + col][i] > 3:
                df[col][i] = np.nan

    # Return df without distance columns
    df = df[features_columns]
    waves = df.to_dict("list")
    return waves 
Example 14
Project: striatum   Author: ntucllab   File: movielens_preprocess.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def movie_preprocessing(movie):
    movie_col = list(movie.columns)
    movie_tag = [doc.split('|') for doc in movie['tag']]
    tag_table = {token: idx for idx, token in enumerate(set(itertools.chain.from_iterable(movie_tag)))}
    movie_tag = pd.DataFrame(movie_tag)
    tag_table = pd.DataFrame(tag_table.items())
    tag_table.columns = ['Tag', 'Index']

    # use one-hot encoding for movie genres (here called tag)
    tag_dummy = np.zeros([len(movie), len(tag_table)])

    for i in range(len(movie)):
        for j in range(len(tag_table)):
            if tag_table['Tag'][j] in list(movie_tag.iloc[i, :]):
                tag_dummy[i, j] = 1

    # combine the tag_dummy one-hot encoding table to original movie files
    movie = pd.concat([movie, pd.DataFrame(tag_dummy)], 1)
    movie_col.extend(['tag' + str(i) for i in range(len(tag_table))])
    movie.columns = movie_col
    movie = movie.drop('tag', 1)
    return movie 
Example 15
Project: pydiogment   Author: SuperKogito   File: dataproc.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def balance_dataset(data):
    # define column names
    column_names = list(data.columns)

    # assert equal number o samples per class
    samples_pro_emotion = {e: len(data[data.emotion == e]) for e in data.emotion.unique()}
    balanced_data = pd.concat([data[data.emotion == e].sample(min(samples_pro_emotion.values()))
                               for e in data.emotion.unique()],
                               axis=0,
                               keys=list(data.columns))

    # split data
    X = balanced_data.iloc[:, :-1]
    y = balanced_data.iloc[:, -1:].astype('category')
    # print("%25s : %s" % ("Data with balanced sets", str(balanced_data.shape)))
    return balanced_data, X, y, column_names 
Example 16
Project: lifestyles   Author: CamDavidsonPilon   File: cbc_hb.py    License: MIT License 6 votes vote down vote up
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10):
    all_attributes = pd.get_dummies(profiles).columns
    profiles_dummies = pd.get_dummies(profiles, drop_first=True)
    choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1)

    respondants = selections.columns
    n_attributes_in_model = profiles_dummies.shape[1]
    n_participants = selections.shape[1]

    with pm.Model():

        # https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf
        # need to include the covariance matrix as a parent of `partsworth`
        alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model))
        partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model))

        cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())]

        trace = pm.sample(sample)
    return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes) 
Example 17
def vote_class(face_encoding, tolerance=0.3, topN=5):
    myprint('vote start ', time.time())
    """
    当比较的结果小于tolerance的时候,有多个值,采用取topN 进行投票 ,决定最终的分类,此处没有对 distance 距离进行加权
    :param face_encoding: face encoding
    :param tolerance: 距离的阈值,越小越相似
    :param topN: 参与投票的最大数量
    :return: detect name
    """
    # 计算出距离
    distance_ = face_recognition.face_distance(known_face_encodings, face_encoding)
    df = pd.DataFrame(distance_, columns=["dis"])  # 转换成 DataFrame
    topDF = df[df['dis'] <= tolerance].nsmallest(topN, columns=['dis'])  # 过滤结果集
    namedf = NAME_DF.loc[topDF.index]  # 从姓名列表中获取face距离对应的人脸名称
    con = pd.concat([topDF, namedf], axis=1)  # concat name and distance
    # print('con', con)
    group = con.groupby(["name"])['dis'].sum()
    gp = group.reset_index()
    print('vote -- ', gp)
    if len(gp) == 0:
        print("------unknown -----")
        return "Unknown", 10
    import numpy as np  # TODO  optimize
    arr = np.array(gp)
    name1 = arr[0, 0]
    dis1 = arr[0, 1]
    print("get top one:", name1, dis1)
    myprint('vote end', time.time())
    return name1, dis1 
Example 18
Project: face-attendance-machine   Author: matiji66   File: facerec_from_webcam_faster.py    License: Apache License 2.0 5 votes vote down vote up
def vote_class(face_encoding, tolerance=0.3, topN=5):
    myprint('vote start ', time.time())
    """
    当比较的结果小于tolerance的时候,有多个值,采用取topN 进行投票 ,决定最终的分类,此处没有对 distance 距离进行加权
    :param face_encoding: face encoding
    :param tolerance: 距离的阈值,越小越相似
    :param topN: 参与投票的最大数量
    :return: detect name
    """
    # 计算出距离
    distance_ = face_recognition.face_distance(known_face_encodings, face_encoding)
    df = pd.DataFrame(distance_, columns=["dis"])  # 转换成 DataFrame
    topDF = df[df['dis'] <= tolerance].nsmallest(topN, columns=['dis'])  # 过滤结果集
    namedf = NAME_DF.loc[topDF.index]  # 从姓名列表中获取face距离对应的人脸名称
    con = pd.concat([topDF, namedf], axis=1)  # concat name and distance
    # print('con', con)
    group = con.groupby(["name"])['dis'].sum()
    gp = group.reset_index()
    print('vote -- ', gp)
    if len(gp) == 0:
        print("------unknown -----")
        return "Unknown", 10
    import numpy as np  # TODO  optimize
    arr = np.array(gp)
    name1 = arr[0, 0]
    dis1 = arr[0, 1]
    print("get top one:", name1, dis1)
    myprint('vote end', time.time())
    return name1, dis1 
Example 19
Project: MPContribs   Author: materialsproject   File: ALS_import.py    License: MIT License 5 votes vote down vote up
def treat_xmcd(scan_groups, scan_params, process_dict):
    # Handels only a single scan now. Splitting into groups is done before (alpha)
    xmcd_frame = pd.DataFrame()
    xmcd_data = process_xmcd(scan_groups, scan_params, process_dict)
    xmcd_frame = pd.concat([xmcd_frame, xmcd_data])
    return xmcd_frame 
Example 20
Project: MPContribs   Author: materialsproject   File: mspScan.py    License: MIT License 5 votes vote down vote up
def read_scans(subdir, datacounter="Counter 1"):
    """Reads a list of scanfiles from a directory into one dataframe"""
    # No multifile support yet. Is important for averaging spectra.
    scandata_f_list = [
        read_scan(os.path.join(subdir, scanfile))
        for scanfile in os.listdir(subdir)
        if not os.path.isdir(os.path.join(subdir, scanfile))
    ]
    scandata_f = pd.concat(scandata_f_list).reset_index()
    scandata_f = prepare_scan(scandata_f, datacounter=datacounter)
    return scandata_f 
Example 21
Project: deep-learning-note   Author: wdxtub   File: 13_house_price.py    License: MIT License 5 votes vote down vote up
def train_and_pred(train_features, test_features, train_labels, test_data,
                   num_epochs, lr, weight_decay, batch_size):
    net = get_net(train_features.shape[1])
    train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size)
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse')
    print('train rmse %f' % train_ls[-1])
    preds = net(test_features).detach().numpy()
    test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
    submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1)
    submission.to_csv('./data/HousePrice/submission.csv', index=False) 
Example 22
Project: nba_scraper   Author: mcbarlowe   File: nba_scraper.py    License: GNU General Public License v3.0 5 votes vote down vote up
def scrape_date_range(
    date_from, date_to, data_format="pandas", data_dir=f"{Path.home()}/nbadata.csv"
):
    """
    Function scrapes all `regular-season` nba games between two dates

    Inputs:
    date_from   - Date to scrape from
    date_to     - Date to scrape to
    data_format - the format of the data the user wants returned. This is either
                  a pandas dataframe or a csv file
    data_dir    - a filepath which to write the csv file if that option is chosen.
                  If no filepath is passed then it will attempt to write to the
                  user's home directory

    Outputs:
    nba_df     - If pandas is chosen then this function will
                 return this pandas dataframe object. If csv then
                 a csv file will be written but None will be returned
    """
    check_format(data_format)
    check_valid_dates(date_from, date_to)

    game_ids = sf.get_date_games(date_from, date_to)
    scraped_games = []

    for game in game_ids:
        print(f"Scraping game id: {game}")
        scraped_games.append(sf.main_scrape(game))

    if data_format == "pandas":
        return pd.concat(scraped_games)
    else:
        pd.concat(scraped_games).to_csv(data_dir, index=False)
        return None 
Example 23
Project: nba_scraper   Author: mcbarlowe   File: nba_scraper.py    License: GNU General Public License v3.0 5 votes vote down vote up
def scrape_wnba_game(game_ids, data_format="pandas", data_dir=f"{Path.home()}/"):
    """
    function scrapes wnba games and returns them in the data format requested
    by the user.

    Inputs:
    game_ids    - list of nba game ids to scrape
    data_format - the format of the data the user wants returned. This is either
                  a pandas dataframe or a csv file
    data_dir    - a filepath which to write the csv file if that option is chosen.
                  If no filepath is passed then it will attempt to write to the
                  user's home directory

    Outputs:
    wnba_df     - If pandas is chosen then this function will
                 return this pandas dataframe object. If csv then
                 a csv file will be written but None will be returned
    """

    check_format(data_format)

    scraped_games = []
    for game in game_ids:
        print(f"Scraping game id: 0{game}")
        scraped_games.append(wsf.wnba_main_scrape(f"0{game}"))
    if len(scraped_games) == 0:
        return
    wnba_df = pd.concat(scraped_games)

    if data_format == "pandas":
        return wnba_df
    else:
        wnba_df.to_csv(f"{data_dir}/{game_ids[0]}.csv", index=False)
        return None 
Example 24
Project: nba_scraper   Author: mcbarlowe   File: nba_scraper.py    License: GNU General Public License v3.0 5 votes vote down vote up
def scrape_game(game_ids, data_format="pandas", data_dir=f"{Path.home()}/"):
    """
    function scrapes nba games and returns them in the data format requested
    by the user.

    Inputs:
    game_ids    - list of nba game ids to scrape
    data_format - the format of the data the user wants returned. This is either
                  a pandas dataframe or a csv file
    data_dir    - a filepath which to write the csv file if that option is chosen.
                  If no filepath is passed then it will attempt to write to the
                  user's home directory

    Outputs:
    nba_df     - If pandas is chosen then this function will
                 return this pandas dataframe object. If csv then
                 a csv file will be written but None will be returned
    """
    check_format(data_format)

    scraped_games = []
    for game in game_ids:
        if game == 21201214:
            print(f"Game {game} is not available")
            continue
        else:
            print(f"Scraping game id: 00{game}")
            scraped_games.append(sf.main_scrape(f"00{game}"))
    if len(scraped_games) == 0:
        return
    nba_df = pd.concat(scraped_games)

    if data_format == "pandas":
        return nba_df
    else:
        nba_df.to_csv(f"{data_dir}/{game_ids[0]}.csv", index=False)
        return None 
Example 25
Project: nba_scraper   Author: mcbarlowe   File: nba_scraper.py    License: GNU General Public License v3.0 5 votes vote down vote up
def scrape_season(season, data_format="pandas", data_dir=f"{Path.home()}/nbadata.csv"):
    """
    This function scrapes and entire season and either returns it as a pandas
    dataframe or writes it to file as a csv file

    Inputs:
    season      - season to be scraped must be an integer
    data_format - the format of the data the user wants returned. This is either
                  a pandas dataframe or a csv file
    data_dir    - a filepath which to write the csv file if that option is chosen.
                  If no filepath is passed then it will attempt to write to the
                  user's home directory

    Outputs:
    nba_df     - If pandas is chosen then this function will
                 return this pandas dataframe object. If csv then
                 a csv file will be written but None will be returned
    """
    check_format(data_format)

    scraped_games = []
    game_ids = list(range(int(f"2{season-2001}00001"), int(f"2{season-2001}01231")))

    for game in game_ids:
        if game == 21201214:
            print(f"Game {game} is not available")
            continue
        else:
            print(f"Scraping game id: 00{game}")
            scraped_games.append(sf.main_scrape(f"00{game}"))

    if len(scraped_games) == 0:
        return

    nba_df = pd.concat(scraped_games)

    if data_format == "pandas":
        return nba_df
    else:
        nba_df.to_csv(f"{data_dir}/nba{season}.csv", index=False)
        return None 
Example 26
def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
                       learning_rate, weight_decay, batch_size):
    """Conducts k-fold cross validation for the model."""
    assert k > 1
    fold_size = X_train.shape[0] // k

    train_loss_sum = 0.0
    test_loss_sum = 0.0
    for test_idx in range(k):
        X_val_test = X_train[test_idx * fold_size: (test_idx + 1) *
                                                   fold_size, :]
        y_val_test = y_train[test_idx * fold_size: (test_idx + 1) * fold_size]
        val_train_defined = False
        for i in range(k):
            if i != test_idx:
                X_cur_fold = X_train[i * fold_size: (i + 1) * fold_size, :]
                y_cur_fold = y_train[i * fold_size: (i + 1) * fold_size]
                if not val_train_defined:
                    X_val_train = X_cur_fold
                    y_val_train = y_cur_fold
                    val_train_defined = True
                else:
                    X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0)
                    y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0)
        net = get_net()
        train_loss = train(net, X_val_train, y_val_train, epochs, verbose_epoch,
                           learning_rate, weight_decay, batch_size)
        train_loss_sum += train_loss
        test_loss = get_rmse_log(net, X_val_test, y_val_test)
        print("Test loss: %f" % test_loss)
        test_loss_sum += test_loss
    return train_loss_sum / k, test_loss_sum / k

# The sets of parameters. Better results are obtained with modifications.
# These parameters can be fine-tuned with k-fold cross-validation. 
Example 27
def learn(epochs, verbose_epoch, X_train, y_train, test, learning_rate,
          weight_decay, batch_size):
    """Trains the model and predicts on the test data set."""
    net = get_net()
    _ = train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
                 weight_decay, batch_size)
    preds = net(X_test).asnumpy()
    test['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
    submission = pd.concat([test['Id'], test['SalePrice']], axis=1)
    submission.to_csv('submission.csv', index=False) 
Example 28
Project: OpenFermion-Cirq   Author: quantumlib   File: result.py    License: Apache License 2.0 5 votes vote down vote up
def extend(self,
               results: Iterable[OptimizationResult]) -> None:
        new_data_frame = pandas.DataFrame(
                {'optimal_value': result.optimal_value,
                 'optimal_parameters': result.optimal_parameters,
                 'num_evaluations': result.num_evaluations,
                 'cost_spent': result.cost_spent,
                 'time': result.time,
                 'seed': result.seed,
                 'status': result.status,
                 'message': result.message}
                for result in results)
        self.data_frame = pandas.concat([self.data_frame, new_data_frame])
        self.results.extend(results) 
Example 29
Project: bioservices   Author: cokelaer   File: fasta.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _get_df(self):
        import pandas as pd
        df =  pd.concat([self.fasta[id_].df for id_ in self.fasta.keys()])
        df.reset_index(inplace=True)
        return df 
Example 30
Project: cs294-112_hws   Author: xuwd11   File: plot.py    License: MIT License 5 votes vote down vote up
def plot_data(data, value="AverageReturn"):
    if isinstance(data, list):
        data = pd.concat(data, ignore_index=True)

    sns.set(style="darkgrid", font_scale=1.5)
    sns.tsplot(data=data, time="Iteration", value=value, unit="Unit", condition="Condition")
    plt.legend(loc='best').draggable()
    plt.show()