Python pandas.DataFrames() Examples

The following are 22 code examples of pandas.DataFrames(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: checks.py    From bulwark with GNU Lesser General Public License v3.0 6 votes vote down vote up
def is_same_as(df, df_to_compare, **kwargs):
    """Asserts that two pd.DataFrames are equal.

    Args:
        df (pd.DataFrame): Any pd.DataFrame.
        df_to_compare (pd.DataFrame): A second pd.DataFrame.
        **kwargs (dict): Keyword arguments passed through to pandas' ``assert_frame_equal``.

    Returns:
        Original `df`.

    """
    try:
        tm.assert_frame_equal(df, df_to_compare, **kwargs)
    except AssertionError as exc:
        raise AssertionError("DataFrames are not equal") from exc
    return df 
Example #2
Source File: correlations.py    From pysystemtrade with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, corr_list, column_names, fit_dates):
        """
        Returns a time series of forecasts for a particular instrument

        :param instrument_code:
        :type str:

        :param rule_variation_list:
        :type list: list of str to get forecasts for, if None uses get_trading_rule_list

        :returns: TxN pd.DataFrames; columns rule_variation_name

        """

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates) 
Example #3
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 6 votes vote down vote up
def get_capped_forecast(self, instrument_code, rule_variation_name):
        """
        Get the capped forecast from the previous module


        KEY INPUT

        :param instrument_code:
        :type str:

        :param rule_variation_name:
        :type str: name of the trading rule variation

        :returns: Tx1 pd.DataFrames

        """
        return self.parent.forecastScaleCap.get_capped_forecast(
            instrument_code, rule_variation_name) 
Example #4
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 6 votes vote down vote up
def get_forecast_weights(self, instrument_code):
        """
        Get the capped forecast from the previous module

        KEY INPUT

        :param instrument_code:
        :type str:

        :param rule_variation_name:
        :type str: name of the trading rule variation

        :returns: dict of Tx1 pd.DataFrames

        """
        return self.parent.combForecast.get_forecast_weights(instrument_code) 
Example #5
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 6 votes vote down vote up
def get_daily_returns_volatility(self, instrument_code):
        """
        Get the daily return (not %) volatility from previous stage, or calculate

        KEY INPUT

        :param instrument_code:
        :type str:

        :returns: Tx1 pd.DataFrames

        """

        system = self.parent
        if hasattr(system, "rawdata"):
            returns_vol = system.rawdata.daily_returns_volatility(
                instrument_code)
        else:
            price = self.get_daily_price(instrument_code)
            returns_vol = robust_vol_calc(price.diff())

        return returns_vol 
Example #6
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 6 votes vote down vote up
def get_aligned_forecast(self, instrument_code, rule_variation_name):
        """
        Get the capped forecast aligned to daily prices


        KEY INPUT

        :param instrument_code:
        :type str:

        :param rule_variation_name:
        :type str: name of the trading rule variation

        :returns: Tx1 pd.DataFrames

        """
        price = self.get_daily_price(instrument_code)
        forecast = self.get_capped_forecast(instrument_code,
                                            rule_variation_name)

        forecast = forecast.reindex(price.index).ffill()

        return forecast 
Example #7
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 5 votes vote down vote up
def get_forecast_diversification_multiplier(self, instrument_code):
        """
        Get the f.d.m from the previous module

        KEY INPUT

        :param instrument_code:
        :type str:

        :returns: dict of Tx1 pd.DataFrames

        """
        return self.parent.combForecast.get_forecast_diversification_multiplier(
            instrument_code) 
Example #8
Source File: conftest.py    From kartothek with MIT License 5 votes vote down vote up
def meta_partitions_evaluation_dataframe(metadata_version):
    """
    Create a list of MetaPartitions for testing. The partitions
    include in-memory pd.DataFrames without external references, i.e. files
     are empty

    """
    df = pd.DataFrame(
        OrderedDict([("P", [1]), ("L", [1]), ("HORIZON", [1]), ("PRED", [10])])
    )
    mp = MetaPartition(
        label="cluster_1_1", data={"PRED": df}, metadata_version=metadata_version
    )
    df_2 = pd.DataFrame(
        OrderedDict([("P", [1]), ("L", [1]), ("HORIZON", [2]), ("PRED", [20])])
    )
    mp2 = MetaPartition(
        label="cluster_1_2", data={"PRED": df_2}, metadata_version=metadata_version
    )
    df_3 = pd.DataFrame(
        OrderedDict([("P", [2]), ("L", [2]), ("HORIZON", [1]), ("PRED", [10])])
    )
    mp3 = MetaPartition(
        label="cluster_2_1", data={"PRED": df_3}, metadata_version=metadata_version
    )
    df_4 = pd.DataFrame(
        OrderedDict([("P", [2]), ("L", [2]), ("HORIZON", [2]), ("PRED", [20])])
    )
    mp4 = MetaPartition(
        label="cluster_2_2", data={"PRED": df_4}, metadata_version=metadata_version
    )
    return [mp, mp2, mp3, mp4] 
Example #9
Source File: conftest.py    From kartothek with MIT License 5 votes vote down vote up
def meta_partitions_dataframe_function(metadata_version):
    """
    Create a list of MetaPartitions for testing. The partitions
    include in-memory pd.DataFrames without external references, i.e. files
     are empty

    """
    return _get_meta_partitions_with_dataframe(metadata_version) 
Example #10
Source File: conftest.py    From kartothek with MIT License 5 votes vote down vote up
def meta_partitions_dataframe(metadata_version):
    """
    Create a list of MetaPartitions for testing. The partitions
    include in-memory pd.DataFrames without external references, i.e. files
     are empty

    """
    with cm_frozen_time(TIME_TO_FREEZE):
        return _get_meta_partitions_with_dataframe(metadata_version) 
Example #11
Source File: theta.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compute_pred_int(self, y_pred, alpha=DEFAULT_ALPHA):
        """
        Get the prediction intervals for the forecast. If alpha is iterable,
        multiple
        intervals will be calculated.
        """
        errors = self._compute_pred_errors(alpha=alpha)

        # for multiple alphas, errors come in a list;
        # for single alpha, they come as a single pd.Series,
        # wrap it here into a list to make it iterable,
        # to avoid code duplication
        if isinstance(errors, pd.Series):
            errors = [errors]

        # compute prediction intervals
        pred_int = [
            pd.DataFrame({
                "lower": y_pred - error,
                "upper": y_pred + error
            })
            for error in errors
        ]

        # for a single alpha, return single pd.DataFrame
        if len(pred_int) == 1:
            return pred_int[0]

        # otherwise return list of pd.DataFrames
        return pred_int 
Example #12
Source File: forecast_combine.py    From pysystemtrade with GNU General Public License v3.0 5 votes vote down vote up
def get_all_forecasts(self, instrument_code, rule_variation_list=None):
        """
        Returns a data frame of forecasts for a particular instrument

        KEY INPUT

        :param instrument_code:
        :type str:

        :param rule_variation_list:
        :type list: list of str to get forecasts for, if None uses get_trading_rule_list

        :returns: TxN pd.DataFrames; columns rule_variation_name

        >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping
        >>> from systems.basesystem import System
        >>> (fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping()
        >>> system1=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config)
        >>> system1.combForecast.get_all_forecasts("EDOLLAR",["ewmac8"]).tail(2)
                      ewmac8
        2015-12-10 -0.190583
        2015-12-11  0.871231
        >>>
        >>> system2=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config)
        >>> system2.combForecast.get_all_forecasts("EDOLLAR").tail(2)
                     ewmac16    ewmac8
        2015-12-10  3.134462 -0.190583
        2015-12-11  3.606243  0.871231
        """

        if rule_variation_list is None:
            rule_variation_list = self.get_trading_rule_list(
                instrument_code)

        forecasts = self.get_forecasts_given_rule_list(instrument_code, rule_variation_list)

        return forecasts 
Example #13
Source File: forecast_combine.py    From pysystemtrade with GNU General Public License v3.0 5 votes vote down vote up
def get_capped_forecast(self, instrument_code, rule_variation_name):
        """
        Get the capped forecast from the previous module

        KEY INPUT

        :param instrument_code:
        :type str:

        :param rule_variation_name:
        :type str: name of the trading rule variation

        :returns: dict of Tx1 pd.DataFrames; keynames rule_variation_name

        >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping
        >>> from systems.basesystem import System
        >>> (fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping()
        >>> system=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config)
        >>> system.combForecast.get_capped_forecast("EDOLLAR","ewmac8").tail(2)
                      ewmac8
        2015-12-10 -0.190583
        2015-12-11  0.871231
        """

        return self.parent.forecastScaleCap.get_capped_forecast(
            instrument_code, rule_variation_name) 
Example #14
Source File: accounts_inputs.py    From pysystemtrade with GNU General Public License v3.0 5 votes vote down vote up
def get_daily_price(self, instrument_code):
        """
        Get the instrument price from rawdata

        Cached as data isn't cached

        :param instrument_code:
        :type str:

        :returns: Tx1 pd.DataFrames

        """
        return self.parent.data.daily_prices(instrument_code) 
Example #15
Source File: struct.py    From quantipy with MIT License 5 votes vote down vote up
def set_qp_multiindex(df, x, y):
    '''
    Takes a pd.DataFrames and applies Quantipy's Question/Values
    layout to it by creating a multiindex on both axes.

    Parameters
    ----------
    df : pd.DataFrame

    x, y : str
        Variable names from the processed case data input,
        i.e. the link definition.

    Returns
    -------
    df : pd.Dataframe (Quantipy convention, multiindexed)
    '''
    axis_labels = ['Question', 'Values']
    df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels)
    if y is None:
        df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels)
    elif y == '@':
        df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels)
    else:
        df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels)

    return df 
Example #16
Source File: functions.py    From quantipy with MIT License 5 votes vote down vote up
def set_qp_multiindex(df, x, y):
    '''
    Takes a pd.DataFrames and applies Quantipy's Question/Values
    layout to it by creating a multiindex on both axes.

    Parameters
    ----------
    df : pd.DataFrame

    x, y : str
        Variable names from the processed case data input,
        i.e. the link definition.

    Returns
    -------
    df : pd.Dataframe (Quantipy convention, multiindexed)
    '''
    axis_labels = ['Question', 'Values']
    df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels)
    if y is None:
        df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels)
    elif y == '@':
        df.columns = pd.MultiIndex.from_product([[x], '@'], names=axis_labels)
    else:
        df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels)

    return df 
Example #17
Source File: functions.py    From quantipy with MIT License 5 votes vote down vote up
def apply_viewdf_layout(df, x, y):
    '''
    Takes a pd.DataFrames and applies Quantipy's Question/Values
    layout to it by creating a multiindex on both axes.

    Parameters
    ----------
    df : pd.DataFrame

    x, y : str
        Variable names from the processed case data input,
        i.e. the link definition.

    Returns
    -------
    df : pd.Dataframe (multiindexed)
    '''
    axis_labels = ['Question', 'Values']
    df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels)
    if y is None:
        df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels)
    elif y == '@':
        df.columns = pd.MultiIndex.from_product([[x], '@'], names=axis_labels)
    else:
        df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels)

    return df

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
Example #18
Source File: load_data.py    From CrypTen with MIT License 5 votes vote down vote up
def read_data(data_dir, dates):
    """Builds dataframe for model and func benchmarks Assumes directory is structured as
     DATA_PATH
        |_2020-02-20
            |_func_benchmarks.csv
            |_model_benchmarks.csv

    Args:
        data_dir (pathlib.path): path containing month subdirectories
        dates (list of str): containing dates / subdirectories available

    Returns: tuple of pd.DataFrames containing func and model benchmarks with dates
    """
    func_df, model_df = None, None

    for date in dates:
        path = os.path.join(data_dir, date)
        tmp_func_df = pd.read_csv(os.path.join(path, "func_benchmarks.csv"))
        tmp_model_df = pd.read_csv(os.path.join(path, "model_benchmarks.csv"))
        tmp_func_df["date"], tmp_model_df["date"] = date, date
        if func_df is None:
            func_df = tmp_func_df.copy()
            model_df = tmp_model_df.copy()
        else:
            func_df = func_df.append(tmp_func_df)
            model_df = model_df.append(tmp_model_df)

    func_df = compute_runtime_gap(func_df)
    func_df = add_error_bars(func_df)
    return func_df, model_df 
Example #19
Source File: transform_problem.py    From estimagic with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_params(params):
    """Check params has a unique index and contains no columns to be created internally.

    Args:
        params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`.

    Raises:
        AssertionError: The index contains duplicates.
        ValueError: The DataFrame contains internal columns.

    """
    assert (
        not params.index.duplicated().any()
    ), "No duplicates allowed in the index of params."

    invalid_names = [
        "_fixed",
        "_fixed_value",
        "_is_fixed_to_value",
        "_is_fixed_to_other",
    ]
    invalid_present_columns = []
    for col in params.columns:
        if col in invalid_names or col.startswith("_internal"):
            invalid_present_columns.append(col)

    if len(invalid_present_columns) > 0:
        msg = (
            "Column names starting with '_internal' and as well as any other of the "
            f"following columns are not allowed in params:\n{invalid_names}."
            f"This is violated for:\n{invalid_present_columns}."
        )
        raise ValueError(msg) 
Example #20
Source File: data_processing.py    From AIAlpha with MIT License 4 votes vote down vote up
def make_train_test(self, df_x, df_y, window, csv_path, has_y=False, binary_y=False, save_csv=False):
        """
        Splits the dataset into train and test
        :param df_x: dataframe of x variables
        :type df_x: pd.DataFrame
        :param df_y: dataframe of y values
        :type df_y: pd.DataFrame
        :param window: the prediction window
        :type window: int
        :param has_y: whether df_y exists separately or is a column in df_x (must be 'target' column)
        :type has_y: boolean
        :return: train_x, train_y, test_x, test_y
        :rtype: pd.DataFrames
        """
        if has_y:
            y_values = df_y.copy()
            y_values.columns = ['y_values']
            fulldata = df_x.copy()
        else:
            if window == 0:
                y_values = df_x['close'].copy()
                y_values.columns = ['y_values']
                fulldata = df_x.copy()
            else:
                y_values = np.log(df_x['close'].copy()/df_x['close'].copy().shift(-window)).dropna()
                y_values.columns = ['y_values']
                fulldata = df_x.iloc[:-window, :].copy()           
        if binary_y:
            y_values.loc[y_values['y_values']<0] = -1
            y_values.loc[y_values['y_values']>0] = 1
            y_values.loc[y_values['y_values']==0] = 0
        print(y_values.shape)
        print(fulldata.shape)
        train_y = y_values.iloc[:int(len(y_values)*self.split)]
        test_y = y_values.iloc[int(len(y_values)*self.split)+1:]

        train_x = fulldata.iloc[:int(len(y_values)*self.split), :]
        test_x = fulldata.iloc[int(len(y_values)*self.split)+1:len(y_values), :]

        print(train_y.shape)
        print(train_x.shape)

        if save_csv:
            train_x.to_csv(f'data/processed_data/{csv_path}/train_x.csv')
            train_y.to_csv(f'data/processed_data/{csv_path}/train_y.csv', header=['y_values'])
            test_x.to_csv(f'data/processed_data/{csv_path}/test_x.csv')
            test_y.to_csv(f'data/processed_data/{csv_path}/test_y.csv', header=['y_values'])
            fulldata.to_csv(f'data/processed_data/{csv_path}/full_x.csv')
            y_values.to_csv(f'data/processed_data/{csv_path}/full_y.csv', header=['y_values'])
        return fulldata, y_values, train_x, train_y, test_x, test_y 
Example #21
Source File: pdutils.py    From pysystemtrade with GNU General Public License v3.0 4 votes vote down vote up
def find_dates_when_label_changes(original_data, new_data, col_names=dict(data='PRICE',
                                                                                        label='PRICE_CONTRACT')):
    """
    For two pd.DataFrames with 2 columns, including a label column, find the date after which the labelling
     is consistent across columns

    >>> s1=pd.DataFrame(dict(PRICE=[1,2,3,np.nan], PRICE_CONTRACT = ["a", "a", "b", "b"]), index=['a1','a2','a3','a4'])
    >>> s2=pd.DataFrame(dict(PRICE=[  2,3,4], PRICE_CONTRACT = [          "b", "b", "b"]), index=['a2','a3','a4'])
    >>> find_dates_when_label_changes(s1, s2)
    ('a3', 'a2')
    >>> s2=pd.DataFrame(dict(PRICE=[  2,3,4], PRICE_CONTRACT = [          "a", "b", "b"]), index=['a2','a3','a4'])
    >>> find_dates_when_label_changes(s1, s2)
    ('a2', 'a1')
    >>> s2=pd.DataFrame(dict(PRICE=[  2,3,4], PRICE_CONTRACT = [          "c", "c", "c"]), index=['a2','a3','a4'])
    >>> find_dates_when_label_changes(s1, s2)
    mismatch_on_last_day
    >>> find_dates_when_label_changes(s1, s1)
    original index matches new
    >>> s2=pd.DataFrame(dict(PRICE=[1, 2,3,4], PRICE_CONTRACT = ["a","c", "c", "c"]), index=['a1','a2','a3','a4'])
    >>> find_dates_when_label_changes(s1, s2)
    mismatch_on_last_day

    :param original_data: some data
    :param new_data: some new data
    :param col_names: dict of str
    :return: tuple or object if match didn't work out
    """
    label_column = col_names['label']

    joint_labels = pd.concat([original_data[label_column],
                                        new_data[label_column]], axis=1)
    joint_labels.columns = ['current', 'new']
    joint_labels = joint_labels.sort_index()

    new_data_start = new_data.index[0]

    existing_labels_in_new_period = joint_labels['current'][new_data_start:].ffill()
    new_labels_in_new_period = joint_labels['new'][new_data_start:].ffill()

    # Find the last date when the labels didn't match, and the first date after that
    match_data=\
        find_dates_when_series_starts_matching(existing_labels_in_new_period, new_labels_in_new_period)

    if match_data is mismatch_on_last_day:
        ## Can't use any of new data
        return mismatch_on_last_day

    elif match_data is all_labels_match:
        ## Can use entire series becuase all match
        if new_data.index[0] == original_data.index[0]:
            # They are same size, so have to use whole of original data
            return original_index_matches_new
        else:
            ## All the new data matches
            first_date_after_series_mismatch = new_data_start
            last_date_when_series_mismatch = original_data.index[original_data.index < new_data_start][-1]
    else:
        first_date_after_series_mismatch, last_date_when_series_mismatch = match_data

    return first_date_after_series_mismatch, last_date_when_series_mismatch 
Example #22
Source File: transform_problem.py    From estimagic with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _pre_process_arguments(
    params, algorithm, algo_options, logging, dashboard, dash_options
):
    """Process user supplied arguments without affecting the optimization problem.

    Args:
        params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`.
        algorithm (str or list of strings): Identifier of the optimization algorithm.
            See :ref:`list_of_algorithms` for supported values.
        algo_options (dict or list of dicts):
            algorithm specific configurations for the optimization
        dashboard (bool): Whether to create and show a dashboard, default is False.
            See :ref:`dashboard` for details.
        dash_options (dict or list of dict, optional): Options passed to the dashboard.
            Supported keys are:
                - port (int): port where to display the dashboard
                - no_browser (bool): whether to display the dashboard in a browser
                - rollover (int): how many iterations to keep in the monitoring plots

    Returns:
        optim_kwargs (dict): dictionary collecting the arguments that are going to be
            passed to _internal_minimize
        params (pd.DataFrame): The expanded params DataFrame with all needed columns.
            See :ref:`params`.
        database_path (str or pathlib.Path or None): path to the database.

    """
    standard_dash_options = {"no_browser": False, "port": None, "rollover": 500}
    # important for dash_options to be last for standards to be overwritten
    dash_options = {**standard_dash_options, **dash_options}

    origin, algo_name = _process_algorithm(algorithm)
    optim_kwargs = {
        "origin": origin,
        "algo_name": algo_name,
        "algo_options": algo_options,
    }

    params = _set_params_defaults_if_missing(params)
    _check_params(params)

    database_path = logging if dashboard else None

    return optim_kwargs, params, dash_options, database_path