Python pandas.MultiIndex() Examples

The following are code examples for showing how to use pandas.MultiIndex(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: fireant   Author: kayak   File: notebooks.py    (license) View Source Project 8 votes vote down vote up
def _set_display_options(self, dataframe, display_schema):
        """
        Replaces the dimension options with those that the user has specified manually e.g. change 'm' to 'mobile'
        """
        dataframe = dataframe.copy()

        for key, dimension in display_schema['dimensions'].items():
            if 'display_options' in dimension:
                display_values = [dimension['display_options'].get(value, value)
                                  for value in dataframe.index.get_level_values(key).unique()]

                if not display_values:
                    continue

                if isinstance(dataframe.index, pd.MultiIndex):
                    dataframe.index.set_levels(display_values, key, inplace=True)

                else:
                    dataframe.index = pd.Index(display_values)

        return dataframe 
Example 2
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 6 votes vote down vote up
def test_three_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2, 3]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6],
                                  [1, 2, 3, 4, 5, 6]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S1', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S2', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S2', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S3', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S3', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs) 
Example 3
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 6 votes vote down vote up
def test_two_iterations_with_metadata_were_values_are_identical(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns,
                            index=['milo'])

        counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns,
                              index=['milo'])

        obs = _compute_summary(data, 'pet', counts=counts)

        d = [
            ['milo', 1,   3., 3.06, 3.27, 3.75, 4.5,  5.25, 5.73, 5.94, 6., 3],
            ['milo', 200, 9.,   9.,   9.,   9.,  9.,    9.,   9.,   9., 9., 3],
        ]
        exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
                                            '25%', '50%', '75%', '91%', '98%',
                                            'max', 'count'])
        pdt.assert_frame_equal(exp, obs) 
Example 4
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 6 votes vote down vote up
def test_some_duplicates_in_category(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1]) 
Example 5
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 6 votes vote down vote up
def test_all_identical(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'russ'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[3, 3, 3, 3]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1]) 
Example 6
Project: feagen   Author: ianlini   File: data_handlers.py    (license) View Source Project 6 votes vote down vote up
def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            is_null = False
            if isinstance(result, pd.DataFrame):
                if result.isnull().any().any():
                    is_null = True
            elif isinstance(result, pd.Series):
                if result.isnull().any():
                    is_null = True
            else:
                raise ValueError("PandasHDFDataHandler doesn't support type "
                                 "{} (in key {})".format(type(result), key))
            if is_null:
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if (isinstance(result, pd.DataFrame)
                        and isinstance(result.index, pd.MultiIndex)
                        and isinstance(result.columns, pd.MultiIndex)):
                    self.hdf_store.put(key, result)
                else:
                    self.hdf_store.put(key, result, format='table')
        self.hdf_store.flush(fsync=True) 
Example 7
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 6 votes vote down vote up
def merge_interictal_preictal(interictal, preictal):
    """
    Merges the *interictal* and *preictal* data frames to a single data frame. Also sorts the multilevel index.
    :param interictal: A data frame containing the interictal samples.
    :param preictal: A data frame containing the preictal samples.
    :return: A data frame containing both interictal and preictal data. The multilevel index of the data frame
             is sorted.
    """

    logging.info("Merging interictal and preictal datasets")
    try:
        preictal.sortlevel('segment', inplace=True)
        if isinstance(preictal.columns, pd.MultiIndex):
            preictal.sortlevel(axis=1, inplace=True)

        interictal.sortlevel('segment', inplace=True)
        if isinstance(interictal.columns, pd.MultiIndex):
            interictal.sortlevel(axis=1, inplace=True)
    except TypeError:
        logging.warning("TypeError when trying to merge interictal and preictal sets.")

    dataset = pd.concat((interictal, preictal))
    dataset.sortlevel('segment', inplace=True)
    return dataset 
Example 8
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 6 votes vote down vote up
def test_k_fold_segment_split():
    """ Test function for the k-fold segment split """
    interictal_classes = np.zeros(120)
    preictal_classes = np.ones(120)
    classes = np.concatenate((interictal_classes, preictal_classes,))
    segments = np.arange(12)
    i = np.arange(240)

    index = pd.MultiIndex.from_product([segments, np.arange(20)], names=('segment', 'start_sample'))

    dataframe = pd.DataFrame({'Preictal': classes, 'i': i}, index=index)

    # With a 6-fold cross validator, we expect each held-out fold to contain exactly 2 segments, one from each class
    cv1 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)
    cv2 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)

    for (training_fold1, test_fold1), (training_fold2, test_fold2) in zip(cv1, cv2):
        assert np.all(training_fold1 == training_fold1) and np.all(test_fold1 == test_fold2) 
Example 9
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 6 votes vote down vote up
def load_preictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading preictal dataframes. Sets the 'Preictal' column to 1.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of preictal data with a 'Preictal' column set to 1.
    """
    preictal = load_feature_files(feature_folder,
                                  class_name="preictal",
                                  sliding_frames=sliding_frames,
                                  **kwargs)
    preictal['Preictal'] = 1
    preictal.sortlevel('segment', inplace=True)
    if isinstance(preictal.columns, pd.MultiIndex):
        preictal.sortlevel(axis=1, inplace=True)
    return preictal 
Example 10
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 6 votes vote down vote up
def load_interictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading interictal dataframes. Sets the 'Preictal' column to 0.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of interictal data with a 'Preictal' column set to 0.
    """

    interictal = load_feature_files(feature_folder,
                                    class_name="preictal",
                                    sliding_frames=sliding_frames,
                                    **kwargs)
    interictal['Preictal'] = 0
    interictal.sortlevel('segment', inplace=True)
    if isinstance(interictal.columns, pd.MultiIndex):
        interictal.sortlevel(axis=1, inplace=True)
    return interictal 
Example 11
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 6 votes vote down vote up
def create_sliding_frames(dataframe, frame_length=12):
    """
    Wrapper for the extend_data_with_sliding_frames function which works with numpy arrays.
    This version does the data-frame conversion for us.

    :param dataframe: The dataframe to extend.
    :param frame_length: The frame length to use in the resulting extended data frame.
    :return: A new data frame where the original dataframe has been extended with sliding frames.
    """
    extended_array = extend_data_with_sliding_frames(dataframe.values)
    # We should preserve the columns of the dataframe, otherwise
    # concatenating different dataframes along the row-axis will give
    # wrong results
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    return pd.DataFrame(data=extended_array,
                        columns=column_index) 
Example 12
Project: meterstick   Author: google   File: metrics_test.py    (license) View Source Project 6 votes vote down vote up
def testTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10)) 
Example 13
Project: meterstick   Author: google   File: metrics_test.py    (license) View Source Project 6 votes vote down vote up
def testShuffledTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df.iloc[np.random.permutation(7)], weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10)) 
Example 14
Project: meterstick   Author: google   File: core_test.py    (license) View Source Project 6 votes vote down vote up
def testRelativeToSplitJackknife(self):
    data = pd.DataFrame(
        {"X": [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8],
         "Y": [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3],
         "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]})

    metric = metrics.Sum("X")
    comparison = comparisons.AbsoluteDifference("Z", 0)
    se_method = standard_errors.Jackknife()
    output = core.Analyze(data).split_by("Y").relative_to(
        comparison).with_standard_errors(se_method).calculate(metric).run()

    rowindex = pd.MultiIndex(
        levels=[[1, 2, 3], [1]],
        labels=[[0, 1, 2], [0, 0, 0]],
        names=["Y", "Z"])
    correct = pd.DataFrame(
        np.array([[-3.0, np.sqrt(5 * np.var([0, -1, -2, -3, -4, -5]))],
                  [-3.0, np.sqrt(5 * np.var([3, 2, 1, -8, -7, -6]))],
                  [-3.0, np.sqrt(5 * np.var([6, 5, 4, -11, -10, -9]))]]),
        columns=("sum(X) Absolute Difference",
                 "sum(X) Absolute Difference Jackknife SE"),
        index=rowindex)

    self.assertTrue(output.equals(correct)) 
Example 15
Project: meterstick   Author: google   File: core_test.py    (license) View Source Project 6 votes vote down vote up
def testDataframeRelativeTo(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).relative_to(comparisons.AbsoluteDifference(
        "Y", 0)).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([-0.2, 0.2]),
        columns=["X Distribution Absolute Difference"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10)) 
Example 16
Project: meterstick   Author: google   File: core_test.py    (license) View Source Project 6 votes vote down vote up
def testSplitDataframe(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).split_by(["Y"]).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([0.2, 0.8, 0.0, 1.0]),
        columns=["X Distribution"],
        index=pd.MultiIndex(levels=[[0.0, 1.0], [0.0, 1.0]],
                            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10)) 
Example 17
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: common.py    (license) View Source Project 6 votes vote down vote up
def _isnull_old(obj):
    """Detect missing values. Treat None, NaN, INF, -INF as null.

    Parameters
    ----------
    arr: ndarray or object value

    Returns
    -------
    boolean ndarray or boolean
    """
    if lib.isscalar(obj):
        return lib.checknull_old(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike_old(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=_isnull_old))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike_old(np.asarray(obj))
    else:
        return obj is None 
Example 18
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_base.py    (license) View Source Project 6 votes vote down vote up
def test_equals_op_multiindex(self):
        # GH9785
        # test comparisons of multiindex
        from pandas.compat import StringIO
        df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
        tm.assert_numpy_array_equal(df.index == df.index,
                                    np.array([True, True]))

        mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
        tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True]))
        mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
        tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False]))
        mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == mi3

        index_a = Index(['foo', 'bar', 'baz'])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == index_a
        tm.assert_numpy_array_equal(index_a == mi3,
                                    np.array([False, False, False])) 
Example 19
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_reshape.py    (license) View Source Project 6 votes vote down vote up
def test_stack_ints(self):
        df = DataFrame(
            np.random.randn(30, 27),
            columns=MultiIndex.from_tuples(
                list(itertools.product(range(3), repeat=3))
            )
        )
        assert_frame_equal(
            df.stack(level=[1, 2]),
            df.stack(level=1).stack(level=1)
        )
        assert_frame_equal(
            df.stack(level=[-2, -1]),
            df.stack(level=1).stack(level=1)
        )

        df_named = df.copy()
        df_named.columns.set_names(range(3), inplace=True)
        assert_frame_equal(
            df_named.stack(level=[1, 2]),
            df_named.stack(level=1).stack(level=1)
        ) 
Example 20
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_reshape.py    (license) View Source Project 6 votes vote down vote up
def test_unstack_level_binding(self):
        # GH9856
        mi = pd.MultiIndex(
            levels=[[u('foo'), u('bar')], [u('one'), u('two')],
                    [u('a'), u('b')]],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
            names=[u('first'), u('second'), u('third')])
        s = pd.Series(0, index=mi)
        result = s.unstack([1, 2]).stack(0)

        expected_mi = pd.MultiIndex(
            levels=[['foo', 'bar'], ['one', 'two']],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
            names=['first', 'second'])

        expected = pd.DataFrame(np.array([[np.nan, 0],
                                          [0, np.nan],
                                          [np.nan, 0],
                                          [0, np.nan]],
                                         dtype=np.float64),
                                index=expected_mi,
                                columns=pd.Index(['a', 'b'], name='third'))

        assert_frame_equal(result, expected) 
Example 21
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_reshape.py    (license) View Source Project 6 votes vote down vote up
def test_unstack_to_series(self):
        # check reversibility
        data = self.frame.unstack()

        self.assertTrue(isinstance(data, Series))
        undo = data.unstack().T
        assert_frame_equal(undo, self.frame)

        # check NA handling
        data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]})
        data.index = Index(['a', 'b', 'c'])
        result = data.unstack()

        midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']],
                          labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
        expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx)

        assert_series_equal(result, expected)

        # check composability of unstack
        old_data = data.copy()
        for _ in range(4):
            data = data.unstack()
        assert_frame_equal(old_data, data) 
Example 22
Project: powerplantmatching   Author: FRESNA   File: plot.py    (license) View Source Project 6 votes vote down vote up
def bar_fueltype_and_country_totals(dfs, keys, figsize=(12,8)):
    df = lookup(dfs, keys)
    countries = df.columns.levels[0] if isinstance(df.columns, pd.MultiIndex) else df.columns
    n = len(countries)
    subplots = gather_nrows_ncols(n)
    fig, ax = plt.subplots(*subplots, figsize=figsize)

    if sum(subplots)>2:
        ax_iter = ax.flat
    else:
        ax_iter = np.array(ax).flat
    for country in countries:
        ax = next(ax_iter)
        df[country].plot.bar(ax=ax, sharex=True, rot=55, legend=None)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
        ax.set_title(country)
        fig.tight_layout(pad=0.5)
    return fig, ax 
Example 23
Project: PyBloqs   Author: manahl   File: table.py    (license) View Source Project 6 votes vote down vote up
def _get_header_iterable(self):
        """Reformats all but the last header rows."""
        df_clean = self.df.loc[:, self.df.columns.get_level_values(0) != ORG_ROW_NAMES]
        if isinstance(df_clean.columns, pd.MultiIndex):
            transpose_tuples = zip(*df_clean.columns.tolist())
            header_values = []
            for i, t in enumerate(transpose_tuples):
                if i < len(transpose_tuples) - 1:
                    # Not the last column, aggregate repeated items, e.g. [['aa', 'aa', 'aa'], ['bb', 'bb', 'bb']]
                    header_values.append([list(g) for _, g in itertools.groupby(t)])
                else:
                    # For the last column keep all elements in single list, e.g. ['a', 'b', 'c', 'a', 'b', 'c']
                    header_values.append(list(t))
            return header_values
        else:
            return [df_clean.columns.tolist()] 
Example 24
Project: fireant   Author: kayak   File: postprocessors.py    (license) View Source Project 6 votes vote down vote up
def _perform_operation(self, dataframe, key, schema, value_func, operation):
        # Check for references
        references = (dataframe.columns.get_level_values(0).tolist()
                      if isinstance(dataframe.columns, pd.MultiIndex)
                      else [None])

        for reference in references:
            metric_df = value_func(dataframe, schema, reference=reference)

            operation_key = ('{}_{}'.format(metric_df.name, key)
                             if reference is None
                             else (reference, '{}_{}'.format(metric_df.name[1], key)))

            if isinstance(dataframe.index, pd.MultiIndex):
                unstack_levels = list(range(1, len(dataframe.index.levels)))
                dataframe[operation_key] = metric_df.groupby(level=unstack_levels).apply(operation)

            else:
                dataframe[operation_key] = operation(metric_df) 
Example 25
Project: fireant   Author: kayak   File: datatables.py    (license) View Source Project 6 votes vote down vote up
def _render_data(self, dataframe, display_schema):
        n = len(dataframe.index.levels) if isinstance(dataframe.index, pd.MultiIndex) else 1
        dimensions = list(display_schema['dimensions'].items())
        row_dimensions, column_dimensions = dimensions[:n], dimensions[n:]

        data = []
        for idx, df_row in dataframe.iterrows():
            row = {}

            if not isinstance(idx, tuple):
                idx = (idx,)

            for key, value in self._render_dimension_data(idx, row_dimensions):
                row[key] = value

            for key, value in self._render_metric_data(df_row, column_dimensions,
                                                       display_schema['metrics'], display_schema.get('references')):
                row[key] = value

            data.append(row)

        return data 
Example 26
Project: xarray_filters   Author: ContinuumIO   File: multi_index.py    (license) View Source Project 6 votes vote down vote up
def create_multi_index(arr):
    '''From DataArray arr make a pandas.MultiIndex for the arr.coords

    Parameters
    ----------

    arr: xarray.DataArray

    Returns
    -------

    index: pandas.MultiIndex instance with index names
           taken from arr.dims and levels taken from arr.coords

    Examples
    --------

    '''
    np_arrs = tuple(getattr(arr, dim).values for dim in arr.dims)
    index = pd.MultiIndex.from_product(np_arrs, names=arr.dims)
    return index 
Example 27
Project: perfume   Author: leifwalsh   File: test_perfume.py    (license) View Source Project 6 votes vote down vote up
def setUp(self):
        samples = []
        t = 1.0
        for i in range(20):
            sample = []
            sample.append(t)
            t += 1.1
            sample.append(t)
            t += 0.2
            sample.append(t)
            t += 1.5
            sample.append(t)
            t += 0.1
            samples.append(sample)
        self.samples = pd.DataFrame(
            data=samples,
            columns=pd.MultiIndex(
                levels=[['fn1', 'fn2'], ['begin', 'end']],
                labels=[[0, 0, 1, 1], [0, 1, 0, 1]]
            )
        ) 
Example 28
Project: crop-seq   Author: epigen   File: analysis.py    (license) View Source Project 6 votes vote down vote up
def read_seurat_hdf5(hdf5_file):
    import h5py
    with h5py.File(hdf5_file, 'r') as handle:
        cols = handle.get("seurat_matrix/columns").value
        rows = handle.get("seurat_matrix/rows").value
        df = handle.get("seurat_matrix/matrix").value
    seurat_matrix = pd.DataFrame(df, index=cols, columns=rows).T

    # add info as multiindex columns
    condition = map(lambda x: x[0], seurat_matrix.columns.str.split("|"))
    replicate = map(lambda x: x[1], seurat_matrix.columns.str.split("|"))
    cell = map(lambda x: x[2], seurat_matrix.columns.str.split("|"))
    grna = map(lambda x: x[3], seurat_matrix.columns.str.split("|"))
    gene = map(lambda x: x[1] if len(x) > 1 else x[0][:4], pd.Series(grna).str.split("_"))
    seurat_matrix.columns = pd.MultiIndex.from_arrays([condition, replicate, cell, grna, gene], names=['condition', 'replicate', 'cell', 'grna', 'gene'])

    return seurat_matrix 
Example 29
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_observed_otus(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp) 
Example 30
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_multiple_metrics(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus', 'shannon'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp)

        exp = pd.DataFrame(data=[[0., 0.811278124459], [0., 1.], [0., 1.]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['shannon'], exp) 
Example 31
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_one_iteration_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S1', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S2', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S2', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S3', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S3', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs) 
Example 32
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_two_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S1', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S2', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S2', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S3', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S3', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs) 
Example 33
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_unique_metadata_groups(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'peanut']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1]) 
Example 34
Project: q2-diversity   Author: qiime2   File: test_alpha_rarefaction.py    (license) View Source Project 5 votes vote down vote up
def test_multiple_categories(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', ''),
                                             ('toy', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ', 'stick'],
                                  [5, 6, 7, 8, 'milo', 'yeti'],
                                  [9, 10, 11, 12, 'peanut', 'stick']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])

        obs = _reindex_with_metadata('toy', ['pet', 'toy'], data)

        exp_ind = pd.Index(['stick', 'yeti'], name='toy')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[2, 2, 2, 2], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1]) 
Example 35
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 5 votes vote down vote up
def normalize_segment_names(dataframe, inplace=False):
    """
    Makes the segment index of the dataframe have names which correspond to the original .mat segment names.
    :param dataframe: The dataframe with segment names
    :param inplace: If True, the segment index will be changed in place in the given data frame.
    :return: A DataFrame where the segment name part of the index has been canonicalized. If inplace is True, the
             orignal dataframe is returned, otherwise a copy is returned.
    """

    index_values = dataframe.index.get_values()
    fixed_values = [(fileutils.get_segment_name(filename), frame) for filename, frame in index_values]
    if not inplace:
        dataframe = dataframe.copy()
    dataframe.index = pd.MultiIndex.from_tuples(fixed_values, names=dataframe.index.names)
    return dataframe 
Example 36
Project: kaggle-seizure-prediction   Author: sics-lm   File: dataset.py    (license) View Source Project 5 votes vote down vote up
def reshape_frames(dataframe, frame_length=12):
    """
    Returns a new dataframe with the given frame length.
    :param dataframe: A pandas DataFrame with one window per row.
    :param frame_length: The desired number of windows for each feature frame. Must divide the number of windows in
                         *dataframe* evenly.
    :return: A new pandas DataFrame with the desired window frame width. The columns of the new data-frame will be
             multi-index so that
        future concatenation of data frames align properly.
    """

    # Assert that the length of the data frame is divisible by
    # frame_length
    n_windows, window_width = dataframe.shape

    if n_windows % frame_length != 0:
        raise ValueError("The dataframe has {} windows which"
                         " is not divisible by the frame"
                         " length {}".format(n_windows, frame_length))
    values = dataframe.values
    n_frames = n_windows / frame_length
    frame_width = window_width * frame_length
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    reshaped_frame = pd.DataFrame(data=values.reshape(n_frames,
                                                      frame_width),
                                  columns=column_index)
    reshaped_frame.sortlevel(axis=1)
    return reshaped_frame 
Example 37
Project: catalyst   Author: enigmampc   File: earnings_estimates.py    (license) View Source Project 5 votes vote down vote up
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the next quarter by picking out the upcoming release for
        each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        next_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a next event.
        """
        next_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(0)
        return next_releases_per_date.index 
Example 38
Project: catalyst   Author: enigmampc   File: earnings_estimates.py    (license) View Source Project 5 votes vote down vote up
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the previous quarter by picking out the most recent
        release relative to each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        previous_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a previous event.
        """
        previous_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(-1)
        return previous_releases_per_date.index 
Example 39
Project: jupyter-handsontables   Author: techmuch   File: __init__.py    (license) View Source Project 5 votes vote down vote up
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value 
Example 40
Project: jupyter-handsontables   Author: techmuch   File: __init__.py    (license) View Source Project 5 votes vote down vote up
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value 
Example 41
Project: meterstick   Author: google   File: metrics_test.py    (license) View Source Project 5 votes vote down vote up
def testTwoDimensionalDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.Distribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 1 / 14., 1 / 14., 11 / 14.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[1, 2, 0, 1], [1, 0, 0, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(output.equals(correct)) 
Example 42
Project: meterstick   Author: google   File: core_test.py    (license) View Source Project 5 votes vote down vote up
def testShuffledDataframeRelativeToJackknife(self):
    # Same as test above, but also testing that reordering the data doesn't
    # change results, up to order.
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    se_method = standard_errors.Jackknife()
    output = core.Analyze(df.iloc[np.random.permutation(11)]).relative_to(
        comparisons.AbsoluteDifference("Y", 0)).with_standard_errors(
            se_method).calculate(metric).run()
    output = (output.
              reset_index().
              sort_values(by=["Y", "Z"]).
              set_index(["Y", "Z"]))

    correct = pd.DataFrame(
        np.array([[-0.2, 0.18100283490],
                  [0.2, 0.18100283490]]),
        columns=["X Distribution Absolute Difference",
                 "X Distribution Absolute Difference Jackknife SE"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))
    correct = (correct.
               reset_index().
               sort_values(by=["Y", "Z"]).
               set_index(["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10)) 
Example 43
Project: weightedcalcs   Author: jsvine   File: core.py    (license) View Source Project 5 votes vote down vote up
def groupby_deco(func):
    def func_wrapper(self, thing, *args, **kwargs):
        if isinstance(thing, pd.core.groupby.DataFrameGroupBy):
            agg = thing.apply(lambda x: func(self, x, *args, **kwargs))
            is_series = isinstance(agg, pd.core.series.Series)
            has_multiindex = isinstance(agg.index, pd.MultiIndex)
            if is_series and has_multiindex:
                return agg.unstack()
            else:
                return agg
        return func(self, thing, *args, **kwargs)
    return func_wrapper 
Example 44
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: common.py    (license) View Source Project 5 votes vote down vote up
def _isnull_new(obj):
    if lib.isscalar(obj):
        return lib.checknull(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=isnull))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike(np.asarray(obj))
    else:
        return obj is None 
Example 45
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_timeseries.py    (license) View Source Project 5 votes vote down vote up
def test_get_level_values_box(self):
        from pandas import MultiIndex

        dates = date_range('1/1/2000', periods=4)
        levels = [dates, [0, 1]]
        labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, labels=labels)

        self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) 
Example 46
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_base.py    (license) View Source Project 5 votes vote down vote up
def setUp(self):
        self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100),
                            strIndex=tm.makeStringIndex(100),
                            dateIndex=tm.makeDateIndex(100),
                            periodIndex=tm.makePeriodIndex(100),
                            tdIndex=tm.makeTimedeltaIndex(100),
                            intIndex=tm.makeIntIndex(100),
                            rangeIndex=tm.makeIntIndex(100),
                            floatIndex=tm.makeFloatIndex(100),
                            boolIndex=Index([True, False]),
                            catIndex=tm.makeCategoricalIndex(100),
                            empty=Index([]),
                            tuples=MultiIndex.from_tuples(lzip(
                                ['foo', 'bar', 'baz'], [1, 2, 3])))
        self.setup_indices() 
Example 47
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_base.py    (license) View Source Project 5 votes vote down vote up
def test_construction_list_mixed_tuples(self):
        # 10697
        # if we are constructing from a mixed list of tuples, make sure that we
        # are independent of the sorting order
        idx1 = Index([('A', 1), 'B'])
        self.assertIsInstance(idx1, Index) and self.assertNotInstance(
            idx1, MultiIndex)
        idx2 = Index(['B', ('A', 1)])
        self.assertIsInstance(idx2, Index) and self.assertNotInstance(
            idx2, MultiIndex) 
Example 48
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_base.py    (license) View Source Project 5 votes vote down vote up
def test_str_attribute(self):
        # GH9068
        methods = ['strip', 'rstrip', 'lstrip']
        idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
        for method in methods:
            expected = Index([getattr(str, method)(x) for x in idx.values])
            tm.assert_index_equal(
                getattr(Index.str, method)(idx.str), expected)

        # create a few instances that are not able to use .str accessor
        indices = [Index(range(5)), tm.makeDateIndex(10),
                   MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]),
                   PeriodIndex(start='2000', end='2010', freq='A')]
        for idx in indices:
            with self.assertRaisesRegexp(AttributeError,
                                         'only use .str accessor'):
                idx.str.repeat(2)

        idx = Index(['a b c', 'd e', 'f'])
        expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']])
        tm.assert_index_equal(idx.str.split(), expected)
        tm.assert_index_equal(idx.str.split(expand=False), expected)

        expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
                                           ('f', np.nan, np.nan)])
        tm.assert_index_equal(idx.str.split(expand=True), expected)

        # test boolean case, should return np.array instead of boolean Index
        idx = Index(['a1', 'a2', 'b1', 'b2'])
        expected = np.array([True, True, False, False])
        tm.assert_numpy_array_equal(idx.str.startswith('a'), expected)
        self.assertIsInstance(idx.str.startswith('a'), np.ndarray)
        s = Series(range(4), index=idx)
        expected = Series(range(2), index=['a1', 'a2'])
        tm.assert_series_equal(s[s.index.str.startswith('a')], expected) 
Example 49
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_base.py    (license) View Source Project 5 votes vote down vote up
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
        # GH7774
        idx = pd.Index(list('abc'))

        def get_reindex_type(target):
            return idx.reindex(target)[0].dtype.type

        self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64)
        self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64)
        self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)

        reindexed = idx.reindex(pd.MultiIndex(
            [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0]
        self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
        self.assertEqual(reindexed.levels[1].dtype.type, np.float64) 
Example 50
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda   Author: SignalMedia   File: test_reshape.py    (license) View Source Project 5 votes vote down vote up
def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        assert_frame_equal(result, expected, check_names=False)
        self.assertEqual(result.index.name, 'index',)
        self.assertEqual(result.columns.names, (None, 'columns'))
        expected.columns = expected.columns.droplevel(0)

        data = {
            'index': range(7),
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        assert_frame_equal(result, expected)