Python pandas.Index() Examples

The following are 30 code examples for showing how to use pandas.Index(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: tensortrade   Author: tensortrade-org   File: portfolio.py    License: Apache License 2.0 6 votes vote down vote up
def on_next(self, data: dict):
        if not self._keys:
            self._keys = self.find_keys(data)

        index = pd.Index([self.clock.step], name="step")
        performance_data = {k: data[k] for k in self._keys}
        performance_data['base_symbol'] = self.base_instrument.symbol
        performance_step = pd.DataFrame(performance_data, index=index)

        net_worth = data['net_worth']

        if self._performance is None:
            self._performance = performance_step
            self._initial_net_worth = net_worth
            self._net_worth = net_worth
        else:
            self._performance = self._performance.append(performance_step)
            self._net_worth = net_worth

        if self._performance_listener:
            self._performance_listener(performance_step) 
Example 2
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_index_names_dedup(self, index_class):

        index_names = ['dedup', None, 'index', int(1)]
        expected = [
            ['dedup_1', 'dedup_2'],
            [None, None],
            ['index_1', 'index_2'],
            ['1_1', '1_2'],
        ]

        for i, name in enumerate(index_names):

            index_A = pd.Index(self.a.index).rename(name)
            df_A = pd.DataFrame(self.a, index=index_A)

            pairs = index_class.index((df_A))

            assert pairs.names == expected[i]
            assert df_A.index.name == name 
Example 3
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_duplicated_index_names_dedup(self, index_class):

        # make an index for each dataframe with a new index name
        index_a = pd.Index(self.a.index, name='index')
        df_a = pd.DataFrame(self.a, index=index_a)

        # make the index
        pairs = index_class.index(df_a)
        assert pairs.names == ['index_1', 'index_2']

        # check for inplace editing (not the intention)
        assert df_a.index.name == 'index'

        # make the index
        index_class.suffixes = ['_a', '_b']
        pairs = index_class.index(df_a)
        assert pairs.names == ['index_a', 'index_b']

        # check for inplace editing (not the intention)
        assert df_a.index.name == 'index' 
Example 4
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_index_names_link(self, index_class):

        # tuples with the name of the first and second index
        index_names = [('index1', 'index2'),
                       ('index1', None), (None, 'index2'), (None, None),
                       (10, 'index2'), (10, 11)]

        for name_a, name_b in index_names:

            # make an index for each dataframe with a new index name
            index_a = pd.Index(self.a.index, name=name_a)
            df_a = pd.DataFrame(self.a, index=index_a)

            index_b = pd.Index(self.b.index, name=name_b)
            df_b = pd.DataFrame(self.b, index=index_b)

            pairs = index_class.index((df_a, df_b))
            assert pairs.names == [name_a, name_b]

            # check for inplace editing (not the intention)
            assert df_a.index.name == name_a
            assert df_b.index.name == name_b 
Example 5
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_index_names_pandas023(self, index_class):
        # Pandas changes the behaviour of MultiIndex names.
        # https://github.com/pandas-dev/pandas/pull/18882
        # https://github.com/J535D165/recordlinkage/issues/55
        # This test tests compatibility.

        # make an index for each dataframe with a new index name
        index_a = pd.Index(self.a.index, name='index')
        df_a = pd.DataFrame(self.a, index=index_a)

        index_b = pd.Index(self.b.index, name='index')
        df_b = pd.DataFrame(self.b, index=index_b)

        # make the index
        pairs_link = index_class._link_index(df_a, df_b)

        if pairs_link.names[0] is not None:
            assert pairs_link.names[0] != pairs_link.names[1]

        # make the index
        pairs_dedup = index_class._dedup_index(df_a)

        if pairs_link.names[0] is not None:
            assert pairs_dedup.names[0] != pairs_dedup.names[1] 
Example 6
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_lower_triangular(self, index_class):

        # make an index for each dataframe with a new index name
        index_a = pd.Index(self.a.index, name='index')
        df_a = pd.DataFrame(self.a, index=index_a)
        pairs = index_class.index(df_a)

        # expected
        levels = [df_a.index.values, df_a.index.values]
        codes = np.tril_indices(len(df_a.index), k=-1)

        full_pairs = pd.MultiIndex(levels=levels,
                                   codes=codes,
                                   verify_integrity=False)

        # all pairs are in the lower triangle of the matrix.
        assert len(pairs.difference(full_pairs)) == 0 
Example 7
Project: mmvec   Author: biocore   File: test_visualizers.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUp(self):
        _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2],
                               [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]],
                              index=pd.Index([c for c in 'ABCD'], name='id'),
                              columns=['m1', 'm2', 'm3']).T
        self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks)
        self.taxa = CategoricalMetadataColumn(pd.Series([
            'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '
            'o__Desulfobacterales; f__Desulfobulbaceae; g__; s__',
            'k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta',
            'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
            'o__Rickettsiales; f__mitochondria; g__Lardizabala; s__biternata',
            'k__Archaea; p__Euryarchaeota; c__Methanomicrobia; '
            'o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina'],
            index=pd.Index([c for c in 'ABCD'], name='feature-id'),
            name='Taxon'))
        self.metabolites = CategoricalMetadataColumn(pd.Series([
            'amino acid', 'carbohydrate', 'drug metabolism'],
            index=pd.Index(['m1', 'm2', 'm3'], name='feature-id'),
            name='Super Pathway')) 
Example 8
Project: mmvec   Author: biocore   File: test_heatmap.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setUp(self):
        self.taxa = pd.Series([
            'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '
            'o__Desulfobacterales; f__Desulfobulbaceae; g__; s__',
            'k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta',
            'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
            'o__Rickettsiales; f__mitochondria; g__Lardizabala; s__biternata',
            'k__Archaea; p__Euryarchaeota; c__Methanomicrobia; '
            'o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina',
            'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
            'o__Rickettsiales; f__mitochondria; g__Pavlova; s__lutheri',
            'k__Archaea; p__[Parvarchaeota]; c__[Parvarchaea]; o__WCHD3-30',
            'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
            'o__Sphingomonadales; f__Sphingomonadaceae'],
            index=pd.Index([c for c in 'ABCDEFG'], name='feature-id'),
            name='Taxon')
        self.exp = pd.Series(
            ['s__', 'o__Streptophyta', 's__biternata', 'g__Methanosarcina',
             's__lutheri', 'o__WCHD3-30', 'f__Sphingomonadaceae'],
            index=pd.Index([c for c in 'ABCDEFG'], name='feature-id'),
            name='Taxon') 
Example 9
Project: arctic   Author: man-group   File: numpy_records.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_to_records(self, df):
        metadata = {}
        index = df.index
        index_tz = None

        if isinstance(index, MultiIndex):
            ix_vals, index_names, index_tz = _multi_index_to_records(index, len(df) == 0)
        else:
            ix_vals = [index.values]
            index_names = list(index.names)
            if index_names[0] is None:
                index_names = ['index']
                log.info("Index has no name, defaulting to 'index'")
            if isinstance(index, DatetimeIndex) and index.tz is not None:
                index_tz = get_timezone(index.tz)

        if index_tz is not None:
            metadata['index_tz'] = index_tz
        metadata['index'] = index_names

        return index_names, ix_vals, metadata 
Example 10
Project: arctic   Author: man-group   File: numpy_records.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn 
Example 11
Project: arctic   Author: man-group   File: test_chunkstore.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_update(chunkstore_lib):
    df = DataFrame(data={'data': [1, 2, 3]},
                   index=pd.Index(data=[dt(2016, 1, 1),
                                        dt(2016, 1, 2),
                                        dt(2016, 1, 3)], name='date'))
    df2 = DataFrame(data={'data': [20, 30, 40]},
                    index=pd.Index(data=[dt(2016, 1, 2),
                                         dt(2016, 1, 3),
                                         dt(2016, 1, 4)], name='date'))

    equals = DataFrame(data={'data': [1, 20, 30, 40]},
                       index=pd.Index(data=[dt(2016, 1, 1),
                                            dt(2016, 1, 2),
                                            dt(2016, 1, 3),
                                            dt(2016, 1, 4)], name='date'))

    chunkstore_lib.write('chunkstore_test', df, chunk_size='D')
    chunkstore_lib.update('chunkstore_test', df2)
    assert_frame_equal(chunkstore_lib.read('chunkstore_test'), equals)
    assert(chunkstore_lib.get_info('chunkstore_test')['len'] == len(equals))
    assert(chunkstore_lib.get_info('chunkstore_test')['chunk_count'] == len(equals)) 
Example 12
Project: arctic   Author: man-group   File: test_chunkstore.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_update_no_overlap(chunkstore_lib):
    df = DataFrame(data={'data': [1, 2, 3]},
                   index=pd.Index(data=[dt(2016, 1, 1),
                                        dt(2016, 1, 2),
                                        dt(2016, 1, 3)], name='date'))
    df2 = DataFrame(data={'data': [20, 30, 40]},
                    index=pd.Index(data=[dt(2015, 1, 2),
                                         dt(2015, 1, 3),
                                         dt(2015, 1, 4)], name='date'))

    equals = DataFrame(data={'data': [20, 30, 40, 1, 2, 3]},
                       index=pd.Index(data=[dt(2015, 1, 2),
                                            dt(2015, 1, 3),
                                            dt(2015, 1, 4),
                                            dt(2016, 1, 1),
                                            dt(2016, 1, 2),
                                            dt(2016, 1, 3)], name='date'))

    chunkstore_lib.write('chunkstore_test', df, chunk_size='D')
    chunkstore_lib.update('chunkstore_test', df2)
    assert_frame_equal(chunkstore_lib.read('chunkstore_test'), equals) 
Example 13
Project: arctic   Author: man-group   File: test_chunkstore.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_update_chunk_range(chunkstore_lib):
    df = DataFrame(data={'data': [1, 2, 3]},
                   index=pd.Index(data=[dt(2015, 1, 1),
                                        dt(2015, 1, 2),
                                        dt(2015, 1, 3)], name='date'))
    df2 = DataFrame(data={'data': [30]},
                    index=pd.Index(data=[dt(2015, 1, 2)],
                                   name='date'))
    equals = DataFrame(data={'data': [30, 3]},
                       index=pd.Index(data=[dt(2015, 1, 2),
                                            dt(2015, 1, 3)],
                                      name='date'))

    chunkstore_lib.write('chunkstore_test', df, chunk_size='M')
    chunkstore_lib.update('chunkstore_test', df2, chunk_range=DateRange(dt(2015, 1, 1), dt(2015, 1, 2)))
    assert_frame_equal(chunkstore_lib.read('chunkstore_test'), equals) 
Example 14
Project: arctic   Author: man-group   File: test_chunkstore.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_append_before(chunkstore_lib):
    df = DataFrame(data={'data': [1, 2, 3]},
                   index=pd.Index(data=[dt(2016, 1, 1),
                                        dt(2016, 1, 2),
                                        dt(2016, 1, 3)], name='date'))
    df2 = DataFrame(data={'data': [20, 30, 40]},
                    index=pd.Index(data=[dt(2015, 1, 2),
                                         dt(2015, 1, 3),
                                         dt(2015, 1, 4)], name='date'))

    equals = DataFrame(data={'data': [20, 30, 40, 1, 2, 3]},
                       index=pd.Index(data=[dt(2015, 1, 2),
                                            dt(2015, 1, 3),
                                            dt(2015, 1, 4),
                                            dt(2016, 1, 1),
                                            dt(2016, 1, 2),
                                            dt(2016, 1, 3)], name='date'))

    chunkstore_lib.write('chunkstore_test', df, chunk_size='D')
    chunkstore_lib.append('chunkstore_test', df2)
    assert_frame_equal(chunkstore_lib.read('chunkstore_test') , equals) 
Example 15
Project: arctic   Author: man-group   File: test_chunkstore.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_update_series(chunkstore_lib):
    df = Series(data=[1, 2, 3],
                index=pd.Index(data=[dt(2016, 1, 1),
                                     dt(2016, 1, 2),
                                     dt(2016, 1, 3)], name='date'),
                name='data')
    df2 = Series(data=[20, 30, 40],
                 index=pd.Index(data=[dt(2016, 1, 2),
                                      dt(2016, 1, 3),
                                      dt(2016, 1, 4)], name='date'),
                 name='data')

    equals = Series(data=[1, 20, 30, 40],
                    index=pd.Index(data=[dt(2016, 1, 1),
                                         dt(2016, 1, 2),
                                         dt(2016, 1, 3),
                                         dt(2016, 1, 4)], name='date'),
                    name='data')

    chunkstore_lib.write('chunkstore_test', df, chunk_size='D')
    chunkstore_lib.update('chunkstore_test', df2)
    assert_series_equal(chunkstore_lib.read('chunkstore_test'), equals) 
Example 16
Project: arctic   Author: man-group   File: test_utils.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def create_test_data(size=5, index=True, multiindex=True, random_data=True, random_ids=True, date_offset=0, use_hours=False, cols=1):
    data = {}
    for i in range(cols):
        if random_data:
            data['data' + str(i)] = [random.random() * random.randint(-100, 100) for _ in range(size)]
        else:
            data['data' + str(i)] = range(size)
    dates = [dt(2016, 1, 1) + timedelta(days=0 if use_hours else n+date_offset,
                                        hours=n+date_offset if use_hours else 0) for n in range(size)]
    if index:
        if multiindex:
            index_col_names = ['date', 'id']
            idx = [(date, random.randint(1, size)) for date in dates] if random_ids else [(date, 1) for date in dates]
            index = MultiIndex.from_tuples(idx, names=index_col_names) if idx else MultiIndex([[]]*2, [[]]*2, names=index_col_names)
            return DataFrame(data=data, index=index)
        return DataFrame(data=data, index=Index(data=dates, name='date'))
    data.update({'date': dates})
    return DataFrame(data=data) 
Example 17
Project: recruit   Author: Frank-qlu   File: test_frame.py    License: Apache License 2.0 6 votes vote down vote up
def test_constructor_ndarray(self, float_frame):
        # no index or columns
        sp = SparseDataFrame(float_frame.values)

        # 1d
        sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
                             columns=['A'])
        tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))

        # raise on level argument
        pytest.raises(TypeError, float_frame.reindex, columns=['A'],
                      level=1)

        # wrong length index / columns
        with pytest.raises(ValueError, match="^Index length"):
            SparseDataFrame(float_frame.values, index=float_frame.index[:-1])

        with pytest.raises(ValueError, match="^Column length"):
            SparseDataFrame(float_frame.values,
                            columns=float_frame.columns[:-1])

    # GH 9272 
Example 18
Project: recruit   Author: Frank-qlu   File: test_integer.py    License: Apache License 2.0 6 votes vote down vote up
def test_astype_index(self, all_data, dropna):
        # as an int/uint index to Index

        all_data = all_data[:10]
        if dropna:
            other = all_data[~all_data.isna()]
        else:
            other = all_data

        dtype = all_data.dtype
        idx = pd.Index(np.array(other))
        assert isinstance(idx, ABCIndexClass)

        result = idx.astype(dtype)
        expected = idx.astype(object).astype(dtype)
        tm.assert_index_equal(result, expected) 
Example 19
Project: recruit   Author: Frank-qlu   File: test_integer.py    License: Apache License 2.0 6 votes vote down vote up
def test_preserve_dtypes(op):
    # TODO(#22346): preserve Int64 dtype
    # for ops that enable (mean would actually work here
    # but generally it is a float return value)
    df = pd.DataFrame({
        "A": ['a', 'b', 'b'],
        "B": [1, None, 3],
        "C": integer_array([1, None, 3], dtype='Int64'),
    })

    # op
    result = getattr(df.C, op)()
    assert isinstance(result, int)

    # groupby
    result = getattr(df.groupby("A"), op)()

    expected = pd.DataFrame({
        "B": np.array([1.0, 3.0]),
        "C": integer_array([1, 3], dtype="Int64")
    }, index=pd.Index(['a', 'b'], name='A'))
    tm.assert_frame_equal(result, expected) 
Example 20
Project: recruit   Author: Frank-qlu   File: test_missing.py    License: Apache License 2.0 6 votes vote down vote up
def test_nan_handling(self):

        # Nans are represented as -1 in codes
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
                                                       dtype=np.int8))
        c[1] = np.nan
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
                                                       dtype=np.int8))

        # Adding nan to categories should make assigned nan point to the
        # category!
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
                                                       dtype=np.int8)) 
Example 21
Project: recruit   Author: Frank-qlu   File: test_indexing.py    License: Apache License 2.0 6 votes vote down vote up
def test_categories_assigments(self):
        s = Categorical(["a", "b", "c", "a"])
        exp = np.array([1, 2, 3, 1], dtype=np.int64)
        s.categories = [1, 2, 3]
        tm.assert_numpy_array_equal(s.__array__(), exp)
        tm.assert_index_equal(s.categories, Index([1, 2, 3]))

        # lengthen
        with pytest.raises(ValueError):
            s.categories = [1, 2, 3, 4]

        # shorten
        with pytest.raises(ValueError):
            s.categories = [1, 2]

    # Combinations of sorted/unique: 
Example 22
Project: recruit   Author: Frank-qlu   File: test_api.py    License: Apache License 2.0 6 votes vote down vote up
def test_ordered_api(self):
        # GH 9347
        cat1 = Categorical(list('acb'), ordered=False)
        tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c']))
        assert not cat1.ordered

        cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False)
        tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a']))
        assert not cat2.ordered

        cat3 = Categorical(list('acb'), ordered=True)
        tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c']))
        assert cat3.ordered

        cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True)
        tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
        assert cat4.ordered 
Example 23
Project: FINE   Author: FZJ-IEK3-VSA   File: standardIO.py    License: MIT License 5 votes vote down vote up
def getDualValues(pyM):
    """
    Get dual values of an optimized pyomo instance.

    :param pyM: optimized pyomo instance
    :type pyM: pyomo Concrete Model

    :return: Pandas Series with dual values
    """
    return pd.Series(list(pyM.dual.values()), index=pd.Index(list(pyM.dual.keys()))) 
Example 24
Project: recordlinkage   Author: J535D165   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def index_split(index, chunks):
    """Function to split pandas.Index and pandas.MultiIndex objects.

    Split :class:`pandas.Index` and :class:`pandas.MultiIndex` objects
    into chunks. This function is based on :func:`numpy.array_split`.

    Parameters
    ----------
    index : pandas.Index, pandas.MultiIndex
        A pandas.Index or pandas.MultiIndex to split into chunks.
    chunks : int
        The number of parts to split the index into.

    Returns
    -------
    list
        A list with chunked pandas.Index or pandas.MultiIndex objects.

    """

    Ntotal = index.shape[0]
    Nsections = int(chunks)
    if Nsections <= 0:
        raise ValueError('number sections must be larger than 0.')
    Neach_section, extras = divmod(Ntotal, Nsections)
    section_sizes = ([0] + extras * [Neach_section + 1] +
                     (Nsections - extras) * [Neach_section])
    div_points = numpy.array(section_sizes).cumsum()

    sub_ind = []
    for i in range(Nsections):
        st = div_points[i]
        end = div_points[i + 1]
        sub_ind.append(index[st:end])

    return sub_ind 
Example 25
Project: recordlinkage   Author: J535D165   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def frame_indexing(frame, multi_index, level_i, indexing_type='label'):
    """Index dataframe based on one level of MultiIndex.

    Arguments
    ---------
    frame : pandas.DataFrame
        The datafrme to select records from.
    multi_index : pandas.MultiIndex
        A pandas multiindex were one fo the levels is used to sample the
        dataframe with.
    level_i : int, str
        The level of the multiIndex to index on.
    indexing_type : str
        The type of indexing. The value can be 'label' or 'position'.
        Default 'label'.

    """

    if indexing_type == "label":
        data = frame.loc[multi_index.get_level_values(level_i)]
        data.index = multi_index
    elif indexing_type == "position":
        data = frame.iloc[multi_index.get_level_values(level_i)]
        data.index = multi_index
    else:
        raise ValueError("indexing_type needs to be 'label' or 'position'")

    return data 
Example 26
Project: recordlinkage   Author: J535D165   File: base.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _verify_integrety(self, x):

        if isinstance(x.index, pandas.Index):

            if not x.index.is_unique:
                raise ValueError('index of DataFrame is not unique')

        elif isinstance(x.index, pandas.MultiIndex):
            raise ValueError(
                'expected pandas.Index instead of pandas.MultiIndex'
            ) 
Example 27
Project: recordlinkage   Author: J535D165   File: base.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, features=[], n_jobs=1, indexing_type='label',
                 **kwargs):

        logging.info("comparing - initialize {} class".format(
            self.__class__.__name__)
        )

        self.features = []
        self.add(features)

        # public
        if n_jobs == -1:
            self.n_jobs = cpu_count()
        else:
            self.n_jobs = n_jobs
        self.indexing_type = indexing_type  # label of position

        # logging
        self._i = 1
        self._i_max = None
        self._n = []
        self._eta = []
        self._output_log_total = True

        # private
        self._compare_functions = []

        if isinstance(features, (pandas.MultiIndex, pandas.Index)):
            warnings.warn(
                "It seems you are using the older version of the Compare API, "
                "see the documentation about how to update to the new API. "
                "http://recordlinkage.readthedocs.io/"
                "en/latest/ref-compare.html",
                DeprecationWarning
            ) 
Example 28
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_add_linking(self):

        indexer1 = Full()
        indexer2 = Block(left_on='var_arange', right_on='var_arange')
        expected = indexer1.index(self.a,
                                  self.b).union(indexer2.index(self.a, self.b))

        indexer = recordlinkage.Index()
        indexer.add(
            [Full(),
             Block(left_on='var_arange', right_on='var_arange')])

        result = indexer.index(self.a, self.b)

        pdt.assert_index_equal(result, expected) 
Example 29
Project: recordlinkage   Author: J535D165   File: test_indexing.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_add_dedup(self):

        indexer1 = Full()
        indexer2 = Block(left_on='var_arange', right_on='var_arange')
        expected = indexer1.index(self.a).union(indexer2.index(self.a))

        indexer = recordlinkage.Index()
        indexer.add(
            [Full(),
             Block(left_on='var_arange', right_on='var_arange')])

        result = indexer.index(self.a)

        pdt.assert_index_equal(result, expected) 
Example 30
Project: whynot   Author: zykls   File: fico.py    License: MIT License 5 votes vote down vote up
def _find_nearest_indices(idx: Index, targets: Array) -> Array:
    """Convert 1d array into array of nearest index values."""
    idx = np.asarray(idx)
    idx_ = np.expand_dims(idx, -1)
    targets = np.expand_dims(targets, 0)
    i = (np.abs(idx_ - targets)).argmin(axis=0)
    return idx[i]