Python pandas.core.reshape.merge.merge() Examples

The following are 30 code examples of pandas.core.reshape.merge.merge(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.reshape.merge , or try the search function .
Example #1
Source File: test_merge.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_merge_index_singlekey_inner(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        # inner join
        result = merge(left, right, left_on='key', right_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected)

        result = merge(right, left, right_on='key', left_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected.loc[:, result.columns]) 
Example #2
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_index_singlekey_right_vs_left(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=False)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=False)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=True)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=True)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) 
Example #3
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_merge_index_singlekey_right_vs_left(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=False)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=False)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=True)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=True)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) 
Example #4
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_merge_index_singlekey_inner(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        # inner join
        result = merge(left, right, left_on='key', right_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected)

        result = merge(right, left, right_on='key', left_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected.loc[:, result.columns]) 
Example #5
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_other_timedelta_unit(self, unit):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        dtype = "m8[{}]".format(unit)
        df2 = s.astype(dtype).to_frame('days')
        assert df2['days'].dtype == 'm8[ns]'

        result = df1.merge(df2, left_on='entity_id', right_index=True)

        exp = pd.DataFrame({'entity_id': [101, 102],
                            'days': np.array(['nat', 'nat'],
                                             dtype=dtype)},
                           columns=['entity_id', 'days'])
        tm.assert_frame_equal(result, exp) 
Example #6
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_overlapping_columns_error_message(self):
        df = DataFrame({'key': [1, 2, 3],
                        'v1': [4, 5, 6],
                        'v2': [7, 8, 9]})
        df2 = DataFrame({'key': [1, 2, 3],
                         'v1': [4, 5, 6],
                         'v2': [7, 8, 9]})

        df.columns = ['key', 'foo', 'foo']
        df2.columns = ['key', 'bar', 'bar']
        expected = DataFrame({'key': [1, 2, 3],
                              'v1': [4, 5, 6],
                              'v2': [7, 8, 9],
                              'v3': [4, 5, 6],
                              'v4': [7, 8, 9]})
        expected.columns = ['key', 'foo', 'foo', 'bar', 'bar']
        assert_frame_equal(merge(df, df2), expected)

        # #2649, #10639
        df2.columns = ['key1', 'foo', 'foo']
        msg = (r"Data columns not unique: Index\(\[u?'foo', u?'foo'\],"
               r" dtype='object'\)")
        with pytest.raises(MergeError, match=msg):
            merge(df, df2) 
Example #7
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_merge_different_column_key_names(self):
        left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                          'value': [1, 2, 3, 4]})
        right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'],
                           'value': [5, 6, 7, 8]})

        merged = left.merge(right, left_on='lkey', right_on='rkey',
                            how='outer', sort=True)

        exp = pd.Series(['bar', 'baz', 'foo', 'foo', 'foo', 'foo', np.nan],
                        name='lkey')
        tm.assert_series_equal(merged['lkey'], exp)

        exp = pd.Series(['bar', np.nan, 'foo', 'foo', 'foo', 'foo', 'qux'],
                        name='rkey')
        tm.assert_series_equal(merged['rkey'], exp)

        exp = pd.Series([2, 3, 1, 1, 4, 4, np.nan], name='value_x')
        tm.assert_series_equal(merged['value_x'], exp)

        exp = pd.Series([6, np.nan, 5, 8, 5, 8, 7], name='value_y')
        tm.assert_series_equal(merged['value_y'], exp) 
Example #8
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_join_key_dtype_cast(self):
        # #8596

        df1 = DataFrame({'key': [1], 'v1': [10]})
        df2 = DataFrame({'key': [2], 'v1': [20]})
        df = merge(df1, df2, how='outer')
        assert df['key'].dtype == 'int64'

        df1 = DataFrame({'key': [True], 'v1': [1]})
        df2 = DataFrame({'key': [False], 'v1': [0]})
        df = merge(df1, df2, how='outer')

        # GH13169
        # this really should be bool
        assert df['key'].dtype == 'object'

        df1 = DataFrame({'val': [1]})
        df2 = DataFrame({'val': [2]})
        lkey = np.array([1])
        rkey = np.array([2])
        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
        assert df['key_0'].dtype == 'int64' 
Example #9
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
    # GH 21220
    a = pd.DataFrame({"A": [1, 2, 3, 4]},
                     index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]],
                     names=['outer', 'inner']))
    b = pd.Series([1, 2, 3, 4],
                  index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]],
                  names=['outer', 'inner']), name=nm)
    expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]},
                            index=pd.MultiIndex.from_product([['a', 'b'], [1]],
                            names=['outer', 'inner']))
    if nm is not None:
        result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
                          left_index=left_index, right_index=right_index)
        tm.assert_frame_equal(result, expected)
    else:
        msg = "Cannot merge a Series without a name"
        with pytest.raises(ValueError, match=msg):
            result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
                              left_index=left_index, right_index=right_index) 
Example #10
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_datetime64tz_empty(self):
        # https://github.com/pandas-dev/pandas/issues/25014
        dtz = pd.DatetimeTZDtype(tz='UTC')
        right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)],
                              'value': [4.0],
                              'date2': [pd.Timestamp('2019', tz=dtz.tz)]},
                             columns=['date', 'value', 'date2'])
        left = right[:0]
        result = left.merge(right, on='date')
        expected = pd.DataFrame({
            'value_x': pd.Series(dtype=float),
            'date2_x': pd.Series(dtype=dtz),
            'date': pd.Series(dtype=dtz),
            'value_y': pd.Series(dtype=float),
            'date2_y': pd.Series(dtype=dtz),
        }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y'])
        tm.assert_frame_equal(result, expected) 
Example #11
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_different_column_key_names(self):
        left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                          'value': [1, 2, 3, 4]})
        right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'],
                           'value': [5, 6, 7, 8]})

        merged = left.merge(right, left_on='lkey', right_on='rkey',
                            how='outer', sort=True)

        exp = pd.Series(['bar', 'baz', 'foo', 'foo', 'foo', 'foo', np.nan],
                        name='lkey')
        tm.assert_series_equal(merged['lkey'], exp)

        exp = pd.Series(['bar', np.nan, 'foo', 'foo', 'foo', 'foo', 'qux'],
                        name='rkey')
        tm.assert_series_equal(merged['rkey'], exp)

        exp = pd.Series([2, 3, 1, 1, 4, 4, np.nan], name='value_x')
        tm.assert_series_equal(merged['value_x'], exp)

        exp = pd.Series([6, np.nan, 5, 8, 5, 8, 7], name='value_y')
        tm.assert_series_equal(merged['value_y'], exp) 
Example #12
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_merge_join_key_dtype_cast(self):
        # #8596

        df1 = DataFrame({'key': [1], 'v1': [10]})
        df2 = DataFrame({'key': [2], 'v1': [20]})
        df = merge(df1, df2, how='outer')
        assert df['key'].dtype == 'int64'

        df1 = DataFrame({'key': [True], 'v1': [1]})
        df2 = DataFrame({'key': [False], 'v1': [0]})
        df = merge(df1, df2, how='outer')

        # GH13169
        # this really should be bool
        assert df['key'].dtype == 'object'

        df1 = DataFrame({'val': [1]})
        df2 = DataFrame({'val': [2]})
        lkey = np.array([1])
        rkey = np.array([2])
        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
        assert df['key_0'].dtype == 'int64' 
Example #13
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_other_datetime_unit(self):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                      'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                      'datetime64[ns]']:

            df2 = s.astype(dtype).to_frame('days')
            # coerces to datetime64[ns], thus sholuld not be affected
            assert df2['days'].dtype == 'datetime64[ns]'

            result = df1.merge(df2, left_on='entity_id', right_index=True)

            exp = pd.DataFrame({'entity_id': [101, 102],
                                'days': np.array(['nat', 'nat'],
                                                 dtype='datetime64[ns]')},
                               columns=['entity_id', 'days'])
            tm.assert_frame_equal(result, exp) 
Example #14
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_all_na_column(self, series_of_dtype,
                                 series_of_dtype_all_na):
        # GH 25183
        df_left = pd.DataFrame(
            {'key': series_of_dtype, 'value': series_of_dtype_all_na},
            columns=['key', 'value'])
        df_right = pd.DataFrame(
            {'key': series_of_dtype, 'value': series_of_dtype_all_na},
            columns=['key', 'value'])
        expected = pd.DataFrame({
            'key': series_of_dtype,
            'value_x': series_of_dtype_all_na,
            'value_y': series_of_dtype_all_na,
        }, columns=['key', 'value_x', 'value_y'])
        actual = df_left.merge(df_right, on='key')
        assert_frame_equal(actual, expected) 
Example #15
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merging_with_bool_or_int_cateorical_column(self, category_column,
                                                        categories,
                                                        expected_categories,
                                                        ordered):
        # GH 17187
        # merging with a boolean/int categorical column
        df1 = pd.DataFrame({'id': [1, 2, 3, 4],
                            'cat': category_column})
        df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered))
        df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
        result = df1.merge(df2)
        expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories,
                                 'num': [1, 9]})
        expected['cat'] = expected['cat'].astype(
            CDT(categories, ordered=ordered))
        assert_frame_equal(expected, result) 
Example #16
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_index_with_more_values(self, how):
        # GH 24212
        # pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
        # interpreted as a missing value instead of the last element
        df1 = pd.DataFrame([[1, 2], [2, 4], [3, 6], [4, 8]],
                           columns=['a', 'b'])
        df2 = pd.DataFrame([[3, 30], [4, 40]],
                           columns=['a', 'c'])
        df1.set_index('a', drop=False, inplace=True)
        df2.set_index('a', inplace=True)
        result = pd.merge(df1, df2, left_index=True, right_on='a', how=how)
        expected = pd.DataFrame([[1, 2, np.nan],
                                 [2, 4, np.nan],
                                 [3, 6, 30.0],
                                 [4, 8, 40.0]],
                                columns=['a', 'b', 'c'])
        expected.set_index('a', drop=False, inplace=True)
        assert_frame_equal(result, expected) 
Example #17
Source File: test_multi.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_multikey(self, left, right, join_type):
        on_cols = ['key1', 'key2']
        result = (left.join(right, on=on_cols, how=join_type)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type)

        tm.assert_frame_equal(result, expected)

        result = (left.join(right, on=on_cols, how=join_type, sort=True)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type, sort=True)

        tm.assert_frame_equal(result, expected) 
Example #18
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_self_join_multiple_categories(self):
        # GH 16767
        # non-duplicates should work with multiple categories
        m = 5
        df = pd.DataFrame({
            'a': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * m,
            'b': ['t', 'w', 'x', 'y', 'z'] * 2 * m,
            'c': [letter
                  for each in ['m', 'n', 'u', 'p', 'o']
                  for letter in [each] * 2 * m],
            'd': [letter
                  for each in ['aa', 'bb', 'cc', 'dd', 'ee',
                               'ff', 'gg', 'hh', 'ii', 'jj']
                  for letter in [each] * m]})

        # change them all to categorical variables
        df = df.apply(lambda x: x.astype('category'))

        # self-join should equal ourselves
        result = pd.merge(df, df, on=list(df.columns))

        assert_frame_equal(result, df) 
Example #19
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_ints_floats_warning(self):
        # GH 16572
        # merge will produce a warning when merging on int and
        # float columns where the float values are not exactly
        # equal to their int representation
        A = DataFrame({'X': [1, 2, 3]})
        B = DataFrame({'Y': [1.1, 2.5, 3.0]})
        expected = DataFrame({'X': [3], 'Y': [3.0]})

        with tm.assert_produces_warning(UserWarning):
            result = A.merge(B, left_on='X', right_on='Y')
            assert_frame_equal(result, expected)

        with tm.assert_produces_warning(UserWarning):
            result = B.merge(A, left_on='Y', right_on='X')
            assert_frame_equal(result, expected[['Y', 'X']])

        # test no warning if float has NaNs
        B = DataFrame({'Y': [np.nan, np.nan, 3.0]})

        with tm.assert_produces_warning(None):
            result = B.merge(A, left_on='Y', right_on='X')
            assert_frame_equal(result, expected[['Y', 'X']]) 
Example #20
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_other_datetime_unit(self):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                      'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                      'datetime64[ns]']:

            df2 = s.astype(dtype).to_frame('days')
            # coerces to datetime64[ns], thus sholuld not be affected
            assert df2['days'].dtype == 'datetime64[ns]'

            result = df1.merge(df2, left_on='entity_id', right_index=True)

            exp = pd.DataFrame({'entity_id': [101, 102],
                                'days': np.array(['nat', 'nat'],
                                                 dtype='datetime64[ns]')},
                               columns=['entity_id', 'days'])
            tm.assert_frame_equal(result, exp) 
Example #21
Source File: test_merge.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_overlapping_columns_error_message(self):
        df = DataFrame({'key': [1, 2, 3],
                        'v1': [4, 5, 6],
                        'v2': [7, 8, 9]})
        df2 = DataFrame({'key': [1, 2, 3],
                         'v1': [4, 5, 6],
                         'v2': [7, 8, 9]})

        df.columns = ['key', 'foo', 'foo']
        df2.columns = ['key', 'bar', 'bar']
        expected = DataFrame({'key': [1, 2, 3],
                              'v1': [4, 5, 6],
                              'v2': [7, 8, 9],
                              'v3': [4, 5, 6],
                              'v4': [7, 8, 9]})
        expected.columns = ['key', 'foo', 'foo', 'bar', 'bar']
        assert_frame_equal(merge(df, df2), expected)

        # #2649, #10639
        df2.columns = ['key1', 'foo', 'foo']
        pytest.raises(ValueError, merge, df, df2) 
Example #22
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_incompat_infer_boolean_object(self):
        # GH21119: bool + object bool merge OK
        df1 = DataFrame({'key': Series([True, False], dtype=object)})
        df2 = DataFrame({'key': [True, False]})

        expected = DataFrame({'key': [True, False]}, dtype=object)
        result = pd.merge(df1, df2, on='key')
        assert_frame_equal(result, expected)
        result = pd.merge(df2, df1, on='key')
        assert_frame_equal(result, expected)

        # with missing value
        df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)})
        df2 = DataFrame({'key': [True, False]})

        expected = DataFrame({'key': [True, False]}, dtype=object)
        result = pd.merge(df1, df2, on='key')
        assert_frame_equal(result, expected)
        result = pd.merge(df2, df1, on='key')
        assert_frame_equal(result, expected) 
Example #23
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def tests_merge_categorical_unordered_equal(self):
        # GH-19551
        df1 = DataFrame({
            'Foo': Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C']),
            'Left': ['A0', 'B0', 'C0'],
        })

        df2 = DataFrame({
            'Foo': Categorical(['C', 'B', 'A'], categories=['C', 'B', 'A']),
            'Right': ['C1', 'B1', 'A1'],
        })
        result = pd.merge(df1, df2, on=['Foo'])
        expected = DataFrame({
            'Foo': pd.Categorical(['A', 'B', 'C']),
            'Left': ['A0', 'B0', 'C0'],
            'Right': ['A1', 'B1', 'C1'],
        })
        assert_frame_equal(result, expected) 
Example #24
Source File: test_merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_merge_left_notempty_right_empty(self):
        # GH 10824
        left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                            columns=['a', 'b', 'c'])
        right = pd.DataFrame([], columns=['x', 'y', 'z'])

        exp_out = pd.DataFrame({'a': [1, 4, 7],
                                'b': [2, 5, 8],
                                'c': [3, 6, 9],
                                'x': np.array([np.nan] * 3, dtype=object),
                                'y': np.array([np.nan] * 3, dtype=object),
                                'z': np.array([np.nan] * 3, dtype=object)},
                               columns=['a', 'b', 'c', 'x', 'y', 'z'])
        exp_in = exp_out[0:0]  # make empty DataFrame keeping dtype
        # result will have object dtype
        exp_in.index = exp_in.index.astype(object)

        def check1(exp, kwarg):
            result = pd.merge(left, right, how='inner', **kwarg)
            tm.assert_frame_equal(result, exp)
            result = pd.merge(left, right, how='right', **kwarg)
            tm.assert_frame_equal(result, exp)

        def check2(exp, kwarg):
            result = pd.merge(left, right, how='left', **kwarg)
            tm.assert_frame_equal(result, exp)
            result = pd.merge(left, right, how='outer', **kwarg)
            tm.assert_frame_equal(result, exp)

            for kwarg in [dict(left_index=True, right_index=True),
                          dict(left_index=True, right_on='x'),
                          dict(left_on='a', right_index=True),
                          dict(left_on='a', right_on='x')]:
                check1(exp_in, kwarg)
                check2(exp_out, kwarg) 
Example #25
Source File: test_merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_merge_inner_join_empty(self):
        # GH 15328
        df_empty = pd.DataFrame()
        df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64')
        result = pd.merge(df_empty, df_a, left_index=True, right_index=True)
        expected = pd.DataFrame({'a': []}, index=[], dtype='int64')
        assert_frame_equal(result, expected) 
Example #26
Source File: test_merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_index_and_on_parameters_confusion(self):
        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
                      left_index=False, right_index=['key1', 'key2'])
        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
                      left_index=['key1', 'key2'], right_index=False)
        pytest.raises(ValueError, merge, self.df, self.df2, how='left',
                      left_index=['key1', 'key2'],
                      right_index=['key1', 'key2']) 
Example #27
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_on_indexes(self, left_df, right_df, how, sort, expected):
        result = pd.merge(left_df, right_df,
                          left_index=True,
                          right_index=True,
                          how=how,
                          sort=sort)
        tm.assert_frame_equal(result, expected) 
Example #28
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_on_int_array(self):
        # GH 23020
        df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
                           'B': 1})
        result = pd.merge(df, df, on='A')
        expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
                                 'B_x': 1,
                                 'B_y': 1})
        assert_frame_equal(result, expected) 
Example #29
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_dtype_on_categorical_dates(self):
        # GH 16900
        # dates should not be coerced to ints

        df = pd.DataFrame(
            [[date(2001, 1, 1), 1.1],
             [date(2001, 1, 2), 1.3]],
            columns=['date', 'num2']
        )
        df['date'] = df['date'].astype('category')

        df2 = pd.DataFrame(
            [[date(2001, 1, 1), 1.3],
             [date(2001, 1, 3), 1.4]],
            columns=['date', 'num4']
        )
        df2['date'] = df2['date'].astype('category')

        expected_outer = pd.DataFrame([
            [pd.Timestamp('2001-01-01'), 1.1, 1.3],
            [pd.Timestamp('2001-01-02'), 1.3, np.nan],
            [pd.Timestamp('2001-01-03'), np.nan, 1.4]],
            columns=['date', 'num2', 'num4']
        )
        result_outer = pd.merge(df, df2, how='outer', on=['date'])
        assert_frame_equal(result_outer, expected_outer)

        expected_inner = pd.DataFrame(
            [[pd.Timestamp('2001-01-01'), 1.1, 1.3]],
            columns=['date', 'num2', 'num4']
        )
        result_inner = pd.merge(df, df2, how='inner', on=['date'])
        assert_frame_equal(result_inner, expected_inner) 
Example #30
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_other_columns(self, left, right):
        # non-merge columns should preserve if possible
        right = right.assign(Z=right.Z.astype('category'))

        merged = pd.merge(left, right, on='X')
        result = merged.dtypes.sort_index()
        expected = Series([CategoricalDtype(),
                           np.dtype('O'),
                           CategoricalDtype()],
                          index=['X', 'Y', 'Z'])
        assert_series_equal(result, expected)

        # categories are preserved
        assert left.X.values.is_dtype_equal(merged.X.values)
        assert right.Z.values.is_dtype_equal(merged.Z.values)