Python pandas.core.reshape.merge.merge() Examples
The following are 30
code examples of pandas.core.reshape.merge.merge().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.reshape.merge
, or try the search function
.
Example #1
Source File: test_merge.py From recruit with Apache License 2.0 | 7 votes |
def test_merge_index_singlekey_inner(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) # inner join result = merge(left, right, left_on='key', right_index=True, how='inner') expected = left.join(right, on='key').loc[result.index] assert_frame_equal(result, expected) result = merge(right, left, right_on='key', left_index=True, how='inner') expected = left.join(right, on='key').loc[result.index] assert_frame_equal(result, expected.loc[:, result.columns])
Example #2
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_index_singlekey_right_vs_left(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) merged1 = merge(left, right, left_on='key', right_index=True, how='left', sort=False) merged2 = merge(right, left, right_on='key', left_index=True, how='right', sort=False) assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) merged1 = merge(left, right, left_on='key', right_index=True, how='left', sort=True) merged2 = merge(right, left, right_on='key', left_index=True, how='right', sort=True) assert_frame_equal(merged1, merged2.loc[:, merged1.columns])
Example #3
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_merge_index_singlekey_right_vs_left(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) merged1 = merge(left, right, left_on='key', right_index=True, how='left', sort=False) merged2 = merge(right, left, right_on='key', left_index=True, how='right', sort=False) assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) merged1 = merge(left, right, left_on='key', right_index=True, how='left', sort=True) merged2 = merge(right, left, right_on='key', left_index=True, how='right', sort=True) assert_frame_equal(merged1, merged2.loc[:, merged1.columns])
Example #4
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_merge_index_singlekey_inner(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) # inner join result = merge(left, right, left_on='key', right_index=True, how='inner') expected = left.join(right, on='key').loc[result.index] assert_frame_equal(result, expected) result = merge(right, left, right_on='key', left_index=True, how='inner') expected = left.join(right, on='key').loc[result.index] assert_frame_equal(result, expected.loc[:, result.columns])
Example #5
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_other_timedelta_unit(self, unit): # GH 13389 df1 = pd.DataFrame({'entity_id': [101, 102]}) s = pd.Series([None, None], index=[101, 102], name='days') dtype = "m8[{}]".format(unit) df2 = s.astype(dtype).to_frame('days') assert df2['days'].dtype == 'm8[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) exp = pd.DataFrame({'entity_id': [101, 102], 'days': np.array(['nat', 'nat'], dtype=dtype)}, columns=['entity_id', 'days']) tm.assert_frame_equal(result, exp)
Example #6
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_overlapping_columns_error_message(self): df = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) df2 = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) df.columns = ['key', 'foo', 'foo'] df2.columns = ['key', 'bar', 'bar'] expected = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9], 'v3': [4, 5, 6], 'v4': [7, 8, 9]}) expected.columns = ['key', 'foo', 'foo', 'bar', 'bar'] assert_frame_equal(merge(df, df2), expected) # #2649, #10639 df2.columns = ['key1', 'foo', 'foo'] msg = (r"Data columns not unique: Index\(\[u?'foo', u?'foo'\]," r" dtype='object'\)") with pytest.raises(MergeError, match=msg): merge(df, df2)
Example #7
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_merge_different_column_key_names(self): left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 4]}) right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'], 'value': [5, 6, 7, 8]}) merged = left.merge(right, left_on='lkey', right_on='rkey', how='outer', sort=True) exp = pd.Series(['bar', 'baz', 'foo', 'foo', 'foo', 'foo', np.nan], name='lkey') tm.assert_series_equal(merged['lkey'], exp) exp = pd.Series(['bar', np.nan, 'foo', 'foo', 'foo', 'foo', 'qux'], name='rkey') tm.assert_series_equal(merged['rkey'], exp) exp = pd.Series([2, 3, 1, 1, 4, 4, np.nan], name='value_x') tm.assert_series_equal(merged['value_x'], exp) exp = pd.Series([6, np.nan, 5, 8, 5, 8, 7], name='value_y') tm.assert_series_equal(merged['value_y'], exp)
Example #8
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_join_key_dtype_cast(self): # #8596 df1 = DataFrame({'key': [1], 'v1': [10]}) df2 = DataFrame({'key': [2], 'v1': [20]}) df = merge(df1, df2, how='outer') assert df['key'].dtype == 'int64' df1 = DataFrame({'key': [True], 'v1': [1]}) df2 = DataFrame({'key': [False], 'v1': [0]}) df = merge(df1, df2, how='outer') # GH13169 # this really should be bool assert df['key'].dtype == 'object' df1 = DataFrame({'val': [1]}) df2 = DataFrame({'val': [2]}) lkey = np.array([1]) rkey = np.array([2]) df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer') assert df['key_0'].dtype == 'int64'
Example #9
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_series(on, left_on, right_on, left_index, right_index, nm): # GH 21220 a = pd.DataFrame({"A": [1, 2, 3, 4]}, index=pd.MultiIndex.from_product([['a', 'b'], [0, 1]], names=['outer', 'inner'])) b = pd.Series([1, 2, 3, 4], index=pd.MultiIndex.from_product([['a', 'b'], [1, 2]], names=['outer', 'inner']), name=nm) expected = pd.DataFrame({"A": [2, 4], "B": [1, 3]}, index=pd.MultiIndex.from_product([['a', 'b'], [1]], names=['outer', 'inner'])) if nm is not None: result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index) tm.assert_frame_equal(result, expected) else: msg = "Cannot merge a Series without a name" with pytest.raises(ValueError, match=msg): result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index)
Example #10
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_on_datetime64tz_empty(self): # https://github.com/pandas-dev/pandas/issues/25014 dtz = pd.DatetimeTZDtype(tz='UTC') right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)], 'value': [4.0], 'date2': [pd.Timestamp('2019', tz=dtz.tz)]}, columns=['date', 'value', 'date2']) left = right[:0] result = left.merge(right, on='date') expected = pd.DataFrame({ 'value_x': pd.Series(dtype=float), 'date2_x': pd.Series(dtype=dtz), 'date': pd.Series(dtype=dtz), 'value_y': pd.Series(dtype=float), 'date2_y': pd.Series(dtype=dtz), }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y']) tm.assert_frame_equal(result, expected)
Example #11
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_different_column_key_names(self): left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 4]}) right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'], 'value': [5, 6, 7, 8]}) merged = left.merge(right, left_on='lkey', right_on='rkey', how='outer', sort=True) exp = pd.Series(['bar', 'baz', 'foo', 'foo', 'foo', 'foo', np.nan], name='lkey') tm.assert_series_equal(merged['lkey'], exp) exp = pd.Series(['bar', np.nan, 'foo', 'foo', 'foo', 'foo', 'qux'], name='rkey') tm.assert_series_equal(merged['rkey'], exp) exp = pd.Series([2, 3, 1, 1, 4, 4, np.nan], name='value_x') tm.assert_series_equal(merged['value_x'], exp) exp = pd.Series([6, np.nan, 5, 8, 5, 8, 7], name='value_y') tm.assert_series_equal(merged['value_y'], exp)
Example #12
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_merge_join_key_dtype_cast(self): # #8596 df1 = DataFrame({'key': [1], 'v1': [10]}) df2 = DataFrame({'key': [2], 'v1': [20]}) df = merge(df1, df2, how='outer') assert df['key'].dtype == 'int64' df1 = DataFrame({'key': [True], 'v1': [1]}) df2 = DataFrame({'key': [False], 'v1': [0]}) df = merge(df1, df2, how='outer') # GH13169 # this really should be bool assert df['key'].dtype == 'object' df1 = DataFrame({'val': [1]}) df2 = DataFrame({'val': [2]}) lkey = np.array([1]) rkey = np.array([2]) df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer') assert df['key_0'].dtype == 'int64'
Example #13
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_other_datetime_unit(self): # GH 13389 df1 = pd.DataFrame({'entity_id': [101, 102]}) s = pd.Series([None, None], index=[101, 102], name='days') for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: df2 = s.astype(dtype).to_frame('days') # coerces to datetime64[ns], thus sholuld not be affected assert df2['days'].dtype == 'datetime64[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) exp = pd.DataFrame({'entity_id': [101, 102], 'days': np.array(['nat', 'nat'], dtype='datetime64[ns]')}, columns=['entity_id', 'days']) tm.assert_frame_equal(result, exp)
Example #14
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): # GH 25183 df_left = pd.DataFrame( {'key': series_of_dtype, 'value': series_of_dtype_all_na}, columns=['key', 'value']) df_right = pd.DataFrame( {'key': series_of_dtype, 'value': series_of_dtype_all_na}, columns=['key', 'value']) expected = pd.DataFrame({ 'key': series_of_dtype, 'value_x': series_of_dtype_all_na, 'value_y': series_of_dtype_all_na, }, columns=['key', 'value_x', 'value_y']) actual = df_left.merge(df_right, on='key') assert_frame_equal(actual, expected)
Example #15
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merging_with_bool_or_int_cateorical_column(self, category_column, categories, expected_categories, ordered): # GH 17187 # merging with a boolean/int categorical column df1 = pd.DataFrame({'id': [1, 2, 3, 4], 'cat': category_column}) df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered)) df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]}) result = df1.merge(df2) expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories, 'num': [1, 9]}) expected['cat'] = expected['cat'].astype( CDT(categories, ordered=ordered)) assert_frame_equal(expected, result)
Example #16
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_on_index_with_more_values(self, how): # GH 24212 # pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is # interpreted as a missing value instead of the last element df1 = pd.DataFrame([[1, 2], [2, 4], [3, 6], [4, 8]], columns=['a', 'b']) df2 = pd.DataFrame([[3, 30], [4, 40]], columns=['a', 'c']) df1.set_index('a', drop=False, inplace=True) df2.set_index('a', inplace=True) result = pd.merge(df1, df2, left_index=True, right_on='a', how=how) expected = pd.DataFrame([[1, 2, np.nan], [2, 4, np.nan], [3, 6, 30.0], [4, 8, 40.0]], columns=['a', 'b', 'c']) expected.set_index('a', drop=False, inplace=True) assert_frame_equal(result, expected)
Example #17
Source File: test_multi.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_on_multikey(self, left, right, join_type): on_cols = ['key1', 'key2'] result = (left.join(right, on=on_cols, how=join_type) .reset_index(drop=True)) expected = pd.merge(left, right.reset_index(), on=on_cols, how=join_type) tm.assert_frame_equal(result, expected) result = (left.join(right, on=on_cols, how=join_type, sort=True) .reset_index(drop=True)) expected = pd.merge(left, right.reset_index(), on=on_cols, how=join_type, sort=True) tm.assert_frame_equal(result, expected)
Example #18
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_self_join_multiple_categories(self): # GH 16767 # non-duplicates should work with multiple categories m = 5 df = pd.DataFrame({ 'a': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * m, 'b': ['t', 'w', 'x', 'y', 'z'] * 2 * m, 'c': [letter for each in ['m', 'n', 'u', 'p', 'o'] for letter in [each] * 2 * m], 'd': [letter for each in ['aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg', 'hh', 'ii', 'jj'] for letter in [each] * m]}) # change them all to categorical variables df = df.apply(lambda x: x.astype('category')) # self-join should equal ourselves result = pd.merge(df, df, on=list(df.columns)) assert_frame_equal(result, df)
Example #19
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_on_ints_floats_warning(self): # GH 16572 # merge will produce a warning when merging on int and # float columns where the float values are not exactly # equal to their int representation A = DataFrame({'X': [1, 2, 3]}) B = DataFrame({'Y': [1.1, 2.5, 3.0]}) expected = DataFrame({'X': [3], 'Y': [3.0]}) with tm.assert_produces_warning(UserWarning): result = A.merge(B, left_on='X', right_on='Y') assert_frame_equal(result, expected) with tm.assert_produces_warning(UserWarning): result = B.merge(A, left_on='Y', right_on='X') assert_frame_equal(result, expected[['Y', 'X']]) # test no warning if float has NaNs B = DataFrame({'Y': [np.nan, np.nan, 3.0]}) with tm.assert_produces_warning(None): result = B.merge(A, left_on='Y', right_on='X') assert_frame_equal(result, expected[['Y', 'X']])
Example #20
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_other_datetime_unit(self): # GH 13389 df1 = pd.DataFrame({'entity_id': [101, 102]}) s = pd.Series([None, None], index=[101, 102], name='days') for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: df2 = s.astype(dtype).to_frame('days') # coerces to datetime64[ns], thus sholuld not be affected assert df2['days'].dtype == 'datetime64[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) exp = pd.DataFrame({'entity_id': [101, 102], 'days': np.array(['nat', 'nat'], dtype='datetime64[ns]')}, columns=['entity_id', 'days']) tm.assert_frame_equal(result, exp)
Example #21
Source File: test_merge.py From vnpy_crypto with MIT License | 6 votes |
def test_overlapping_columns_error_message(self): df = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) df2 = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9]}) df.columns = ['key', 'foo', 'foo'] df2.columns = ['key', 'bar', 'bar'] expected = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], 'v2': [7, 8, 9], 'v3': [4, 5, 6], 'v4': [7, 8, 9]}) expected.columns = ['key', 'foo', 'foo', 'bar', 'bar'] assert_frame_equal(merge(df, df2), expected) # #2649, #10639 df2.columns = ['key1', 'foo', 'foo'] pytest.raises(ValueError, merge, df, df2)
Example #22
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def test_merge_incompat_infer_boolean_object(self): # GH21119: bool + object bool merge OK df1 = DataFrame({'key': Series([True, False], dtype=object)}) df2 = DataFrame({'key': [True, False]}) expected = DataFrame({'key': [True, False]}, dtype=object) result = pd.merge(df1, df2, on='key') assert_frame_equal(result, expected) result = pd.merge(df2, df1, on='key') assert_frame_equal(result, expected) # with missing value df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)}) df2 = DataFrame({'key': [True, False]}) expected = DataFrame({'key': [True, False]}, dtype=object) result = pd.merge(df1, df2, on='key') assert_frame_equal(result, expected) result = pd.merge(df2, df1, on='key') assert_frame_equal(result, expected)
Example #23
Source File: test_merge.py From recruit with Apache License 2.0 | 6 votes |
def tests_merge_categorical_unordered_equal(self): # GH-19551 df1 = DataFrame({ 'Foo': Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C']), 'Left': ['A0', 'B0', 'C0'], }) df2 = DataFrame({ 'Foo': Categorical(['C', 'B', 'A'], categories=['C', 'B', 'A']), 'Right': ['C1', 'B1', 'A1'], }) result = pd.merge(df1, df2, on=['Foo']) expected = DataFrame({ 'Foo': pd.Categorical(['A', 'B', 'C']), 'Left': ['A0', 'B0', 'C0'], 'Right': ['A1', 'B1', 'C1'], }) assert_frame_equal(result, expected)
Example #24
Source File: test_merge.py From vnpy_crypto with MIT License | 5 votes |
def test_merge_left_notempty_right_empty(self): # GH 10824 left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) right = pd.DataFrame([], columns=['x', 'y', 'z']) exp_out = pd.DataFrame({'a': [1, 4, 7], 'b': [2, 5, 8], 'c': [3, 6, 9], 'x': np.array([np.nan] * 3, dtype=object), 'y': np.array([np.nan] * 3, dtype=object), 'z': np.array([np.nan] * 3, dtype=object)}, columns=['a', 'b', 'c', 'x', 'y', 'z']) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype # result will have object dtype exp_in.index = exp_in.index.astype(object) def check1(exp, kwarg): result = pd.merge(left, right, how='inner', **kwarg) tm.assert_frame_equal(result, exp) result = pd.merge(left, right, how='right', **kwarg) tm.assert_frame_equal(result, exp) def check2(exp, kwarg): result = pd.merge(left, right, how='left', **kwarg) tm.assert_frame_equal(result, exp) result = pd.merge(left, right, how='outer', **kwarg) tm.assert_frame_equal(result, exp) for kwarg in [dict(left_index=True, right_index=True), dict(left_index=True, right_on='x'), dict(left_on='a', right_index=True), dict(left_on='a', right_on='x')]: check1(exp_in, kwarg) check2(exp_out, kwarg)
Example #25
Source File: test_merge.py From vnpy_crypto with MIT License | 5 votes |
def test_merge_inner_join_empty(self): # GH 15328 df_empty = pd.DataFrame() df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') result = pd.merge(df_empty, df_a, left_index=True, right_index=True) expected = pd.DataFrame({'a': []}, index=[], dtype='int64') assert_frame_equal(result, expected)
Example #26
Source File: test_merge.py From vnpy_crypto with MIT License | 5 votes |
def test_index_and_on_parameters_confusion(self): pytest.raises(ValueError, merge, self.df, self.df2, how='left', left_index=False, right_index=['key1', 'key2']) pytest.raises(ValueError, merge, self.df, self.df2, how='left', left_index=['key1', 'key2'], right_index=False) pytest.raises(ValueError, merge, self.df, self.df2, how='left', left_index=['key1', 'key2'], right_index=['key1', 'key2'])
Example #27
Source File: test_merge.py From recruit with Apache License 2.0 | 5 votes |
def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): result = pd.merge(left_df, right_df, left_index=True, right_index=True, how=how, sort=sort) tm.assert_frame_equal(result, expected)
Example #28
Source File: test_merge.py From recruit with Apache License 2.0 | 5 votes |
def test_merge_on_int_array(self): # GH 23020 df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), 'B': 1}) result = pd.merge(df, df, on='A') expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), 'B_x': 1, 'B_y': 1}) assert_frame_equal(result, expected)
Example #29
Source File: test_merge.py From recruit with Apache License 2.0 | 5 votes |
def test_dtype_on_categorical_dates(self): # GH 16900 # dates should not be coerced to ints df = pd.DataFrame( [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], columns=['date', 'num2'] ) df['date'] = df['date'].astype('category') df2 = pd.DataFrame( [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], columns=['date', 'num4'] ) df2['date'] = df2['date'].astype('category') expected_outer = pd.DataFrame([ [pd.Timestamp('2001-01-01'), 1.1, 1.3], [pd.Timestamp('2001-01-02'), 1.3, np.nan], [pd.Timestamp('2001-01-03'), np.nan, 1.4]], columns=['date', 'num2', 'num4'] ) result_outer = pd.merge(df, df2, how='outer', on=['date']) assert_frame_equal(result_outer, expected_outer) expected_inner = pd.DataFrame( [[pd.Timestamp('2001-01-01'), 1.1, 1.3]], columns=['date', 'num2', 'num4'] ) result_inner = pd.merge(df, df2, how='inner', on=['date']) assert_frame_equal(result_inner, expected_inner)
Example #30
Source File: test_merge.py From recruit with Apache License 2.0 | 5 votes |
def test_other_columns(self, left, right): # non-merge columns should preserve if possible right = right.assign(Z=right.Z.astype('category')) merged = pd.merge(left, right, on='X') result = merged.dtypes.sort_index() expected = Series([CategoricalDtype(), np.dtype('O'), CategoricalDtype()], index=['X', 'Y', 'Z']) assert_series_equal(result, expected) # categories are preserved assert left.X.values.is_dtype_equal(merged.X.values) assert right.Z.values.is_dtype_equal(merged.Z.values)