Python pandas.merge() Examples

The following are 30 code examples of pandas.merge(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: movielens_preprocess.py    From striatum with BSD 2-Clause "Simplified" License 9 votes vote down vote up
def main():
    # read and preprocess the movie data
    movie = pd.read_table('movies.dat', sep='::', names=['movie_id', 'movie_name', 'tag'], engine='python')
    movie = movie_preprocessing(movie)

    # read the ratings data and merge it with movie data
    rating = pd.read_table("ratings.dat", sep="::",
                           names=["user_id", "movie_id", "rating", "timestamp"], engine='python')
    data = pd.merge(rating, movie, on="movie_id")

    # extract feature from our data set
    streaming_batch, user_feature, actions, reward_list = feature_extraction(data)
    streaming_batch.to_csv("streaming_batch.csv", sep='\t', index=False)
    user_feature.to_csv("user_feature.csv", sep='\t')
    pd.DataFrame(actions, columns=['movie_id']).to_csv("actions.csv", sep='\t', index=False)
    reward_list.to_csv("reward_list.csv", sep='\t', index=False)

    action_context = movie[movie['movie_id'].isin(actions)]
    action_context.to_csv("action_context.csv", sep='\t', index = False) 
Example #2
Source File: test_merge.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_merge_index_singlekey_inner(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        # inner join
        result = merge(left, right, left_on='key', right_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected)

        result = merge(right, left, right_on='key', left_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected.loc[:, result.columns]) 
Example #3
Source File: test_merge.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_merge_datetime64tz_with_dst_transition(self):
        # GH 18885
        df1 = pd.DataFrame(pd.date_range(
            '2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'),
            columns=['date'])
        df1['value'] = 1
        df2 = pd.DataFrame({
            'date': pd.to_datetime([
                '2017-10-29 03:00:00', '2017-10-29 04:00:00',
                '2017-10-29 05:00:00'
            ]),
            'value': 2
        })
        df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert(
            'Europe/Madrid')
        result = pd.merge(df1, df2, how='outer', on='date')
        expected = pd.DataFrame({
            'date': pd.date_range(
                '2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'),
            'value_x': [1] * 4 + [np.nan] * 3,
            'value_y': [np.nan] * 4 + [2] * 3
        })
        assert_frame_equal(result, expected) 
Example #4
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_datetime64tz_empty(self):
        # https://github.com/pandas-dev/pandas/issues/25014
        dtz = pd.DatetimeTZDtype(tz='UTC')
        right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)],
                              'value': [4.0],
                              'date2': [pd.Timestamp('2019', tz=dtz.tz)]},
                             columns=['date', 'value', 'date2'])
        left = right[:0]
        result = left.merge(right, on='date')
        expected = pd.DataFrame({
            'value_x': pd.Series(dtype=float),
            'date2_x': pd.Series(dtype=dtz),
            'date': pd.Series(dtype=dtz),
            'value_y': pd.Series(dtype=float),
            'date2_y': pd.Series(dtype=dtz),
        }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y'])
        tm.assert_frame_equal(result, expected) 
Example #5
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_incompat_infer_boolean_object(self):
        # GH21119: bool + object bool merge OK
        df1 = DataFrame({'key': Series([True, False], dtype=object)})
        df2 = DataFrame({'key': [True, False]})

        expected = DataFrame({'key': [True, False]}, dtype=object)
        result = pd.merge(df1, df2, on='key')
        assert_frame_equal(result, expected)
        result = pd.merge(df2, df1, on='key')
        assert_frame_equal(result, expected)

        # with missing value
        df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)})
        df2 = DataFrame({'key': [True, False]})

        expected = DataFrame({'key': [True, False]}, dtype=object)
        result = pd.merge(df1, df2, on='key')
        assert_frame_equal(result, expected)
        result = pd.merge(df2, df1, on='key')
        assert_frame_equal(result, expected) 
Example #6
Source File: preprocess_for_nn.py    From mercari-price-suggestion with MIT License 6 votes vote down vote up
def transform(self, df):
        df["name"] = df["name"].apply(number_preprocess)
        df["item_description"] = df["item_description"].apply(number_preprocess)

        for cat in self.cat_cols:
            df[cat] = df[cat].apply(lambda x: x if x in self.cat_vocab[cat] else "rarecategory")
            df[cat] = self.le[cat].transform(df[cat])

        df["name"] = df["name"].apply(lambda x: cut(x, self.voc))
        df["item_description"] = df["item_description"].apply(lambda x: cut(x, self.voc))

        df['seq_item_description'] = self.tok_raw.texts_to_sequences(df["item_description"].values)
        df['seq_name'] = self.tok_raw.texts_to_sequences(df["name"].values)

        for col in ["name_ori", "item_description_ori"]:
            f_col = col + "_freq"
            df = pd.merge(df, self.freqs[col], how="left", on=col)
            df[f_col] = df[f_col].fillna(0)
            df[f_col] = df[f_col] / (self.max_freqs[col] + 1)

        return df 
Example #7
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_index_with_more_values(self, how):
        # GH 24212
        # pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
        # interpreted as a missing value instead of the last element
        df1 = pd.DataFrame([[1, 2], [2, 4], [3, 6], [4, 8]],
                           columns=['a', 'b'])
        df2 = pd.DataFrame([[3, 30], [4, 40]],
                           columns=['a', 'c'])
        df1.set_index('a', drop=False, inplace=True)
        df2.set_index('a', inplace=True)
        result = pd.merge(df1, df2, left_index=True, right_on='a', how=how)
        expected = pd.DataFrame([[1, 2, np.nan],
                                 [2, 4, np.nan],
                                 [3, 6, 30.0],
                                 [4, 8, 40.0]],
                                columns=['a', 'b', 'c'])
        expected.set_index('a', drop=False, inplace=True)
        assert_frame_equal(result, expected) 
Example #8
Source File: pricing.py    From thewarden with MIT License 6 votes vote down vote up
def df_fx(self, currency, fx_provider):
        try:
            # First get the df from this currency
            if currency != 'USD':
                fx = PriceData(currency, fx_provider)
                fx.df = fx.df.rename(columns={'close': 'fx_close'})
                fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close,
                                                  errors='coerce')
                # Merge the two dfs:
                merge_df = pd.merge(self.df, fx.df, on='date', how='inner')
                merge_df['close'] = merge_df['close'].astype(float)
                merge_df['close_converted'] = merge_df['close'] * merge_df[
                    'fx_close']
                return (merge_df)
            else:  # If currency is USD no conversion is needed - prices are all in USD
                self.df['fx_close'] = 1
                self.df['close_converted'] = self.df['close'].astype(float)
                return (self.df)
        except Exception as e:
            self.errors.append(e)
            return (None) 
Example #9
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals):
        # GH 9780, GH 15800
        # Raise a ValueError when a user tries to merge on
        # dtypes that are incompatible (e.g., obj and int/float)

        df1 = DataFrame({'A': df1_vals})
        df2 = DataFrame({'A': df2_vals})

        msg = ("You are trying to merge on {lk_dtype} and "
               "{rk_dtype} columns. If you wish to proceed "
               "you should use pd.concat".format(lk_dtype=df1['A'].dtype,
                                                 rk_dtype=df2['A'].dtype))
        msg = re.escape(msg)
        with pytest.raises(ValueError, match=msg):
            pd.merge(df1, df2, on=['A'])

        # Check that error still raised when swapping order of dataframes
        msg = ("You are trying to merge on {lk_dtype} and "
               "{rk_dtype} columns. If you wish to proceed "
               "you should use pd.concat".format(lk_dtype=df2['A'].dtype,
                                                 rk_dtype=df1['A'].dtype))
        msg = re.escape(msg)
        with pytest.raises(ValueError, match=msg):
            pd.merge(df2, df1, on=['A']) 
Example #10
Source File: reshaping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge(self, data, na_value):
        # GH-20743
        df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
                            'key': [0, 1, 2]})
        df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})

        res = pd.merge(df1, df2)
        exp = pd.DataFrame(
            {'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
             'ext': data._from_sequence([data[0], data[0], data[1]],
                                        dtype=data.dtype)})
        self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])

        res = pd.merge(df1, df2, how='outer')
        exp = pd.DataFrame(
            {'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
             'key': [0, 0, 1, 2, 3],
             'ext': data._from_sequence(
                 [data[0], data[0], data[1], data[2], na_value],
                 dtype=data.dtype)})
        self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']]) 
Example #11
Source File: reshaping.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_extension_array(self, data):
        # GH 23020
        a, b = data[:2]
        key = type(data)._from_sequence([a, b], dtype=data.dtype)

        df = pd.DataFrame({"key": key, "val": [1, 2]})
        result = pd.merge(df, df, on='key')
        expected = pd.DataFrame({"key": key,
                                 "val_x": [1, 2],
                                 "val_y": [1, 2]})
        self.assert_frame_equal(result, expected)

        # order
        result = pd.merge(df.iloc[[1, 0]], df, on='key')
        expected = expected.iloc[[1, 0]].reset_index(drop=True)
        self.assert_frame_equal(result, expected) 
Example #12
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_other_timedelta_unit(self, unit):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        dtype = "m8[{}]".format(unit)
        df2 = s.astype(dtype).to_frame('days')
        assert df2['days'].dtype == 'm8[ns]'

        result = df1.merge(df2, left_on='entity_id', right_index=True)

        exp = pd.DataFrame({'entity_id': [101, 102],
                            'days': np.array(['nat', 'nat'],
                                             dtype=dtype)},
                           columns=['entity_id', 'days'])
        tm.assert_frame_equal(result, exp) 
Example #13
Source File: test_multi.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_on_multikey(self, left, right, join_type):
        on_cols = ['key1', 'key2']
        result = (left.join(right, on=on_cols, how=join_type)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type)

        tm.assert_frame_equal(result, expected)

        result = (left.join(right, on=on_cols, how=join_type, sort=True)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type, sort=True)

        tm.assert_frame_equal(result, expected) 
Example #14
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_other_datetime_unit(self):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                      'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                      'datetime64[ns]']:

            df2 = s.astype(dtype).to_frame('days')
            # coerces to datetime64[ns], thus sholuld not be affected
            assert df2['days'].dtype == 'datetime64[ns]'

            result = df1.merge(df2, left_on='entity_id', right_index=True)

            exp = pd.DataFrame({'entity_id': [101, 102],
                                'days': np.array(['nat', 'nat'],
                                                 dtype='datetime64[ns]')},
                               columns=['entity_id', 'days'])
            tm.assert_frame_equal(result, exp) 
Example #15
Source File: test_multi.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_single_common_level(self):
        index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
                                                ('K1', 'X2')],
                                               names=['key', 'X'])

        left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                             'B': ['B0', 'B1', 'B2']},
                            index=index_left)

        index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
                                                 ('K2', 'Y2'), ('K2', 'Y3')],
                                                names=['key', 'Y'])

        right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                              'D': ['D0', 'D1', 'D2', 'D3']},
                             index=index_right)

        result = left.join(right)
        expected = (pd.merge(left.reset_index(), right.reset_index(),
                             on=['key'], how='inner')
                    .set_index(['key', 'X', 'Y']))

        tm.assert_frame_equal(result, expected) 
Example #16
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_join_key_dtype_cast(self):
        # #8596

        df1 = DataFrame({'key': [1], 'v1': [10]})
        df2 = DataFrame({'key': [2], 'v1': [20]})
        df = merge(df1, df2, how='outer')
        assert df['key'].dtype == 'int64'

        df1 = DataFrame({'key': [True], 'v1': [1]})
        df2 = DataFrame({'key': [False], 'v1': [0]})
        df = merge(df1, df2, how='outer')

        # GH13169
        # this really should be bool
        assert df['key'].dtype == 'object'

        df1 = DataFrame({'val': [1]})
        df2 = DataFrame({'val': [2]})
        lkey = np.array([1])
        rkey = np.array([2])
        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
        assert df['key_0'].dtype == 'int64' 
Example #17
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_misspecified(self):
        msg = "Must pass right_on or right_index=True"
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.right, left_index=True)
        msg = "Must pass left_on or left_index=True"
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.right, right_index=True)

        msg = ('Can only pass argument "on" OR "left_on" and "right_on", not'
               ' a combination of both')
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.left, left_on='key', on='key')

        msg = r"len\(right_on\) must equal len\(left_on\)"
        with pytest.raises(ValueError, match=msg):
            merge(self.df, self.df2, left_on=['key1'],
                  right_on=['key1', 'key2']) 
Example #18
Source File: test_merge.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_merge_index_singlekey_right_vs_left(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=False)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=False)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=True)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=True)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) 
Example #19
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_left_merge_empty_dataframe(self):
        left = DataFrame({'key': [1], 'value': [2]})
        right = DataFrame({'key': []})

        result = merge(left, right, on='key', how='left')
        assert_frame_equal(result, left)

        result = merge(right, left, on='key', how='right')
        assert_frame_equal(result, left) 
Example #20
Source File: test_join.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_join_inner_multiindex(self):
        key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
                'qux', 'snap']
        key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
                'three', 'one']

        data = np.random.randn(len(key1))
        data = DataFrame({'key1': key1, 'key2': key2,
                          'data': data})

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                  [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        to_join = DataFrame(np.random.randn(10, 3), index=index,
                            columns=['j_one', 'j_two', 'j_three'])

        joined = data.join(to_join, on=['key1', 'key2'], how='inner')
        expected = merge(data, to_join.reset_index(),
                         left_on=['key1', 'key2'],
                         right_on=['first', 'second'], how='inner',
                         sort=False)

        expected2 = merge(to_join, data,
                          right_on=['key1', 'key2'], left_index=True,
                          how='inner', sort=False)
        assert_frame_equal(joined, expected2.reindex_like(joined))

        expected2 = merge(to_join, data, right_on=['key1', 'key2'],
                          left_index=True, how='inner', sort=False)

        expected = expected.drop(['first', 'second'], axis=1)
        expected.index = joined.index

        assert joined.index.is_monotonic
        assert_frame_equal(joined, expected)

        # _assert_same_contents(expected, expected2.loc[:, expected.columns]) 
Example #21
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_index_and_on_parameters_confusion(self):
        msg = ("right_index parameter must be of type bool, not"
               r" <(class|type) 'list'>")
        with pytest.raises(ValueError, match=msg):
            merge(self.df, self.df2, how='left',
                  left_index=False, right_index=['key1', 'key2'])
        msg = ("left_index parameter must be of type bool, not "
               r"<(class|type) 'list'>")
        with pytest.raises(ValueError, match=msg):
            merge(self.df, self.df2, how='left',
                  left_index=['key1', 'key2'], right_index=False)
        with pytest.raises(ValueError, match=msg):
            merge(self.df, self.df2, how='left',
                  left_index=['key1', 'key2'], right_index=['key1', 'key2']) 
Example #22
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_inner_join_empty(self):
        # GH 15328
        df_empty = pd.DataFrame()
        df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64')
        result = pd.merge(df_empty, df_a, left_index=True, right_index=True)
        expected = pd.DataFrame({'a': []}, index=[], dtype='int64')
        assert_frame_equal(result, expected) 
Example #23
Source File: test_join.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_join_multi_to_multi(self, join_type):
        # GH 20475
        leftindex = MultiIndex.from_product([list('abc'), list('xy'), [1, 2]],
                                            names=['abc', 'xy', 'num'])
        left = DataFrame({'v1': range(12)}, index=leftindex)

        rightindex = MultiIndex.from_product([list('abc'), list('xy')],
                                             names=['abc', 'xy'])
        right = DataFrame({'v2': [100 * i for i in range(1, 7)]},
                          index=rightindex)

        result = left.join(right, on=['abc', 'xy'], how=join_type)
        expected = (left.reset_index()
                        .merge(right.reset_index(),
                               on=['abc', 'xy'], how=join_type)
                        .set_index(['abc', 'xy', 'num'])
                    )
        assert_frame_equal(expected, result)

        msg = (r'len\(left_on\) must equal the number of levels in the index'
               ' of "right"')
        with pytest.raises(ValueError, match=msg):
            left.join(right, on='xy', how=join_type)

        with pytest.raises(ValueError, match=msg):
            right.join(left, on=['abc', 'xy'], how=join_type) 
Example #24
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_common(self):
        joined = merge(self.df, self.df2)
        exp = merge(self.df, self.df2, on=['key1', 'key2'])
        tm.assert_frame_equal(joined, exp) 
Example #25
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_overlap(self):
        merged = merge(self.left, self.left, on='key')
        exp_len = (self.left['key'].value_counts() ** 2).sum()
        assert len(merged) == exp_len
        assert 'v1_x' in merged
        assert 'v1_y' in merged 
Example #26
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_copy(self):
        left = DataFrame({'a': 0, 'b': 1}, index=lrange(10))
        right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10))

        merged = merge(left, right, left_index=True,
                       right_index=True, copy=True)

        merged['a'] = 6
        assert (left['a'] == 0).all()

        merged['d'] = 'peekaboo'
        assert (right['d'] == 'bar').all() 
Example #27
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_nocopy(self):
        left = DataFrame({'a': 0, 'b': 1}, index=lrange(10))
        right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10))

        merged = merge(left, right, left_index=True,
                       right_index=True, copy=False)

        merged['a'] = 6
        assert (left['a'] == 6).all()

        merged['d'] = 'peekaboo'
        assert (right['d'] == 'peekaboo').all() 
Example #28
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_intelligently_handle_join_key(self):
        # #733, be a bit more 1337 about not returning unconsolidated DataFrame

        left = DataFrame({'key': [1, 1, 2, 2, 3],
                          'value': lrange(5)}, columns=['value', 'key'])
        right = DataFrame({'key': [1, 1, 2, 3, 4, 5],
                           'rvalue': lrange(6)})

        joined = merge(left, right, on='key', how='outer')
        expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5],
                              'value': np.array([0, 0, 1, 1, 2, 3, 4,
                                                 np.nan, np.nan]),
                              'rvalue': [0, 1, 0, 1, 2, 2, 3, 4, 5]},
                             columns=['value', 'key', 'rvalue'])
        assert_frame_equal(joined, expected) 
Example #29
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals):
        # GH 16572
        # Check that float column is not cast to object if
        # merging on float and int columns
        A = DataFrame({'X': int_vals})
        B = DataFrame({'Y': float_vals})
        expected = DataFrame(exp_vals)

        result = A.merge(B, left_on='X', right_on='Y')
        assert_frame_equal(result, expected)

        result = B.merge(A, left_on='Y', right_on='X')
        assert_frame_equal(result, expected[['Y', 'X']]) 
Example #30
Source File: test_merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals):
        # these are explicity allowed incompat merges, that pass thru
        # the result type is dependent on if the values on the rhs are
        # inferred, otherwise these will be coereced to object

        df1 = DataFrame({'A': df1_vals})
        df2 = DataFrame({'A': df2_vals})

        result = pd.merge(df1, df2, on=['A'])
        assert is_object_dtype(result.A.dtype)
        result = pd.merge(df2, df1, on=['A'])
        assert is_object_dtype(result.A.dtype)