Python numpy.random.randn() Examples

The following are 30 code examples of numpy.random.randn(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy.random , or try the search function .
Example #1
Source File: test_multilevel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_mixed_depth_pop(self):
        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                  ['', 'wx', 'wy', '', '', '']]

        tuples = sorted(zip(*arrays))
        index = MultiIndex.from_tuples(tuples)
        df = DataFrame(randn(4, 6), columns=index)

        df1 = df.copy()
        df2 = df.copy()
        result = df1.pop('a')
        expected = df2.pop(('a', '', ''))
        tm.assert_series_equal(expected, result, check_names=False)
        tm.assert_frame_equal(df1, df2)
        assert result.name == 'a'

        expected = df1['top']
        df1 = df1.drop(['top'], axis=1)
        result = df2.pop('top')
        tm.assert_frame_equal(expected, result)
        tm.assert_frame_equal(df1, df2) 
Example #2
Source File: test_join.py    From recruit with Apache License 2.0 6 votes vote down vote up
def setup_method(self, method):
        # aggregate multiple columns
        self.df = DataFrame({'key1': get_test_data(),
                             'key2': get_test_data(),
                             'data1': np.random.randn(N),
                             'data2': np.random.randn(N)})

        # exclude a couple keys for fun
        self.df = self.df[self.df['key2'] > 1]

        self.df2 = DataFrame({'key1': get_test_data(n=N // 5),
                              'key2': get_test_data(ngroups=NGROUPS // 2,
                                                    n=N // 5),
                              'value': np.random.randn(N // 5)})

        index, data = tm.getMixedTypeDict()
        self.target = DataFrame(data, index=index)

        # Join on string value
        self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']},
                                index=data['C']) 
Example #3
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_kurt_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = d.rolling(window=5).kurt()
        tm.assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = d.rolling(window=3).kurt()
        tm.assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 1.224307, 2.671499]
        d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
                    ])
        expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])
        x = d.rolling(window=4).kurt()
        tm.assert_series_equal(expected, x) 
Example #4
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_skew_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = d.rolling(window=5).skew()
        tm.assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = d.rolling(window=2).skew()
        tm.assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 0.177994, 1.548824]
        d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
                    ])
        expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])
        x = d.rolling(window=4).skew()
        tm.assert_series_equal(expected, x) 
Example #5
Source File: test_multi.py    From recruit with Apache License 2.0 6 votes vote down vote up
def setup_method(self):
        self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                        ['one', 'two', 'three']],
                                codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                       [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                                names=['first', 'second'])
        self.to_join = DataFrame(np.random.randn(10, 3), index=self.index,
                                 columns=['j_one', 'j_two', 'j_three'])

        # a little relevant example with NAs
        key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
                'qux', 'snap']
        key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
                'three', 'one']

        data = np.random.randn(len(key1))
        self.data = DataFrame({'key1': key1, 'key2': key2,
                               'data': data}) 
Example #6
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_errors(self):
        x = np.random.randn(1, 2, 3)
        assert_raises_regex(np.AxisError, 'source.*out of bounds',
                            np.moveaxis, x, 3, 0)
        assert_raises_regex(np.AxisError, 'source.*out of bounds',
                            np.moveaxis, x, -4, 0)
        assert_raises_regex(np.AxisError, 'destination.*out of bounds',
                            np.moveaxis, x, 0, 5)
        assert_raises_regex(ValueError, 'repeated axis in `source`',
                            np.moveaxis, x, [0, 0], [0, 1])
        assert_raises_regex(ValueError, 'repeated axis in `destination`',
                            np.moveaxis, x, [0, 1], [1, 1])
        assert_raises_regex(ValueError, 'must have the same number',
                            np.moveaxis, x, 0, [0, 1])
        assert_raises_regex(ValueError, 'must have the same number',
                            np.moveaxis, x, [0, 1], [0]) 
Example #7
Source File: test_multilevel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_count_level_series(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two',
                                                           'three', 'four']],
                           codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]])

        s = Series(np.random.randn(len(index)), index=index)

        result = s.count(level=0)
        expected = s.groupby(level=0).count()
        tm.assert_series_equal(
            result.astype('f8'), expected.reindex(result.index).fillna(0))

        result = s.count(level=1)
        expected = s.groupby(level=1).count()
        tm.assert_series_equal(
            result.astype('f8'), expected.reindex(result.index).fillna(0)) 
Example #8
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_flex_binary_frame(self, method):
        series = self.frame[1]

        res = getattr(series.rolling(window=10), method)(self.frame)
        res2 = getattr(self.frame.rolling(window=10), method)(series)
        exp = self.frame.apply(lambda x: getattr(
            series.rolling(window=10), method)(x))

        tm.assert_frame_equal(res, exp)
        tm.assert_frame_equal(res2, exp)

        frame2 = self.frame.copy()
        frame2.values[:] = np.random.randn(*frame2.shape)

        res3 = getattr(self.frame.rolling(window=10), method)(frame2)
        exp = DataFrame({k: getattr(self.frame[k].rolling(
            window=10), method)(frame2[k]) for k in self.frame})
        tm.assert_frame_equal(res3, exp) 
Example #9
Source File: test_concat.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_concat_dict(self):
        frames = {'foo': DataFrame(np.random.randn(4, 3)),
                  'bar': DataFrame(np.random.randn(4, 3)),
                  'baz': DataFrame(np.random.randn(4, 3)),
                  'qux': DataFrame(np.random.randn(4, 3))}

        sorted_keys = sorted(frames)

        result = concat(frames)
        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
        tm.assert_frame_equal(result, expected)

        result = concat(frames, axis=1)
        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys,
                          axis=1)
        tm.assert_frame_equal(result, expected)

        keys = ['baz', 'foo', 'bar']
        result = concat(frames, keys=keys)
        expected = concat([frames[k] for k in keys], keys=keys)
        tm.assert_frame_equal(result, expected) 
Example #10
Source File: test_multilevel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_sort_index_level_large_cardinality(self):

        # #2684 (int64)
        index = MultiIndex.from_arrays([np.arange(4000)] * 3)
        df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64)

        # it works!
        result = df.sort_index(level=0)
        assert result.index.lexsort_depth == 3

        # #2684 (int32)
        index = MultiIndex.from_arrays([np.arange(4000)] * 3)
        df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32)

        # it works!
        result = df.sort_index(level=0)
        assert (result.dtypes.values == df.dtypes.values).all()
        assert result.index.lexsort_depth == 3 
Example #11
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_window_with_args(self):
        # make sure that we are aggregating window functions correctly with arg
        r = Series(np.random.randn(100)).rolling(window=10, min_periods=1,
                                                 win_type='gaussian')
        expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1)
        expected.columns = ['<lambda>', '<lambda>']
        result = r.aggregate([lambda x: x.mean(std=10),
                              lambda x: x.mean(std=.01)])
        tm.assert_frame_equal(result, expected)

        def a(x):
            return x.mean(std=10)

        def b(x):
            return x.mean(std=0.01)

        expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1)
        expected.columns = ['a', 'b']
        result = r.aggregate([a, b])
        tm.assert_frame_equal(result, expected) 
Example #12
Source File: test_multilevel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_std_var_pass_ddof(self):
        index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile(
            np.arange(10), 5)])
        df = DataFrame(np.random.randn(len(index), 5), index=index)

        for meth in ['var', 'std']:
            ddof = 4
            alt = lambda x: getattr(x, meth)(ddof=ddof)

            result = getattr(df[0], meth)(level=0, ddof=ddof)
            expected = df[0].groupby(level=0).agg(alt)
            tm.assert_series_equal(result, expected)

            result = getattr(df, meth)(level=0, ddof=ddof)
            expected = df.groupby(level=0).agg(alt)
            tm.assert_frame_equal(result, expected) 
Example #13
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_cmov_window_corner(self):
        # GH 8238
        # all nan
        vals = pd.Series([np.nan] * 10)
        result = vals.rolling(5, center=True, win_type='boxcar').mean()
        assert np.isnan(result).all()

        # empty
        vals = pd.Series([])
        result = vals.rolling(5, center=True, win_type='boxcar').mean()
        assert len(result) == 0

        # shorter than window
        vals = pd.Series(np.random.randn(5))
        result = vals.rolling(10, win_type='boxcar').mean()
        assert np.isnan(result).all()
        assert len(result) == 5 
Example #14
Source File: test_concat.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_crossed_dtypes_weird_corner(self):
        columns = ['A', 'B', 'C', 'D']
        df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='f8'),
                         'B': np.array([1, 2, 3, 4], dtype='i8'),
                         'C': np.array([1, 2, 3, 4], dtype='f8'),
                         'D': np.array([1, 2, 3, 4], dtype='i8')},
                        columns=columns)

        df2 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8'),
                         'B': np.array([1, 2, 3, 4], dtype='f8'),
                         'C': np.array([1, 2, 3, 4], dtype='i8'),
                         'D': np.array([1, 2, 3, 4], dtype='f8')},
                        columns=columns)

        appended = df1.append(df2, ignore_index=True)
        expected = DataFrame(np.concatenate([df1.values, df2.values], axis=0),
                             columns=columns)
        tm.assert_frame_equal(appended, expected)

        df = DataFrame(np.random.randn(1, 3), index=['a'])
        df2 = DataFrame(np.random.randn(1, 4), index=['b'])
        result = concat(
            [df, df2], keys=['one', 'two'], names=['first', 'second'])
        assert result.index.names == ('first', 'second') 
Example #15
Source File: test_concat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_concat_single_with_key(self):
        df = DataFrame(np.random.randn(10, 4))

        result = concat([df], keys=['foo'])
        expected = concat([df, df], keys=['foo', 'bar'])
        tm.assert_frame_equal(result, expected[:10]) 
Example #16
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unicode_repr_level_names(self):
        index = MultiIndex.from_tuples([(0, 0), (1, 1)],
                                       names=[u('\u0394'), 'i1'])

        s = Series(lrange(2), index=index)
        df = DataFrame(np.random.randn(2, 4), index=index)
        repr(s)
        repr(df) 
Example #17
Source File: test_multi.py    From recruit with Apache License 2.0 5 votes vote down vote up
def right():
    """right dataframe (multi-indexed) for multi-index join tests"""
    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                               ['one', 'two', 'three']],
                       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                       names=['key1', 'key2'])

    return DataFrame(np.random.randn(10, 3), index=index,
                     columns=['j_one', 'j_two', 'j_three']) 
Example #18
Source File: test_multi.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_left_merge_na_buglet(self):
        left = DataFrame({'id': list('abcde'), 'v1': randn(5),
                          'v2': randn(5), 'dummy': list('abcde'),
                          'v3': randn(5)},
                         columns=['id', 'v1', 'v2', 'dummy', 'v3'])
        right = DataFrame({'id': ['a', 'b', np.nan, np.nan, np.nan],
                           'sv3': [1.234, 5.678, np.nan, np.nan, np.nan]})

        result = merge(left, right, on='id', how='left')

        rdf = right.drop(['id'], axis=1)
        expected = left.join(rdf)
        tm.assert_frame_equal(result, expected) 
Example #19
Source File: test_concat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_concat_exclude_none(self):
        df = DataFrame(np.random.randn(10, 4))

        pieces = [df[:5], None, None, df[5:]]
        result = concat(pieces)
        tm.assert_frame_equal(result, df)
        with pytest.raises(ValueError, match="All objects passed were None"):
            concat([None, None]) 
Example #20
Source File: test_concat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_concat_series_axis1_same_names_ignore_index(self):
        dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1]
        s1 = Series(randn(len(dates)), index=dates, name='value')
        s2 = Series(randn(len(dates)), index=dates, name='value')

        result = concat([s1, s2], axis=1, ignore_index=True)
        expected = Index([0, 1])

        tm.assert_index_equal(result.columns, expected) 
Example #21
Source File: test_concat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_concat_multiindex_rangeindex(self):
        # GH13542
        # when multi-index levels are RangeIndex objects
        # there is a bug in concat with objects of len 1

        df = DataFrame(np.random.randn(9, 2))
        df.index = MultiIndex(levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
                              codes=[np.repeat(np.arange(3), 3),
                                     np.tile(np.arange(3), 3)])

        res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
        exp = df.iloc[[2, 3, 4, 5], :]
        tm.assert_frame_equal(res, exp) 
Example #22
Source File: test_concat.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_concat_copy(self):
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
        df3 = DataFrame({5: 'foo'}, index=range(4))

        # These are actual copies.
        result = concat([df, df2, df3], axis=1, copy=True)

        for b in result._data.blocks:
            assert b.values.base is None

        # These are the same.
        result = concat([df, df2, df3], axis=1, copy=False)

        for b in result._data.blocks:
            if b.is_float:
                assert b.values.base is df._data.blocks[0].values.base
            elif b.is_integer:
                assert b.values.base is df2._data.blocks[0].values.base
            elif b.is_object:
                assert b.values.base is not None

        # Float block was consolidated.
        df4 = DataFrame(np.random.randn(4, 1))
        result = concat([df, df2, df3, df4], axis=1, copy=False)
        for b in result._data.blocks:
            if b.is_float:
                assert b.values.base is None
            elif b.is_integer:
                assert b.values.base is df2._data.blocks[0].values.base
            elif b.is_object:
                assert b.values.base is not None 
Example #23
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_drop_preserve_names(self):
        index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
                                        [1, 2, 3, 1, 2, 3]],
                                       names=['one', 'two'])

        df = DataFrame(np.random.randn(6, 3), index=index)

        result = df.drop([(0, 2)])
        assert result.index.names == ('one', 'two') 
Example #24
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_multilevel_consolidate(self):
        index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), (
            'bar', 'one'), ('bar', 'two')])
        df = DataFrame(np.random.randn(4, 4), index=index, columns=index)
        df['Totals', ''] = df.sum(1)
        df = df._consolidate() 
Example #25
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unstack_number_of_levels_larger_than_int32(self):
        # GH 20601
        df = DataFrame(np.random.randn(2 ** 16, 2),
                       index=[np.arange(2 ** 16), np.arange(2 ** 16)])
        with pytest.raises(ValueError, match='int32 overflow'):
            df.unstack() 
Example #26
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unstack_unobserved_keys(self):
        # related to #2278 refactoring
        levels = [[0, 1], [0, 1, 2, 3]]
        codes = [[0, 0, 1, 1], [0, 2, 0, 2]]

        index = MultiIndex(levels, codes)

        df = DataFrame(np.random.randn(4, 2), index=index)

        result = df.unstack()
        assert len(result.columns) == 4

        recons = result.stack()
        tm.assert_frame_equal(recons, df) 
Example #27
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_unstack_multiple_no_empty_columns(self):
        index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0), (
            1, 'baz', 1), (1, 'qux', 1)])

        s = Series(np.random.randn(4), index=index)

        unstacked = s.unstack([1, 2])
        expected = unstacked.dropna(axis=1, how='all')
        tm.assert_frame_equal(unstacked, expected) 
Example #28
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_delevel_infer_dtype(self):
        tuples = [tuple
                  for tuple in cart_product(
                      ['foo', 'bar'], [10, 20], [1.0, 1.1])]
        index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2'])
        df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'],
                       index=index)
        deleveled = df.reset_index()
        assert is_integer_dtype(deleveled['prm1'])
        assert is_float_dtype(deleveled['prm2']) 
Example #29
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_dataframe_constructor(self):
        multi = DataFrame(np.random.randn(4, 4),
                          index=[np.array(['a', 'a', 'b', 'b']),
                                 np.array(['x', 'y', 'x', 'y'])])
        assert isinstance(multi.index, MultiIndex)
        assert not isinstance(multi.columns, MultiIndex)

        multi = DataFrame(np.random.randn(4, 4),
                          columns=[['a', 'a', 'b', 'b'],
                                   ['x', 'y', 'x', 'y']])
        assert isinstance(multi.columns, MultiIndex) 
Example #30
Source File: test_multilevel.py    From recruit with Apache License 2.0 5 votes vote down vote up
def setup_method(self, method):

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
                                                                  'three']],
                           codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                  [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        self.frame = DataFrame(np.random.randn(10, 3), index=index,
                               columns=Index(['A', 'B', 'C'], name='exp'))

        self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
                                       codes=[[0, 1, 2, 3]], names=['first'])

        # create test series object
        arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
        tuples = lzip(*arrays)
        index = MultiIndex.from_tuples(tuples)
        s = Series(randn(8), index=index)
        s[3] = np.NaN
        self.series = s

        self.tdf = tm.makeTimeDataFrame(100)
        self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month,
                                     lambda x: x.day]).sum()

        # use Int64Index, to make sure things work
        self.ymd.index.set_levels([lev.astype('i8')
                                   for lev in self.ymd.index.levels],
                                  inplace=True)
        self.ymd.index.set_names(['year', 'month', 'day'], inplace=True)