Python pandas.isna() Examples

The following are 30 code examples of pandas.isna(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function

Example #1

Source File: test_missing.py From recruit with Apache License 2.0

6 votes

def test_nan_stays_float():

    # GH 7031
    idx0 = pd.MultiIndex(levels=[["A", "B"], []],
                         codes=[[1, 0], [-1, -1]],
                         names=[0, 1])
    idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
                         codes=[[0], [0]],
                         names=[0, 1])
    idxm = idx0.join(idx1, how='outer')
    assert pd.isna(idx0.get_level_values(1)).all()
    # the following failed in 0.14.1
    assert pd.isna(idxm.get_level_values(1)[:-1]).all()

    df0 = pd.DataFrame([[1, 2]], index=idx0)
    df1 = pd.DataFrame([[3, 4]], index=idx1)
    dfm = df0 - df1
    assert pd.isna(df0.index.get_level_values(1)).all()
    # the following failed in 0.14.1
    assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()

Example #2

Source File: numpy_.py From recruit with Apache License 2.0

6 votes

def fillna(self, value=None, method=None, limit=None):
        # TODO(_values_for_fillna): remove this
        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if is_array_like(value):
            if len(value) != len(self):
                raise ValueError("Length of 'value' does not match. Got ({}) "
                                 " expected {}".format(len(value), len(self)))
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == 'pad' else backfill_1d
                new_values = func(self._ndarray, limit=limit,
                                  mask=mask)
                new_values = self._from_sequence(new_values, dtype=self.dtype)
            else:
                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values

Example #3

Source File: test_missing.py From recruit with Apache License 2.0

6 votes

def test_interpolate_index_values(self):
        s = Series(np.nan, index=np.sort(np.random.rand(30)))
        s[::3] = np.random.randn(10)

        vals = s.index.values.astype(float)

        result = s.interpolate(method='index')

        expected = s.copy()
        bad = isna(expected.values)
        good = ~bad
        expected = Series(np.interp(vals[bad], vals[good],
                                    s.values[good]),
                          index=s.index[bad])

        assert_series_equal(result[bad], expected)

        # 'values' is synonymous with 'index' for the method kwarg
        other_result = s.interpolate(method='values')

        assert_series_equal(other_result, result)
        assert_series_equal(other_result[bad], expected)

Example #4

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_argsort(self, datetime_series):
        self._check_accum_op('argsort', datetime_series, check_dtype=False)
        argsorted = datetime_series.argsort()
        assert issubclass(argsorted.dtype.type, np.integer)

        # GH 2967 (introduced bug in 0.11-dev I think)
        s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)])
        assert s.dtype == 'datetime64[ns]'
        shifted = s.shift(-1)
        assert shifted.dtype == 'datetime64[ns]'
        assert isna(shifted[4])

        result = s.argsort()
        expected = Series(lrange(5), dtype='int64')
        assert_series_equal(result, expected)

        result = shifted.argsort()
        expected = Series(lrange(4) + [-1], dtype='int64')
        assert_series_equal(result, expected)

Example #5

Source File: test_iloc.py From recruit with Apache License 2.0

6 votes

def test_iloc_getitem_dups(self):

        # no dups in panel (bug?)
        self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
                          {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
                          objs=['series', 'frame'], typs=['ints', 'uints'])

        # GH 6766
        df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
        df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
        df = concat([df1, df2], axis=1)

        # cross-sectional indexing
        result = df.iloc[0, 0]
        assert isna(result)

        result = df.iloc[0, :]
        expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
                          name=0)
        tm.assert_series_equal(result, expected)

Example #6

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_cov(self, datetime_series):
        # full overlap
        tm.assert_almost_equal(datetime_series.cov(datetime_series),
                               datetime_series.std() ** 2)

        # partial overlap
        tm.assert_almost_equal(datetime_series[:15].cov(datetime_series[5:]),
                               datetime_series[5:15].std() ** 2)

        # No overlap
        assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))

        # all NA
        cp = datetime_series[:10].copy()
        cp[:] = np.nan
        assert isna(cp.cov(cp))

        # min_periods
        assert isna(datetime_series[:15].cov(datetime_series[5:],
                    min_periods=12))

        ts1 = datetime_series[:15].reindex(datetime_series.index)
        ts2 = datetime_series[5:].reindex(datetime_series.index)
        assert isna(ts1.cov(ts2, min_periods=12))

Example #7

Source File: test_analytics.py From recruit with Apache License 2.0

6 votes

def test_clip_types_and_nulls(self):

        sers = [Series([np.nan, 1.0, 2.0, 3.0]), Series([None, 'a', 'b', 'c']),
                Series(pd.to_datetime(
                    [np.nan, 1, 2, 3], unit='D'))]

        for s in sers:
            thresh = s[2]
            with tm.assert_produces_warning(FutureWarning):
                lower = s.clip_lower(thresh)
            with tm.assert_produces_warning(FutureWarning):
                upper = s.clip_upper(thresh)
            assert lower[notna(lower)].min() == thresh
            assert upper[notna(upper)].max() == thresh
            assert list(isna(s)) == list(isna(lower))
            assert list(isna(s)) == list(isna(upper))

Example #8

Source File: test_stat_reductions.py From recruit with Apache License 2.0

6 votes

def test_sem(self):
        string_series = tm.makeStringSeries().rename('series')
        datetime_series = tm.makeTimeSeries().rename('ts')

        alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
        self._check_stat_op('sem', alt, string_series)

        result = datetime_series.sem(ddof=4)
        expected = np.std(datetime_series.values,
                          ddof=4) / np.sqrt(len(datetime_series.values))
        tm.assert_almost_equal(result, expected)

        # 1 - element series with ddof=1
        s = datetime_series.iloc[[0]]
        result = s.sem(ddof=1)
        assert pd.isna(result)

Example #9

Source File: test_datetimelike.py From recruit with Apache License 2.0

6 votes

def test_gap_upsample(self):
        low = tm.makeTimeSeries()
        low[5:25] = np.nan
        _, ax = self.plt.subplots()
        low.plot(ax=ax)

        idxh = date_range(low.index[0], low.index[-1], freq='12h')
        s = Series(np.random.randn(len(idxh)), idxh)
        s.plot(secondary_y=True)
        lines = ax.get_lines()
        assert len(lines) == 1
        assert len(ax.right_ax.get_lines()) == 1

        line = lines[0]
        data = line.get_xydata()
        if (self.mpl_ge_3_0_0 or not self.mpl_ge_2_0_1
                or (self.mpl_ge_2_1_0 and not self.mpl_ge_2_2_2)):
            # 2.0.0, 2.2.0 (exactly) or >= 3.0.0
            data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan)

        assert isinstance(data, np.ma.core.MaskedArray)
        mask = data.mask
        assert mask[5:25, 1].all()

Example #10

Source File: test_asof.py From recruit with Apache License 2.0

6 votes

def test_all_nans(self):
        # GH 15713
        # series is all nans
        result = Series([np.nan]).asof([0])
        expected = Series([np.nan])
        tm.assert_series_equal(result, expected)

        # testing non-default indexes
        N = 50
        rng = date_range('1/1/1990', periods=N, freq='53s')

        dates = date_range('1/1/1990', periods=N * 3, freq='25s')
        result = Series(np.nan, index=rng).asof(dates)
        expected = Series(np.nan, index=dates)
        tm.assert_series_equal(result, expected)

        # testing scalar input
        date = date_range('1/1/1990', periods=N * 3, freq='25s')[0]
        result = Series(np.nan, index=rng).asof(date)
        assert isna(result)

        # test name is propagated
        result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5])
        expected = Series(np.nan, index=[4, 5], name='test')
        tm.assert_series_equal(result, expected)

Example #11

Source File: test_datetime_index.py From recruit with Apache License 2.0

6 votes

def test_ohlc_5min():
    def _ohlc(group):
        if isna(group).all():
            return np.repeat(np.nan, 4)
        return [group[0], group.max(), group.min(), group[-1]]

    rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s')
    ts = Series(np.random.randn(len(rng)), index=rng)

    resampled = ts.resample('5min', closed='right',
                            label='right').ohlc()

    assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all()

    exp = _ohlc(ts[1:31])
    assert (resampled.loc['1/1/2000 00:05'] == exp).all()

    exp = _ohlc(ts['1/1/2000 5:55:01':])
    assert (resampled.loc['1/1/2000 6:00:00'] == exp).all()

Example #12

Source File: test_datetime_index.py From recruit with Apache License 2.0

6 votes

def test_resample_how_ohlc(series):
    s = series
    grouplist = np.ones_like(s)
    grouplist[0] = 0
    grouplist[1:6] = 1
    grouplist[6:11] = 2
    grouplist[11:] = 3

    def _ohlc(group):
        if isna(group).all():
            return np.repeat(np.nan, 4)
        return [group[0], group.max(), group.min(), group[-1]]

    expected = DataFrame(
        s.groupby(grouplist).agg(_ohlc).values.tolist(),
        index=date_range('1/1/2000', periods=4, freq='5min', name='index'),
        columns=['open', 'high', 'low', 'close'])

    result = s.resample('5min', closed='right', label='right').ohlc()
    assert_frame_equal(result, expected)

Example #13

Source File: graph.py From AutoSmart with GNU General Public License v3.0

6 votes

def recognize_binary_col(self,data,cat_cols):
        def func(ss):
            ss = ss.unique()
            if len(ss) == 3:
                if pd.isna(ss).sum() == 1:
                    return True
            if len(ss) == 2:
                return True
            return False
        
        binary_cols = []
        
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols)
        
        for col,is_binary in zip(cat_cols,res):
            if is_binary:
                binary_cols.append(col)
        
        return binary_cols

Example #14

Source File: base_china_stock_finance_recorder.py From zvt with MIT License

6 votes

def fill_timestamp_with_jq(self, security_item, the_data):
        # get report published date from jq
        try:
            q = query(
                indicator.pubDate
            ).filter(
                indicator.code == to_jq_entity_id(security_item),
            )

            df = get_fundamentals(q, statDate=to_jq_report_period(the_data.report_date))
            if not df.empty and pd.isna(df).empty:
                the_data.timestamp = to_pd_timestamp(df['pubDate'][0])
                self.logger.info(
                    'jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema, security_item.id,
                                                                           the_data.timestamp,
                                                                           the_data.report_date))
                self.session.commit()
        except Exception as e:
            self.logger.error(e)

Example #15

Source File: test_strings.py From recruit with Apache License 2.0

6 votes

def test_iter(self):
        # GH3638
        strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
        ds = Series(strs)

        for s in ds.str:
            # iter must yield a Series
            assert isinstance(s, Series)

            # indices of each yielded Series should be equal to the index of
            # the original Series
            tm.assert_index_equal(s.index, ds.index)

            for el in s:
                # each element of the series is either a basestring/str or nan
                assert isinstance(el, compat.string_types) or isna(el)

        # desired behavior is to iterate until everything would be nan on the
        # next iter so make sure the last element of the iterator was 'l' in
        # this case since 'wikitravel' is the longest string
        assert s.dropna().values.item() == 'l'

Example #16

Source File: test_stat_reductions.py From recruit with Apache License 2.0

6 votes

def test_var_std(self):
        string_series = tm.makeStringSeries().rename('series')
        datetime_series = tm.makeTimeSeries().rename('ts')

        alt = lambda x: np.std(x, ddof=1)
        self._check_stat_op('std', alt, string_series)

        alt = lambda x: np.var(x, ddof=1)
        self._check_stat_op('var', alt, string_series)

        result = datetime_series.std(ddof=4)
        expected = np.std(datetime_series.values, ddof=4)
        tm.assert_almost_equal(result, expected)

        result = datetime_series.var(ddof=4)
        expected = np.var(datetime_series.values, ddof=4)
        tm.assert_almost_equal(result, expected)

        # 1 - element series with ddof=1
        s = datetime_series.iloc[[0]]
        result = s.var(ddof=1)
        assert pd.isna(result)

        result = s.std(ddof=1)
        assert pd.isna(result)

Example #17

Source File: test_multi.py From vnpy_crypto with MIT License

6 votes

def test_nan_stays_float(self):

        # GH 7031
        idx0 = pd.MultiIndex(levels=[["A", "B"], []],
                             labels=[[1, 0], [-1, -1]],
                             names=[0, 1])
        idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
                             labels=[[0], [0]],
                             names=[0, 1])
        idxm = idx0.join(idx1, how='outer')
        assert pd.isna(idx0.get_level_values(1)).all()
        # the following failed in 0.14.1
        assert pd.isna(idxm.get_level_values(1)[:-1]).all()

        df0 = pd.DataFrame([[1, 2]], index=idx0)
        df1 = pd.DataFrame([[3, 4]], index=idx1)
        dfm = df0 - df1
        assert pd.isna(df0.index.get_level_values(1)).all()
        # the following failed in 0.14.1
        assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()

Example #18

Source File: test_block_internals.py From recruit with Apache License 2.0

6 votes

def test_strange_column_corruption_issue(self):
        # (wesm) Unclear how exactly this is related to internal matters
        df = DataFrame(index=[0, 1])
        df[0] = np.nan
        wasCol = {}
        # uncommenting these makes the results match
        # for col in xrange(100, 200):
        #    wasCol[col] = 1
        #    df[col] = np.nan

        for i, dt in enumerate(df.index):
            for col in range(100, 200):
                if col not in wasCol:
                    wasCol[col] = 1
                    df[col] = np.nan
                df[col][dt] = i

        myid = 100

        first = len(df.loc[pd.isna(df[myid]), [myid]])
        second = len(df.loc[pd.isna(df[myid]), [myid]])
        assert first == second == 0

Example #19

Source File: test_sparse.py From recruit with Apache License 2.0

6 votes

def test_fillna_frame(self, data_missing):
        # Have to override to specify that fill_value will change.
        fill_value = data_missing[1]

        result = pd.DataFrame({
            "A": data_missing,
            "B": [1, 2]
        }).fillna(fill_value)

        if pd.isna(data_missing.fill_value):
            dtype = SparseDtype(data_missing.dtype, fill_value)
        else:
            dtype = data_missing.dtype

        expected = pd.DataFrame({
            "A": data_missing._from_sequence([fill_value, fill_value],
                                             dtype=dtype),
            "B": [1, 2],
        })

        self.assert_frame_equal(result, expected)

Example #20

Source File: test_sparse.py From recruit with Apache License 2.0

6 votes

def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected)

Example #21

Source File: missing.py From vnpy_crypto with MIT License

5 votes

def test_isna(self, data_missing):
        expected = np.array([True, False])

        result = pd.isna(data_missing)
        tm.assert_numpy_array_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=bool)
        self.assert_series_equal(result, expected)

Example #22

Source File: tigeristry.py From pygraphistry with BSD 3-Clause "New" or "Revised" License

5 votes

def __json_to_graphistry(self, graphistry, json, bindings):        
        edges_df = pd.DataFrame({'from_id': [], 'to_id': []})
        edge_key = bindings['edges']
        edges = [x for x in json if edge_key in x]      
        if len(edges) > 0 and (edge_key in edges[0]):
            edges = edges[0][edge_key]
            edges_df = pd.DataFrame(edges)
            try:
                edges_df = edges_df.drop(columns=['attributes'])
                attrs = [x['attributes'] for x in edges]
                edges_df = pd.merge( edges_df, pd.DataFrame(attrs), left_index=True, right_index=True )
            except:
                self.__log('Failed to extract edge attrs')
            g = graphistry.bind(source='from_id', destination='to_id').edges(edges_df)
        
        nodes_df = pd.DataFrame({'type': [], 'node_id': []})
        node_key = bindings['nodes']
        nodes = [x for x in json if node_key in x]
        if len(nodes) > 0 and (node_key in nodes[0]):
            nodes = nodes[0][node_key]
            nodes_df = pd.DataFrame(nodes)
            try:
                nodes_df = nodes_df.drop(columns=['attributes'])
                attrs = [x['attributes'] for x in nodes]
                nodes_df = pd.merge( nodes_df, pd.DataFrame(attrs), left_index=True, right_index=True )
            except:
                self.__log('Failed to extract node attrs')
        else:        
            nodes_df = pd.DataFrame({'node_id': edges_df['from_id'].append(edges_df['to_id'])}) \
              .drop_duplicates().reset_index(drop=True)           
            from_types = nodes_df.merge(edges_df[['from_id', 'from_type']].rename(columns={'from_id': 'node_id', 'from_type': 'type'}), on='node_id', how='left')
            to_types = nodes_df.merge(edges_df[['to_id', 'to_type']].rename(columns={'to_id': 'node_id', 'to_type': 'type'}), on='node_id', how='left')
            nodes_df = nodes_df.merge(
                pd.DataFrame({'type': 
                              from_types.merge(to_types, left_index=True, right_index=True)\
                                .apply(
                                  lambda row: row['type_x'] if not pd.isna(row['type_x']) else row['type_y'],
                                  axis=1)}),
                left_index=True, right_index=True)              
        g = g.bind(node='node_id').nodes(nodes_df)
        return g

Example #23

Source File: test_reductions.py From recruit with Apache License 2.0

5 votes

def test_idxmin(self):
        # test idxmin
        # _check_stat_op approach can not be used here because of isna check.
        string_series = tm.makeStringSeries().rename('series')

        # add some NaNs
        string_series[5:15] = np.NaN

        # skipna or no
        assert string_series[string_series.idxmin()] == string_series.min()
        assert pd.isna(string_series.idxmin(skipna=False))

        # no NaNs
        nona = string_series.dropna()
        assert nona[nona.idxmin()] == nona.min()
        assert (nona.index.values.tolist().index(nona.idxmin()) ==
                nona.values.argmin())

        # all NaNs
        allna = string_series * np.nan
        assert pd.isna(allna.idxmin())

        # datetime64[ns]
        s = Series(pd.date_range('20130102', periods=6))
        result = s.idxmin()
        assert result == 0

        s[0] = np.nan
        result = s.idxmin()
        assert result == 1

Example #24

Source File: sparse.py From recruit with Apache License 2.0

5 votes

def isna(self):
        from pandas import isna
        # If null fill value, we want SparseDtype[bool, true]
        # to preserve the same memory usage.
        dtype = SparseDtype(bool, self._null_fill_value)
        return type(self)._simple_new(isna(self.sp_values),
                                      self.sp_index, dtype)

Example #25

Source File: test_reductions.py From recruit with Apache License 2.0

5 votes

def test_ops_consistency_on_empty(self, method):

        # GH#7869
        # consistency on empty

        # float
        result = getattr(Series(dtype=float), method)()
        assert pd.isna(result)

        # timedelta64[ns]
        result = getattr(Series(dtype='m8[ns]'), method)()
        assert result is pd.NaT

Example #26

Source File: test_reductions.py From recruit with Apache License 2.0

5 votes

def test_minmax_nat_datetime64(self, op):
        # Return NaT
        obj = DatetimeIndex([])
        assert pd.isna(getattr(obj, op)())

        obj = DatetimeIndex([pd.NaT])
        assert pd.isna(getattr(obj, op)())

        obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT])
        assert pd.isna(getattr(obj, op)())

Example #27

Source File: test_reductions.py From recruit with Apache License 2.0

5 votes

def test_minmax_timedelta64(self):

        # monotonic
        idx1 = TimedeltaIndex(['1 days', '2 days', '3 days'])
        assert idx1.is_monotonic

        # non-monotonic
        idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT'])
        assert not idx2.is_monotonic

        for idx in [idx1, idx2]:
            assert idx.min() == Timedelta('1 days')
            assert idx.max() == Timedelta('3 days')
            assert idx.argmin() == 0
            assert idx.argmax() == 2

        for op in ['min', 'max']:
            # Return NaT
            obj = TimedeltaIndex([])
            assert pd.isna(getattr(obj, op)())

            obj = TimedeltaIndex([pd.NaT])
            assert pd.isna(getattr(obj, op)())

            obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
            assert pd.isna(getattr(obj, op)())

Example #28

Source File: test_constructors.py From recruit with Apache License 2.0

5 votes

def test_frame_datetime64_mixed_index_ctor_1681(self):
        dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
        ts = Series(dr)

        # it works!
        d = DataFrame({'A': 'foo', 'B': ts}, index=dr)
        assert d['B'].isna().all()

Example #29

Source File: test_constructors.py From recruit with Apache License 2.0

5 votes

def test_constructor_multi_index(self):
        # GH 4078
        # construction error with mi and all-nan frame
        tuples = [(2, 3), (3, 3), (3, 3)]
        mi = MultiIndex.from_tuples(tuples)
        df = DataFrame(index=mi, columns=mi)
        assert pd.isna(df).values.ravel().all()

        tuples = [(3, 3), (2, 3), (3, 3)]
        mi = MultiIndex.from_tuples(tuples)
        df = DataFrame(index=mi, columns=mi)
        assert pd.isna(df).values.ravel().all()

Example #30

Source File: test_block_internals.py From recruit with Apache License 2.0

5 votes

def test_stale_cached_series_bug_473(self):

        # this is chained, but ok
        with option_context('chained_assignment', None):
            Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'),
                          columns=('e', 'f', 'g', 'h'))
            repr(Y)
            Y['e'] = Y['e'].astype('object')
            Y['g']['c'] = np.NaN
            repr(Y)
            result = Y.sum()  # noqa
            exp = Y['g'].sum()  # noqa
            assert pd.isna(Y['g']['c'])