Python pandas.isna() Examples
The following are 30
code examples of pandas.isna().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_missing.py From recruit with Apache License 2.0 | 6 votes |
def test_nan_stays_float(): # GH 7031 idx0 = pd.MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]) idx1 = pd.MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1]) idxm = idx0.join(idx1, how='outer') assert pd.isna(idx0.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(idxm.get_level_values(1)[:-1]).all() df0 = pd.DataFrame([[1, 2]], index=idx0) df1 = pd.DataFrame([[3, 4]], index=idx1) dfm = df0 - df1 assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
Example #2
Source File: numpy_.py From recruit with Apache License 2.0 | 6 votes |
def fillna(self, value=None, method=None, limit=None): # TODO(_values_for_fillna): remove this value, method = validate_fillna_kwargs(value, method) mask = self.isna() if is_array_like(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) value = value[mask] if mask.any(): if method is not None: func = pad_1d if method == 'pad' else backfill_1d new_values = func(self._ndarray, limit=limit, mask=mask) new_values = self._from_sequence(new_values, dtype=self.dtype) else: # fill with value new_values = self.copy() new_values[mask] = value else: new_values = self.copy() return new_values
Example #3
Source File: test_missing.py From recruit with Apache License 2.0 | 6 votes |
def test_interpolate_index_values(self): s = Series(np.nan, index=np.sort(np.random.rand(30))) s[::3] = np.random.randn(10) vals = s.index.values.astype(float) result = s.interpolate(method='index') expected = s.copy() bad = isna(expected.values) good = ~bad expected = Series(np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]) assert_series_equal(result[bad], expected) # 'values' is synonymous with 'index' for the method kwarg other_result = s.interpolate(method='values') assert_series_equal(other_result, result) assert_series_equal(other_result[bad], expected)
Example #4
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_argsort(self, datetime_series): self._check_accum_op('argsort', datetime_series, check_dtype=False) argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)]) assert s.dtype == 'datetime64[ns]' shifted = s.shift(-1) assert shifted.dtype == 'datetime64[ns]' assert isna(shifted[4]) result = s.argsort() expected = Series(lrange(5), dtype='int64') assert_series_equal(result, expected) result = shifted.argsort() expected = Series(lrange(4) + [-1], dtype='int64') assert_series_equal(result, expected)
Example #5
Source File: test_iloc.py From recruit with Apache License 2.0 | 6 votes |
def test_iloc_getitem_dups(self): # no dups in panel (bug?) self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, objs=['series', 'frame'], typs=['ints', 'uints']) # GH 6766 df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) df = concat([df1, df2], axis=1) # cross-sectional indexing result = df.iloc[0, 0] assert isna(result) result = df.iloc[0, :] expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], name=0) tm.assert_series_equal(result, expected)
Example #6
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_cov(self, datetime_series): # full overlap tm.assert_almost_equal(datetime_series.cov(datetime_series), datetime_series.std() ** 2) # partial overlap tm.assert_almost_equal(datetime_series[:15].cov(datetime_series[5:]), datetime_series[5:15].std() ** 2) # No overlap assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) # all NA cp = datetime_series[:10].copy() cp[:] = np.nan assert isna(cp.cov(cp)) # min_periods assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) ts1 = datetime_series[:15].reindex(datetime_series.index) ts2 = datetime_series[5:].reindex(datetime_series.index) assert isna(ts1.cov(ts2, min_periods=12))
Example #7
Source File: test_analytics.py From recruit with Apache License 2.0 | 6 votes |
def test_clip_types_and_nulls(self): sers = [Series([np.nan, 1.0, 2.0, 3.0]), Series([None, 'a', 'b', 'c']), Series(pd.to_datetime( [np.nan, 1, 2, 3], unit='D'))] for s in sers: thresh = s[2] with tm.assert_produces_warning(FutureWarning): lower = s.clip_lower(thresh) with tm.assert_produces_warning(FutureWarning): upper = s.clip_upper(thresh) assert lower[notna(lower)].min() == thresh assert upper[notna(upper)].max() == thresh assert list(isna(s)) == list(isna(lower)) assert list(isna(s)) == list(isna(upper))
Example #8
Source File: test_stat_reductions.py From recruit with Apache License 2.0 | 6 votes |
def test_sem(self): string_series = tm.makeStringSeries().rename('series') datetime_series = tm.makeTimeSeries().rename('ts') alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt, string_series) result = datetime_series.sem(ddof=4) expected = np.std(datetime_series.values, ddof=4) / np.sqrt(len(datetime_series.values)) tm.assert_almost_equal(result, expected) # 1 - element series with ddof=1 s = datetime_series.iloc[[0]] result = s.sem(ddof=1) assert pd.isna(result)
Example #9
Source File: test_datetimelike.py From recruit with Apache License 2.0 | 6 votes |
def test_gap_upsample(self): low = tm.makeTimeSeries() low[5:25] = np.nan _, ax = self.plt.subplots() low.plot(ax=ax) idxh = date_range(low.index[0], low.index[-1], freq='12h') s = Series(np.random.randn(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() assert len(lines) == 1 assert len(ax.right_ax.get_lines()) == 1 line = lines[0] data = line.get_xydata() if (self.mpl_ge_3_0_0 or not self.mpl_ge_2_0_1 or (self.mpl_ge_2_1_0 and not self.mpl_ge_2_2_2)): # 2.0.0, 2.2.0 (exactly) or >= 3.0.0 data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask assert mask[5:25, 1].all()
Example #10
Source File: test_asof.py From recruit with Apache License 2.0 | 6 votes |
def test_all_nans(self): # GH 15713 # series is all nans result = Series([np.nan]).asof([0]) expected = Series([np.nan]) tm.assert_series_equal(result, expected) # testing non-default indexes N = 50 rng = date_range('1/1/1990', periods=N, freq='53s') dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = Series(np.nan, index=rng).asof(dates) expected = Series(np.nan, index=dates) tm.assert_series_equal(result, expected) # testing scalar input date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] result = Series(np.nan, index=rng).asof(date) assert isna(result) # test name is propagated result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) expected = Series(np.nan, index=[4, 5], name='test') tm.assert_series_equal(result, expected)
Example #11
Source File: test_datetime_index.py From recruit with Apache License 2.0 | 6 votes |
def test_ohlc_5min(): def _ohlc(group): if isna(group).all(): return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s') ts = Series(np.random.randn(len(rng)), index=rng) resampled = ts.resample('5min', closed='right', label='right').ohlc() assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all() exp = _ohlc(ts[1:31]) assert (resampled.loc['1/1/2000 00:05'] == exp).all() exp = _ohlc(ts['1/1/2000 5:55:01':]) assert (resampled.loc['1/1/2000 6:00:00'] == exp).all()
Example #12
Source File: test_datetime_index.py From recruit with Apache License 2.0 | 6 votes |
def test_resample_how_ohlc(series): s = series grouplist = np.ones_like(s) grouplist[0] = 0 grouplist[1:6] = 1 grouplist[6:11] = 2 grouplist[11:] = 3 def _ohlc(group): if isna(group).all(): return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] expected = DataFrame( s.groupby(grouplist).agg(_ohlc).values.tolist(), index=date_range('1/1/2000', periods=4, freq='5min', name='index'), columns=['open', 'high', 'low', 'close']) result = s.resample('5min', closed='right', label='right').ohlc() assert_frame_equal(result, expected)
Example #13
Source File: graph.py From AutoSmart with GNU General Public License v3.0 | 6 votes |
def recognize_binary_col(self,data,cat_cols): def func(ss): ss = ss.unique() if len(ss) == 3: if pd.isna(ss).sum() == 1: return True if len(ss) == 2: return True return False binary_cols = [] res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols) for col,is_binary in zip(cat_cols,res): if is_binary: binary_cols.append(col) return binary_cols
Example #14
Source File: base_china_stock_finance_recorder.py From zvt with MIT License | 6 votes |
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq try: q = query( indicator.pubDate ).filter( indicator.code == to_jq_entity_id(security_item), ) df = get_fundamentals(q, statDate=to_jq_report_period(the_data.report_date)) if not df.empty and pd.isna(df).empty: the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.logger.info( 'jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema, security_item.id, the_data.timestamp, the_data.report_date)) self.session.commit() except Exception as e: self.logger.error(e)
Example #15
Source File: test_strings.py From recruit with Apache License 2.0 | 6 votes |
def test_iter(self): # GH3638 strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel' ds = Series(strs) for s in ds.str: # iter must yield a Series assert isinstance(s, Series) # indices of each yielded Series should be equal to the index of # the original Series tm.assert_index_equal(s.index, ds.index) for el in s: # each element of the series is either a basestring/str or nan assert isinstance(el, compat.string_types) or isna(el) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in # this case since 'wikitravel' is the longest string assert s.dropna().values.item() == 'l'
Example #16
Source File: test_stat_reductions.py From recruit with Apache License 2.0 | 6 votes |
def test_var_std(self): string_series = tm.makeStringSeries().rename('series') datetime_series = tm.makeTimeSeries().rename('ts') alt = lambda x: np.std(x, ddof=1) self._check_stat_op('std', alt, string_series) alt = lambda x: np.var(x, ddof=1) self._check_stat_op('var', alt, string_series) result = datetime_series.std(ddof=4) expected = np.std(datetime_series.values, ddof=4) tm.assert_almost_equal(result, expected) result = datetime_series.var(ddof=4) expected = np.var(datetime_series.values, ddof=4) tm.assert_almost_equal(result, expected) # 1 - element series with ddof=1 s = datetime_series.iloc[[0]] result = s.var(ddof=1) assert pd.isna(result) result = s.std(ddof=1) assert pd.isna(result)
Example #17
Source File: test_multi.py From vnpy_crypto with MIT License | 6 votes |
def test_nan_stays_float(self): # GH 7031 idx0 = pd.MultiIndex(levels=[["A", "B"], []], labels=[[1, 0], [-1, -1]], names=[0, 1]) idx1 = pd.MultiIndex(levels=[["C"], ["D"]], labels=[[0], [0]], names=[0, 1]) idxm = idx0.join(idx1, how='outer') assert pd.isna(idx0.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(idxm.get_level_values(1)[:-1]).all() df0 = pd.DataFrame([[1, 2]], index=idx0) df1 = pd.DataFrame([[3, 4]], index=idx1) dfm = df0 - df1 assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
Example #18
Source File: test_block_internals.py From recruit with Apache License 2.0 | 6 votes |
def test_strange_column_corruption_issue(self): # (wesm) Unclear how exactly this is related to internal matters df = DataFrame(index=[0, 1]) df[0] = np.nan wasCol = {} # uncommenting these makes the results match # for col in xrange(100, 200): # wasCol[col] = 1 # df[col] = np.nan for i, dt in enumerate(df.index): for col in range(100, 200): if col not in wasCol: wasCol[col] = 1 df[col] = np.nan df[col][dt] = i myid = 100 first = len(df.loc[pd.isna(df[myid]), [myid]]) second = len(df.loc[pd.isna(df[myid]), [myid]]) assert first == second == 0
Example #19
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_fillna_frame(self, data_missing): # Have to override to specify that fill_value will change. fill_value = data_missing[1] result = pd.DataFrame({ "A": data_missing, "B": [1, 2] }).fillna(fill_value) if pd.isna(data_missing.fill_value): dtype = SparseDtype(data_missing.dtype, fill_value) else: dtype = data_missing.dtype expected = pd.DataFrame({ "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), "B": [1, 2], }) self.assert_frame_equal(result, expected)
Example #20
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #21
Source File: missing.py From vnpy_crypto with MIT License | 5 votes |
def test_isna(self, data_missing): expected = np.array([True, False]) result = pd.isna(data_missing) tm.assert_numpy_array_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=bool) self.assert_series_equal(result, expected)
Example #22
Source File: tigeristry.py From pygraphistry with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __json_to_graphistry(self, graphistry, json, bindings): edges_df = pd.DataFrame({'from_id': [], 'to_id': []}) edge_key = bindings['edges'] edges = [x for x in json if edge_key in x] if len(edges) > 0 and (edge_key in edges[0]): edges = edges[0][edge_key] edges_df = pd.DataFrame(edges) try: edges_df = edges_df.drop(columns=['attributes']) attrs = [x['attributes'] for x in edges] edges_df = pd.merge( edges_df, pd.DataFrame(attrs), left_index=True, right_index=True ) except: self.__log('Failed to extract edge attrs') g = graphistry.bind(source='from_id', destination='to_id').edges(edges_df) nodes_df = pd.DataFrame({'type': [], 'node_id': []}) node_key = bindings['nodes'] nodes = [x for x in json if node_key in x] if len(nodes) > 0 and (node_key in nodes[0]): nodes = nodes[0][node_key] nodes_df = pd.DataFrame(nodes) try: nodes_df = nodes_df.drop(columns=['attributes']) attrs = [x['attributes'] for x in nodes] nodes_df = pd.merge( nodes_df, pd.DataFrame(attrs), left_index=True, right_index=True ) except: self.__log('Failed to extract node attrs') else: nodes_df = pd.DataFrame({'node_id': edges_df['from_id'].append(edges_df['to_id'])}) \ .drop_duplicates().reset_index(drop=True) from_types = nodes_df.merge(edges_df[['from_id', 'from_type']].rename(columns={'from_id': 'node_id', 'from_type': 'type'}), on='node_id', how='left') to_types = nodes_df.merge(edges_df[['to_id', 'to_type']].rename(columns={'to_id': 'node_id', 'to_type': 'type'}), on='node_id', how='left') nodes_df = nodes_df.merge( pd.DataFrame({'type': from_types.merge(to_types, left_index=True, right_index=True)\ .apply( lambda row: row['type_x'] if not pd.isna(row['type_x']) else row['type_y'], axis=1)}), left_index=True, right_index=True) g = g.bind(node='node_id').nodes(nodes_df) return g
Example #23
Source File: test_reductions.py From recruit with Apache License 2.0 | 5 votes |
def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check. string_series = tm.makeStringSeries().rename('series') # add some NaNs string_series[5:15] = np.NaN # skipna or no assert string_series[string_series.idxmin()] == string_series.min() assert pd.isna(string_series.idxmin(skipna=False)) # no NaNs nona = string_series.dropna() assert nona[nona.idxmin()] == nona.min() assert (nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()) # all NaNs allna = string_series * np.nan assert pd.isna(allna.idxmin()) # datetime64[ns] s = Series(pd.date_range('20130102', periods=6)) result = s.idxmin() assert result == 0 s[0] = np.nan result = s.idxmin() assert result == 1
Example #24
Source File: sparse.py From recruit with Apache License 2.0 | 5 votes |
def isna(self): from pandas import isna # If null fill value, we want SparseDtype[bool, true] # to preserve the same memory usage. dtype = SparseDtype(bool, self._null_fill_value) return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype)
Example #25
Source File: test_reductions.py From recruit with Apache License 2.0 | 5 votes |
def test_ops_consistency_on_empty(self, method): # GH#7869 # consistency on empty # float result = getattr(Series(dtype=float), method)() assert pd.isna(result) # timedelta64[ns] result = getattr(Series(dtype='m8[ns]'), method)() assert result is pd.NaT
Example #26
Source File: test_reductions.py From recruit with Apache License 2.0 | 5 votes |
def test_minmax_nat_datetime64(self, op): # Return NaT obj = DatetimeIndex([]) assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) assert pd.isna(getattr(obj, op)())
Example #27
Source File: test_reductions.py From recruit with Apache License 2.0 | 5 votes |
def test_minmax_timedelta64(self): # monotonic idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) assert idx1.is_monotonic # non-monotonic idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) assert not idx2.is_monotonic for idx in [idx1, idx2]: assert idx.min() == Timedelta('1 days') assert idx.max() == Timedelta('3 days') assert idx.argmin() == 0 assert idx.argmax() == 2 for op in ['min', 'max']: # Return NaT obj = TimedeltaIndex([]) assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT]) assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) assert pd.isna(getattr(obj, op)())
Example #28
Source File: test_constructors.py From recruit with Apache License 2.0 | 5 votes |
def test_frame_datetime64_mixed_index_ctor_1681(self): dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') ts = Series(dr) # it works! d = DataFrame({'A': 'foo', 'B': ts}, index=dr) assert d['B'].isna().all()
Example #29
Source File: test_constructors.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_multi_index(self): # GH 4078 # construction error with mi and all-nan frame tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) assert pd.isna(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) assert pd.isna(df).values.ravel().all()
Example #30
Source File: test_block_internals.py From recruit with Apache License 2.0 | 5 votes |
def test_stale_cached_series_bug_473(self): # this is chained, but ok with option_context('chained_assignment', None): Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), columns=('e', 'f', 'g', 'h')) repr(Y) Y['e'] = Y['e'].astype('object') Y['g']['c'] = np.NaN repr(Y) result = Y.sum() # noqa exp = Y['g'].sum() # noqa assert pd.isna(Y['g']['c'])