Python pandas.isnull() Examples
The following are 30
code examples of pandas.isnull().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: stata.py From Computable with MIT License | 6 votes |
def _write_data_dates(self): convert_dates = self._convert_dates data = self.datarows byteorder = self._byteorder TYPE_MAP = self.TYPE_MAP MISSING_VALUES = self.MISSING_VALUES typlist = self.typlist for row in data: #row = row.squeeze().tolist() # needed for structured arrays for i, var in enumerate(row): typ = ord(typlist[i]) #NOTE: If anyone finds this terribly slow, there is # a vectorized way to convert dates, see genfromdta for going # from int to datetime and reverse it. will copy data though if i in convert_dates: var = _datetime_to_stata_elapsed(var, self.fmtlist[i]) if typ <= 244: # we've got a string if len(var) < typ: var = _pad_bytes(var, typ) self._write(var) else: if isnull(var): # this only matters for floats var = MISSING_VALUES[typ] self._file.write(struct.pack(byteorder+TYPE_MAP[typ], var))
Example #2
Source File: sensitivity_analysis.py From DETAD with MIT License | 6 votes |
def compute_mAP_N(result,this_cls_pred,this_cls_gt): ap = np.zeros(len(result.tiou_thresholds)) tp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred))) fp = np.zeros((len(result.tiou_thresholds), len(this_cls_pred))) for tidx, tiou in enumerate(result.tiou_thresholds): fp[tidx,pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values] = 1 tp[tidx,~(pd.isnull(this_cls_pred[result.matched_gt_id_cols[tidx]]).values)] = 1 tp_cumsum = np.cumsum(tp, axis=1).astype(np.float) fp_cumsum = np.cumsum(fp, axis=1).astype(np.float) recall_cumsum = tp_cumsum / len(np.unique(this_cls_gt['gt-id'])) precision_cumsum = recall_cumsum * result.average_num_instance_per_class / (recall_cumsum * result.average_num_instance_per_class + fp_cumsum) for tidx in range(len(result.tiou_thresholds)): ap[tidx] = interpolated_prec_rec(precision_cumsum[tidx,:], recall_cumsum[tidx,:]) return ap.mean() # Initialize true positive and false positive vectors.
Example #3
Source File: common.py From naru with Apache License 2.0 | 6 votes |
def SetDistribution(self, distinct_values): """This is all the values this column will ever see.""" assert self.all_distinct_values is None # pd.isnull returns true for both np.nan and np.datetime64('NaT'). is_nan = pd.isnull(distinct_values) contains_nan = np.any(is_nan) dv_no_nan = distinct_values[~is_nan] # NOTE: np.sort puts NaT values at beginning, and NaN values at end. # For our purposes we always add any null value to the beginning. vs = np.sort(np.unique(dv_no_nan)) if contains_nan and np.issubdtype(distinct_values.dtype, np.datetime64): vs = np.insert(vs, 0, np.datetime64('NaT')) elif contains_nan: vs = np.insert(vs, 0, np.nan) if self.distribution_size is not None: assert len(vs) == self.distribution_size self.all_distinct_values = vs self.distribution_size = len(vs) return self
Example #4
Source File: compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _compute_frequency(self, col): # https://github.com/pydata/pandas/issues/3729 na_value = 'NAN' value_count = col.fillna(na_value) c = value_count.groupby(by=value_count).transform('count') c = c.astype(numpy.float64) if self.normalise: c = c / len(col) # replace missing values c[col.isnull()] = self.missing_value return c
Example #5
Source File: string.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def jarowinkler_similarity(s1, s2): conc = pandas.Series(list(zip(s1, s2))) from jellyfish import jaro_winkler def jaro_winkler_apply(x): try: return jaro_winkler(x[0], x[1]) except Exception as err: if pandas.isnull(x[0]) or pandas.isnull(x[1]): return np.nan else: raise err return conc.apply(jaro_winkler_apply)
Example #6
Source File: string.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def levenshtein_similarity(s1, s2): conc = pandas.Series(list(zip(s1, s2))) from jellyfish import levenshtein_distance def levenshtein_apply(x): try: return 1 - levenshtein_distance(x[0], x[1]) \ / np.max([len(x[0]), len(x[1])]) except Exception as err: if pandas.isnull(x[0]) or pandas.isnull(x[1]): return np.nan else: raise err return conc.apply(levenshtein_apply)
Example #7
Source File: compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _compute_vectorized(self, s_left, s_right): # Values or agree/disagree if self.agree_value == 'value': compare = s_left.copy() compare[s_left != s_right] = self.disagree_value else: compare = pandas.Series(self.disagree_value, index=s_left.index) compare[s_left == s_right] = self.agree_value # Only when disagree value is not identical with the missing value if self.disagree_value != self.missing_value: compare[(s_left.isnull() | s_right.isnull())] = self.missing_value return compare
Example #8
Source File: string.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def damerau_levenshtein_similarity(s1, s2): conc = pandas.Series(list(zip(s1, s2))) from jellyfish import damerau_levenshtein_distance def damerau_levenshtein_apply(x): try: return 1 - damerau_levenshtein_distance(x[0], x[1]) \ / np.max([len(x[0]), len(x[1])]) except Exception as err: if pandas.isnull(x[0]) or pandas.isnull(x[1]): return np.nan else: raise err return conc.apply(damerau_levenshtein_apply)
Example #9
Source File: type_detection.py From sato with Apache License 2.0 | 6 votes |
def detect_integer(e): if e == '' or pd.isnull(e): return False try: if integer_regex.match(e): return True except: try: if float(e).is_integer(): return True except: try: for l in locales: locale.setlocale(locale.LC_all, l) if float(locale.atoi(e)).is_integer(): return True except: pass return False
Example #10
Source File: type_detection.py From sato with Apache License 2.0 | 6 votes |
def detect_decimal(e): if e == '' or pd.isnull(e): return False if decimal_regex.match(e): return True try: d = Decimal(e) return True except: try: for l in locales: locale.setlocale(locale.LC_all, l) value = locale.atof(e) if sys.version_info < (2, 7): value = str(e) return Decimal(e) except: pass return False
Example #11
Source File: test_integer.py From recruit with Apache License 2.0 | 6 votes |
def test_conversions(data_missing): # astype to object series df = pd.DataFrame({'A': data_missing}) result = df['A'].astype('object') expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A') tm.assert_series_equal(result, expected) # convert to object ndarray # we assert that we are exactly equal # including type conversions of scalars result = df['A'].astype('object').values expected = np.array([np.nan, 1], dtype=object) tm.assert_numpy_array_equal(result, expected) for r, e in zip(result, expected): if pd.isnull(r): assert pd.isnull(e) elif is_integer(r): # PY2 can be int or long assert r == e assert is_integer(e) else: assert r == e assert type(r) == type(e)
Example #12
Source File: __init__.py From psst with MIT License | 6 votes |
def solve(self, solver='glpk', verbose=False, keepfiles=False, resolve=False, **kwargs): if solver == 'xpress': resolve = True solve_model(self._model, solver=solver, verbose=verbose, keepfiles=keepfiles, **kwargs) self._results = PSSTResults(self) if resolve: for t, row in self.results.unit_commitment.iterrows(): for g, v in row.iteritems(): if not pd.isnull(v): self._model.UnitOn[g, t].fixed = True self._model.UnitOn[g, t] = int(float(v)) solve_model(self._model, solver=solver, verbose=verbose, keepfiles=keepfiles, is_mip=False, **kwargs) self._results = PSSTResults(self) self._status = 'solved'
Example #13
Source File: test_validator_database_random_entries.py From cellphonedb with MIT License | 6 votes |
def test_gene(self): dataframe = cellphonedb_app.cellphonedb.database_manager.get_repository( 'gene').get_all_expanded() data_not_match = False for gene in gene_entries: db_gene = dataframe for column_name in gene: if gene[column_name] == None: db_gene = db_gene[pd.isnull(db_gene[column_name])] else: db_gene = db_gene[db_gene[column_name] == gene[column_name]] if (len(db_gene) < 1): app_logger.warning('Failed cheking Gene:') app_logger.warning('Expected data:') app_logger.warning(gene) data_not_match = True self.assertFalse(data_not_match, 'Some Gene doesnt match')
Example #14
Source File: recmat.py From quail with MIT License | 6 votes |
def _recmat_exact(presented, recalled, features): lists = presented.index.get_values() cols = max(presented.shape[1], recalled.shape[1]) result = np.empty((presented.shape[0], cols))*np.nan for li, l in enumerate(lists): p_list = presented.loc[l] r_list = recalled.loc[l] for i, feature in enumerate(features): get_feature = lambda x: np.array(x[feature]) if not np.array(pd.isnull(x['item'])).any() else np.nan p = np.vstack(p_list.apply(get_feature).get_values()) r = r_list.dropna().apply(get_feature).get_values() r = np.vstack(list(filter(lambda x: x is not np.nan, r))) try: m = [np.where((p==x).all(axis=1))[0] for x in r] except AttributeError: m = [] result[li, :len(m)] = [x[0]+1 if len(x)>0 else np.nan for x in m] return result
Example #15
Source File: test_core.py From ffn with MIT License | 6 votes |
def test_calc_stats(): # test twelve_month_win_perc divide by zero prices = df.C['2010-10-01':'2011-08-01'] stats = ffn.calc_stats(prices).stats assert pd.isnull(stats['twelve_month_win_perc']) prices = df.C['2009-10-01':'2011-08-01'] stats = ffn.calc_stats(prices).stats assert not pd.isnull(stats['twelve_month_win_perc']) # test yearly_sharpe divide by zero prices = df.C['2009-01-01':'2012-01-01'] stats = ffn.calc_stats(prices).stats assert 'yearly_sharpe' in stats.index prices[prices > 0.0] = 1.0 # throws warnings stats = ffn.calc_stats(prices).stats assert pd.isnull(stats['yearly_sharpe'])
Example #16
Source File: test_foreign.py From vnpy_crypto with MIT License | 6 votes |
def test_missing_roundtrip(): buf = BytesIO() dta = np.array([(np.nan, np.inf, "")], dtype=[("double_miss", float), ("float_miss", np.float32), ("string_miss", "a1")]) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta = genfromdta(buf, missing_flt=np.nan) assert_(isnull(dta[0][0])) assert_(isnull(dta[0][1])) assert_(dta[0][2] == asbytes("")) dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"), missing_flt=-999) assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Example #17
Source File: pdutils.py From meterstick with Apache License 2.0 | 6 votes |
def any_null(obj): """Checks if there are any null values in obj. Args: obj: A scalar, Series, or DataFrame. Returns: A boolean. True if there are any NaN values in obj. Raises: ValueError: if obj is not a scalar, Series, or DataFrame """ if np.isscalar(obj): return pd.isnull(obj) elif isinstance(obj, pd.Series): return obj.isnull().any() elif isinstance(obj, pd.DataFrame): return obj.isnull().values.any() else: raise ValueError("obj is not a scalar, Series, or DataFrame.")
Example #18
Source File: test_resample.py From Computable with MIT License | 6 votes |
def test_ohlc_5min(self): def _ohlc(group): if isnull(group).all(): return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s') ts = Series(np.random.randn(len(rng)), index=rng) resampled = ts.resample('5min', how='ohlc', closed='right', label='right') self.assert_((resampled.ix['1/1/2000 00:00'] == ts[0]).all()) exp = _ohlc(ts[1:31]) self.assert_((resampled.ix['1/1/2000 00:05'] == exp).all()) exp = _ohlc(ts['1/1/2000 5:55:01':]) self.assert_((resampled.ix['1/1/2000 6:00:00'] == exp).all())
Example #19
Source File: generic.py From Computable with MIT License | 6 votes |
def clip_lower(self, threshold): """ Return copy of the input with values below given value truncated See also -------- clip Returns ------- clipped : same type as input """ if isnull(threshold): raise ValueError("Cannot use an NA value as a clip threshold") return self.where((self >= threshold) | isnull(self), threshold)
Example #20
Source File: transform.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _mode(x, def_fill=ImputerMixin._def_fill): """Get the most common value in a 1d H2OFrame. Ties will be handled in a non-specified manner. Parameters ---------- x : ``H2OFrame``, shape=(n_samples, 1) The 1d frame from which to derive the mode """ idx = x.as_data_frame(use_pandas=True)[x.columns[0]].value_counts().index # if the most common is null, then return the next most common. # if there is no next common (i.e., 100% null) then we return the def_fill return idx[0] if not pd.isnull(idx[0]) else idx[1] if idx.shape[0] > 1 else def_fill
Example #21
Source File: word2vec.py From kaggle-HomeDepot with MIT License | 5 votes |
def replace_nan(s): if pd.isnull(s)==True: s="" return s
Example #22
Source File: grams_and_terms_features.py From kaggle-HomeDepot with MIT License | 5 votes |
def replace_nan(s): if pd.isnull(s)==True: s="" return s #code for attributes creation
Example #23
Source File: json.py From Computable with MIT License | 5 votes |
def _try_convert_to_date(self, data): """ try to parse a ndarray like into a date column try to coerce object in epoch/iso formats and integer/float in epcoh formats, return a boolean if parsing was successful """ # no conversion on empty if not len(data): return data, False new_data = data if new_data.dtype == 'object': try: new_data = data.astype('int64') except: pass # ignore numbers that are out of range if issubclass(new_data.dtype.type, np.number): in_range = (isnull(new_data.values) | (new_data > self.min_stamp) | (new_data.values == iNaT)) if not in_range.all(): return data, False date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: new_data = to_datetime(new_data, errors='raise', unit=date_unit) except OverflowError: continue except: break return new_data, True return data, False
Example #24
Source File: foreign.py From vnpy_crypto with MIT License | 5 votes |
def _write_data_dates(self): convert_dates = self._convert_dates data = self.datarows byteorder = self._byteorder TYPE_MAP = self.TYPE_MAP MISSING_VALUES = self.MISSING_VALUES typlist = self.typlist for row in data: #row = row.squeeze().tolist() # needed for structured arrays for i,var in enumerate(row): typ = ord(typlist[i]) #NOTE: If anyone finds this terribly slow, there is # a vectorized way to convert dates, see genfromdta for going # from int to datetime and reverse it. will copy data though if i in convert_dates: var = _datetime_to_stata_elapsed(var, self.fmtlist[i]) if typ <= 244: # we've got a string if isnull(var): var = "" # missing string if len(var) < typ: var = _pad_bytes(var, len(var) + 1) self._write(var) else: if isnull(var): # this only matters for floats var = MISSING_VALUES[typ] self._write(pack(byteorder+TYPE_MAP[typ], var))
Example #25
Source File: Fetch_Data_Stock_US_Daily.py From StockRecommendSystem with MIT License | 5 votes |
def judgeNeedPreDownload(root_path, symbol, first_date, from_date, to_date): publishDay = pd.Timestamp(queryStockPublishDay(root_path, "DB_STOCK", "SHEET_US", symbol)) if pd.isnull(publishDay) == False and publishDay == first_date: return False dateList = judgeOpenDaysInRange(from_date, to_date) if len(dateList) > 0: lastDay = pd.Timestamp(dateList['date'].index[-1]) if pd.isnull(publishDay) or lastDay > publishDay: return True return False
Example #26
Source File: grams_and_terms_features.py From kaggle-HomeDepot with MIT License | 5 votes |
def replace_nan(s): if pd.isnull(s)==True: s="" return s
Example #27
Source File: grams_and_terms_features.py From kaggle-HomeDepot with MIT License | 5 votes |
def replace_nan(s): s=str(s) if pd.isnull(s)==True: s="" return s
Example #28
Source File: indicator.py From pinkfish with MIT License | 5 votes |
def apply(self, row): if pd.isnull(row['__sma_slow__']): self._r = np.nan elif row['__sma_fast__'] > row['__sma_slow__']: self._r = self._r + 1 if self._r > 0 else 1 else: self._r = self._r -1 if self._r < 0 else -1 return self._r
Example #29
Source File: Fetch_Data_Stock_HK_Daily.py From StockRecommendSystem with MIT License | 5 votes |
def judgeNeedPreDownload(dir, symbol, from_date, to_date): dateList = judgeOpenDaysInRange(from_date, to_date) if len(dateList) > 0: publishDay = pd.Timestamp(getStockPublishDay(dir, symbol)) lastDay = pd.Timestamp(dateList['Date'].index[-1]) if pd.isnull(publishDay) or lastDay > publishDay: return True return False
Example #30
Source File: generic.py From Computable with MIT License | 5 votes |
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwds): """ Percent change over given number of periods Parameters ---------- periods : int, default 1 Periods to shift for forming percent change fill_method : str, default 'pad' How to handle NAs before computing percent changes limit : int, default None The number of consecutive NAs to fill before stopping freq : DateOffset, timedelta, or offset alias string, optional Increment to use from time series API (e.g. 'M' or BDay()) Returns ------- chg : same type as caller """ # TODO: Not sure if above is correct - need someone to confirm. if fill_method is None: data = self else: data = self.fillna(method=fill_method, limit=limit) rs = data / data.shift(periods=periods, freq=freq, **kwds) - 1 if freq is None: mask = com.isnull(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs