Python numpy.nans() Examples
The following are 13
code examples of numpy.nans().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: test_nanguardmode.py From D-VAE with MIT License | 5 votes |
def test_NanGuardMode(): """ Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans intentionally. A working implementation should be able to capture all the abnormalties. """ x = T.matrix() w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX)) y = T.dot(x, w) fun = theano.function( [x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True) ) a = numpy.random.randn(3, 5).astype(theano.config.floatX) infa = numpy.tile( (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5)) nana = numpy.tile( numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5)) biga = numpy.tile( numpy.asarray(1e20).astype(theano.config.floatX), (3, 5)) fun(a) # normal values # Temporarily silence logger _logger = logging.getLogger("theano.compile.nanguardmode") try: _logger.propagate = False assert_raises(AssertionError, fun, infa) # INFs assert_raises(AssertionError, fun, nana) # NANs assert_raises(AssertionError, fun, biga) # big values finally: _logger.propagate = True
Example #2
Source File: test_nanguardmode.py From attention-lvcsr with MIT License | 5 votes |
def test_NanGuardMode(): """ Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans intentionally. A working implementation should be able to capture all the abnormalties. """ x = T.matrix() w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX)) y = T.dot(x, w) fun = theano.function( [x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True) ) a = numpy.random.randn(3, 5).astype(theano.config.floatX) infa = numpy.tile( (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5)) nana = numpy.tile( numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5)) biga = numpy.tile( numpy.asarray(1e20).astype(theano.config.floatX), (3, 5)) fun(a) # normal values # Temporarily silence logger _logger = logging.getLogger("theano.compile.nanguardmode") try: _logger.propagate = False assert_raises(AssertionError, fun, infa) # INFs assert_raises(AssertionError, fun, nana) # NANs assert_raises(AssertionError, fun, biga) # big values finally: _logger.propagate = True
Example #3
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _rm_all_Sc(self): """Remove people who didn't enter a Rx score in Rx division. And set other Scale values as np.nans. Returns ------- cfopendata : pd.Dataframe Crossfit open data with less rows. """ l = [] for i in range(self.wodscompleted): l.append(self.df.loc[:, self.scorel[i]].values.tolist()) # Create index for nans and change for each coloumn ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int) ii[:] = -1 # Find scaled inputs and set to nan for i in range(self.wodscompleted): for j in range(len(self.df)): if l[i][j].endswith('- s'): ii[i,j] = j for i in range(self.wodscompleted): tmp = ii[i,:] _tmp = tmp[tmp >= 0] self.df.iloc[_tmp,self.ci[i]] = np.nan # If all scores are nan set row to nan and remove _ind = pd.isnull(self.df.loc[:,self.scorel]).all(axis=1) _in2 = _ind[_ind == True].index.values self.df.iloc[_in2,:] = np.nan self.df = self.df.dropna(axis=0, how='all').reset_index(drop=True) return self.df
Example #4
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _rm_all_Sc_and_0s(self): """Remove people who didn't enter a Rx score in Rx division or did not enter a score. Returns ------- cfopendata : pd.Dataframe Crossfit open data with less rows. """ l = [] for i in range(self.wodscompleted): l.append(self.df.loc[:, self.scorel[i]].values.tolist()) # Create index for nans and change for each coloumn ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int) ii[:] = -1 # Find empty scores for i in range(self.wodscompleted): for j in range(len(self.df)): if l[i][j] == '0' or l[i][j] == '': ii[i,j] = j for i in range(self.wodscompleted): tmp = ii[i,:] _tmp = tmp[tmp >= 0] self.df.iloc[_tmp,self.ci[i]] = np.nan # If all scores are nan set row to nan and remove _ind = pd.isnull(self.df.loc[:,self.scorel]).all(axis=1) _in2 = _ind[_ind == True].index.values self.df.iloc[_in2,:] = np.nan self.df = self.df.dropna(axis=0, how='all').reset_index(drop=True) return self.df
Example #5
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _rm_Sc_str(self): """Remove the ' - s' from all the scores. and make '0' or '' a np.nan Returns ------- cfopendata : pd.Dataframe Crossfit open data without ' - s' in scores and np.nans. """ l = []; nl = [] for i in range(self.wodscompleted): l.append(self.df.loc[:, self.scorel[i]].values.tolist()) nl.append(self.df.loc[:, self.scorel[i]].values.tolist()) # Create index for nans and change for each coloumn ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int) ii[:] = -1 # Find scaled inputs and set to nan for i in range(self.wodscompleted): for j in range(len(self.df)): if l[i][j].endswith('- s'): nl[i][j] = l[i][j][0:-4] else: ii[i,j] = j # Add new list back in and nans for i in range(self.wodscompleted): self.df.iloc[:,self.ci[i]] = nl[i][:] tmp = ii[i,:] _tmp = tmp[tmp >= 0] self.df.iloc[_tmp,self.ci[i]] = np.nan return self.df
Example #6
Source File: write_gctx.py From cmapPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def write_metadata(hdf5_out, dim, metadata_df, convert_back_to_neg_666, gzip_compression): """ Writes either column or row metadata to proper node of gctx out (hdf5) file. Input: - hdf5_out (h5py): open hdf5 file to write to - dim (str; must be "row" or "col"): dimension of metadata to write to - metadata_df (pandas DataFrame): metadata DataFrame to write to file - convert_back_to_neg_666 (bool): Whether to convert numpy.nans back to "-666", as per CMap metadata null convention """ if dim == "col": hdf5_out.create_group(col_meta_group_node) metadata_node_name = col_meta_group_node elif dim == "row": hdf5_out.create_group(row_meta_group_node) metadata_node_name = row_meta_group_node else: logger.error("'dim' argument must be either 'row' or 'col'!") # write id field to expected node hdf5_out.create_dataset(metadata_node_name + "/id", data=[numpy.string_(str(x)) for x in metadata_df.index], compression=gzip_compression) metadata_fields = list(metadata_df.columns.copy()) # if specified, convert numpy.nans in metadata back to -666 if convert_back_to_neg_666: for c in metadata_fields: metadata_df[[c]] = metadata_df[[c]].replace([numpy.nan], ["-666"]) # write metadata columns to their own arrays for field in [entry for entry in metadata_fields if entry != "ind"]: if numpy.array(metadata_df.loc[:, field]).dtype.type in (numpy.str_, numpy.object_): array_write = numpy.array(metadata_df.loc[:, field]).astype('S') else: array_write = numpy.array(metadata_df.loc[:, field]) hdf5_out.create_dataset(metadata_node_name + "/" + field, data=array_write, compression=gzip_compression)
Example #7
Source File: read_path.py From control with GNU General Public License v3.0 | 5 votes |
def get_single(**kwargs): """Wrap the number with np.nans on either end """ num = get_raw_data(**kwargs) new_array = np.zeros((num.shape[0]+2, num.shape[1])) new_array[0] = [np.nan, np.nan] new_array[-1] = [np.nan, np.nan] new_array[1:-1] = num return new_array
Example #8
Source File: read_path.py From control with GNU General Public License v3.0 | 5 votes |
def get_sequence(sequence, writebox, spaces=False): """Returns a sequence sequence list: the sequence of integers writebox list: [min x, max x, min y, max y] """ nans = np.array([np.nan, np.nan]) nums= nans.copy() if spaces is False: each_num_width = (writebox[1] - writebox[0]) / float(len(sequence)) else: each_num_width = (writebox[1] - writebox[0]) / float(len(sequence)*2 - 1) for ii, nn in enumerate(sequence): if spaces is False: num_writebox = [writebox[0] + each_num_width * ii , writebox[0] + each_num_width * (ii+1), writebox[2], writebox[3]] else: num_writebox = [writebox[0] + each_num_width * 2 * ii , writebox[0] + each_num_width * 2 * (ii+.5), writebox[2], writebox[3]] if isinstance(nn, int): nn = str(nn) num = get_raw_data(nn, num_writebox) nums = np.vstack([nums, num, nans]) return nums ### Testing code ###
Example #9
Source File: checks.py From bulwark with GNU Lesser General Public License v3.0 | 5 votes |
def has_no_nans(df, columns=None): """Asserts that there are no np.nans in `df`. This is a convenience wrapper for `has_no_x`. Args: df (pd.DataFrame): Any pd.DataFrame. columns (list): A subset of columns to check for np.nans. Returns: Original `df`. """ return has_no_x(df, values=[np.nan], columns=columns)
Example #10
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _extract_score(self, wod): """Convert workout score to a pd.Timedelta or integer. Parameters ---------- wod : string Name of the wod. Returns ------- cfopendata : pd.Dataframe Score are either a pd.Timedelta, integer. """ df_c_name = wod+'_score' s = self.df.loc[:,df_c_name].values.tolist() # Keep track of the indicies tdi = np.empty(shape=(0, 0), dtype=int) # time delta ii = np.empty(shape=(0, 0), dtype=int) # integers ni = np.empty(shape=(0, 0), dtype=int) # np.nans # initialize new_score array _s = self.df.loc[:,df_c_name].reset_index(drop=True) for i, _str in enumerate(s): # nans if isinstance(_str, float): ni = np.append(ni, i) else: # Convert time to time delta if ':' in _str: # Some team scores are H:MM:SS if _str.count(':') > 1: _s[i] = pd.to_timedelta(_str) else: _s[i] = pd.to_timedelta('0:'+_str) tdi = np.append(tdi, i) # Convert reps/weight to integers else: _s[i] = int(_str.split(" ")[0]) # Drop scores of 0 reps/weight if _s[i] > 0: ii = np.append(ii, i) else: _s[i] = np.nan ni = np.append(ni, i) self.cleandata.loc[:,df_c_name] = _s.values self.tdi = tdi self.ii = ii self.ni = ni return self
Example #11
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _wod_percentile(self, wod): """Calculate wod percentile for reps/weight. Parameters ---------- wod : string Name of wod. Returns ------- cfopendata : pd.Dataframe Added percentile column. """ df_c_name = wod+'_score' _s = self.cleandata.loc[:,df_c_name] _s_i = _s[self.ii] _s_i_sorted = _s_i.sort_values(ascending=False) pct = np.flip(np.round(np.linspace(0, 100, num=len(_s_i_sorted)), decimals=4), 0) # Remove duplicates pct = self._rm_dups_wod(_s_i_sorted, pct) # Check if any nans rows if len(self.ni) > 0: # Get nan rows _s_n = _s[self.ni] # Append to s_td_sorted _s_i_sorted = _s_i_sorted.append(_s_n) # Add nans into pct _nan_arr = np.arange(len(_s_i_sorted) - len(pct), dtype=np.double) _nan_arr[:] = np.nan # Append NaN to pct pct = np.append(pct, _nan_arr) # Append pct to _s_i_sorted _df_i_sorted = _s_i_sorted.to_frame(name = df_c_name) _df_i_sorted2 = _df_i_sorted.copy() _df_i_sorted2 = _df_i_sorted2.rename( columns={wod+'_score': wod+'_percentile'}) pct = np.transpose(np.expand_dims(pct, axis=0)) _df_i_sorted2.loc[:] = pct # Put back into dataframe index _df_i_sorted2.index = _df_i_sorted2.index.map(int) _df = _df_i_sorted2.sort_index() pct_vals = _df.values # Add to self.cleandata self.cleandata.loc[:, wod+'_percentile'] = pct_vals return self
Example #12
Source File: variabilityops.py From anvio with GNU General Public License v3.0 | 4 votes |
def get_histogram(self, column, fix_offset=False, **kwargs): """ Return a histogram (counts and bins) for a specified column of self.data Parameters ========== column : str The name of the column you want to get a histogram for. Must be numeric type fix_offset : bool, False If True, bins is set as the centre point for each bin, rather than the bin edges. This decreases the length of bins by 1, since there is one less bin that there are bin edges. **kwargs : dict, optional Any arguments of np.histogram (https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.histogram.html) Returns ======= (values, bins) : tuple values are the counts in each bin, bins are either bin edges (fix_offset=False) or centre-points of the bins (fix_offset=True) """ if not pd.api.types.is_numeric_dtype(self.data[column]): raise ConfigError("get_histogram :: %s is not of numeric type" % (column)) if fix_offset: range_offset = (kwargs["range"][1] - kwargs["range"][0]) / (kwargs["bins"] - 1) / 2 kwargs["range"] = (kwargs["range"][0] - range_offset, kwargs["range"][1] + range_offset) # define numpy array; filter infinities and nans column_data = self.data[column].values column_data = column_data[np.isfinite(column_data)] # histogram values, bins = np.histogram(self.data[column], **kwargs) if fix_offset: bins = bins[:-1] + range_offset # now bins have the same length as values and represent the midpoint of each bin (e.g. # the first bin value is the original minimum value passed to this function return values, bins
Example #13
Source File: anndata.py From anndata with BSD 3-Clause "New" or "Revised" License | 4 votes |
def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None): """\ Transform string annotations to categoricals. Only affects string annotations that lead to less categories than the total number of observations. Params ------ df If `df` is `None`, modifies both :attr:`obs` and :attr:`var`, otherwise modifies `df` inplace. Notes ----- Turns the view of an :class:`~anndata.AnnData` into an actual :class:`~anndata.AnnData`. """ dont_modify = False # only necessary for backed views if df is None: dfs = [self.obs, self.var] if self.is_view and self.isbacked: dont_modify = True else: dfs = [df] for df in dfs: string_cols = [ key for key in df.columns if is_string_dtype(df[key]) and not is_categorical(df[key]) ] for key in string_cols: # make sure we only have strings # (could be that there are np.nans (float), -666, "-666", for instance) c = df[key].astype("U") # make a categorical c = pd.Categorical(c, categories=natsorted(np.unique(c))) if len(c.categories) >= len(c): continue if dont_modify: raise RuntimeError( "Please call `.strings_to_categoricals()` on full " "AnnData, not on this view. You might encounter this" "error message while copying or writing to disk." ) if self.is_view: warnings.warn( "Initializing view as actual.", ImplicitModificationWarning ) # If `self` is a view, it will be actualized in the next line, # therefore the previous warning df[key] = c logger.info(f"... storing {key!r} as categorical")