Python numpy.nans() Examples

The following are 13 code examples of numpy.nans(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: test_nanguardmode.py    From D-VAE with MIT License 5 votes vote down vote up
def test_NanGuardMode():
    """
    Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
    intentionally. A working implementation should be able to capture all
    the abnormalties.
    """
    x = T.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
    y = T.dot(x, w)

    fun = theano.function(
        [x], y,
        mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
    )
    a = numpy.random.randn(3, 5).astype(theano.config.floatX)
    infa = numpy.tile(
        (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
    nana = numpy.tile(
        numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5))
    biga = numpy.tile(
        numpy.asarray(1e20).astype(theano.config.floatX), (3, 5))

    fun(a)  # normal values

    # Temporarily silence logger
    _logger = logging.getLogger("theano.compile.nanguardmode")
    try:
        _logger.propagate = False
        assert_raises(AssertionError, fun, infa)  # INFs
        assert_raises(AssertionError, fun, nana)  # NANs
        assert_raises(AssertionError, fun, biga)  # big values
    finally:
        _logger.propagate = True 
Example #2
Source File: test_nanguardmode.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_NanGuardMode():
    """
    Tests if NanGuardMode is working by feeding in numpy.inf and numpy.nans
    intentionally. A working implementation should be able to capture all
    the abnormalties.
    """
    x = T.matrix()
    w = theano.shared(numpy.random.randn(5, 7).astype(theano.config.floatX))
    y = T.dot(x, w)

    fun = theano.function(
        [x], y,
        mode=NanGuardMode(nan_is_error=True, inf_is_error=True)
    )
    a = numpy.random.randn(3, 5).astype(theano.config.floatX)
    infa = numpy.tile(
        (numpy.asarray(100.) ** 1000000).astype(theano.config.floatX), (3, 5))
    nana = numpy.tile(
        numpy.asarray(numpy.nan).astype(theano.config.floatX), (3, 5))
    biga = numpy.tile(
        numpy.asarray(1e20).astype(theano.config.floatX), (3, 5))

    fun(a)  # normal values

    # Temporarily silence logger
    _logger = logging.getLogger("theano.compile.nanguardmode")
    try:
        _logger.propagate = False
        assert_raises(AssertionError, fun, infa)  # INFs
        assert_raises(AssertionError, fun, nana)  # NANs
        assert_raises(AssertionError, fun, biga)  # big values
    finally:
        _logger.propagate = True 
Example #3
Source File: clean.py    From cfanalytics with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _rm_all_Sc(self):
        """Remove people who didn't enter a Rx score in Rx division.
        And set other Scale values as np.nans.
        
        Returns
        -------
        cfopendata : pd.Dataframe
            Crossfit open data with less rows.
        """
        l = []
        for i in range(self.wodscompleted):
            l.append(self.df.loc[:, self.scorel[i]].values.tolist())
        
        # Create index for nans and change for each coloumn
        ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int)
        ii[:] = -1
        
        # Find scaled inputs and set to nan
        for i in range(self.wodscompleted):
            for j in range(len(self.df)):
                if l[i][j].endswith('- s'):
                    ii[i,j] = j
        
        for i in range(self.wodscompleted):
            tmp = ii[i,:]
            _tmp = tmp[tmp >= 0]
            self.df.iloc[_tmp,self.ci[i]] = np.nan
        
        # If all scores are nan set row to nan and remove
        _ind = pd.isnull(self.df.loc[:,self.scorel]).all(axis=1)
        _in2 = _ind[_ind == True].index.values
        self.df.iloc[_in2,:] = np.nan 
        self.df = self.df.dropna(axis=0, how='all').reset_index(drop=True)
        return self.df 
Example #4
Source File: clean.py    From cfanalytics with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _rm_all_Sc_and_0s(self):
        """Remove people who didn't enter a Rx score in Rx division or did not 
        enter a score.
        
        Returns
        -------
        cfopendata : pd.Dataframe
            Crossfit open data with less rows.
        """
        l = []
        for i in range(self.wodscompleted):
            l.append(self.df.loc[:, self.scorel[i]].values.tolist())
            
        # Create index for nans and change for each coloumn
        ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int)
        ii[:] = -1

        # Find empty scores
        for i in range(self.wodscompleted):
            for j in range(len(self.df)):
                if l[i][j] == '0' or l[i][j] == '':
                    ii[i,j] = j
                     
        for i in range(self.wodscompleted):
            tmp = ii[i,:]
            _tmp = tmp[tmp >= 0]
            self.df.iloc[_tmp,self.ci[i]] = np.nan
        
        # If all scores are nan set row to nan and remove
        _ind = pd.isnull(self.df.loc[:,self.scorel]).all(axis=1)
        _in2 = _ind[_ind == True].index.values
        self.df.iloc[_in2,:] = np.nan 
        self.df = self.df.dropna(axis=0, how='all').reset_index(drop=True)
        return self.df 
Example #5
Source File: clean.py    From cfanalytics with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _rm_Sc_str(self):
        """Remove the ' - s' from all the scores. and make '0' or '' a np.nan
        
        Returns
        -------
        cfopendata : pd.Dataframe
            Crossfit open data without ' - s' in scores and np.nans.
        """
        l = []; nl = [] 
        for i in range(self.wodscompleted):
            l.append(self.df.loc[:, self.scorel[i]].values.tolist())
            nl.append(self.df.loc[:, self.scorel[i]].values.tolist())
        
        # Create index for nans and change for each coloumn
        ii = np.empty(shape=(self.wodscompleted, len(self.df)), dtype=int)
        ii[:] = -1
        
        # Find scaled inputs and set to nan
        for i in range(self.wodscompleted):
            for j in range(len(self.df)):
                if l[i][j].endswith('- s'):
                    nl[i][j] = l[i][j][0:-4]
                else:
                    ii[i,j] = j
        
        # Add new list back in and nans
        for i in range(self.wodscompleted):
            self.df.iloc[:,self.ci[i]] = nl[i][:]
            tmp = ii[i,:]
            _tmp = tmp[tmp >= 0]
            self.df.iloc[_tmp,self.ci[i]] = np.nan         
        return self.df 
Example #6
Source File: write_gctx.py    From cmapPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def write_metadata(hdf5_out, dim, metadata_df, convert_back_to_neg_666, gzip_compression):
    """
	Writes either column or row metadata to proper node of gctx out (hdf5) file.

	Input:
		- hdf5_out (h5py): open hdf5 file to write to
		- dim (str; must be "row" or "col"): dimension of metadata to write to 
		- metadata_df (pandas DataFrame): metadata DataFrame to write to file 
		- convert_back_to_neg_666 (bool): Whether to convert numpy.nans back to "-666",
				as per CMap metadata null convention 
	"""
    if dim == "col":
        hdf5_out.create_group(col_meta_group_node)
        metadata_node_name = col_meta_group_node
    elif dim == "row":
        hdf5_out.create_group(row_meta_group_node)
        metadata_node_name = row_meta_group_node
    else:
        logger.error("'dim' argument must be either 'row' or 'col'!")

    # write id field to expected node
    hdf5_out.create_dataset(metadata_node_name + "/id", data=[numpy.string_(str(x)) for x in metadata_df.index],
        compression=gzip_compression)

    metadata_fields = list(metadata_df.columns.copy())

    # if specified, convert numpy.nans in metadata back to -666
    if convert_back_to_neg_666:
        for c in metadata_fields:
            metadata_df[[c]] = metadata_df[[c]].replace([numpy.nan], ["-666"])

    # write metadata columns to their own arrays
    for field in [entry for entry in metadata_fields if entry != "ind"]:
        if numpy.array(metadata_df.loc[:, field]).dtype.type in (numpy.str_, numpy.object_):
            array_write = numpy.array(metadata_df.loc[:, field]).astype('S')
        else:
            array_write = numpy.array(metadata_df.loc[:, field])
        hdf5_out.create_dataset(metadata_node_name + "/" + field,
                                data=array_write,
                                compression=gzip_compression) 
Example #7
Source File: read_path.py    From control with GNU General Public License v3.0 5 votes vote down vote up
def get_single(**kwargs):
    """Wrap the number with np.nans on either end
    """

    num = get_raw_data(**kwargs)
    new_array = np.zeros((num.shape[0]+2, num.shape[1]))
    new_array[0] = [np.nan, np.nan]
    new_array[-1] = [np.nan, np.nan]
    new_array[1:-1] = num

    return new_array 
Example #8
Source File: read_path.py    From control with GNU General Public License v3.0 5 votes vote down vote up
def get_sequence(sequence, writebox, spaces=False):
    """Returns a sequence 

    sequence list: the sequence of integers
    writebox list: [min x, max x, min y, max y]
    """

    nans = np.array([np.nan, np.nan])
    nums= nans.copy()

    if spaces is False:
        each_num_width = (writebox[1] - writebox[0]) / float(len(sequence))
    else: 
        each_num_width = (writebox[1] - writebox[0]) / float(len(sequence)*2 - 1)

    for ii, nn in enumerate(sequence):

        if spaces is False:
            num_writebox = [writebox[0] + each_num_width * ii , 
                            writebox[0] + each_num_width * (ii+1), 
                            writebox[2], writebox[3]]
        else:
            num_writebox = [writebox[0] + each_num_width * 2 * ii , 
                            writebox[0] + each_num_width * 2 * (ii+.5), 
                            writebox[2], writebox[3]]
        if isinstance(nn, int):
            nn = str(nn)
        num = get_raw_data(nn, num_writebox)
        nums = np.vstack([nums, num, nans])

    return nums 


### Testing code ### 
Example #9
Source File: checks.py    From bulwark with GNU Lesser General Public License v3.0 5 votes vote down vote up
def has_no_nans(df, columns=None):
    """Asserts that there are no np.nans in `df`.

    This is a convenience wrapper for `has_no_x`.

    Args:
        df (pd.DataFrame): Any pd.DataFrame.
        columns (list): A subset of columns to check for np.nans.

    Returns:
        Original `df`.

    """
    return has_no_x(df, values=[np.nan], columns=columns) 
Example #10
Source File: clean.py    From cfanalytics with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _extract_score(self, wod):
        """Convert workout score to a pd.Timedelta or integer.
        
        Parameters
        ----------
        wod : string
            Name of the wod.
        
        Returns
        -------
        cfopendata : pd.Dataframe
            Score are either a pd.Timedelta, integer.
        """
        df_c_name = wod+'_score'
        s = self.df.loc[:,df_c_name].values.tolist()
       
        # Keep track of the indicies
        tdi = np.empty(shape=(0, 0), dtype=int) # time delta
        ii = np.empty(shape=(0, 0), dtype=int) # integers
        ni = np.empty(shape=(0, 0), dtype=int) # np.nans
        
        # initialize new_score array
        _s = self.df.loc[:,df_c_name].reset_index(drop=True)
        for i, _str in enumerate(s):
            # nans
            if isinstance(_str, float):
                ni = np.append(ni, i)
            else:
                # Convert time to time delta
                if ':' in _str:
                    # Some team scores are H:MM:SS
                    if _str.count(':') > 1:                
                        _s[i] = pd.to_timedelta(_str)
                    else:
                        _s[i] = pd.to_timedelta('0:'+_str)
                    tdi = np.append(tdi, i)
                # Convert reps/weight to integers
                else:
                    _s[i] = int(_str.split(" ")[0])
                    # Drop scores of 0 reps/weight
                    if _s[i] > 0:
                        ii = np.append(ii, i)
                    else:
                        _s[i] = np.nan
                        ni = np.append(ni, i)
        self.cleandata.loc[:,df_c_name] = _s.values
        self.tdi = tdi
        self.ii = ii
        self.ni = ni
        return self 
Example #11
Source File: clean.py    From cfanalytics with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _wod_percentile(self, wod):
        """Calculate wod percentile for reps/weight.
        
        Parameters
        ----------
        wod : string
            Name of wod.
        
        Returns
        -------
        cfopendata : pd.Dataframe
            Added percentile column.
        """  
        df_c_name = wod+'_score'
        _s = self.cleandata.loc[:,df_c_name]
        _s_i = _s[self.ii]
        _s_i_sorted = _s_i.sort_values(ascending=False)

        pct = np.flip(np.round(np.linspace(0, 100, num=len(_s_i_sorted)),
                               decimals=4), 0)
        # Remove duplicates        
        pct = self._rm_dups_wod(_s_i_sorted, pct)    
        
        # Check if any nans rows
        if len(self.ni) > 0:
            # Get nan rows
            _s_n = _s[self.ni]
            # Append to s_td_sorted
            _s_i_sorted = _s_i_sorted.append(_s_n)
            # Add nans into pct
            _nan_arr = np.arange(len(_s_i_sorted) - len(pct), dtype=np.double)
            _nan_arr[:] = np.nan
            # Append NaN to pct
            pct = np.append(pct, _nan_arr)

        # Append pct to _s_i_sorted
        _df_i_sorted = _s_i_sorted.to_frame(name = df_c_name)
        _df_i_sorted2 = _df_i_sorted.copy()
        _df_i_sorted2 = _df_i_sorted2.rename(
                columns={wod+'_score': wod+'_percentile'})
        pct = np.transpose(np.expand_dims(pct, axis=0))
        _df_i_sorted2.loc[:] = pct
        # Put back into dataframe index
        _df_i_sorted2.index = _df_i_sorted2.index.map(int)
        _df = _df_i_sorted2.sort_index()
        pct_vals = _df.values
 
        # Add to self.cleandata
        self.cleandata.loc[:, wod+'_percentile'] = pct_vals
        return self 
Example #12
Source File: variabilityops.py    From anvio with GNU General Public License v3.0 4 votes vote down vote up
def get_histogram(self, column, fix_offset=False, **kwargs):
        """ Return a histogram (counts and bins) for a specified column of self.data

        Parameters
        ==========
        column : str
            The name of the column you want to get a histogram for. Must be numeric type
        fix_offset : bool, False
            If True, bins is set as the centre point for each bin, rather than the bin edges.
            This decreases the length of bins by 1, since there is one less bin that there are
            bin edges.
        **kwargs : dict, optional
            Any arguments of np.histogram
            (https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.histogram.html)

        Returns
        =======
        (values, bins) : tuple
            values are the counts in each bin, bins are either bin edges (fix_offset=False) or
            centre-points of the bins (fix_offset=True)
        """

        if not pd.api.types.is_numeric_dtype(self.data[column]):
            raise ConfigError("get_histogram :: %s is not of numeric type" % (column))

        if fix_offset:
            range_offset = (kwargs["range"][1] - kwargs["range"][0]) / (kwargs["bins"] - 1) / 2
            kwargs["range"] = (kwargs["range"][0] - range_offset, kwargs["range"][1] + range_offset)

        # define numpy array; filter infinities and nans
        column_data = self.data[column].values
        column_data = column_data[np.isfinite(column_data)]

        # histogram
        values, bins = np.histogram(self.data[column], **kwargs)

        if fix_offset:
            bins = bins[:-1] + range_offset
            # now bins have the same length as values and represent the midpoint of each bin (e.g.
            # the first bin value is the original minimum value passed to this function

        return values, bins 
Example #13
Source File: anndata.py    From anndata with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
        """\
        Transform string annotations to categoricals.

        Only affects string annotations that lead to less categories than the
        total number of observations.

        Params
        ------
        df
            If `df` is `None`, modifies both :attr:`obs` and :attr:`var`,
            otherwise modifies `df` inplace.

        Notes
        -----
        Turns the view of an :class:`~anndata.AnnData` into an actual
        :class:`~anndata.AnnData`.
        """
        dont_modify = False  # only necessary for backed views
        if df is None:
            dfs = [self.obs, self.var]
            if self.is_view and self.isbacked:
                dont_modify = True
        else:
            dfs = [df]
        for df in dfs:
            string_cols = [
                key
                for key in df.columns
                if is_string_dtype(df[key]) and not is_categorical(df[key])
            ]
            for key in string_cols:
                # make sure we only have strings
                # (could be that there are np.nans (float), -666, "-666", for instance)
                c = df[key].astype("U")
                # make a categorical
                c = pd.Categorical(c, categories=natsorted(np.unique(c)))
                if len(c.categories) >= len(c):
                    continue
                if dont_modify:
                    raise RuntimeError(
                        "Please call `.strings_to_categoricals()` on full "
                        "AnnData, not on this view. You might encounter this"
                        "error message while copying or writing to disk."
                    )
                if self.is_view:
                    warnings.warn(
                        "Initializing view as actual.", ImplicitModificationWarning
                    )
                # If `self` is a view, it will be actualized in the next line,
                # therefore the previous warning
                df[key] = c
                logger.info(f"... storing {key!r} as categorical")