Python numpy.issubdtype() Examples

The following are 30 code examples of numpy.issubdtype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: column.py    From CHAID with Apache License 2.0 6 votes vote down vote up
def __init__(self, arr=None, metadata=None, missing_id='<missing>',
                 groupings=None, substitute=True, weights=None, name=None):
        super(self.__class__, self).__init__(arr, metadata, missing_id=missing_id, weights=weights, name=name)
        self._nan = np.array([np.nan]).astype(int)[0]

        if substitute and metadata is None:
            self.arr, self.orig_type = self.substitute_values(self.arr)
        elif substitute and metadata and not np.issubdtype(self.arr.dtype, np.integer):
            # custom metadata has been passed in from external source, and must be converted to int
            self.arr = self.arr.astype(int)
            self.metadata = { int(k):v for k, v in metadata.items() }
            self.metadata[self._nan] = missing_id

        self._groupings = {}
        if groupings is None:
            for x in np.unique(self.arr):
                self._groupings[x] = [x, x + 1, False]
        else:
            for x in np.unique(self.arr):
                self._groupings[x] = list(groupings[x])
        self._possible_groups = None 
Example #2
Source File: parameters.py    From buzzard with Apache License 2.0 6 votes vote down vote up
def normalize_channels_parameter(channels, channel_count):
    if channels is None:
        if channel_count == 1:
            return [0], True
        else:
            return list(range(channel_count)), False

    indices = np.arange(channel_count)
    indices = indices[channels]
    indices = np.atleast_1d(indices)

    if isinstance(channels, slice):
        return indices.tolist(), False

    channels = np.asarray(channels)
    if not np.issubdtype(channels.dtype, np.number):
        raise TypeError('`channels` should be None or int or slice or list of int')
    if channels.ndim == 0:
        assert len(indices) == 1
        return indices.tolist(), True
    return indices.tolist(), False 
Example #3
Source File: describe.py    From mars with Apache License 2.0 6 votes vote down vote up
def __call__(self, df_or_series):
        if isinstance(df_or_series, SERIES_TYPE):
            if not np.issubdtype(df_or_series.dtype, np.number):
                raise NotImplementedError('non-numeric type is not supported for now')
            test_series = pd.Series([], dtype=df_or_series.dtype).describe(
                percentiles=self._percentiles, include=self._include, exclude=self._exclude)
            return self.new_series([df_or_series], shape=(len(test_series),),
                                   dtype=test_series.dtype,
                                   index_value=parse_index(test_series.index, store_data=True))
        else:
            test_inp_df = build_empty_df(df_or_series.dtypes)
            test_df = test_inp_df.describe(
                percentiles=self._percentiles, include=self._include, exclude=self._exclude)
            for dtype in test_df.dtypes:
                if not np.issubdtype(dtype, np.number):
                    raise NotImplementedError('non-numeric type is not supported for now')
            return self.new_dataframe([df_or_series], shape=test_df.shape, dtypes=test_df.dtypes,
                                      index_value=parse_index(test_df.index, store_data=True),
                                      columns_value=parse_index(test_df.columns, store_data=True)) 
Example #4
Source File: writer.py    From buzzard with Apache License 2.0 6 votes vote down vote up
def _checksum(fname, buffer_size=512 * 1024, dtype='uint64'):
    # https://github.com/airware/buzzard/pull/39/#discussion_r239071556
    dtype = np.dtype(dtype)
    dtypesize = dtype.itemsize
    assert buffer_size % dtypesize == 0
    assert np.issubdtype(dtype, np.unsignedinteger)

    acc = dtype.type(0)
    with open(fname, "rb") as f:
        with np.warnings.catch_warnings():
            np.warnings.filterwarnings('ignore', r'overflow encountered')

            for chunk in iter(lambda: f.read(buffer_size), b""):
                head = np.frombuffer(chunk, dtype, count=len(chunk) // dtypesize)
                head = np.add.reduce(head, dtype=dtype, initial=acc)
                acc += head

                tailsize = len(chunk) % dtypesize
                if tailsize > 0:
                    # This should only be needed for file's tail
                    tail = chunk[-tailsize:] + b'\0' * (dtypesize - tailsize)
                    tail = np.frombuffer(tail, dtype)
                    acc += tail
        return '{:016x}'.format(acc.item()) 
Example #5
Source File: file_checker.py    From buzzard with Apache License 2.0 6 votes vote down vote up
def _checksum(fname, buffer_size=512 * 1024, dtype='uint64'):
    # https://github.com/airware/buzzard/pull/39/#discussion_r239071556
    dtype = np.dtype(dtype)
    dtypesize = dtype.itemsize
    assert buffer_size % dtypesize == 0
    assert np.issubdtype(dtype, np.unsignedinteger)

    acc = dtype.type(0)
    with open(fname, "rb") as f:
        with np.warnings.catch_warnings():
            np.warnings.filterwarnings('ignore', r'overflow encountered')

            for chunk in iter(lambda: f.read(buffer_size), b""):
                head = np.frombuffer(chunk, dtype, count=len(chunk) // dtypesize)
                head = np.add.reduce(head, dtype=dtype, initial=acc)
                acc += head

                tailsize = len(chunk) % dtypesize
                if tailsize > 0:
                    # This should only be needed for file's tail
                    tail = chunk[-tailsize:] + b'\0' * (dtypesize - tailsize)
                    tail = np.frombuffer(tail, dtype)
                    acc += tail
        return '{:016x}'.format(acc.item()) 
Example #6
Source File: common.py    From naru with Apache License 2.0 6 votes vote down vote up
def SetDistribution(self, distinct_values):
        """This is all the values this column will ever see."""
        assert self.all_distinct_values is None
        # pd.isnull returns true for both np.nan and np.datetime64('NaT').
        is_nan = pd.isnull(distinct_values)
        contains_nan = np.any(is_nan)
        dv_no_nan = distinct_values[~is_nan]
        # NOTE: np.sort puts NaT values at beginning, and NaN values at end.
        # For our purposes we always add any null value to the beginning.
        vs = np.sort(np.unique(dv_no_nan))
        if contains_nan and np.issubdtype(distinct_values.dtype, np.datetime64):
            vs = np.insert(vs, 0, np.datetime64('NaT'))
        elif contains_nan:
            vs = np.insert(vs, 0, np.nan)
        if self.distribution_size is not None:
            assert len(vs) == self.distribution_size
        self.all_distinct_values = vs
        self.distribution_size = len(vs)
        return self 
Example #7
Source File: test_base.py    From mars with Apache License 2.0 6 votes vote down vote up
def testAstype(self):
        arr = ones((10, 20, 30), chunk_size=3)

        arr2 = arr.astype(np.int32)
        arr2 = arr2.tiles()

        self.assertEqual(arr2.shape, (10, 20, 30))
        self.assertTrue(np.issubdtype(arr2.dtype, np.int32))
        self.assertEqual(arr2.op.casting, 'unsafe')

        with self.assertRaises(TypeError):
            arr.astype(np.int32, casting='safe')

        arr3 = arr.astype(arr.dtype, order='F')
        self.assertTrue(arr3.flags['F_CONTIGUOUS'])
        self.assertFalse(arr3.flags['C_CONTIGUOUS'])

        arr3 = arr3.tiles()

        self.assertEqual(arr3.chunks[0].order.value, 'F') 
Example #8
Source File: test_linalg.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_basic_property(self):
        # Check A = L L^H
        shapes = [(1, 1), (2, 2), (3, 3), (50, 50), (3, 10, 10)]
        dtypes = (np.float32, np.float64, np.complex64, np.complex128)

        for shape, dtype in itertools.product(shapes, dtypes):
            np.random.seed(1)
            a = np.random.randn(*shape)
            if np.issubdtype(dtype, np.complexfloating):
                a = a + 1j*np.random.randn(*shape)

            t = list(range(len(shape)))
            t[-2:] = -1, -2

            a = np.matmul(a.transpose(t).conj(), a)
            a = np.asarray(a, dtype=dtype)

            c = np.linalg.cholesky(a)

            b = np.matmul(c, c.transpose(t).conj())
            assert_allclose(b, a,
                            err_msg="{} {}\n{}\n{}".format(shape, dtype, a, c),
                            atol=500 * a.shape[0] * np.finfo(dtype).eps) 
Example #9
Source File: breakdown_metric.py    From lingvo with Apache License 2.0 6 votes vote down vote up
def _AccumulateHistogram(self, statistics=None, labels=None):
    """Accumulate histogram of binned statistic by label.

    Args:
      statistics: int32 np.array of shape [K, 1] of binned statistic
      labels: int32 np.array of shape [K, 1] of labels

    Returns:
      nothing
    """
    assert np.issubdtype(statistics.dtype, int)
    if not statistics.size:
      return
    p = self.params
    assert np.max(statistics) < self._histogram.shape[0], (
        'Histogram shape too small %d vs %d' %
        (np.max(statistics), self._histogram.shape[0]))
    for l in range(p.metadata.NumClasses()):
      indices = np.where(labels == l)[0]
      for s in statistics[indices]:
        self._histogram[s, l] += 1 
Example #10
Source File: _dtype.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _name_get(dtype):
    # provides dtype.name.__get__

    if dtype.isbuiltin == 2:
        # user dtypes don't promise to do anything special
        return dtype.type.__name__

    # Builtin classes are documented as returning a "bit name"
    name = dtype.type.__name__

    # handle bool_, str_, etc
    if name[-1] == '_':
        name = name[:-1]

    # append bit counts to str, unicode, and void
    if np.issubdtype(dtype, np.flexible) and not _isunsized(dtype):
        name += "{}".format(dtype.itemsize * 8)

    # append metadata to datetimes
    elif dtype.type in (np.datetime64, np.timedelta64):
        name += _datetime_metadata_str(dtype)

    return name 
Example #11
Source File: map.py    From mars with Apache License 2.0 5 votes vote down vote up
def __call__(self, series, dtype):
        if dtype is None:
            inferred_dtype = None
            if callable(self._arg):
                # arg is a function, try to inspect the signature
                sig = inspect.signature(self._arg)
                return_type = sig.return_annotation
                if return_type is not inspect._empty:
                    inferred_dtype = np.dtype(return_type)
            else:
                if isinstance(self._arg, MutableMapping):
                    inferred_dtype = pd.Series(self._arg).dtype
                else:
                    inferred_dtype = self._arg.dtype
            if inferred_dtype is not None and np.issubdtype(inferred_dtype, np.number):
                if np.issubdtype(inferred_dtype, np.inexact):
                    # for the inexact e.g. float
                    # we can make the decision,
                    # but for int, due to the nan which may occur,
                    # we cannot infer the dtype
                    dtype = inferred_dtype
            else:
                dtype = inferred_dtype

        if dtype is None:
            raise ValueError('cannot infer dtype, '
                             'it needs to be specified manually for `map`')
        else:
            dtype = np.int64 if dtype is int else dtype
            dtype = np.dtype(dtype)

        inputs = [series]
        if isinstance(self._arg, SERIES_TYPE):
            inputs.append(self._arg)
        return self.new_series(inputs, shape=series.shape, dtype=dtype,
                               index_value=series.index_value, name=series.name) 
Example #12
Source File: _norm.py    From lambda-packs with MIT License 5 votes vote down vote up
def _sparse_frobenius_norm(x):
    if np.issubdtype(x.dtype, np.complexfloating):
        sqnorm = abs(x).power(2).sum()
    else:
        sqnorm = x.power(2).sum()
    return sqrt(sqnorm) 
Example #13
Source File: column.py    From CHAID with Apache License 2.0 5 votes vote down vote up
def __init__(self, arr=None, metadata=None, missing_id='<missing>',
                 weights=None):
        if not np.issubdtype(arr.dtype, np.number):
            raise ValueError('Must only pass numerical values to create continuous column')

        super(self.__class__, self).__init__(np.nan_to_num(arr), metadata, missing_id=missing_id, weights=weights) 
Example #14
Source File: compressed.py    From lambda-packs with MIT License 5 votes vote down vote up
def _divide_sparse(self, other):
        """
        Divide this matrix by a second sparse matrix.
        """
        if other.shape != self.shape:
            raise ValueError('inconsistent shapes')

        r = self._binopt(other, '_eldiv_')

        if np.issubdtype(r.dtype, np.inexact):
            # Eldiv leaves entries outside the combined sparsity
            # pattern empty, so they must be filled manually.
            # Everything outside of other's sparsity is NaN, and everything
            # inside it is either zero or defined by eldiv.
            out = np.empty(self.shape, dtype=self.dtype)
            out.fill(np.nan)
            row, col = other.nonzero()
            out[row, col] = 0
            r = r.tocoo()
            out[r.row, r.col] = r.data
            out = np.matrix(out)
        else:
            # integers types go with nan <-> 0
            out = r

        return out 
Example #15
Source File: defchararray.py    From lambda-packs with MIT License 5 votes vote down vote up
def ljust(a, width, fillchar=' '):
    """
    Return an array with the elements of `a` left-justified in a
    string of length `width`.

    Calls `str.ljust` element-wise.

    Parameters
    ----------
    a : array_like of str or unicode

    width : int
        The length of the resulting strings
    fillchar : str or unicode, optional
        The character to use for padding

    Returns
    -------
    out : ndarray
        Output array of str or unicode, depending on input type

    See also
    --------
    str.ljust

    """
    a_arr = numpy.asarray(a)
    width_arr = numpy.asarray(width)
    size = long(numpy.max(width_arr.flat))
    if numpy.issubdtype(a_arr.dtype, numpy.string_):
        fillchar = asbytes(fillchar)
    return _vec_string(
        a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) 
Example #16
Source File: defchararray.py    From lambda-packs with MIT License 5 votes vote down vote up
def center(a, width, fillchar=' '):
    """
    Return a copy of `a` with its elements centered in a string of
    length `width`.

    Calls `str.center` element-wise.

    Parameters
    ----------
    a : array_like of str or unicode

    width : int
        The length of the resulting strings
    fillchar : str or unicode, optional
        The padding character to use (default is space).

    Returns
    -------
    out : ndarray
        Output array of str or unicode, depending on input
        types

    See also
    --------
    str.center

    """
    a_arr = numpy.asarray(a)
    width_arr = numpy.asarray(width)
    size = long(numpy.max(width_arr.flat))
    if numpy.issubdtype(a_arr.dtype, numpy.string_):
        fillchar = asbytes(fillchar)
    return _vec_string(
        a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar)) 
Example #17
Source File: arraypad.py    From lambda-packs with MIT License 5 votes vote down vote up
def _round_ifneeded(arr, dtype):
    """
    Rounds arr inplace if destination dtype is integer.

    Parameters
    ----------
    arr : ndarray
        Input array.
    dtype : dtype
        The dtype of the destination array.

    """
    if np.issubdtype(dtype, np.integer):
        arr.round(out=arr) 
Example #18
Source File: __init__.py    From dgl with Apache License 2.0 5 votes vote down vote up
def tensor(data, dtype=None):
    if dtype is None:
        if is_tensor(data):
            data = zerocopy_to_numpy(data)
        else:
            data = np.array(data)
        dtype = int64 if np.issubdtype(data.dtype, np.integer) else float32
    return copy_to(_tensor(data, dtype), _default_context) 
Example #19
Source File: test_ordinal_column.py    From CHAID with Apache License 2.0 5 votes vote down vote up
def test_correctly_subs_floats_for_ints(self):
        assert np.issubdtype(self.col_with_nan.arr.dtype, np.integer) 
Example #20
Source File: defchararray.py    From recruit with Apache License 2.0 5 votes vote down vote up
def ljust(a, width, fillchar=' '):
    """
    Return an array with the elements of `a` left-justified in a
    string of length `width`.

    Calls `str.ljust` element-wise.

    Parameters
    ----------
    a : array_like of str or unicode

    width : int
        The length of the resulting strings
    fillchar : str or unicode, optional
        The character to use for padding

    Returns
    -------
    out : ndarray
        Output array of str or unicode, depending on input type

    See also
    --------
    str.ljust

    """
    a_arr = numpy.asarray(a)
    width_arr = numpy.asarray(width)
    size = long(numpy.max(width_arr.flat))
    if numpy.issubdtype(a_arr.dtype, numpy.string_):
        fillchar = asbytes(fillchar)
    return _vec_string(
        a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) 
Example #21
Source File: Interp.py    From pylops with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _linearinterp(M, iava, dims=None, dir=0, dtype='float64'):
    """Linear interpolation.
    """
    if np.issubdtype(iava.dtype, np.integer):
        iava = iava.astype(np.float)
    if dims is None:
        lastsample = M
        dimsd = None
    else:
        lastsample = dims[dir]
        dimsd = list(dims)
        dimsd[dir] = len(iava)
        dimsd = tuple(dimsd)

    # ensure that samples are not beyond the last sample, in that case set to
    # penultimate sample and raise a warning
    outside = (iava >= lastsample - 1)
    if sum(outside) > 0:
        logging.warning('at least one value is beyond penultimate sample, '
                        'forced to be at penultimate sample')
    iava[outside] = lastsample - 1 - 1e-10
    _checkunique(iava)

    # find indices and weights
    iva_l = np.floor(iava).astype(np.int)
    iva_r = iva_l + 1
    weights = iava - iva_l

    # create operators
    Op = Diagonal(1 - weights, dims=dimsd, dir=dir, dtype=dtype) * \
         Restriction(M, iva_l, dims=dims, dir=dir, dtype=dtype) + \
         Diagonal(weights, dims=dimsd, dir=dir, dtype=dtype) * \
         Restriction(M, iva_r, dims=dims, dir=dir, dtype=dtype)
    return Op, iava 
Example #22
Source File: _footprint.py    From buzzard with Apache License 2.0 5 votes vote down vote up
def meshgrid_raster_in(self, other, dtype=None, op=np.floor):
        """Compute raster coordinate matrices of `self` in `other` referential

        Parameters
        ----------
        other: Footprint
            ..
        dtype: None or convertible to np.dtype
            Output dtype
            If None: Use buzz.env.default_index_dtype
        op: None or function operating on a vector
            Function to apply before casting output to dtype
            If None: Do not transform data before casting

        Returns
        -------
        (x, y): (np.ndarray, np.ndarray)
            Raster coordinate matrices
            with shape = self.shape
            with dtype = dtype
        """
        # Check other parameter
        if not isinstance(other, self.__class__):
            raise TypeError('other should be a Footprint') # pragma: no cover

        # Check dtype parameter
        if dtype is None:
            dtype = env.default_index_dtype
        else:
            dtype = conv.dtype_of_any_downcast(dtype)

        # Check op parameter
        if not np.issubdtype(dtype, np.integer):
            op = None

        xy = other.spatial_to_raster(np.dstack(self.meshgrid_spatial), dtype=dtype, op=op)
        return xy[..., 0], xy[..., 1] 
Example #23
Source File: DataSet.py    From pyvtk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_int_seq(self,obj,mx_int):
        if (hasattr(obj,'dtype')):
            if issubdtype(obj.dtype,integer):
                return 1 if obj.max() >= mx_int else 0
        if common.is_sequence(obj):
            for o in obj:
                if self._check_int_seq(o,mx_int):
                    return 1
        elif not common.is_int(obj) or obj>=mx_int:
            return 1
        return 0 
Example #24
Source File: utils.py    From typhon with MIT License 5 votes vote down vote up
def get_arts_typename(var):
    """Returns the ARTS type name for this variable.

    Args:
        var: Variable to get the ARTS type name for.

    Returns:
        str: ARTS type name.

    """
    if type(var).__name__ in basic_types:
        ret = basic_types[type(var).__name__]
        if ret == 'Array':
            if len(var) == 0:
                return None
            else:
                element_type = get_arts_typename(var[0])
                for element in var[1:]:
                    if element_type != get_arts_typename(element):
                        return None
                ret = 'ArrayOf' + element_type
    elif isinstance(var, np.ndarray):
        if np.issubdtype(var.dtype, np.complex128):
            ret = complex_tensor_names[var.ndim - 1]
        else:
            ret = tensor_names[var.ndim - 1]
    else:
        ret = type(var).__name__

    return ret 
Example #25
Source File: test_timeseries.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_frame_append_datetime64_column(self):
        rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
        df = DataFrame(index=np.arange(len(rng)))

        df['A'] = rng
        assert np.issubdtype(df['A'].dtype, np.dtype('M8[ns]')) 
Example #26
Source File: test_timeseries.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_frame_ctor_datetime64_column(self):
        rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
        dates = np.asarray(rng)

        df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates})
        assert np.issubdtype(df['B'].dtype, np.dtype('M8[ns]')) 
Example #27
Source File: test_alter_index.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_reindex_series_add_nat():
    rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
    series = Series(rng)

    result = series.reindex(lrange(15))
    assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))

    mask = result.isna()
    assert mask[-5:].all()
    assert not mask[:-5].any() 
Example #28
Source File: test_category.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_engine_type(self, dtype, engine_type):
        if dtype != np.int64:
            # num. of uniques required to push CategoricalIndex.codes to a
            # dtype (128 categories required for .codes dtype to be int16 etc.)
            num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
            ci = pd.CategoricalIndex(range(num_uniques))
        else:
            # having 2**32 - 2**31 categories would be very memory-intensive,
            # so we cheat a bit with the dtype
            ci = pd.CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1)
            ci.values._codes = ci.values._codes.astype('int64')
        assert np.issubdtype(ci.codes.dtype, dtype)
        assert isinstance(ci._engine, engine_type) 
Example #29
Source File: defchararray.py    From recruit with Apache License 2.0 5 votes vote down vote up
def rjust(a, width, fillchar=' '):
    """
    Return an array with the elements of `a` right-justified in a
    string of length `width`.

    Calls `str.rjust` element-wise.

    Parameters
    ----------
    a : array_like of str or unicode

    width : int
        The length of the resulting strings
    fillchar : str or unicode, optional
        The character to use for padding

    Returns
    -------
    out : ndarray
        Output array of str or unicode, depending on input type

    See also
    --------
    str.rjust

    """
    a_arr = numpy.asarray(a)
    width_arr = numpy.asarray(width)
    size = long(numpy.max(width_arr.flat))
    if numpy.issubdtype(a_arr.dtype, numpy.string_):
        fillchar = asbytes(fillchar)
    return _vec_string(
        a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar)) 
Example #30
Source File: test_indexing.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_reindex_frame_add_nat(self):
        rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
        df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng})

        result = df.reindex(lrange(15))
        assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))

        mask = com.isna(result)['B']
        assert mask[-5:].all()
        assert not mask[:-5].any()