Python pandas._libs.lib.infer_dtype() Examples

The following are 30 code examples of pandas._libs.lib.infer_dtype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas._libs.lib , or try the search function .
Example #1
Source File: test_inference.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == 'categorical'

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == 'categorical' 
Example #2
Source File: test_inference.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_floats(self):
        arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating'

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
                       dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed-integer'

        arr = np.array([1, 2, 3, 4, 5], dtype='f4')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating'

        arr = np.array([1, 2, 3, 4, 5], dtype='f8')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating' 
Example #3
Source File: test_inference.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_decimals(self):
        # GH15690
        arr = np.array([Decimal(1), Decimal(2), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal'

        arr = np.array([1.0, 2.0, Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed'

        arr = np.array([Decimal(1), Decimal('NaN'), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal'

        arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal' 
Example #4
Source File: algorithms.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _get_data_algo(values, func_map):

    if is_categorical_dtype(values):
        values = values._values_for_rank()

    values, dtype, ndtype = _ensure_data(values)
    if ndtype == 'object':

        # it's cheaper to use a String Hash Table than Object; we infer
        # including nulls because that is the only difference between
        # StringHashTable and ObjectHashtable
        if lib.infer_dtype(values, skipna=False) in ['string']:
            ndtype = 'string'

    f = func_map.get(ndtype, func_map['object'])

    return f, values


# --------------- #
# top-level algos #
# --------------- # 
Example #5
Source File: missing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _infer_fill_value(val):
    """
    infer the fill value for the nan/NaT from the provided
    scalar/ndarray/list-like if we are a NaT, return the correct dtyped
    element to provide proper block construction
    """

    if not is_list_like(val):
        val = [val]
    val = np.array(val, copy=False)
    if is_datetimelike(val):
        return np.array('NaT', dtype=val.dtype)
    elif is_object_dtype(val.dtype):
        dtype = lib.infer_dtype(ensure_object(val), skipna=False)
        if dtype in ['datetime', 'datetime64']:
            return np.array('NaT', dtype=_NS_DTYPE)
        elif dtype in ['timedelta', 'timedelta64']:
            return np.array('NaT', dtype=_TD_DTYPE)
    return np.nan 
Example #6
Source File: test_inference.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([True, False, True, 'foo'], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed'

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([True, np.nan, False], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        result = lib.infer_dtype(arr, skipna=False)
        assert result == 'mixed' 
Example #7
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == 'categorical'

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr), skipna=True)
        assert result == 'categorical' 
Example #8
Source File: _converter.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _convert_1d(values, units, axis):
        if not hasattr(axis, 'freq'):
            raise TypeError('Axis must have `freq` set to convert to Periods')
        valid_types = (compat.string_types, datetime,
                       Period, pydt.date, pydt.time, np.datetime64)
        if (isinstance(values, valid_types) or is_integer(values) or
                is_float(values)):
            return get_datevalue(values, axis.freq)
        elif isinstance(values, PeriodIndex):
            return values.asfreq(axis.freq)._ndarray_values
        elif isinstance(values, Index):
            return values.map(lambda x: get_datevalue(x, axis.freq))
        elif lib.infer_dtype(values, skipna=False) == 'period':
            # https://github.com/pandas-dev/pandas/issues/24304
            # convert ndarray[period] -> PeriodIndex
            return PeriodIndex(values, freq=axis.freq)._ndarray_values
        elif isinstance(values, (list, tuple, np.ndarray, Index)):
            return [get_datevalue(x, axis.freq) for x in values]
        return values 
Example #9
Source File: algorithms.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _get_data_algo(values, func_map):

    if is_categorical_dtype(values):
        values = values._values_for_rank()

    values, dtype, ndtype = _ensure_data(values)
    if ndtype == 'object':

        # it's cheaper to use a String Hash Table than Object; we infer
        # including nulls because that is the only difference between
        # StringHashTable and ObjectHashtable
        if lib.infer_dtype(values, skipna=False) in ['string']:
            ndtype = 'string'

    f = func_map.get(ndtype, func_map['object'])

    return f, values


# --------------- #
# top-level algos #
# --------------- # 
Example #10
Source File: missing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _infer_fill_value(val):
    """
    infer the fill value for the nan/NaT from the provided
    scalar/ndarray/list-like if we are a NaT, return the correct dtyped
    element to provide proper block construction
    """

    if not is_list_like(val):
        val = [val]
    val = np.array(val, copy=False)
    if is_datetimelike(val):
        return np.array('NaT', dtype=val.dtype)
    elif is_object_dtype(val.dtype):
        dtype = lib.infer_dtype(ensure_object(val), skipna=False)
        if dtype in ['datetime', 'datetime64']:
            return np.array('NaT', dtype=_NS_DTYPE)
        elif dtype in ['timedelta', 'timedelta64']:
            return np.array('NaT', dtype=_TD_DTYPE)
    return np.nan 
Example #11
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_decimals(self):
        # GH15690
        arr = np.array([Decimal(1), Decimal(2), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal'

        arr = np.array([1.0, 2.0, Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed'

        arr = np.array([Decimal(1), Decimal('NaN'), Decimal(3)])
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal'

        arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'decimal' 
Example #12
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_floats(self):
        arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating'

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
                       dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed-integer'

        arr = np.array([1, 2, 3, 4, 5], dtype='f4')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating'

        arr = np.array([1, 2, 3, 4, 5], dtype='f8')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'floating' 
Example #13
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([True, False, True, 'foo'], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'mixed'

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        arr = np.array([True, np.nan, False], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean'

        result = lib.infer_dtype(arr, skipna=False)
        assert result == 'mixed' 
Example #14
Source File: _converter.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _convert_1d(values, units, axis):
        if not hasattr(axis, 'freq'):
            raise TypeError('Axis must have `freq` set to convert to Periods')
        valid_types = (compat.string_types, datetime,
                       Period, pydt.date, pydt.time, np.datetime64)
        if (isinstance(values, valid_types) or is_integer(values) or
                is_float(values)):
            return get_datevalue(values, axis.freq)
        elif isinstance(values, PeriodIndex):
            return values.asfreq(axis.freq)._ndarray_values
        elif isinstance(values, Index):
            return values.map(lambda x: get_datevalue(x, axis.freq))
        elif lib.infer_dtype(values, skipna=False) == 'period':
            # https://github.com/pandas-dev/pandas/issues/24304
            # convert ndarray[period] -> PeriodIndex
            return PeriodIndex(values, freq=axis.freq)._ndarray_values
        elif isinstance(values, (list, tuple, np.ndarray, Index)):
            return [get_datevalue(x, axis.freq) for x in values]
        return values 
Example #15
Source File: test_inference.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_bools(self):
        arr = np.array([True, False, True, True, True], dtype='O')
        result = lib.infer_dtype(arr)
        assert result == 'boolean'

        arr = np.array([np.bool_(True), np.bool_(False)], dtype='O')
        result = lib.infer_dtype(arr)
        assert result == 'boolean'

        arr = np.array([True, False, True, 'foo'], dtype='O')
        result = lib.infer_dtype(arr)
        assert result == 'mixed'

        arr = np.array([True, False, True], dtype=bool)
        result = lib.infer_dtype(arr)
        assert result == 'boolean'

        arr = np.array([True, np.nan, False], dtype='O')
        result = lib.infer_dtype(arr, skipna=True)
        assert result == 'boolean' 
Example #16
Source File: test_inference.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_floats(self):
        arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O')
        result = lib.infer_dtype(arr)
        assert result == 'floating'

        arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
                       dtype='O')
        result = lib.infer_dtype(arr)
        assert result == 'mixed-integer'

        arr = np.array([1, 2, 3, 4, 5], dtype='f4')
        result = lib.infer_dtype(arr)
        assert result == 'floating'

        arr = np.array([1, 2, 3, 4, 5], dtype='f8')
        result = lib.infer_dtype(arr)
        assert result == 'floating' 
Example #17
Source File: missing.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _infer_fill_value(val):
    """
    infer the fill value for the nan/NaT from the provided
    scalar/ndarray/list-like if we are a NaT, return the correct dtyped
    element to provide proper block construction
    """

    if not is_list_like(val):
        val = [val]
    val = np.array(val, copy=False)
    if is_datetimelike(val):
        return np.array('NaT', dtype=val.dtype)
    elif is_object_dtype(val.dtype):
        dtype = lib.infer_dtype(_ensure_object(val))
        if dtype in ['datetime', 'datetime64']:
            return np.array('NaT', dtype=_NS_DTYPE)
        elif dtype in ['timedelta', 'timedelta64']:
            return np.array('NaT', dtype=_TD_DTYPE)
    return np.nan 
Example #18
Source File: test_inference.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr))
        assert result == 'categorical'

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr)
        assert result == 'categorical'

        result = lib.infer_dtype(Series(arr))
        assert result == 'categorical' 
Example #19
Source File: algorithms.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _get_hashtable_algo(values):
    """
    Parameters
    ----------
    values : arraylike

    Returns
    -------
    tuples(hashtable class,
           vector class,
           values,
           dtype,
           ndtype)
    """
    values, dtype, ndtype = _ensure_data(values)

    if ndtype == 'object':

        # it's cheaper to use a String Hash Table than Object; we infer
        # including nulls because that is the only difference between
        # StringHashTable and ObjectHashtable
        if lib.infer_dtype(values, skipna=False) in ['string']:
            ndtype = 'string'
        else:
            ndtype = 'object'

    htable, table = _hashtables[ndtype]
    return (htable, table, values, dtype, ndtype) 
Example #20
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_object_empty(self, box, missing, dtype, skipna, expected):
        # GH 23421
        arr = box([missing, missing], dtype=dtype)

        result = lib.infer_dtype(arr, skipna=skipna)
        assert result == expected 
Example #21
Source File: sql.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _get_notna_col_dtype(self, col):
        """
        Infer datatype of the Series col.  In case the dtype of col is 'object'
        and it contains NA values, this infers the datatype of the not-NA
        values.  Needed for inserting typed data containing NULLs, GH8778.
        """
        col_for_inference = col
        if col.dtype == 'object':
            notnadata = col[~isna(col)]
            if len(notnadata):
                col_for_inference = notnadata

        return lib.infer_dtype(col_for_inference) 
Example #22
Source File: common.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def is_datetime_arraylike(arr):
    """
    Check whether an array-like is a datetime array-like or DatetimeIndex.

    Parameters
    ----------
    arr : array-like
        The array-like to check.

    Returns
    -------
    boolean : Whether or not the array-like is a datetime
              array-like or DatetimeIndex.

    Examples
    --------
    >>> is_datetime_arraylike([1, 2, 3])
    False
    >>> is_datetime_arraylike(pd.Index([1, 2, 3]))
    False
    >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3]))
    True
    """

    if isinstance(arr, ABCDatetimeIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'datetime'
    return getattr(arr, 'inferred_type', None) == 'datetime' 
Example #23
Source File: series.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _set_with(self, key, value):
        # other: fancy integer or otherwise
        if isinstance(key, slice):
            indexer = self.index._convert_slice_indexer(key, kind='getitem')
            return self._set_values(indexer, value)
        else:
            if isinstance(key, tuple):
                try:
                    self._set_values(key, value)
                except Exception:
                    pass

            if is_scalar(key):
                key = [key]
            elif not isinstance(key, (list, Series, np.ndarray)):
                try:
                    key = list(key)
                except Exception:
                    key = [key]

            if isinstance(key, Index):
                key_type = key.inferred_type
            else:
                key_type = lib.infer_dtype(key, skipna=False)

            if key_type == 'integer':
                if self.index.inferred_type == 'integer':
                    self._set_labels(key, value)
                else:
                    return self._set_values(key, value)
            elif key_type == 'boolean':
                self._set_values(key.astype(np.bool_), value)
            else:
                self._set_labels(key, value) 
Example #24
Source File: common.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def is_period_arraylike(arr):
    """
    Check whether an array-like is a periodical array-like or PeriodIndex.

    Parameters
    ----------
    arr : array-like
        The array-like to check.

    Returns
    -------
    boolean : Whether or not the array-like is a periodical
              array-like or PeriodIndex instance.

    Examples
    --------
    >>> is_period_arraylike([1, 2, 3])
    False
    >>> is_period_arraylike(pd.Index([1, 2, 3]))
    False
    >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D"))
    True
    """

    if isinstance(arr, ABCPeriodIndex):
        return True
    elif isinstance(arr, (np.ndarray, ABCSeries)):
        return arr.dtype == object and lib.infer_dtype(arr) == 'period'
    return getattr(arr, 'inferred_type', None) == 'period' 
Example #25
Source File: sql.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _sql_type_name(self, col):
        dtype = self.dtype or {}
        if col.name in dtype:
            return dtype[col.name]

        # Infer type of column, while ignoring missing values.
        # Needed for inserting typed data containing NULLs, GH 8778.
        col_type = lib.infer_dtype(col, skipna=True)

        if col_type == 'timedelta64':
            warnings.warn("the 'timedelta' type is not supported, and will be "
                          "written as integer values (ns frequency) to the "
                          "database.", UserWarning, stacklevel=8)
            col_type = "integer"

        elif col_type == "datetime64":
            col_type = "datetime"

        elif col_type == "empty":
            col_type = "string"

        elif col_type == "complex":
            raise ValueError('Complex datatypes not supported')

        if col_type not in _SQL_TYPES:
            col_type = "string"

        return _SQL_TYPES[col_type] 
Example #26
Source File: algorithms.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _get_hashtable_algo(values):
    """
    Parameters
    ----------
    values : arraylike

    Returns
    -------
    tuples(hashtable class,
           vector class,
           values,
           dtype,
           ndtype)
    """
    values, dtype, ndtype = _ensure_data(values)

    if ndtype == 'object':

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            ndtype = 'string'
        else:
            ndtype = 'object'

    htable, table = _hashtables[ndtype]
    return (htable, table, values, dtype, ndtype) 
Example #27
Source File: algorithms.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _ensure_arraylike(values):
    """
    ensure that we are arraylike if not already
    """
    if not is_array_like(values):
        inferred = lib.infer_dtype(values)
        if inferred in ['mixed', 'string', 'unicode']:
            if isinstance(values, tuple):
                values = list(values)
            values = construct_1d_object_array_from_listlike(values)
        else:
            values = np.asarray(values)
    return values 
Example #28
Source File: tile.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _convert_bin_to_numeric_type(bins, dtype):
    """
    if the passed bin is of datetime/timedelta type,
    this method converts it to integer

    Parameters
    ----------
    bins : list-like of bins
    dtype : dtype of data

    Raises
    ------
    ValueError if bins are not of a compat dtype to dtype
    """
    bins_dtype = infer_dtype(bins)
    if is_timedelta64_dtype(dtype):
        if bins_dtype in ['timedelta', 'timedelta64']:
            bins = to_timedelta(bins).view(np.int64)
        else:
            raise ValueError("bins must be of timedelta64 dtype")
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        if bins_dtype in ['datetime', 'datetime64']:
            bins = to_datetime(bins).view(np.int64)
        else:
            raise ValueError("bins must be of datetime64 dtype")

    return bins 
Example #29
Source File: test_inference.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_infer_dtype_bytes(self):
        compare = 'string' if PY2 else 'bytes'

        # string array of bytes
        arr = np.array(list('abc'), dtype='S1')
        assert lib.infer_dtype(arr, skipna=True) == compare

        # object array of bytes
        arr = arr.astype(object)
        assert lib.infer_dtype(arr, skipna=True) == compare

        # object array of bytes with missing values
        assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare 
Example #30
Source File: test_inference.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
        # see pandas/conftest.py
        inferred_dtype, values = any_skipna_inferred_dtype

        # make sure the inferred dtype of the fixture is as requested
        assert inferred_dtype == lib.infer_dtype(values, skipna=True)