# ***************************************************************************** # Copyright (c) 2019-2020, Intel Corporation All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** """ | :class:`pandas.Series` functions and operators implementations in SDC | Also, it contains Numba internal operators which are required for Series type handling """ import numba import numpy import operator import pandas import math import sys from numba.core.errors import TypingError from numba.core.typing import signature from numba.extending import intrinsic from numba import types from numba.core import cgutils from numba.np import numpy_support from numba.typed import List, Dict from numba import prange from numba.np.arraymath import get_isnan from pandas.core.indexing import IndexingError import sdc import sdc.datatypes.common_functions as common_functions from sdc.utilities.sdc_typing_utils import (TypeChecker, check_index_is_numeric, check_types_comparable, find_common_dtype_from_numpy_dtypes, has_literal_value, has_python_value) from sdc.datatypes.range_index_type import RangeIndexType from sdc.datatypes.common_functions import (sdc_join_series_indexes, sdc_arrays_argsort, sdc_reindex_series) from sdc.datatypes.hpat_pandas_rolling_types import ( gen_sdc_pandas_rolling_overload_body, sdc_pandas_rolling_docstring_tmpl) from sdc.datatypes.hpat_pandas_series_rolling_types import _hpat_pandas_series_rolling_init from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType from sdc.datatypes.hpat_pandas_getitem_types import SeriesGetitemAccessorType from sdc.hiframes.pd_series_type import SeriesType from sdc.str_arr_type import (StringArrayType, string_array_type) from sdc.str_arr_ext import (str_arr_is_na, str_arr_set_na, num_total_chars, pre_alloc_string_array, cp_str_list_to_array, create_str_arr_from_list, str_arr_set_na_by_mask, str_list_to_array) from sdc.utilities.utils import to_array, sdc_overload, sdc_overload_method, sdc_overload_attribute from sdc import sdc_autogenerated from sdc.functions import numpy_like from sdc.hiframes.api import isna from sdc.datatypes.hpat_pandas_groupby_functions import init_series_groupby from sdc.utilities.prange_utils import parallel_chunks from .pandas_series_functions import apply from .pandas_series_functions import map as _map @sdc_overload(operator.getitem) def hpat_pandas_series_accessor_getitem(self, idx): """ Pandas Series operator :attr:`pandas.Series.__getitem__` implementation for methods `pandas.Series.iloc`, `pandas.Series.loc`, `pandas.Series.iat`, `pandas.Series.at` Parameters ---------- self: :class:`pandas.Series` input Series other: :obj:`pandas.Series`, :obj:`int` or :obj:`float`, :obj:`slice`, :obj:`list` input arg Returns ------- :class:`pandas.Series` or a value of :obj:`pandas.Series.dtype` returns object """ _func_name = 'Operator getitem().' if not isinstance(self, SeriesGetitemAccessorType): return None accessor = self.accessor.literal_value if accessor == 'iloc': if isinstance(idx, (types.List, types.Array, types.SliceType)): def hpat_pandas_series_iloc_list_slice_impl(self, idx): result_data = self._series._data[idx] result_index = self._series.index[idx] return pandas.Series(data=result_data, index=result_index, name=self._series._name) return hpat_pandas_series_iloc_list_slice_impl if isinstance(idx, (int, types.Integer)): def hpat_pandas_series_iloc_impl(self, idx): return self._series._data[idx] return hpat_pandas_series_iloc_impl def hpat_pandas_series_iloc_callable_impl(self, idx): index = numpy.asarray(list(map(idx, self._series._data))) return pandas.Series( data=self._series._data[index], index=self._series.index[index], name=self._series._name ) return hpat_pandas_series_iloc_callable_impl raise TypingError('{} The index must be an Integer, Slice or List of Integer or a callable.\ Given: {}'.format(_func_name, idx)) if accessor == 'iat': if isinstance(idx, (int, types.Integer)): def hpat_pandas_series_iat_impl(self, idx): return self._series._data[idx] return hpat_pandas_series_iat_impl raise TypingError('{} The index must be a Integer. Given: {}'.format(_func_name, idx)) if accessor == 'loc': # Note: Loc return Series # Note: Loc slice and callable with String is not implemented # Note: Loc slice without start is not supported min_int64 = numpy.iinfo('int64').min max_int64 = numpy.iinfo('int64').max index_is_none = (self.series.index is None or isinstance(self.series.index, numba.types.misc.NoneType)) if isinstance(idx, types.SliceType) and not index_is_none: def hpat_pandas_series_loc_slice_impl(self, idx): series = self._series index = series.index start_position = len(index) stop_position = 0 max_diff = 0 min_diff = 0 start_position_inc = len(index) start_position_dec = len(index) stop_position_inc = 0 stop_position_dec = 0 idx_start = idx.start idx_stop = idx.stop for i in numba.prange(len(index)): if index[i] >= idx_start: start_position_inc = min(start_position_inc, i) if index[i] <= idx_start: start_position_dec = min(start_position_dec, i) if index[i] <= idx_stop: stop_position_inc = max(stop_position_inc, i) if index[i] >= idx_stop: stop_position_dec = max(stop_position_dec, i) if i > 0: max_diff = max(max_diff, index[i] - index[i - 1]) min_diff = min(min_diff, index[i] - index[i - 1]) if max_diff*min_diff < 0: raise ValueError("Index must be monotonic increasing or decreasing") if max_diff > 0: start_position = start_position_inc stop_position = stop_position_inc if idx_stop < index[0]: return pandas.Series(data=series._data[:0], index=series._index[:0], name=series._name) else: start_position = start_position_dec stop_position = stop_position_dec if idx.stop != max_int64 else len(index) if idx_stop > index[0] and idx_stop != max_int64: return pandas.Series(data=series._data[:0], index=series._index[:0], name=series._name) stop_position = min(stop_position + 1, len(index)) if ( start_position >= len(index) or stop_position <= 0 or stop_position <= start_position ): return pandas.Series(data=series._data[:0], index=series._index[:0], name=series._name) return pandas.Series(data=series._data[start_position:stop_position], index=index[start_position:stop_position], name=series._name) return hpat_pandas_series_loc_slice_impl if isinstance(idx, types.SliceType) and index_is_none: def hpat_pandas_series_loc_slice_noidx_impl(self, idx): max_slice = sys.maxsize start = idx.start stop = idx.stop if idx.stop == max_slice: stop = max_slice - 1 result_data = self._series._data[start:stop+1] result_index = numpy.arange(start, stop + 1) return pandas.Series(data=result_data, index=result_index, name=self._series._name) return hpat_pandas_series_loc_slice_noidx_impl if isinstance(idx, (types.Array, types.List)): def hpat_pandas_series_loc_array_impl(self, idx): index = self._series.index data = self._series._data size = len(index) data_res = [] index_res = [] for value in idx: mask = numpy.zeros(shape=size, dtype=numpy.bool_) for i in numba.prange(size): mask[i] = index[i] == value data_res.extend(data[mask]) index_res.extend(index[mask]) return pandas.Series(data=data_res, index=index_res, name=self._series._name) return hpat_pandas_series_loc_array_impl if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)): def hpat_pandas_series_loc_impl(self, idx): index = self._series.index mask = numpy.empty(len(self._series._data), numpy.bool_) for i in numba.prange(len(index)): mask[i] = index[i] == idx return pandas.Series(data=self._series._data[mask], index=index[mask], name=self._series._name) return hpat_pandas_series_loc_impl raise TypingError('{} The index must be an Number, Slice, String, List, Array or a callable.\ Given: {}'.format(_func_name, idx)) if accessor == 'at': if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)): def hpat_pandas_series_at_impl(self, idx): index = self._series.index count = 0 mask = numpy.empty(len(self._series._data), numpy.bool_) for i in numba.prange(len(index)): mask[i] = index[i] == idx if mask[i] == True: # noqa count += 1 if count == 0: # noqa raise ValueError("Index is not in the Series") return self._series._data[mask] return hpat_pandas_series_at_impl raise TypingError('{} The index must be a Number or String. Given: {}'.format(_func_name, idx)) raise TypingError('{} Unknown accessor. Only "loc", "iloc", "at", "iat" are supported.\ Given: {}'.format(_func_name, accessor)) @sdc_overload(operator.getitem) def hpat_pandas_series_getitem(self, idx): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.getitem Get value(s) of Series by key. Limitations ----------- Supported ``key`` can be one of the following: - Integer scalar, e.g. :obj:`series[0]` - An array or a list, e.g. :obj:`series[0,2,5]` - A list of booleans, e.g. :obj:`series[True,False]` - A slice, e.g. :obj:`series[2:5]` - Another series Examples -------- .. literalinclude:: ../../../examples/series/series_getitem/series_getitem_scalar_single_result.py :language: python :lines: 32- :caption: Getting Pandas Series elements. Returns single value. :name: ex_series_getitem .. command-output:: python ./series/series_getitem/series_getitem_scalar_single_result.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_getitem/series_getitem_scalar_multiple_result.py :language: python :lines: 34- :caption: Getting Pandas Series elements. Returns multiple value. :name: ex_series_getitem .. command-output:: python ./series/series_getitem/series_getitem_scalar_multiple_result.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_getitem/series_getitem_slice.py :language: python :lines: 35- :caption: Getting Pandas Series elements by slice. :name: ex_series_getitem .. command-output:: python ./series/series_getitem/series_getitem_slice.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_getitem/series_getitem_bool_array.py :language: python :lines: 37- :caption: Getting Pandas Series elements by array of booleans. :name: ex_series_getitem .. command-output:: python ./series/series_getitem/series_getitem_bool_array.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_getitem/series_getitem_series.py :language: python :lines: 36- :caption: Getting Pandas Series elements by another Series. :name: ex_series_getitem .. command-output:: python ./series/series_getitem/series_getitem_series.py :cwd: ../../../examples .. seealso:: :ref:`Series.setitem <pandas.Series.setitem>` Set value to Series by index :ref:`Series.loc <pandas.Series.loc>` Access a group of rows and columns by label(s) or a boolean array. :ref:`Series.iloc <pandas.Series.iloc>` Purely integer-location based indexing for selection by position. :ref:`Series.at <pandas.Series.at>` Access a single value for a row/column label pair. :ref:`Series.iat <pandas.Series.iat>` Access a single value for a row/column pair by integer position. :ref:`DataFrame.getitem <pandas.DataFrame.getitem>` Get data from a DataFrame by indexer. :ref:`DataFrame.setitem <pandas.DataFrame.setitem>` Set value to DataFrame by index :ref:`DataFrame.loc <pandas.DataFrame.loc>` Access a group of rows and columns by label(s) or a boolean array. :ref:`DataFrame.iloc <pandas.DataFrame.iloc>` Purely integer-location based indexing for selection by position. :ref:`DataFrame.at <pandas.DataFrame.at>` Access a single value for a row/column label pair. :ref:`DataFrame.iat <pandas.DataFrame.iat>` Access a single value for a row/column pair by integer position. .. todo:: Fix SDC behavior and add the expected output of the > python ./series_getitem.py to the docstring Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series operator :attr:`pandas.Series.__getitem__` implementation .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_getitem* """ _func_name = 'Operator getitem().' if not isinstance(self, SeriesType): return None # Note: Getitem return Series index_is_none = isinstance(self.index, numba.types.misc.NoneType) index_is_none_or_numeric = index_is_none or (self.index and isinstance(self.index.dtype, types.Number)) index_is_string = not index_is_none and isinstance(self.index.dtype, (types.UnicodeType, types.StringLiteral)) if ( isinstance(idx, types.Number) and index_is_none_or_numeric or (isinstance(idx, (types.UnicodeType, types.StringLiteral)) and index_is_string) ): def hpat_pandas_series_getitem_index_impl(self, idx): index = self.index mask = numpy.empty(len(self._data), numpy.bool_) for i in numba.prange(len(index)): mask[i] = index[i] == idx return pandas.Series(data=self._data[mask], index=index[mask], name=self._name) return hpat_pandas_series_getitem_index_impl if (isinstance(idx, types.Integer) and index_is_string): def hpat_pandas_series_idx_impl(self, idx): return self._data[idx] return hpat_pandas_series_idx_impl if isinstance(idx, types.SliceType): # Return slice for str values not implement def hpat_pandas_series_getitem_idx_slice_impl(self, idx): return pandas.Series(data=self._data[idx], index=self.index[idx], name=self._name) return hpat_pandas_series_getitem_idx_slice_impl if (isinstance(idx, (types.List, types.Array)) and isinstance(idx.dtype, (types.Boolean, bool))): def hpat_pandas_series_getitem_idx_list_impl(self, idx): if len(self) != len(idx): raise IndexError("Item wrong length") return pandas.Series( data=numpy_like.getitem_by_mask(self._data, idx), index=numpy_like.getitem_by_mask(self.index, idx), name=self._name ) return hpat_pandas_series_getitem_idx_list_impl # idx is Series and it's index is any, idx.dtype is Boolean if (isinstance(idx, SeriesType) and isinstance(idx.dtype, types.Boolean)): none_indexes = isinstance(self.index, types.NoneType) and isinstance(idx.index, types.NoneType) none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(idx.index, types.NoneType) or check_index_is_numeric(idx))) if not (none_or_numeric_indexes or check_types_comparable(self.index, idx.index)): msg = '{} The index of boolean indexer is not comparable to Series index.' + \ ' Given: self.index={}, idx.index={}' raise TypingError(msg.format(_func_name, self.index, idx.index)) def _series_getitem_idx_bool_indexer_impl(self, idx): # TO-DO: replace sdc_reindex_series with reindex methods and move this logic to impl # for specific index types (needs proper index type instead of types.none as index) if none_indexes == True: # noqa if len(self) > len(idx): msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)." raise IndexingError(msg) self_index = self.index reindexed_idx = idx else: self_index = self.index reindexed_idx = sdc_reindex_series(idx._data, idx.index, idx._name, self_index) return pandas.Series( data=numpy_like.getitem_by_mask(self._data, reindexed_idx._data), index=numpy_like.getitem_by_mask(self_index, reindexed_idx._data), name=self._name ) return _series_getitem_idx_bool_indexer_impl # idx is Series and it's index is None, idx.dtype is not Boolean if (isinstance(idx, SeriesType) and index_is_none and not isinstance(idx.data.dtype, (types.Boolean, bool))): def hpat_pandas_series_getitem_idx_list_impl(self, idx): res = numpy.copy(self._data[:len(idx._data)]) index = numpy.arange(len(self._data)) for i in numba.prange(len(res)): for j in numba.prange(len(index)): if j == idx._data[i]: res[i] = self._data[j] return pandas.Series(data=res, index=index[idx._data], name=self._name) return hpat_pandas_series_getitem_idx_list_impl # idx is Series and it's index is not None, idx.dtype is not Boolean if (isinstance(idx, SeriesType) and not isinstance(self.index, types.NoneType) and not isinstance(idx.data.dtype, (types.Boolean, bool))): def hpat_pandas_series_getitem_idx_series_impl(self, idx): index = self.index data = self._data size = len(index) data_res = [] index_res = [] for value in idx._data: mask = numpy.zeros(shape=size, dtype=numpy.bool_) for i in numba.prange(size): mask[i] = index[i] == value data_res.extend(data[mask]) index_res.extend(index[mask]) return pandas.Series(data=data_res, index=index_res, name=self._name) return hpat_pandas_series_getitem_idx_series_impl raise TypingError('{} The index must be an Number, Slice, String, Boolean Array or a Series.\ Given: {}'.format(_func_name, idx)) @sdc_overload(operator.setitem) def sdc_pandas_series_setitem(self, idx, value): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.setitem Set value to Series by index Limitations ----------- - Not supported for idx as a string slice, e.g. S['a':'f'] = value - Not supported for string series - Not supported for a case of setting value for non existing index - Not supported for cases when setting causes change of the Series dtype Examples -------- .. literalinclude:: ../../../examples/series/series_setitem_int.py :language: python :lines: 27- :caption: Setting Pandas Series elements :name: ex_series_setitem .. command-output:: python ./series/series_setitem_int.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_setitem_slice.py :language: python :lines: 27- :caption: Setting Pandas Series elements by slice :name: ex_series_setitem .. command-output:: python ./series/series_setitem_slice.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_setitem_series.py :language: python :lines: 27- :caption: Setting Pandas Series elements by series :name: ex_series_setitem .. command-output:: python ./series/series_setitem_series.py :cwd: ../../../examples .. seealso:: :ref:`Series.getitem <pandas.Series.getitem>` Get value(s) of Series by key. :ref:`Series.loc <pandas.Series.loc>` Access a group of rows and columns by label(s) or a boolean array. :ref:`Series.iloc <pandas.Series.iloc>` Purely integer-location based indexing for selection by position. :ref:`Series.at <pandas.Series.at>` Access a single value for a row/column label pair. :ref:`Series.iat <pandas.Series.iat>` Access a single value for a row/column pair by integer position. :ref:`DataFrame.getitem <pandas.DataFrame.getitem>` Get data from a DataFrame by indexer. :ref:`DataFrame.setitem <pandas.DataFrame.setitem>` Set value to DataFrame by index :ref:`DataFrame.loc <pandas.DataFrame.loc>` Access a group of rows and columns by label(s) or a boolean array. :ref:`DataFrame.iloc <pandas.DataFrame.iloc>` Purely integer-location based indexing for selection by position. :ref:`DataFrame.at <pandas.DataFrame.at>` Access a single value for a row/column label pair. :ref:`DataFrame.iat <pandas.DataFrame.iat>` Access a single value for a row/column pair by integer position. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series operator :attr:`pandas.Series.set` implementation .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_setitem* """ _func_name = 'Operator setitem().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(idx, (types.Number, types.UnicodeType, types.SliceType, types.Array, SeriesType))): ty_checker.raise_exc(idx, 'scalar, Slice, Array or Series', 'idx') all_supported_scalar_types = (types.Number, types.UnicodeType, types.Boolean) if not (isinstance(value, all_supported_scalar_types) or isinstance(value, (SeriesType, types.Array))): ty_checker.raise_exc(value, 'scalar, Array or Series', 'value') if not check_types_comparable(self, value): msg = '{} The value and Series data must be comparable. Given: self.dtype={}, value={}' raise TypingError(msg.format(_func_name, self.dtype, value)) # idx is not necessarily of the same dtype as self.index, e.g. it might be a Boolean indexer or a Slice if not (check_types_comparable(idx, self.index) or isinstance(idx, (types.Integer, types.SliceType)) or (isinstance(idx, (SeriesType, types.Array)) and isinstance(idx.dtype, (types.Integer, types.Boolean)))): msg = '{} The idx is not comparable to Series index, not a Boolean or integer indexer or a Slice. ' + \ 'Given: self.index={}, idx={}' raise TypingError(msg.format(_func_name, self.index, idx)) value_is_series = isinstance(value, SeriesType) value_is_array = isinstance(value, types.Array) # for many cases pandas setitem assigns values along positions in self._data # not considering Series index, so a common implementation exists idx_is_boolean_array = isinstance(idx, types.Array) and isinstance(idx.dtype, types.Boolean) idx_is_boolean_series = isinstance(idx, SeriesType) and isinstance(idx.dtype, types.Boolean) idx_and_self_index_comparable = check_types_comparable(self.index, idx) self_index_is_none = isinstance(self.index, types.NoneType) assign_along_positions = ((self_index_is_none or isinstance(idx, types.SliceType) or not idx_and_self_index_comparable) and not idx_is_boolean_series and not idx_is_boolean_array) idx_is_scalar = isinstance(idx, (types.Number, types.UnicodeType)) if assign_along_positions or idx_is_scalar: idx_is_numeric_or_boolean_series = (isinstance(idx, SeriesType) and isinstance(idx.dtype, (types.Number, types.Boolean))) assign_via_idx_mask = idx_is_scalar and idx_and_self_index_comparable assign_via_idx_data = idx_is_numeric_or_boolean_series and not idx_and_self_index_comparable def sdc_pandas_series_setitem_no_reindexing_impl(self, idx, value): if assign_via_idx_mask == True: # noqa # FIXME_Numba#5157: using asarray since eq impl for RangeIndexType returns list _idx = numpy.asarray(self._index == idx) elif assign_via_idx_data == True: # noqa _idx = idx._data else: _idx = idx _value = value._data if value_is_series == True else value # noqa self._data[_idx] = _value return self return sdc_pandas_series_setitem_no_reindexing_impl if (idx_is_boolean_array or idx_is_boolean_series) and value_is_series: self_index_dtype = types.int64 if isinstance(self.index, types.NoneType) else self.index.dtype value_index_dtype = types.int64 if isinstance(value.index, types.NoneType) else value.index.dtype if (isinstance(self_index_dtype, types.Number) and isinstance(value_index_dtype, types.Number)): indexes_common_dtype = find_common_dtype_from_numpy_dtypes([self_index_dtype, value_index_dtype], []) elif (isinstance(self_index_dtype, types.UnicodeType) and isinstance(value_index_dtype, types.UnicodeType)): indexes_common_dtype = types.unicode_type else: msg = '{} The self and value indexes must be comparable. Given: self.dtype={}, value.dtype={}' raise TypingError(msg.format(_func_name, self_index_dtype, value_index_dtype)) if idx_is_boolean_array: def sdc_pandas_series_setitem_idx_bool_array_align_impl(self, idx, value): # if idx is a Boolean array (and value is a series) it's used as a mask for self.index # and filtered indexes are looked in value.index, and if found corresponding value is set if value_is_series == True: # noqa value_index, self_index = value.index, self.index unique_value_indices, unique_self_indices = set(value_index), set(self_index) # pandas behaves differently if value.index has duplicates and if it has no # in case of duplicates in value.index assignment is made via positions # in case there are no duplicates, value.index is used as reindexer self_index_has_duplicates = len(unique_self_indices) != len(self_index) value_index_has_duplicates = len(unique_value_indices) != len(value_index) if (self_index_has_duplicates or value_index_has_duplicates): self._data[idx] = value._data else: map_index_to_position = Dict.empty( key_type=indexes_common_dtype, value_type=types.int32 ) for i, index_value in enumerate(value_index): map_index_to_position[index_value] = types.int32(i) # such iterative setitem on a StringArray will be inefficient # TODO: refactor this when str_arr setitem is fully supported for i in numba.prange(len(self_index)): if idx[i]: self_index_value = self_index[i] if self_index_value in map_index_to_position: self._data[i] = value._data[map_index_to_position[self_index_value]] else: sdc.hiframes.join.setitem_arr_nan(self._data, i) else: # if value has no index - nothing to reindex and assignment is made along positions set by idx mask self._data[idx] = value return self return sdc_pandas_series_setitem_idx_bool_array_align_impl elif idx_is_boolean_series: def sdc_pandas_series_setitem_idx_bool_series_align_impl(self, idx, value): self_index, idx_index = self.index, idx.index # FIXME: for now just use sorted, as == is not implemented for sets of unicode strings if (sorted(self_index) != sorted(idx_index)): msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)" raise ValueError(msg) # if idx is a Boolean Series it's data is used as a mask for it's index # and filtered indexes are either looked in value.index (if value is a Series) # or in self.index (if value is scalar or array) filtered_idx_indices = idx_index[idx._data] filtered_idx_indices_set = set(filtered_idx_indices) if value_is_series == True: # noqa if len(filtered_idx_indices_set) != len(filtered_idx_indices): raise ValueError("cannot reindex from a duplicate axis") map_self_index_to_position = Dict.empty( key_type=indexes_common_dtype, value_type=types.int32 ) for i, index_value in enumerate(self_index): map_self_index_to_position[index_value] = types.int32(i) value_index = value.index map_value_index_to_position = Dict.empty( key_type=indexes_common_dtype, value_type=types.int32 ) for i, index_value in enumerate(value_index): map_value_index_to_position[index_value] = types.int32(i) # for all index values in filtered index assign element of value with this index # to element of self with this index for i in numba.prange(len(filtered_idx_indices)): idx_index_value = filtered_idx_indices[i] if idx_index_value in map_value_index_to_position: self_index_pos = map_self_index_to_position[idx_index_value] value_index_pos = map_value_index_to_position[idx_index_value] self._data[self_index_pos] = value._data[value_index_pos] else: sdc.hiframes.join.setitem_arr_nan(self._data, map_self_index_to_position[idx_index_value]) else: # use filtered index values to create a set mask, then make assignment to self # using this mask (i.e. the order of filtered indices in self.index does not matter) self_index_size = len(self_index) set_mask = numpy.zeros(self_index_size, dtype=numpy.bool_) for i in numba.prange(self_index_size): if self_index[i] in filtered_idx_indices_set: set_mask[i] = True self._data[set_mask] = value return self return sdc_pandas_series_setitem_idx_bool_series_align_impl elif isinstance(idx, (SeriesType, types.Array)) and idx_and_self_index_comparable: # idx is numeric Series or array comparable with self.index, hence reindexing is possible if isinstance(self.index.dtype, types.Number): idx_is_series = isinstance(idx, SeriesType) value_is_scalar = not (value_is_series or value_is_array) def sdc_pandas_series_setitem_idx_int_series_align_impl(self, idx, value): _idx = idx._data if idx_is_series == True else idx # noqa _value = value._data if value_is_series == True else value # noqa self_index_size = len(self._index) idx_size = len(_idx) valid_indices = numpy.repeat(-1, self_index_size) for i in numba.prange(self_index_size): for j in numpy.arange(idx_size): if self._index[i] == _idx[j]: valid_indices[i] = j valid_indices_positions = numpy.arange(self_index_size)[valid_indices != -1] valid_indices_masked = valid_indices[valid_indices != -1] indexes_found = self._index[valid_indices_positions] if len(numpy.unique(indexes_found)) != len(indexes_found): raise ValueError("Reindexing only valid with uniquely valued Index objects") if len(valid_indices_masked) != idx_size: raise ValueError("Reindexing not possible: idx has index not found in Series") if value_is_scalar == True: # noqa self._data[valid_indices_positions] = _value else: self._data[valid_indices_positions] = numpy.take(_value, valid_indices_masked) return self return sdc_pandas_series_setitem_idx_int_series_align_impl elif isinstance(self.index.dtype, types.UnicodeType): def sdc_pandas_series_setitem_idx_str_series_align_impl(self, idx, value): map_index_to_position = Dict.empty( key_type=types.unicode_type, value_type=types.int32 ) for i, index_value in enumerate(self._index): if index_value in map_index_to_position: raise ValueError("Reindexing only valid with uniquely valued Index objects") map_index_to_position[index_value] = types.int32(i) idx_data_size = len(idx._data) number_of_found = 0 set_positions = numpy.empty(idx_data_size, dtype=types.int32) for i in numba.prange(len(idx._data)): index_value = idx._data[i] if index_value in map_index_to_position: number_of_found += 1 set_positions[i] = map_index_to_position[index_value] if number_of_found != idx_data_size: raise ValueError("Reindexing not possible: idx has index not found in Series") if value_is_series == True: # noqa self._data[set_positions] = value._data else: self._data[set_positions] = value return self return sdc_pandas_series_setitem_idx_str_series_align_impl else: # self.index.dtype other than types.Number or types.Unicode return None return None @sdc_overload_attribute(SeriesType, 'iloc') def hpat_pandas_series_iloc(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.iloc Limitations ----------- Iloc always returns Series. Examples -------- .. literalinclude:: ../../../examples/series/series_iloc/series_iloc_value.py :language: python :lines: 27- :caption: With a scalar integer. :name: ex_series_iloc .. command-output:: python ./series/series_iloc/series_iloc_value.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_iloc/series_iloc_slice.py :language: python :lines: 33- :caption: With a slice object. :name: ex_series_iloc .. command-output:: python ./series/series_iloc/series_iloc_slice.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.iat <pandas.DataFrame.iat>` Fast integer location scalar accessor. :ref:`DataFrame.loc <pandas.DataFrame.loc>` Purely label-location based indexer for selection by label. :ref:`Series.iloc <pandas.Series.iloc>` Purely integer-location based indexing for selection by position. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.iloc` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_iloc* """ _func_name = 'Attribute iloc().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_iloc_impl(self): return sdc.datatypes.hpat_pandas_getitem_types.series_getitem_accessor_init(self, 'iloc') return hpat_pandas_series_iloc_impl @sdc_overload_attribute(SeriesType, 'loc') def hpat_pandas_series_loc(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.loc Limitations ----------- - Loc always returns Series. - Loc slice is supported only with numeric values and specified ``start``. - Loc callable is not supported yet. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_loc/series_loc_single_result.py :language: python :lines: 32- :caption: With a scalar integer. Returns single value. :name: ex_series_loc .. command-output:: python ./series/series_loc/series_loc_single_result.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_loc/series_loc_multiple_result.py :language: python :lines: 34- :caption: With a scalar integer. Returns multiple value. :name: ex_series_loc .. command-output:: python ./series/series_loc/series_loc_multiple_result.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_loc/series_loc_slice.py :language: python :lines: 34- :caption: With a slice object. Returns multiple value. :name: ex_series_loc .. command-output:: python ./series/series_loc/series_loc_slice.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.at <pandas.DataFrame.at>` Access a single value for a row/column label pair. :ref:`DataFrame.iloc <pandas.DataFrame.iloc>` Access group of rows and columns by integer position(s). :ref:`DataFrame.xs <pandas.DataFrame.xs>` Returns a cross-section (row(s) or column(s)) from the Series/DataFrame. :ref:`Series.loc <pandas.Series.loc>` Access group of values using labels. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.loc` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_loc* """ _func_name = 'Attribute loc().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_loc_impl(self): return sdc.datatypes.hpat_pandas_getitem_types.series_getitem_accessor_init(self, 'loc') return hpat_pandas_series_loc_impl @sdc_overload_attribute(SeriesType, 'iat') def hpat_pandas_series_iat(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.iat Examples -------- .. literalinclude:: ../../../examples/series/series_iat.py :language: python :lines: 27- :caption: Get value at specified index position. :name: ex_series_iat .. command-output:: python ./series/series_iat.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.at <pandas.DataFrame.at>` Access a single value for a row/column label pair. :ref:`DataFrame.loc <pandas.DataFrame.loc>` Purely label-location based indexer for selection by label. :ref:`DataFrame.iloc <pandas.DataFrame.iloc>` Access group of rows and columns by integer position(s). Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.iat` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_iat* """ _func_name = 'Attribute iat().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_iat_impl(self): return sdc.datatypes.hpat_pandas_getitem_types.series_getitem_accessor_init(self, 'iat') return hpat_pandas_series_iat_impl @sdc_overload_attribute(SeriesType, 'at') def hpat_pandas_series_at(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.at Limitations ----------- - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_at/series_at_single_result.py :language: python :lines: 27- :caption: With a scalar integer. Returns single value. :name: ex_series_at .. command-output:: python ./series/series_at/series_at_single_result.py :cwd: ../../../examples .. literalinclude:: ../../../examples/series/series_at/series_at_multiple_result.py :language: python :lines: 27- :caption: With a scalar integer. Returns multiple value. :name: ex_series_at .. command-output:: python ./series/series_at/series_at_multiple_result.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.iat <pandas.DataFrame.iat>` Access a single value for a row/column pair by integer position. :ref:`DataFrame.loc <pandas.DataFrame.loc>` Access a group of rows and columns by label(s). :ref:`Series.at <pandas.Series.at>` Access a single value using a label. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.at` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_at* """ _func_name = 'Attribute at().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_at_impl(self): return sdc.datatypes.hpat_pandas_getitem_types.series_getitem_accessor_init(self, 'at') return hpat_pandas_series_at_impl @sdc_overload_method(SeriesType, 'nsmallest') def hpat_pandas_series_nsmallest(self, n=5, keep='first'): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.nsmallest Limitations ----------- - Parameter ``keep`` is supported only with default value ``'first'``. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_nsmallest.py :language: python :lines: 27- :caption: Returns the smallest n elements. :name: ex_series_nsmallest .. command-output:: python ./series/series_nsmallest.py :cwd: ../../../examples .. seealso:: :ref:`Series.nlargest <pandas.Series.nlargest>` Get the n largest elements. :ref:`Series.sort_values <pandas.Series.sort_values>` Sort Series by values. :ref:`Series.head <pandas.Series.head>` Return the first n rows. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.nsmallest` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_nsmallest* """ _func_name = 'Method nsmallest().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(n, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(n, 'int', 'n') if not isinstance(keep, (types.Omitted, str, types.UnicodeType, types.StringLiteral)): ty_checker.raise_exc(keep, 'str', 'keep') def hpat_pandas_series_nsmallest_impl(self, n=5, keep='first'): if keep != 'first': raise ValueError("Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'") # mergesort is used for stable sorting of repeated values indices = self._data.argsort(kind='mergesort')[:max(n, 0)] return self.take(indices) return hpat_pandas_series_nsmallest_impl @sdc_overload_method(SeriesType, 'nlargest') def hpat_pandas_series_nlargest(self, n=5, keep='first'): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.nlargest Limitations ----------- - Parameter ``keep`` is supported only with default value ``'first'``. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_nlargest.py :language: python :lines: 27- :caption: Returns the largest n elements. :name: ex_series_nlargest .. command-output:: python ./series/series_nlargest.py :cwd: ../../../examples .. seealso:: :ref:`Series.nsmallest <pandas.Series.nsmallest>` Get the n smallest elements. :ref:`Series.sort_values <pandas.Series.sort_values>` Sort Series by values. :ref:`Series.head <pandas.Series.head>` Return the first n rows. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.nlargest` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_nlargest* """ _func_name = 'Method nlargest().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(n, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(n, 'int', 'n') if not isinstance(keep, (types.Omitted, str, types.UnicodeType, types.StringLiteral)): ty_checker.raise_exc(keep, 'str', 'keep') def hpat_pandas_series_nlargest_impl(self, n=5, keep='first'): if keep != 'first': raise ValueError("Method nlargest(). Unsupported parameter. Given 'keep' != 'first'") # data: [0, 1, -1, 1, 0] -> [1, 1, 0, 0, -1] # index: [0, 1, 2, 3, 4] -> [1, 3, 0, 4, 2] (not [3, 1, 4, 0, 2]) # subtract 1 to ensure reverse ordering at boundaries indices = (-self._data - 1).argsort(kind='mergesort')[:max(n, 0)] return self.take(indices) return hpat_pandas_series_nlargest_impl @sdc_overload_attribute(SeriesType, 'shape') def hpat_pandas_series_shape(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.shape Examples -------- .. literalinclude:: ../../../examples/series/series_shape.py :language: python :lines: 27- :caption: Return a tuple of the shape of the underlying data. :name: ex_series_shape .. command-output:: python ./series/series_shape.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute :attr:`pandas.Series.shape` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shape1 """ _func_name = 'Attribute shape.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) def hpat_pandas_series_shape_impl(self): return self._data.shape return hpat_pandas_series_shape_impl @sdc_overload_method(SeriesType, 'std') def hpat_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.std Limitations ----------- Parameters ``axis``, ``level`` and ``numeric_only`` are supported only with default value ``None``. Examples -------- .. literalinclude:: ../../../examples/series/series_std.py :language: python :lines: 27- :caption: Returns sample standard deviation over Series. :name: ex_series_std .. command-output:: python ./series/series_std.py :cwd: ../../../examples .. seealso:: :ref:`Series.var <pandas.Series.var>` Returns unbiased variance over Series. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.std` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_std """ _func_name = 'Method std().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data, 'number', 'self.data') if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None: ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(ddof, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(ddof, 'int', 'ddof') if not isinstance(axis, (types.Omitted, types.NoneType)) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, (types.Omitted, types.NoneType)) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_std_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): var = self.var(axis=axis, skipna=skipna, level=level, ddof=ddof, numeric_only=numeric_only) return var ** 0.5 return hpat_pandas_series_std_impl @sdc_overload_attribute(SeriesType, 'values') def hpat_pandas_series_values(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.values Examples -------- .. literalinclude:: ../../../examples/series/series_values.py :language: python :lines: 27- :caption: Return Series as ndarray or ndarray-like depending on the dtype. :name: ex_series_values .. command-output:: python ./series/series_values.py :cwd: ../../../examples .. seealso:: :ref:`Series.array <pandas.Series.array>` Reference to the underlying data. :ref:`Series.to_numpy <pandas.Series.to_numpy>` A NumPy array representing the underlying data. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute 'values' implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_values* """ _func_name = 'Attribute values.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) def hpat_pandas_series_values_impl(self): return self._data return hpat_pandas_series_values_impl @sdc_overload_method(SeriesType, 'value_counts') def hpat_pandas_series_value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.value_counts Limitations ----------- - Parameters ``normalize`` and ``bins`` are currently unsupported. - Parameter ``dropna`` is unsupported for String Series. - Elements with the same count might appear in result in a different order than in Pandas. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_value_counts.py :language: python :lines: 35- :caption: Getting the number of values excluding NaNs :name: ex_series_value_counts .. command-output:: python ./series/series_value_counts.py :cwd: ../../../examples .. seealso:: :ref:`Series.count <pandas.Series.count>` Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.value_counts` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_value_counts* """ _func_name = 'Method value_counts().' ty_checker = TypeChecker('Method value_counts().') ty_checker.check(self, SeriesType) if not isinstance(normalize, (types.Omitted, types.Boolean, bool)) and normalize is True: ty_checker.raise_exc(normalize, 'boolean', 'normalize') if not isinstance(sort, (types.Omitted, types.Boolean, bool)): ty_checker.raise_exc(sort, 'boolean', 'sort') if not isinstance(ascending, (types.Omitted, types.Boolean, bool)): ty_checker.raise_exc(ascending, 'boolean', 'ascending') if not isinstance(bins, (types.Omitted, types.NoneType)) and bins is not None: ty_checker.raise_exc(bins, 'boolean', 'bins') if not isinstance(dropna, (types.Omitted, types.Boolean, bool)): ty_checker.raise_exc(dropna, 'boolean', 'dropna') if isinstance(self.data, StringArrayType): def hpat_pandas_series_value_counts_str_impl( self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): value_counts_dict = Dict.empty( key_type=types.unicode_type, value_type=types.intp ) nan_counts = 0 for i, value in enumerate(self._data): if str_arr_is_na(self._data, i): if not dropna: nan_counts += 1 continue value_counts_dict[value] = value_counts_dict.get(value, 0) + 1 need_add_nan_count = not dropna and nan_counts values = [key for key in value_counts_dict] counts_as_list = [value_counts_dict[key] for key in value_counts_dict.keys()] values_len = len(values) if need_add_nan_count: # append a separate empty string for NaN elements values_len += 1 values.append('') counts_as_list.append(nan_counts) counts = numpy.asarray(counts_as_list, dtype=numpy.intp) indexes_order = numpy.arange(values_len) if sort: indexes_order = counts.argsort() if not ascending: indexes_order = indexes_order[::-1] counts_sorted = numpy.take(counts, indexes_order) values_sorted_by_count = [values[i] for i in indexes_order] # allocate the result index as a StringArray and copy values to it result_index = create_str_arr_from_list(values_sorted_by_count) if need_add_nan_count: # set null bit for StringArray element corresponding to NaN element (was added as last in values) index_previous_nan_pos = values_len - 1 for i in numpy.arange(values_len): if indexes_order[i] == index_previous_nan_pos: str_arr_set_na(result_index, i) break return pandas.Series(counts_sorted, index=result_index, name=self._name) return hpat_pandas_series_value_counts_str_impl elif isinstance(self.dtype, (types.Number, types.Boolean)): series_dtype = self.dtype def hpat_pandas_series_value_counts_number_impl( self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): value_counts_dict = Dict.empty( key_type=series_dtype, value_type=types.intp ) zero_counts = 0 is_zero_found = False for value in self._data: if (dropna and numpy.isnan(value)): continue # Pandas hash-based value_count_float64 function doesn't distinguish between # positive and negative zeros, hence we count zero values separately and store # as a key the first zero value found in the Series if not value: zero_counts += 1 if not is_zero_found: zero_value = value is_zero_found = True continue value_counts_dict[value] = value_counts_dict.get(value, 0) + 1 if zero_counts: value_counts_dict[zero_value] = zero_counts unique_values = numpy.asarray( list(value_counts_dict), dtype=self._data.dtype ) value_counts = numpy.asarray( [value_counts_dict[key] for key in value_counts_dict], dtype=numpy.intp ) indexes_order = numpy.arange(len(value_counts)) if sort: indexes_order = value_counts.argsort() if not ascending: indexes_order = indexes_order[::-1] sorted_unique_values = numpy.take(unique_values, indexes_order) sorted_value_counts = numpy.take(value_counts, indexes_order) return pandas.Series(sorted_value_counts, index=sorted_unique_values, name=self._name) return hpat_pandas_series_value_counts_number_impl return None @sdc_overload_method(SeriesType, 'var') def hpat_pandas_series_var(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.var Limitations ----------- Parameters ``axis``, ``level`` and ``numeric_only`` are supported only with default value ``None``. Examples -------- .. literalinclude:: ../../../examples/series/series_var.py :language: python :lines: 27- :caption: Returns unbiased variance over Series. :name: ex_series_var .. command-output:: python ./series/series_var.py :cwd: ../../../examples .. seealso:: :ref:`Series.std <pandas.Series.std>` Returns sample standard deviation over Series. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.var` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_var """ _func_name = 'Method var().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data, 'number', 'self.data') if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None: ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(ddof, (types.Omitted, int, types.Integer)): ty_checker.raise_exc(ddof, 'int', 'ddof') if not isinstance(axis, (types.Omitted, types.NoneType)) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, (types.Omitted, types.NoneType)) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): if skipna is None: skipna = True if skipna: valuable_length = len(self._data) - numpy.sum(numpy.isnan(self._data)) if valuable_length <= ddof: return numpy.nan return numpy_like.nanvar(self._data) * valuable_length / (valuable_length - ddof) if len(self._data) <= ddof: return numpy.nan return self._data.var() * len(self._data) / (len(self._data) - ddof) return hpat_pandas_series_var_impl @sdc_overload_attribute(SeriesType, 'index') def hpat_pandas_series_index(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.index Limitations ----------- - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_index.py :language: python :lines: 27- :caption: The index (axis labels) of the Series. :name: ex_series_index .. command-output:: python ./series/series_index.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute :attr:`pandas.Series.index` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_index1 Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_index2 """ _func_name = 'Attribute index.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if isinstance(self.index, types.NoneType): def hpat_pandas_series_index_none_impl(self): return pandas.RangeIndex(len(self._data)) return hpat_pandas_series_index_none_impl else: def hpat_pandas_series_index_impl(self): return self._index return hpat_pandas_series_index_impl hpat_pandas_series_rolling = sdc_overload_method(SeriesType, 'rolling')( gen_sdc_pandas_rolling_overload_body(_hpat_pandas_series_rolling_init, SeriesType)) hpat_pandas_series_rolling.__doc__ = sdc_pandas_rolling_docstring_tmpl.format( ty='Series', ty_lower='series') @sdc_overload_attribute(SeriesType, 'size') def hpat_pandas_series_size(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.size Examples -------- .. literalinclude:: ../../../examples/series/series_size.py :language: python :lines: 27- :caption: Return the number of elements in the underlying data. :name: ex_series_size .. command-output:: python ./series/series_size.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute :attr:`pandas.Series.size` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_size """ _func_name = 'Attribute size.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) def hpat_pandas_series_size_impl(self): return len(self._data) return hpat_pandas_series_size_impl @sdc_overload_attribute(SeriesType, 'str') def hpat_pandas_series_str(self): """ Pandas Series attribute :attr:`pandas.Series.str` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_get """ _func_name = 'Attribute str.' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) if not isinstance(self.data.dtype, (types.List, types.UnicodeType)): msg = '{} Can only use .str accessor with string values. Given: {}' raise TypingError(msg.format(_func_name, self.data.dtype)) def hpat_pandas_series_str_impl(self): return pandas.core.strings.StringMethods(self) return hpat_pandas_series_str_impl @sdc_overload_attribute(SeriesType, 'ndim') def hpat_pandas_series_ndim(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.ndim Examples -------- .. literalinclude:: ../../../examples/series/series_ndim.py :language: python :lines: 27- :caption: Number of dimensions of the underlying data, by definition 1. :name: ex_series_ndim .. command-output:: python ./series/series_ndim.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute :attr:`pandas.Series.ndim` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_getattr_ndim """ _func_name = 'Attribute ndim.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) def hpat_pandas_series_ndim_impl(self): return 1 return hpat_pandas_series_ndim_impl @sdc_overload_attribute(SeriesType, 'T') def hpat_pandas_series_T(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.T Examples -------- .. literalinclude:: ../../../examples/series/series_T.py :language: python :lines: 27- :caption: Return the transpose, which is by definition self. :name: ex_series_T .. command-output:: python ./series/series_T.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series attribute :attr:`pandas.Series.T` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_getattr_T """ _func_name = 'Attribute T.' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) def hpat_pandas_series_T_impl(self): return self._data return hpat_pandas_series_T_impl @sdc_overload(len) def hpat_pandas_series_len(self): """ Pandas Series operator :func:`len` implementation .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_len """ _func_name = 'Operator len().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) def hpat_pandas_series_len_impl(self): return len(self._data) return hpat_pandas_series_len_impl @sdc_overload_method(SeriesType, 'astype', parallel=False) def hpat_pandas_series_astype(self, dtype, copy=True, errors='raise'): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.astype Limitations ----------- - Parameter ``copy`` is supported only with default value ``True``. Examples -------- .. literalinclude:: ../../../examples/series/series_astype.py :language: python :lines: 36- :caption: Cast a pandas object to a specified dtype. :name: ex_series_astype .. command-output:: python ./series/series_astype.py :cwd: ../../../examples .. seealso:: `pandas.to_datetime <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime>`_ Convert argument to datetime. `pandas.to_timedelta <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_timedelta.html#pandas.to_timedelta>`_ Convert argument to timedelta. `pandas.to_numeric <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_numeric.html#pandas.to_numeric>`_ Convert argument to a numeric type. `numpy.ndarray.astype <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.astype.html#numpy.ndarray.astype>`_ Copy of the array, cast to a specified type. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.astype` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_astype* """ _func_name = 'Method astype().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(copy, (types.Omitted, bool, types.Boolean)): ty_checker.raise_exc(copy, 'bool', 'copy') if (not isinstance(errors, (types.Omitted, str, types.UnicodeType, types.StringLiteral)) and errors in ('raise', 'ignore')): ty_checker.raise_exc(errors, 'str', 'errors') # Return npytypes.Array from npytypes.Array for astype(types.functions.NumberClass), example - astype(np.int64) # Return npytypes.Array from npytypes.Array for astype(types.StringLiteral), example - astype('int64') def hpat_pandas_series_astype_numba_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=numpy_like.astype_no_inline(self._data, dtype), index=self._index, name=self._name) # Return self def hpat_pandas_series_astype_no_modify_impl(self, dtype, copy=True, errors='raise'): return pandas.Series(data=self._data, index=self._index, name=self._name) str_check = ((isinstance(dtype, types.Function) and dtype.typing_key == str) or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')) # Needs Numba astype impl support converting unicode_type to NumberClass and other types if (isinstance(self.data, StringArrayType) and not str_check): if isinstance(dtype, types.functions.NumberClass) and errors == 'raise': raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype}') if isinstance(dtype, types.StringLiteral) and errors == 'raise': try: literal_value = numpy.dtype(dtype.literal_value) except: pass # Will raise the exception later else: raise TypingError(f'Needs Numba astype impl support converting unicode_type to {dtype.literal_value}') data_narr = isinstance(self.data, types.npytypes.Array) dtype_num_liter = isinstance(dtype, (types.functions.NumberClass, types.StringLiteral)) if data_narr and dtype_num_liter or str_check: return hpat_pandas_series_astype_numba_impl if errors == 'raise': raise TypingError(f'{_func_name} The object must be a supported type. Given dtype: {dtype}') else: return hpat_pandas_series_astype_no_modify_impl @sdc_overload_method(SeriesType, 'shift') def hpat_pandas_series_shift(self, periods=1, freq=None, axis=0, fill_value=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.shift Limitations ----------- Parameters ``freq`` and ``axis`` are supported only with default values ``None`` and ``0`` respectively. Examples -------- .. literalinclude:: ../../../examples/series/series_shift.py :language: python :lines: 36- :caption: Shift index by desired number of periods with an optional time freq. :name: ex_series_shift .. command-output:: python ./series/series_shift.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.shift` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_shift* """ _func_name = 'Method shift().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'number', 'self.data.dtype') if not isinstance(fill_value, (types.Omitted, types.Number, types.NoneType)) and fill_value is not None: ty_checker.raise_exc(fill_value, 'number', 'fill_value') if not isinstance(freq, (types.Omitted, types.NoneType)) and freq is not None: ty_checker.raise_exc(freq, 'None', 'freq') if not isinstance(axis, (types.Omitted, int, types.Integer)) and not axis: ty_checker.raise_exc(axis, 'int', 'axis') fill_is_default = isinstance(fill_value, (types.Omitted, types.NoneType)) or fill_value is None series_np_dtype = [numpy_support.as_dtype(self.data.dtype)] fill_np_dtype = [numpy.float64 if fill_is_default else numpy_support.as_dtype(fill_value)] fill_dtype = types.float64 if fill_is_default else fill_value common_dtype = find_common_dtype_from_numpy_dtypes([], [self.data.dtype, fill_dtype]) if fill_is_default: def hpat_pandas_series_shift_impl(self, periods=1, freq=None, axis=0, fill_value=None): if axis != 0: raise TypingError('Method shift(). Unsupported parameters. Given axis != 0') arr = numpy.empty(shape=len(self._data), dtype=common_dtype) if periods > 0: arr[:periods] = numpy.nan arr[periods:] = self._data[:-periods] elif periods < 0: arr[periods:] = numpy.nan arr[:periods] = self._data[-periods:] else: arr[:] = self._data return pandas.Series(data=arr, index=self._index, name=self._name) return hpat_pandas_series_shift_impl def hpat_pandas_series_shift_impl(self, periods=1, freq=None, axis=0, fill_value=None): if axis != 0: raise TypingError('Method shift(). Unsupported parameters. Given axis != 0') arr = numpy.empty(len(self._data), dtype=common_dtype) if periods > 0: arr[:periods] = fill_value arr[periods:] = self._data[:-periods] elif periods < 0: arr[periods:] = fill_value arr[:periods] = self._data[-periods:] else: arr[:] = self._data return pandas.Series(data=arr, index=self._index, name=self._name) return hpat_pandas_series_shift_impl @sdc_overload_method(SeriesType, 'isin') def hpat_pandas_series_isin(self, values): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.isin Limitations ----------- - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_isin.py :language: python :lines: 27- :caption: Check whether values are contained in Series. :name: ex_series_isin .. command-output:: python ./series/series_isin.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.isin <pandas.DataFrame.isin>` Equivalent method on DataFrame. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.isin` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_isin* """ _func_name = 'Method isin().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(values, (types.Set, types.List)): ty_checker.raise_exc(values, 'set or list', 'values') if isinstance(values.dtype, (types.UnicodeType, types.StringLiteral)): def hpat_pandas_series_isin_impl(self, values): # TODO: replace with below line when Numba supports np.isin in nopython mode # return pandas.Series (np.isin (self._data, values)) values = str_list_to_array(list(values)) values = set(values) data_len = len(self._data) result = numpy.empty(data_len, dtype=numpy.bool_) for i in prange(data_len): result[i] = self._data[i] in values return pandas.Series(data=result, index=self._index, name=self._name) else: def hpat_pandas_series_isin_impl(self, values): # TODO: replace with below line when Numba supports np.isin in nopython mode # return pandas.Series (np.isin (self._data, values)) values = set(values) data_len = len(self._data) result = numpy.empty(data_len, dtype=numpy.bool_) for i in prange(data_len): result[i] = self._data[i] in values return pandas.Series(data=result, index=self._index, name=self._name) return hpat_pandas_series_isin_impl @sdc_overload_method(SeriesType, 'append') def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integrity=False): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.append Limitations ----------- - Parameter ``verify_integrity`` is currently unsupported by Intel Scalable Dataframe Compiler - Parameter ``ignore_index`` is supported as literal value only - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_append.py :language: python :lines: 37- :caption: Concatenate two or more Series. :name: ex_series_append .. command-output:: python ./series/series_append.py :cwd: ../../../examples .. seealso:: `pandas.absolute <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html#pandas.concat>`_ General function to concatenate DataFrame or Series objects. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.append` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_append* """ _func_name = 'Method append().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(to_append, SeriesType) or (isinstance(to_append, (types.UniTuple, types.List)) and isinstance(to_append.dtype, SeriesType))): ty_checker.raise_exc(to_append, 'series or list/tuple of series', 'to_append') # currently we will always raise this in the end, i.e. if no impl was found # TODO: find a way to stop compilation early and not proceed with unliteral step if not (isinstance(ignore_index, types.Literal) and isinstance(ignore_index, types.Boolean) or isinstance(ignore_index, types.Omitted) or ignore_index is False): ty_checker.raise_exc(ignore_index, 'literal Boolean constant', 'ignore_index') if not (verify_integrity is False or isinstance(verify_integrity, types.Omitted)): ty_checker.raise_exc(verify_integrity, 'bool', 'verify_integrity') # ignore_index value has to be known at compile time to select between implementations with different signatures ignore_index_is_false = (has_literal_value(ignore_index, False) or has_python_value(ignore_index, False) or isinstance(ignore_index, types.Omitted)) to_append_is_series = isinstance(to_append, SeriesType) if ignore_index_is_false: def hpat_pandas_series_append_impl(self, to_append, ignore_index=False, verify_integrity=False): if to_append_is_series == True: # noqa new_data = common_functions.hpat_arrays_append(self._data, to_append._data) new_index = common_functions.hpat_arrays_append(self.index, to_append.index) else: data_arrays_to_append = [series._data for series in to_append] index_arrays_to_append = [series.index for series in to_append] new_data = common_functions.hpat_arrays_append(self._data, data_arrays_to_append) new_index = common_functions.hpat_arrays_append(self.index, index_arrays_to_append) return pandas.Series(new_data, new_index) return hpat_pandas_series_append_impl else: def hpat_pandas_series_append_ignore_index_impl(self, to_append, ignore_index=False, verify_integrity=False): if to_append_is_series == True: # noqa new_data = common_functions.hpat_arrays_append(self._data, to_append._data) else: arrays_to_append = [series._data for series in to_append] new_data = common_functions.hpat_arrays_append(self._data, arrays_to_append) return pandas.Series(new_data, None) return hpat_pandas_series_append_ignore_index_impl @sdc_overload_method(SeriesType, 'copy') def hpat_pandas_series_copy(self, deep=True): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.copy Limitations ----------- - When ``deep=False``, a new object will be created without copying the calling object’s data and with a copy of the calling object’s indices. Examples -------- .. literalinclude:: ../../../examples/series/series_copy.py :language: python :lines: 27- :caption: Make a copy of this object’s indices and data. :name: ex_series_copy .. command-output:: python ./series/series_copy.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.copy` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series -k series_copy """ ty_checker = TypeChecker('Method Series.copy().') ty_checker.check(self, SeriesType) if not isinstance(deep, (types.Omitted, types.Boolean)) and not deep: ty_checker.raise_exc(deep, 'boolean', 'deep') if isinstance(self.index, types.NoneType): def hpat_pandas_series_copy_impl(self, deep=True): if deep: return pandas.Series(data=numpy_like.copy(self._data), name=self._name) else: return pandas.Series(data=self._data, name=self._name) return hpat_pandas_series_copy_impl else: def hpat_pandas_series_copy_impl(self, deep=True): if deep: return pandas.Series(data=numpy_like.copy(self._data), index=numpy_like.copy(self._index), name=self._name) else: # Shallow copy of index is not supported yet return pandas.Series(data=self._data, index=numpy_like.copy(self._index), name=self._name) return hpat_pandas_series_copy_impl @sdc_overload_method(SeriesType, 'corr') def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.corr Limitations ----------- - Parameter ``method`` is supported only with default value 'pearson' Examples -------- .. literalinclude:: ../../../examples/series/series_corr.py :language: python :lines: 27- :caption: Compute correlation with other Series, excluding missing values. :name: ex_series_corr .. command-output:: python ./series/series_corr.py :cwd: ../../../examples .. seealso:: :ref:`Series.cov <pandas.Series.cov>` Compute covariance with Series, excluding missing values. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.corr` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_corr* """ _func_name = 'Method corr().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) ty_checker.check(other, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data, 'number', 'self.data') if not isinstance(other.data.dtype, types.Number): ty_checker.raise_exc(other.data, 'number', 'other.data') if not isinstance(min_periods, (int, types.Integer, types.Omitted, types.NoneType)) and min_periods is not None: ty_checker.raise_exc(min_periods, 'int64', 'min_periods') def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None): return numpy_like.corr(self, other, method, min_periods) return hpat_pandas_series_corr_impl @sdc_overload_method(SeriesType, 'head') def hpat_pandas_series_head(self, n=5): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.head Examples -------- .. literalinclude:: ../../../examples/series/series_head.py :language: python :lines: 34- :caption: Getting the first n rows. :name: ex_series_head .. command-output:: python ./series/series_head.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.tail <pandas.DataFrame.tail>` Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.head` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_head* """ _func_name = 'Method head().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(n, (types.Integer, types.Omitted, types.NoneType)) and n != 5: ty_checker.raise_exc(n, 'int', 'n') if isinstance(self.index, types.NoneType): def hpat_pandas_series_head_impl(self, n=5): return pandas.Series(data=self._data[:n], name=self._name) return hpat_pandas_series_head_impl else: def hpat_pandas_series_head_index_impl(self, n=5): return pandas.Series(data=self._data[:n], index=self._index[:n], name=self._name) return hpat_pandas_series_head_index_impl @sdc_overload_method(SeriesType, 'isnull') def hpat_pandas_series_isnull(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.isnull Examples -------- .. literalinclude:: ../../../examples/series/series_isnull.py :language: python :lines: 27- :caption: Detect missing values. :name: ex_series_isnull .. command-output:: python ./series/series_isnull.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.isnull` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_isnull* """ _func_name = 'Method isnull().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if isinstance(self.data.dtype, (types.Number, types.Boolean, bool)): def hpat_pandas_series_isnull_impl(self): return pandas.Series(data=numpy_like.isnan(self._data), index=self._index, name=self._name) return hpat_pandas_series_isnull_impl if isinstance(self.data.dtype, types.UnicodeType): def hpat_pandas_series_isnull_impl(self): result = numpy.empty(len(self._data), numpy.bool_) byte_size = 8 # iterate over bits in StringArrayType null_bitmap and fill array indicating if array's element are NaN for i in range(len(self._data)): bmap_idx = i // byte_size bit_idx = i % byte_size bmap = self._data.null_bitmap[bmap_idx] bit_value = (bmap >> bit_idx) & 1 result[i] = bit_value == 0 return pandas.Series(result, index=self._index, name=self._name) return hpat_pandas_series_isnull_impl @sdc_overload_method(SeriesType, 'isna') def hpat_pandas_series_isna(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.isna Examples -------- .. literalinclude:: ../../../examples/series/series_isna.py :language: python :lines: 27- :caption: Detect missing values. :name: ex_series_isna .. command-output:: python ./series/series_isna.py :cwd: ../../../examples .. seealso:: :ref:`Series.isnull <pandas.Series.isnull>` Alias of isna. :ref:`Series.notna <pandas.Series.notna>` Boolean inverse of isna. :ref:`Series.dropna <pandas.Series.dropna>` Omit axes labels with missing values. `pandas.absolute <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.isna.html#pandas.isna>`_ Top-level isna. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.isna` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_isna* """ _func_name = 'Method isna().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if isinstance(self.data.dtype, (types.Number, types.Boolean, bool)): def hpat_pandas_series_isna_impl(self): return pandas.Series(data=numpy_like.isnan(self._data), index=self._index, name=self._name) return hpat_pandas_series_isna_impl if isinstance(self.data.dtype, types.UnicodeType): def hpat_pandas_series_isna_impl(self): result = numpy.empty(len(self._data), numpy.bool_) byte_size = 8 # iterate over bits in StringArrayType null_bitmap and fill array indicating if array's element are NaN for i in range(len(self._data)): bmap_idx = i // byte_size bit_idx = i % byte_size bmap = self._data.null_bitmap[bmap_idx] bit_value = (bmap >> bit_idx) & 1 result[i] = bit_value == 0 return pandas.Series(result, index=self._index, name=self._name) return hpat_pandas_series_isna_impl @sdc_overload_method(SeriesType, 'notna') def hpat_pandas_series_notna(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.notna Examples -------- .. literalinclude:: ../../../examples/series/series_notna.py :language: python :lines: 27- :caption: Detect existing (non-missing) values. :name: ex_series_notna .. command-output:: python ./series/series_notna.py :cwd: ../../../examples .. seealso:: :ref:`Series.notnull <pandas.Series.notnull>` Alias of notna. :ref:`Series.isna <pandas.Series.isna>` Boolean inverse of notna. :ref:`Series.dropna <pandas.Series.dropna>` Omit axes labels with missing values. `pandas.absolute <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.notna.html#pandas.notna>`_ Top-level notna. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.notna` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_notna* """ _func_name = 'Method notna().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if isinstance(self.data.dtype, (types.Number, types.Boolean, bool)): def hpat_pandas_series_notna_impl(self): return pandas.Series(numpy_like.notnan(self._data), index=self._index, name=self._name) return hpat_pandas_series_notna_impl if isinstance(self.data.dtype, types.UnicodeType): def hpat_pandas_series_notna_impl(self): result = self.isna() return pandas.Series(numpy.invert(result._data), index=self._index, name=self._name) return hpat_pandas_series_notna_impl @sdc_overload_method(SeriesType, 'sum') def hpat_pandas_series_sum( self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, ): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.sum Limitations ----------- - Parameters ``axis``, ``level``, ``numeric_only`` and ``min_count`` \ are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_sum.py :language: python :lines: 27- :caption: Return the sum of the values for the requested axis. :name: ex_series_sum .. command-output:: python ./series/series_sum.py :cwd: ../../../examples .. seealso:: :ref:`Series.sum <pandas.Series.sum>` Return the sum. :ref:`Series.min <pandas.Series.min>` Return the minimum. :ref:`Series.max <pandas.Series.max>` Return the maximum. :ref:`Series.idxmin <pandas.Series.idxmin>` Return the index of the minimum. :ref:`Series.idxmax <pandas.Series.idxmax>` Return the index of the maximum. :ref:`DataFrame.sum <pandas.DataFrame.sum>` Return the sum over the requested axis. :ref:`DataFrame.min <pandas.DataFrame.min>` Return the minimum over the requested axis. :ref:`DataFrame.max <pandas.DataFrame.max>` Return the maximum over the requested axis. :ref:`DataFrame.idxmin <pandas.DataFrame.idxmin>` Return the index of the minimum over the requested axis. :ref:`DataFrame.idxmax <pandas.DataFrame.idxmax>` Return index of first occurrence of maximum over requested axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.sum` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series -k series_sum """ _func_name = 'Method sum().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(axis, (types.Integer, types.Omitted)) or axis is None): ty_checker.raise_exc(axis, 'int', 'axis') if not (isinstance(skipna, (types.Boolean, types.Omitted, types.NoneType)) or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not (isinstance(level, (types.Integer, types.StringLiteral, types.Omitted, types.NoneType)) or level is None): ty_checker.raise_exc(level, 'int or str', 'level') if not (isinstance(numeric_only, (types.Boolean, types.Omitted)) or numeric_only is None): ty_checker.raise_exc(numeric_only, 'bool', 'numeric_only') if not (isinstance(min_count, (types.Integer, types.Omitted)) or min_count == 0): ty_checker.raise_exc(min_count, 'int', 'min_count') def hpat_pandas_series_sum_impl( self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, ): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nansum(self._data) return numpy_like.sum(self._data) return hpat_pandas_series_sum_impl @sdc_overload_method(SeriesType, 'take') def hpat_pandas_series_take(self, indices, axis=0, is_copy=False): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.take Limitations ----------- Parameter ``axis`` is supported only with default values ``0`` and ``'index'``. Parameter ``is_copy`` is supported only with default value ``False``. Examples -------- .. literalinclude:: ../../../examples/series/series_take.py :language: python :lines: 27- :caption: Return the elements in the given positional indices along an axis. :name: ex_series_take .. command-output:: python ./series/series_take.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.loc <pandas.DataFrame.loc>` Select a subset of a DataFrame by labels. :ref:`DataFrame.iloc <pandas.DataFrame.iloc>` Select a subset of a DataFrame by positions. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.take` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_take_index_* """ _func_name = 'Method take().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if (not isinstance(axis, (int, types.Integer, str, types.UnicodeType, types.StringLiteral, types.Omitted)) and axis not in (0, 'index')): ty_checker.raise_exc(axis, 'integer or string', 'axis') if not isinstance(is_copy, (bool, types.Boolean, types.Omitted)) and is_copy is not False: ty_checker.raise_exc(is_copy, 'boolean', 'is_copy') if not isinstance(indices, (types.List, types.Array)): ty_checker.raise_exc(indices, 'array-like', 'indices') if isinstance(self.index, types.NoneType) or self.index is None: def hpat_pandas_series_take_noindex_impl(self, indices, axis=0, is_copy=False): local_data = [self._data[i] for i in indices] return pandas.Series(local_data, indices) return hpat_pandas_series_take_noindex_impl def hpat_pandas_series_take_impl(self, indices, axis=0, is_copy=False): local_data = [self._data[i] for i in indices] local_index = [self._index[i] for i in indices] return pandas.Series(local_data, local_index) return hpat_pandas_series_take_impl @sdc_overload_method(SeriesType, 'idxmax') def hpat_pandas_series_idxmax(self, axis=None, skipna=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.idxmax Limitations ----------- Parameter ``axis`` is supported only with default value ``None``. Parameter ``skipna`` cannot be ``False`` with data of string type. Examples -------- .. literalinclude:: ../../../examples/series/series_idxmax.py :language: python :lines: 27- :caption: Getting the row label of the maximum value. :name: ex_series_idxmax .. command-output:: python ./series/series_idxmax.py :cwd: ../../../examples .. seealso:: :ref:`Series.idxmin <pandas.Series.idxmin>` Return index label of the first occurrence of minimum of values. `numpy.absolute <https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html#numpy.argmax>`_ Return indices of the maximum values along the given axis. :ref:`DataFrame.idxmax <pandas.DataFrame.idxmax>` Return index of first occurrence of maximum over requested axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.idxmax` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_idxmax* """ _func_name = 'Method idxmax().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype') if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not (isinstance(axis, types.Omitted) or axis is None): ty_checker.raise_exc(axis, 'None', 'axis') none_index = isinstance(self.index, types.NoneType) or self.index is None if isinstance(self.data, StringArrayType): def hpat_pandas_series_idxmax_str_impl(self, axis=None, skipna=None): if skipna is None: _skipna = True else: raise ValueError("Method idxmax(). Unsupported parameter 'skipna'=False with str data") result = numpy.argmax(self._data) if none_index == True: # noqa return result else: return self._index[int(result)] return hpat_pandas_series_idxmax_str_impl def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=None): # return numpy.argmax(self._data) if skipna is None: _skipna = True else: _skipna = skipna if _skipna: result = numpy_like.nanargmax(self._data) else: result = numpy_like.argmax(self._data) if none_index == True: # noqa return result else: return self._index[int(result)] return numpy_like.argmax(self._data) return hpat_pandas_series_idxmax_impl @sdc_overload_method(SeriesType, 'prod') def hpat_pandas_series_prod(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.prod Limitations ----------- - Parameters ``axis``, ``level``, ``numeric_only`` and ``min_count`` \ are currently unsupported by Intel Scalable Dataframe Compiler Examples -------- .. literalinclude:: ../../../examples/series/series_prod.py :language: python :lines: 27- :caption: Return the product of the values. :name: ex_series_prod .. command-output:: python ./series/series_prod.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.prod` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series -k series_prod """ _func_name = 'Method prod().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, (types.Integer, types.Float)): ty_checker.raise_exc(self.data.dtype, 'numeric', 'self.data.dtype') if not (isinstance(axis, (types.Integer, types.Omitted)) or axis is None): ty_checker.raise_exc(axis, 'int', 'axis') if not (isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) or skipna is None or skipna is True): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not (isinstance(level, (types.Integer, types.StringLiteral, types.Omitted, types.NoneType)) or level is None): ty_checker.raise_exc(level, 'int or str', 'level') if not (isinstance(numeric_only, (types.Boolean, types.Omitted)) or numeric_only is None): ty_checker.raise_exc(numeric_only, 'bool', 'numeric_only') if not (isinstance(min_count, (types.Integer, types.Omitted)) or min_count == 0): ty_checker.raise_exc(min_count, 'int', 'min_count') def hpat_pandas_series_prod_impl(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nanprod(self._data) else: return numpy.prod(self._data) return hpat_pandas_series_prod_impl @sdc_overload_method(SeriesType, 'quantile') def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.quantile Limitations ----------- Parameter ``interpolation`` is currently unsupported. Examples -------- .. literalinclude:: ../../../examples/series/series_quantile.py :language: python :lines: 27- :caption: Computing quantile for the Series :name: ex_series_quantile .. command-output:: python ./series/series_quantile.py :cwd: ../../../examples .. seealso:: :ref:`core.window.Rolling.quantile <pandas.core.window.Rolling.quantile>` Calculate the rolling quantile. `numpy.percentile <https://docs.scipy.org/doc/numpy/reference/generated/numpy.percentile.html#numpy.percentile>`_ Compute the q-th percentile of the data along the specified axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.quantile` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_quantile* """ _func_name = 'Method quantile().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(interpolation, types.Omitted) and interpolation != 'linear': ty_checker.raise_exc(interpolation, 'str', 'interpolation') if not isinstance(q, (int, float, list, types.Number, types.Omitted, types.List)): ty_checker.raise_exc(q, 'int, float, list', 'q') def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'): return numpy.quantile(self._data, q) return hpat_pandas_series_quantile_impl @sdc_overload_method(SeriesType, 'rename') def hpat_pandas_series_rename(self, index=None, copy=True, inplace=False, level=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.rename Limitations ----------- - Parameter ``level`` is currently unsupported by Intel Scalable Dataframe Compiler. Examples -------- .. literalinclude:: ../../../examples/series/series_rename.py :language: python :lines: 36- :caption: Alter Series index labels or name. :name: ex_series_rename .. command-output:: python ./series/series_rename.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.rename` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rename """ ty_checker = TypeChecker('Method rename().') ty_checker.check(self, SeriesType) if not isinstance(index, (types.Omitted, types.UnicodeType, types.StringLiteral, str, types.Integer, types.Boolean, types.Hashable, types.Float, types.NPDatetime, types.NPTimedelta, types.Number)) and index is not None: ty_checker.raise_exc(index, 'string', 'index') if not isinstance(copy, (types.Omitted, types.Boolean, bool)): ty_checker.raise_exc(copy, 'boolean', 'copy') if not isinstance(inplace, (types.Omitted, types.Boolean, bool)): ty_checker.raise_exc(inplace, 'boolean', 'inplace') if not isinstance(level, (types.Omitted, types.UnicodeType, types.StringLiteral, types.Integer)) and level is not None: ty_checker.raise_exc(level, 'Integer or string', 'level') def hpat_pandas_series_rename_idx_impl(self, index=None, copy=True, inplace=False, level=None): if copy is True: series_data = self._data.copy() series_index = self._index.copy() else: series_data = self._data series_index = self._index return pandas.Series(data=series_data, index=series_index, name=index) def hpat_pandas_series_rename_noidx_impl(self, index=None, copy=True, inplace=False, level=None): if copy is True: series_data = self._data.copy() else: series_data = self._data return pandas.Series(data=series_data, index=self._index, name=index) if isinstance(self.index, types.NoneType): return hpat_pandas_series_rename_noidx_impl return hpat_pandas_series_rename_idx_impl @sdc_overload_method(SeriesType, 'min') def hpat_pandas_series_min(self, axis=None, skipna=None, level=None, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.min Limitations ----------- Parameters ``level``, ``numeric_only`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler. Examples -------- .. literalinclude:: ../../../examples/series/series_min.py :language: python :lines: 27- :caption: Getting the minimum value of Series elements :name: ex_series_min .. command-output:: python ./series/series_min.py :cwd: ../../../examples .. seealso:: :ref:`Series.sum <pandas.Series.sum>` Return the sum. :ref:`Series.min <pandas.Series.min>` Return the minimum. :ref:`Series.max <pandas.Series.max>` Return the maximum. :ref:`Series.idxmin <pandas.Series.idxmin>` Return the index of the minimum. :ref:`Series.idxmax <pandas.Series.idxmax>` Return the index of the maximum. :ref:`DataFrame.sum <pandas.DataFrame.sum>` Return the sum over the requested axis. :ref:`DataFrame.min <pandas.DataFrame.min>` Return the minimum over the requested axis. :ref:`DataFrame.max <pandas.DataFrame.max>` Return the maximum over the requested axis. :ref:`DataFrame.idxmin <pandas.DataFrame.idxmin>` Return the index of the minimum over the requested axis. :ref:`DataFrame.idxmax <pandas.DataFrame.idxmax>` Return the index of the maximum over the requested axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.min` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_min* """ _func_name = 'Method min().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, (types.Integer, types.Float)): ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype') if not (isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) or skipna is True or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(axis, types.Omitted) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, types.Omitted) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_min_impl(self, axis=None, skipna=None, level=None, numeric_only=None): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nanmin(self._data) return self._data.min() return hpat_pandas_series_min_impl @sdc_overload_method(SeriesType, 'max') def hpat_pandas_series_max(self, axis=None, skipna=None, level=None, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.max Limitations ----------- Parameters ``axis``, ``level`` and ``numeric_only`` are currently unsupported. Examples -------- .. literalinclude:: ../../../examples/series/series_max.py :language: python :lines: 27- :caption: Getting the maximum value of Series elements :name: ex_series_max .. command-output:: python ./series/series_max.py :cwd: ../../../examples .. seealso:: :ref:`Series.sum <pandas.Series.sum>` Return the sum. :ref:`Series.min <pandas.Series.min>` Return the minimum. :ref:`Series.max <pandas.Series.max>` Return the maximum. :ref:`Series.idxmin <pandas.Series.idxmin>` Return the index of the minimum. :ref:`Series.idxmax <pandas.Series.idxmax>` Return the index of the maximum. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.max` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_max* """ _func_name = 'Method max().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, (types.Integer, types.Float)): ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype') if not (isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) or skipna is True or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(axis, types.Omitted) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, types.Omitted) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_max_impl(self, axis=None, skipna=None, level=None, numeric_only=None): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nanmax(self._data) return self._data.max() return hpat_pandas_series_max_impl @sdc_overload_method(SeriesType, 'mean') def hpat_pandas_series_mean(self, axis=None, skipna=None, level=None, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.mean Limitations ----------- - Parameters ``axis``, ``level`` and ``numeric_only`` \ are currently unsupported by Intel Scalable Dataframe Compiler. Examples -------- .. literalinclude:: ../../../examples/series/series_mean.py :language: python :lines: 27- :caption: Return the mean of the values. :name: ex_series_mean .. command-output:: python ./series/series_mean.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.mean` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series -k series_mean """ _func_name = 'Method mean().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'numeric', 'self.data.dtype') if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None: ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(axis, types.Omitted) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, types.Omitted) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_mean_impl(self, axis=None, skipna=None, level=None, numeric_only=None): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nanmean(self._data) return self._data.mean() return hpat_pandas_series_mean_impl @sdc_overload_method(SeriesType, 'idxmin') def hpat_pandas_series_idxmin(self, axis=None, skipna=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.idxmin Limitations ----------- - Parameter ``axis`` is supported only with default value ``None``. - Parameter ``skipna`` cannot be ``False`` with data of string type. Examples -------- .. literalinclude:: ../../../examples/series/series_idxmin.py :language: python :lines: 27- :caption: Getting the row label of the minimum value. :name: ex_series_idxmin .. command-output:: python ./series/series_idxmin.py :cwd: ../../../examples .. seealso:: :ref:`Series.idxmax <pandas.Series.idxmax>` Return index label of the first occurrence of maximum of values. `numpy.argmin <https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmin.html#numpy.argmin>`_ Return indices of the minimum values along the given axis. :ref:`DataFrame.idxmin <pandas.DataFrame.idxmin>` Return index of first occurrence of minimum over requested axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.idxmin` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_idxmin* """ _func_name = 'Method idxmin().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype') if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not (isinstance(axis, types.Omitted) or axis is None): ty_checker.raise_exc(axis, 'None', 'axis') none_index = isinstance(self.index, types.NoneType) or self.index is None if isinstance(self.data, StringArrayType): def hpat_pandas_series_idxmin_str_impl(self, axis=None, skipna=None): if skipna is None: _skipna = True else: raise ValueError("Method idxmin(). Unsupported parameter 'skipna'=False with str data") result = numpy.argmin(self._data) if none_index == True: # noqa return result else: return self._index[int(result)] return hpat_pandas_series_idxmin_str_impl def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=None): # return numpy.argmin(self._data) if skipna is None: _skipna = True else: _skipna = skipna if _skipna: result = numpy_like.nanargmin(self._data) else: result = numpy_like.argmin(self._data) if none_index == True: # noqa return result else: return self._index[int(result)] return numpy_like.argmin(self._data) return hpat_pandas_series_idxmin_impl @sdc_overload_method(SeriesType, 'abs') def hpat_pandas_series_abs(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.abs Examples -------- .. literalinclude:: ../../../examples/series/series_abs.py :language: python :lines: 27- :caption: Getting the absolute value of each element in Series :name: ex_series_abs .. command-output:: python ./series/series_abs.py :cwd: ../../../examples .. seealso:: `numpy.absolute <https://docs.scipy.org/doc/numpy/reference/generated/numpy.absolute.html>`_ Calculate the absolute value element-wise. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.abs` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_abs1 """ _func_name = 'Method abs().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.dtype, (types.Integer, types.Float)): raise TypingError( '{} The function only applies to elements that are all numeric. Given data type: {}'.format(_func_name, self.dtype)) def hpat_pandas_series_abs_impl(self): return pandas.Series(numpy.abs(self._data)) return hpat_pandas_series_abs_impl @sdc_overload_method(SeriesType, 'unique') def hpat_pandas_series_unique(self): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.unique Limitations ----------- - Return values order is unspecified - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_unique.py :language: python :lines: 27- :caption: Getting unique values in Series :name: ex_series_unique .. command-output:: python ./series/series_unique.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.unique` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_sorted """ ty_checker = TypeChecker('Method unique().') ty_checker.check(self, SeriesType) if isinstance(self.data, StringArrayType): def hpat_pandas_series_unique_str_impl(self): ''' Returns sorted unique elements of an array Note: Can't use Numpy due to StringArrayType has no ravel() for noPython mode. Also, NotImplementedError: unicode_type cannot be represented as a Numpy dtype Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_str ''' str_set = set(self._data) return to_array(str_set) return hpat_pandas_series_unique_str_impl def hpat_pandas_series_unique_impl(self): ''' Returns sorted unique elements of an array Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique ''' return numpy.unique(self._data) return hpat_pandas_series_unique_impl @sdc_overload_method(SeriesType, 'cumsum') def hpat_pandas_series_cumsum(self, axis=None, skipna=True): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.cumsum Limitations ----------- Parameter ``axis`` is supported only with default value ``None``. Examples -------- .. literalinclude:: ../../../examples/series/series_cumsum.py :language: python :lines: 27- :caption: Returns cumulative sum over Series. :name: ex_series_cumsum .. command-output:: python ./series/series_cumsum.py :cwd: ../../../examples .. seealso:: :ref:`Series.sum <pandas.Series.sum>` Return the sum over Series. :ref:`Series.cummax <pandas.Series.cummax>` Return cumulative maximum over Series. :ref:`Series.cummin <pandas.Series.cummin>` Return cumulative minimum over Series. :ref:`Series.cumprod <pandas.Series.cumprod>` Return cumulative product over Series. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.cumsum` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_cumsum* """ _func_name = 'Method cumsum().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'numeric', 'self.data.dtype') if not isinstance(axis, (types.Omitted, types.NoneType)) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') def hpat_pandas_series_cumsum_impl(self, axis=None, skipna=True): if skipna: return pandas.Series(numpy_like.nancumsum(self._data, like_pandas=True)) return pandas.Series(numpy_like.cumsum(self._data)) return hpat_pandas_series_cumsum_impl @sdc_overload_method(SeriesType, 'nunique') def hpat_pandas_series_nunique(self, dropna=True): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.nunique Limitations ----------- - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_nunique.py :language: python :lines: 27- :caption: Return number of unique elements in the object. :name: ex_series_nunique .. command-output:: python ./series/series_nunique.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.nunique <pandas.DataFrame.nunique>` Method nunique for DataFrame. :ref:`Series.count <pandas.Series.count>` Count non-NA/null observations in the Series. :ref:`DatatFrame.count <pandas.DataFrame.count>` Count non-NA cells for each column Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.nunique` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_nunique """ _func_name = 'Method nunique().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if isinstance(self.data, StringArrayType): def hpat_pandas_series_nunique_str_impl(self, dropna=True): """ It is better to merge with Numeric branch """ data = self._data if dropna: nan_mask = self.isna() data = self._data[~nan_mask._data] unique_values = set(data) return len(unique_values) return hpat_pandas_series_nunique_str_impl def hpat_pandas_series_nunique_impl(self, dropna=True): """ This function for Numeric data because NumPy dosn't support StringArrayType Algo looks a bit ambigous because, currently, set() can not be used with NumPy with Numba JIT """ data_mask_for_nan = numpy.isnan(self._data) nan_exists = numpy.any(data_mask_for_nan) data_no_nan = self._data[~data_mask_for_nan] data_set = set(data_no_nan) if dropna or not nan_exists: return len(data_set) else: return len(data_set) + 1 return hpat_pandas_series_nunique_impl @sdc_overload_method(SeriesType, 'count') def hpat_pandas_series_count(self, level=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.count Limitations ----------- Parameter ``level`` is currently unsupported. Examples -------- .. literalinclude:: ../../../examples/series/series_count.py :language: python :lines: 27- :caption: Counting non-NaN values in Series :name: ex_series_count .. command-output:: python ./series/series_count.py :cwd: ../../../examples .. seealso:: :ref:`Series.value_counts <pandas.Series.value_counts>` Return a Series containing counts of unique values. :ref:`Series.str.len <pandas.Series.str.len>` Count the length of each element in the Series. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.count` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_count """ _func_name = 'Method count().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if isinstance(self.data, StringArrayType): def hpat_pandas_series_count_str_impl(self, level=None): nan_mask = self.isna() return numpy.sum(nan_mask._data == 0) return hpat_pandas_series_count_str_impl if isinstance(self.data, types.Array) and isinstance(self.data.dtype, types.Integer): def hpat_pandas_series_count_int_impl(self, level=None): return len(self._data) return hpat_pandas_series_count_int_impl def hpat_pandas_series_count_impl(self, level=None): """ Return number of non-NA/null observations in the object Returns number of unique elements in the object Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_count """ result = 0 for i in prange(len(self._data)): if not numpy.isnan(self._data[i]): result = result + 1 return result return hpat_pandas_series_count_impl @sdc_overload_method(SeriesType, 'median') def hpat_pandas_series_median(self, axis=None, skipna=None, level=None, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.median Limitations ----------- - Parameters ``axis``, ``level`` and ``numeric_only`` are supported only with default value ``None``. Examples -------- .. literalinclude:: ../../../examples/series/series_median.py :language: python :lines: 27- :caption: Return the median of the values for the requested axis. :name: ex_series_median .. command-output:: python ./series/series_median.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.median <pandas.DataFrame.median>` Return the median of the values for the columns. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.median` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_median1* """ _func_name = 'Method median().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.dtype, types.Number): ty_checker.raise_exc(self.dtype, 'numeric', 'self.dtype') if not (isinstance(axis, (types.Integer, types.UnicodeType, types.Omitted)) or axis is None): ty_checker.raise_exc(axis, 'int or str', 'axis') if not (isinstance(skipna, (types.Boolean, types.Omitted, types.NoneType)) or skipna or skipna is None): ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(axis, types.Omitted) and axis is not None: ty_checker.raise_exc(axis, 'None', 'axis') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, types.Omitted) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def hpat_pandas_series_median_impl(self, axis=None, skipna=None, level=None, numeric_only=None): if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy.nanmedian(self._data) return numpy.median(self._data) return hpat_pandas_series_median_impl @sdc_overload_method(SeriesType, 'argsort') def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.argsort Limitations ----------- - Parameter ``axis`` is supported only with default value ``0``. - Parameter ``order`` is supported only with default value ``None``. - Parameter ``kind`` is supported only with values ``'mergesort'`` and ``'quicksort'``. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_argsort.py :language: python :lines: 27- :caption: Override ndarray.argsort. :name: ex_series_argsort .. command-output:: python ./series/series_argsort.py :cwd: ../../../examples .. seealso:: `numpy.ndarray.argsort <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.argsort.html#numpy.ndarray.argsort>`_ Return indices of the minimum values along the given axis. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.argsort` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_argsort* """ _func_name = 'Method argsort().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype') if not (isinstance(axis, types.Omitted) or isinstance(axis, types.Integer) or axis == 0): ty_checker.raise_exc(axis, 'int64', 'axis') if not isinstance(kind, (types.Omitted, str, types.UnicodeType, types.StringLiteral)): ty_checker.raise_exc(kind, 'quicksort', 'kind') if not isinstance(order, (str, types.UnicodeType, types.StringLiteral, types.Omitted, types.NoneType, types.List))\ and order is not None: ty_checker.raise_exc(order, 'None', 'order') if not isinstance(self.index, types.NoneType): def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=None): if kind != 'quicksort' and kind != 'mergesort': raise ValueError("Method argsort(). Unsupported parameter. Given 'kind' != 'quicksort' or 'mergesort'") if kind == 'mergesort': #It is impossible to use numpy.argsort(self._data, kind=kind) since numba gives typing error sort = numpy.argsort(self._data, kind='mergesort') else: sort = numpy.argsort(self._data) na = self.isna().sum() result = numpy.empty(len(self._data), dtype=numpy.int64) na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) if kind == 'mergesort': sort_nona = numpy.argsort(self._data[~na_data_arr], kind='mergesort') else: sort_nona = numpy.argsort(self._data[~na_data_arr]) q = 0 for id, i in enumerate(sort): if id in set(sort[len(self._data) - na:]): q += 1 else: result[id] = sort_nona[id - q] for i in sort[len(self._data) - na:]: result[i] = -1 return pandas.Series(result, self._index) return hpat_pandas_series_argsort_idx_impl def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=None): if kind != 'quicksort' and kind != 'mergesort': raise ValueError("Method argsort(). Unsupported parameter. Given 'kind' != 'quicksort' or 'mergesort'") if kind == 'mergesort': sort = numpy.argsort(self._data, kind='mergesort') else: sort = numpy.argsort(self._data) na = self.isna().sum() result = numpy.empty(len(self._data), dtype=numpy.int64) na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) if kind == 'mergesort': sort_nona = numpy.argsort(self._data[~na_data_arr], kind='mergesort') else: sort_nona = numpy.argsort(self._data[~na_data_arr]) q = 0 for id, i in enumerate(sort): if id in set(sort[len(self._data) - na:]): q += 1 else: result[id] = sort_nona[id - q] for i in sort[len(self._data) - na:]: result[i] = -1 return pandas.Series(result) return hpat_pandas_series_argsort_noidx_impl @sdc_overload_method(SeriesType, 'sort_values', parallel=False) def hpat_pandas_series_sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.sort_values Limitations ----------- - Parameter ``inplace`` is supported only with default value ``False``. - Parameter ``axis`` is currently unsupported by Intel Scalable Dataframe Compiler. - Parameter ``kind`` is supported only with values ``'mergesort'`` and ``'quicksort'``. - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_sort_values.py :language: python :lines: 36- :caption: Sort by the values. :name: ex_series_sort_values .. command-output:: python ./series/series_sort_values.py :cwd: ../../../examples .. seealso:: :ref:`Series.sort_index <pandas.Series.sort_index>` Sort by the Series indices. :ref:`DataFrame.sort_values <pandas.DataFrame.sort_values>` Sort DataFrame by the values along either axis. :ref:`DataFrame.sort_index <pandas.DataFrame.sort_index>` Sort DataFrame by indices. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.sort_values` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values* """ _func_name = 'Method sort_values().' ty_checker = TypeChecker('Method sort_values().') ty_checker.check(self, SeriesType) axis_supported_types = (int, types.Omitted, types.Integer, types.StringLiteral, types.UnicodeType) if not isinstance(axis, axis_supported_types): ty_checker.raise_exc(axis, 'integer or string', 'axis') ascending_supported_types = (bool, types.Omitted, types.Boolean) if not isinstance(ascending, ascending_supported_types): ty_checker.raise_exc(ascending, 'boolean', 'ascending') kind_supported_types = (str, types.Omitted, types.NoneType, types.StringLiteral, types.UnicodeType) if not isinstance(kind, kind_supported_types): ty_checker.raise_exc(kind, 'string', 'kind') kind_is_none_or_default = isinstance(kind, (str, types.Omitted, types.NoneType)) na_position_supported_types = (str, types.Omitted, types.StringLiteral, types.UnicodeType) if not isinstance(na_position, na_position_supported_types): ty_checker.raise_exc(na_position, 'string', 'na_position') if not (inplace is False or isinstance(inplace, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, inplace)) def _sdc_pandas_series_sort_values_impl( self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): common_functions._sdc_pandas_series_check_axis(axis) if not (kind_is_none_or_default or kind in ('quicksort', 'mergesort')): raise ValueError("Method sort_values(). Unsupported parameter. Given kind != 'quicksort', 'mergesort'") if na_position not in ('last', 'first'): raise ValueError("Method sort_values(). Unsupported parameter. Given na_position != 'last', 'first'") data_nan_mask = sdc.hiframes.api.get_nan_mask(self._data) good = ~data_nan_mask if kind_is_none_or_default == True: # noqa argsort_res = sdc_arrays_argsort(self._data[good], kind='quicksort') else: argsort_res = sdc_arrays_argsort(self._data[good], kind=kind) if not ascending: argsort_res = argsort_res[::-1] idx = numpy.arange(len(self), dtype=numpy.int32) sorted_index = numpy.empty(len(self), dtype=numpy.int32) if na_position == "last": nans_start, nans_stop = good.sum(), len(self) sorted_index[:nans_start] = idx[good][argsort_res] sorted_index[nans_start: nans_stop] = idx[data_nan_mask] elif na_position == "first": nans_start, nans_stop = 0, data_nan_mask.sum() sorted_index[nans_stop:] = idx[good][argsort_res] sorted_index[:nans_stop] = idx[data_nan_mask] result_data = self._data[sorted_index] result_index = self.index[sorted_index] return pandas.Series(data=result_data, index=result_index, name=self._name) return _sdc_pandas_series_sort_values_impl @sdc_overload_method(SeriesType, 'dropna') def hpat_pandas_series_dropna(self, axis=0, inplace=False): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.dropna Limitations ----------- - Parameters ``inplace`` and ``axis`` are currently unsupported by Intel Scalable Dataframe Compiler. Examples -------- .. literalinclude:: ../../../examples/series/series_dropna.py :language: python :lines: 34- :caption: Return a new Series with missing values removed. :name: ex_series_dropna .. command-output:: python ./series/series_dropna.py :cwd: ../../../examples .. seealso:: :ref:`Series.isna <pandas.Series.isna>` Indicate missing values. :ref:`Series.notna <pandas.Series.notna>` Indicate existing (non-missing) values. :ref:`Series.fillna <pandas.Series.fillna>` Replace missing values. :ref:`DataFrame.dropna <pandas.DataFrame.dropna>` Drop rows or columns which contain NA values. `pandas.Index.dropna <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Index.dropna.html#pandas.Index.dropna>`_ Return Index without NA/NaN values Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.dropna` implementation. .. only:: developer Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_dropna* """ _func_name = 'Method dropna().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(axis, (types.Integer, types.StringLiteral, types.UnicodeType, types.Omitted)) or axis == 0): ty_checker.raise_exc(axis, 'int or str', 'axis') if not (inplace is False or isinstance(inplace, types.Omitted)): ty_checker.raise_exc(inplace, 'bool', 'inplace') if (isinstance(self.data.dtype, types.Number) and isinstance(self.index, (types.Number, types.NoneType, RangeIndexType))): def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False): index = self.index return numpy_like.dropna(self._data, index, self._name) return hpat_pandas_series_dropna_impl else: def hpat_pandas_series_dropna_str_impl(self, axis=0, inplace=False): # generate Series index if needed by using SeriesType.index (i.e. not self._index) na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) data = self._data[~na_data_arr] index = self.index[~na_data_arr] return pandas.Series(data, index, self._name) return hpat_pandas_series_dropna_str_impl @sdc_overload_method(SeriesType, 'fillna') def hpat_pandas_series_fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.fillna Limitations ----------- - Parameters ``method``, ``limit`` and ``downcast`` are currently unsupported by Intel Scalable Dataframe Compiler. - Parameter ``inplace`` is supported with literal value only. Examples -------- .. literalinclude:: ../../../examples/series/series_fillna.py :language: python :lines: 35- :caption: Fill NA/NaN values using the specified method. :name: ex_series_fillna .. command-output:: python ./series/series_fillna.py :cwd: ../../../examples .. seealso:: `pandas.interpolate <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.interpolate.html#pandas.Series.interpolate>`_ Fill NaN values using interpolation. `pandas.reindex <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.reindex.html#pandas.Series.reindex>`_ Conform object to new index. `pandas.asfreq <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.asfreq.html#pandas.Series.asfreq>`_ Convert TimeSeries to specified frequency. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.fillna` implementation. .. only:: developer Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_fillna* """ _func_name = 'Method fillna().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(axis, (types.Integer, types.StringLiteral, types.UnicodeType, types.Omitted)) or axis is None): ty_checker.raise_exc(axis, 'int or str', 'axis') if not (isinstance(inplace, types.Literal) and isinstance(inplace, types.Boolean) or isinstance(inplace, types.Omitted) or inplace is False): ty_checker.raise_exc(inplace, 'bool', 'inplace') if not isinstance(method, (types.Omitted, types.NoneType)) and method is not None: ty_checker.raise_exc(method, 'None', 'method') if not isinstance(limit, (types.Omitted, types.NoneType)) and limit is not None: ty_checker.raise_exc(limit, 'None', 'limit') if not isinstance(downcast, (types.Omitted, types.NoneType)) and downcast is not None: ty_checker.raise_exc(downcast, 'None', 'downcast') # inplace value has to be known at compile time to select between implementations with different signatures if ((isinstance(inplace, types.Literal) and inplace.literal_value == True) or (isinstance(inplace, bool) and inplace == True)): # do operation inplace, fill the NA/NaNs in the same array and return None if isinstance(self.dtype, types.UnicodeType): # TODO: StringArrayType cannot resize inplace, and assigning a copy back to self._data is not possible now raise TypingError('{} Not implemented when Series dtype is {} and\ inplace={}'.format(_func_name, self.dtype, inplace)) else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): return numpy_like.fillna(self._data, inplace=inplace, value=value) return hpat_pandas_series_fillna_impl else: # non inplace implementations, copy array, fill the NA/NaN and return a new Series if isinstance(self.dtype, types.UnicodeType): # For StringArrayType implementation is taken from _series_fillna_str_alloc_impl # (can be called directly when it's index handling is fixed) def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value), index=self._index, name=self._name) return hpat_pandas_series_str_fillna_impl elif isinstance(self.dtype, (types.Integer, types.Boolean)): def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value), index=self._index, name=self._name) return hpat_pandas_series_no_nan_fillna_impl else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): filled_data = numpy_like.fillna(self._data, inplace=inplace, value=value) return pandas.Series(data=filled_data, index=self._index, name=self._name) return hpat_pandas_series_fillna_impl @sdc_overload_method(SeriesType, 'cov') def hpat_pandas_series_cov(self, other, min_periods=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.cov Examples -------- .. literalinclude:: ../../../examples/series/series_cov.py :language: python :lines: 27- :caption: Compute covariance with Series, excluding missing values. :name: ex_series_cov .. command-output:: python ./series/series_cov.py :cwd: ../../../examples .. seealso:: :ref:`Series.corr <pandas.Series.corr>` Compute correlation with other Series, excluding missing values. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.cov` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_cov* """ ty_checker = TypeChecker('Method cov().') ty_checker.check(self, SeriesType) ty_checker.check(other, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'number', 'self.data') if not isinstance(other.data.dtype, types.Number): ty_checker.raise_exc(other.data.dtype, 'number', 'other.data') if not isinstance(min_periods, (types.Integer, types.Omitted, types.NoneType)) and min_periods is not None: ty_checker.raise_exc(min_periods, 'int64', 'min_periods') def hpat_pandas_series_cov_impl(self, other, min_periods=None): if min_periods is None: min_periods = 2 if min_periods < 2: min_periods = 2 min_len = min(len(self._data), len(other._data)) if min_len == 0: return numpy.nan other_sum = 0. self_sum = 0. self_other_sum = 0. total_count = 0 for i in prange(min_len): s = self._data[i] o = other._data[i] if not (numpy.isnan(s) or numpy.isnan(o)): self_sum += s other_sum += o self_other_sum += s*o total_count += 1 if total_count < min_periods: return numpy.nan return (self_other_sum - self_sum*other_sum/total_count)/(total_count - 1) return hpat_pandas_series_cov_impl @sdc_overload_method(SeriesType, 'pct_change', parallel=False) def hpat_pandas_series_pct_change(self, periods=1, fill_method='pad', limit=None, freq=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.pct_change Limitations ----------- - Parameters limit, freq are currently unsupported by Intel Scalable Dataframe Compiler - This function may reveal slower performance than Pandas* on user system. Users should exercise a tradeoff between staying in JIT-region with that function or going back to interpreter mode. Examples -------- .. literalinclude:: ../../../examples/series/series_pct_change.py :language: python :lines: 36- :caption: Percentage change between the current and a prior element. :name: ex_series_pct_change .. command-output:: python ./series/series_pct_change.py :cwd: ../../../examples .. seealso:: :ref:`Series.diff <pandas.Series.diff>` Compute the difference of two elements in a Series. :ref:`DataFrame.diff <pandas.DataFrame.diff>` Compute the difference of two elements in a DataFrame. :ref:`Series.shift <pandas.Series.shift>` Shift the index by some number of periods. :ref:`DataFrame.shift <pandas.DataFrame.shift>` Shift the index by some number of periods. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.pct_change` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_pct_change """ ty_checker = TypeChecker('Method pct_change().') ty_checker.check(self, SeriesType) if not isinstance(self.data.dtype, types.Number): ty_checker.raise_exc(self.data.dtype, 'number', 'self.data') if not isinstance(periods, (types.Integer, types.Omitted)): ty_checker.raise_exc(periods, 'int64', 'periods') if not isinstance(fill_method, (str, types.UnicodeType, types.StringLiteral, types.NoneType, types.Omitted)): ty_checker.raise_exc(fill_method, 'string', 'fill_method') if not isinstance(limit, (types.Omitted, types.NoneType)): ty_checker.raise_exc(limit, 'None', 'limit') if not isinstance(freq, (types.Omitted, types.NoneType)): ty_checker.raise_exc(freq, 'None', 'freq') def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit=None, freq=None): if not (fill_method is None or fill_method in ['pad', 'ffill', 'backfill', 'bfill']): raise ValueError( "Method pct_change(). Unsupported parameter. The function uses fill_method pad (ffill) or backfill (bfill) or None.") local_series = self.copy() if fill_method is not None: # replacement method fillna for given method # ========================================= # Example: # s = [1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2] # result = [1.1, 0.3, 0.3, 1, inf, 0, 1.1, 1.1, 2.2, inf, 2, 2] # ========================================== for i in range(len(local_series._data)): # check each element on numpy.nan if numpy.isnan(local_series._data[i]): if fill_method in ['pad', 'ffill']: # if it first element is nan, element will be is nan # if it not first element, element will be is nearest is not nan element # take a step back while will not find is not nan element # if before the first element you did not find one, the element will be equal nan if i == 0: local_series._data[i] = numpy.nan else: k = 1 while numpy.isnan(local_series._data[i - k]): if i - k == 0: local_series._data[i] = numpy.nan break k += 1 local_series._data[i] = local_series._data[i - k] elif fill_method in ['backfill', 'bfill']: # if it last element is nan, element will be is nan # if it not last element, element will be is nearest is not nan element # take a step front while will not find is not nan element # if before the last element you did not find one, the element will be equal nan if i == len(local_series._data)-1: local_series._data[i] = numpy.nan else: k = 1 while numpy.isnan(local_series._data[i + k]): if i + k == len(local_series._data) - 1: local_series._data[i] = numpy.nan break k += 1 local_series._data[i] = local_series._data[i + k] rshift = local_series.shift(periods=periods, freq=freq) rdiv = local_series.div(rshift) result = rdiv._data - 1 return pandas.Series(result) return hpat_pandas_series_pct_change_impl @sdc_overload_method(SeriesType, 'describe') def hpat_pandas_series_describe(self, percentiles=None, include=None, exclude=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.describe Limitations ----------- - Parameters ``include`` and ``exclude`` are currently unsupported by Intel Scalable Dataframe Compiler. - For string Series resulting values are returned as strings. Examples -------- .. literalinclude:: ../../../examples/series/series_describe.py :language: python :lines: 39- :caption: Generate descriptive statistics. :name: ex_series_describe .. command-output:: python ./series/series_describe.py :cwd: ../../../examples .. seealso:: :ref:`DataFrame.count <pandas.DataFrame.count>` Count number of non-NA/null observations. :ref:`DataFrame.max <pandas.DataFrame.max>` Maximum of the values in the object. :ref:`DataFrame.min <pandas.DataFrame.min>` Minimum of the values in the object. :ref:`DataFrame.mean <pandas.DataFrame.mean>` Mean of the values. :ref:`DataFrame.std <pandas.DataFrame.std>` Standard deviation of the observations. :ref:`DataFrame.select_dtypes <pandas.DataFrame.select_dtypes>` Subset of a DataFrame including/excluding columns based on their dtype. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.describe` implementation. .. only:: developer Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_describe* """ _func_name = 'Method describe().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not (isinstance(percentiles, (types.List, types.Array, types.UniTuple)) and isinstance(percentiles.dtype, types.Number) or isinstance(percentiles, (types.Omitted, types.NoneType)) or percentiles is None): ty_checker.raise_exc(percentiles, 'list-like', 'percentiles') if not (isinstance(include, (types.Omitted, types.NoneType)) or include is None): raise TypingError('{} Unsupported parameters. Given include: {}'.format(_func_name, include)) if not (isinstance(exclude, (types.Omitted, types.NoneType)) or exclude is None): raise TypingError('{} Unsupported parameters. Given exclude: {}'.format(_func_name, exclude)) is_percentiles_none = percentiles is None or isinstance(percentiles, (types.Omitted, types.NoneType)) if isinstance(self.dtype, types.Number): def hpat_pandas_series_describe_numeric_impl(self, percentiles=None, include=None, exclude=None): if is_percentiles_none == False: # noqa percentiles_list = list(percentiles) median_in_percentiles = 0.5 in percentiles_list if not median_in_percentiles: percentiles_list.append(0.5) sorted_percentiles = sorted(percentiles_list) # check percentiles have correct values: arr = numpy.asarray(sorted_percentiles) if len(numpy.unique(arr)) != len(arr): raise ValueError("percentiles cannot contain duplicates") if numpy.any(arr[(arr < 0) * (arr > 1)]): raise ValueError("percentiles should all be in the interval [0, 1].") # TODO: support proper rounding of percentiles like in pandas.io.formats.format.format_percentiles # requires numpy.round(precision), numpy.isclose to be supported by Numba percentiles_indexes = common_functions._sdc_pandas_format_percentiles(arr) else: sorted_percentiles = [0.25, 0.5, 0.75] percentiles_indexes = ['25%', '50%', '75%'] index_strings = ['count', 'mean', 'std', 'min'] index_strings.extend(percentiles_indexes) index_strings.append('max') values = [] values.append(numpy.float64(self.count())) values.append(self.mean()) values.append(self.std()) values.append(self.min()) for p in sorted_percentiles: values.append(self.quantile(p)) values.append(self.max()) return pandas.Series(values, index_strings) return hpat_pandas_series_describe_numeric_impl elif isinstance(self.dtype, types.UnicodeType): def hpat_pandas_series_describe_string_impl(self, percentiles=None, include=None, exclude=None): objcounts = self.value_counts() index_strings = ['count', 'unique', 'top', 'freq'] # use list of strings for the output series, since Numba doesn't support np.arrays with object dtype values = [] values.append(str(self.count())) values.append(str(len(self.unique()))) values.append(str(objcounts.index[0])) values.append(str(objcounts.iloc[0])) return pandas.Series(values, index_strings) return hpat_pandas_series_describe_string_impl elif isinstance(self.dtype, (types.NPDatetime, types.NPTimedelta)): # TODO: provide specialization for (types.NPDatetime, types.NPTimedelta) # needs dropna for date-time series, conversion to int and tz_convert to be implemented return None return None @sdc_overload(operator.add, parallel=False) def sdc_pandas_str_series_operator_add(self, other): """ Additional overload of operator.add for Series of strings. For generic overload see - autogenerated one in sdc_autogenerated.py """ _func_name = 'Operator add().' ty_checker = TypeChecker('Operator add().') self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is for string series only (so check that at least one is series of strings) self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType) if not (self_is_string_series or other_is_string_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or string', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or string', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) series_data_comparable = check_types_comparable(self, other) if not series_data_comparable: raise TypingError('{} Not supported for not-comparable operands. \ Given: self={}, other={}'.format(_func_name, self, other)) if not operands_are_series: def _series_operator_add_scalar_impl(self, other): if self_is_series == True: # noqa result_data = self._data + other return pandas.Series(result_data, index=self._index, name=self._name) else: result_data = self + other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_operator_add_scalar_impl else: # both operands are string series # TO-DO: None indexes branch is dead code, remove? if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_add_none_indexes_impl(self, other): if (len(self._data) == len(other._data)): result_data = self._data + other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data_as_list = [] result_nan_mask = numpy.zeros(max_data_size, dtype=numpy.bool_) result_nan_mask[min_data_size:] = True for i in numpy.arange(min_data_size): if str_arr_is_na(self._data, i) or str_arr_is_na(other._data, i): result_nan_mask[i] = True result_data_as_list.append('') else: result_data_as_list.append(self._data[i] + other._data[i]) result_data_as_list.extend([''] * (max_data_size - min_data_size)) result_data = create_str_arr_from_list(result_data_as_list) str_arr_set_na_by_mask(result_data, result_nan_mask) return pandas.Series(result_data, self._index) return _series_operator_add_none_indexes_impl else: left_index_is_range = isinstance(self.index, RangeIndexType) numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [self.index.dtype, other.index.dtype], []) common_dtype_different = (numba_index_common_dtype != self.index.dtype or numba_index_common_dtype != other.index.dtype) def _series_operator_add_common_impl(self, other): left_index, right_index = self.index, other.index # TO-DO: coversion of RangeIndexType to np.array may happen several times here: # in array_equal, in astype or left_index.values - need caching of array allocated once # check if indexes are equal and series don't have to be aligned if (left_index is right_index or numpy_like.array_equal(left_index, right_index)): result_data = self._data + other._data if common_dtype_different == True: # noqa result_index = numpy_like.astype(left_index, numba_index_common_dtype) else: result_index = left_index.values if left_index_is_range == True else left_index # noqa return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes(left_index, right_index) result_size = len(joined_index) result_nan_mask = numpy.zeros(result_size, dtype=numpy.bool_) result_data_as_list = [] for i in numpy.arange(result_size): if (left_indexer[i] == -1 or right_indexer[i] == -1 or str_arr_is_na(self._data, left_indexer[i]) or str_arr_is_na(other._data, right_indexer[i])): result_nan_mask[i] = True result_data_as_list.append('') else: result_data_as_list.append(self._data[left_indexer[i]] + other._data[right_indexer[i]]) result_data = create_str_arr_from_list(result_data_as_list) str_arr_set_na_by_mask(result_data, result_nan_mask) return pandas.Series(result_data, joined_index) return _series_operator_add_common_impl return None @sdc_overload(operator.mul, parallel=False) def sdc_pandas_str_series_operator_mul(self, other): """ Additional overload of operator.add for Series of strings. For generic overload see - autogenerated one in sdc_autogenerated.py """ _func_name = 'Operator mul().' ty_checker = TypeChecker(_func_name) self_is_series, other_is_series = isinstance(self, SeriesType), isinstance(other, SeriesType) if not (self_is_series or other_is_series): return None # this overload is for string series only (but another might well be series of integers) self_is_string_series = self_is_series and isinstance(self.dtype, types.UnicodeType) other_is_string_series = other_is_series and isinstance(other.dtype, types.UnicodeType) if not (self_is_string_series or other_is_string_series): return None if not isinstance(self, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(self, 'pandas.series or string', 'self') if not isinstance(other, (SeriesType, types.Number, types.UnicodeType)): ty_checker.raise_exc(other, 'pandas.series or string', 'other') operands_are_series = self_is_series and other_is_series if operands_are_series: none_or_numeric_indexes = ((isinstance(self.index, types.NoneType) or check_index_is_numeric(self)) and (isinstance(other.index, types.NoneType) or check_index_is_numeric(other))) series_indexes_comparable = check_types_comparable(self.index, other.index) or none_or_numeric_indexes if not series_indexes_comparable: raise TypingError('{} Not implemented for series with not-comparable indexes. \ Given: self.index={}, other.index={}'.format(_func_name, self.index, other.index)) # check operation is allowed self_is_int_series = self_is_series and isinstance(self.dtype, types.Integer) other_is_int_series = other_is_series and isinstance(other.dtype, types.Integer) if not (self_is_string_series and (other_is_int_series or isinstance(other, types.Integer)) or (other_is_string_series and (self_is_int_series or isinstance(self, types.Integer)))): raise TypingError('{} Not supported between operands of types: self={}, \ other={}'.format(_func_name, self, other)) if not operands_are_series: def _series_operator_mul_scalar_impl(self, other): if self_is_series == True: # noqa result_data = self._data * other return pandas.Series(result_data, index=self._index, name=self._name) else: result_data = self * other._data return pandas.Series(result_data, index=other._index, name=other._name) return _series_operator_mul_scalar_impl else: # both operands are series (one is integer and other is string) self_is_series = isinstance(self, SeriesType) # optimization for series with default indexes, that can be aligned differently if (isinstance(self.index, types.NoneType) and isinstance(other.index, types.NoneType)): def _series_operator_mul_none_indexes_impl(self, other): series_operand = self if self_is_series == True else other # noqa if (len(self._data) == len(other._data)): result_data = self._data * other._data return pandas.Series(result_data) else: left_size, right_size = len(self._data), len(other._data) min_data_size = min(left_size, right_size) max_data_size = max(left_size, right_size) result_data_as_list = [] result_nan_mask = numpy.zeros(max_data_size, dtype=numpy.bool_) result_nan_mask[min_data_size:] = True for i in numpy.arange(min_data_size): if str_arr_is_na(series_operand._data, i): result_nan_mask[i] = True result_data_as_list.append('') result_data_as_list.append(self._data[i] * other._data[i]) result_data_as_list.extend([''] * (max_data_size - min_data_size)) result_data = create_str_arr_from_list(result_data_as_list) str_arr_set_na_by_mask(result_data, result_nan_mask) return pandas.Series(result_data, self._index) return _series_operator_mul_none_indexes_impl else: left_index_is_range = isinstance(self.index, RangeIndexType) numba_index_common_dtype = find_common_dtype_from_numpy_dtypes( [self.index.dtype, other.index.dtype], []) common_dtype_different = (numba_index_common_dtype != self.index.dtype or numba_index_common_dtype != other.index.dtype) def _series_operator_mul_common_impl(self, other): left_index, right_index = self.index, other.index # TO-DO: coversion of RangeIndexType to np.array may happen several times here: # in array_equal, in astype or left_index.values - need caching of array allocated once # check if indexes are equal and series don't have to be aligned if (left_index is right_index or numpy_like.array_equal(left_index, right_index)): result_data = self._data * other._data if common_dtype_different == True: # noqa result_index = numpy_like.astype(left_index, numba_index_common_dtype) else: result_index = left_index.values if left_index_is_range == True else left_index # noqa return pandas.Series(result_data, index=result_index) # TODO: replace below with core join(how='outer', return_indexers=True) when implemented joined_index, left_indexer, right_indexer = sdc_join_series_indexes(left_index, right_index) str_series_operand = self if self_is_string_series == True else other # noqa str_series_indexer = left_indexer if self_is_string_series == True else right_indexer # noqa result_size = len(joined_index) result_nan_mask = numpy.zeros(result_size, dtype=numpy.bool_) result_data_as_list = [] for i in numpy.arange(result_size): if (left_indexer[i] == -1 or right_indexer[i] == -1 or str_arr_is_na(str_series_operand._data, str_series_indexer[i])): result_nan_mask[i] = True result_data_as_list.append('') else: result_data_as_list.append(self._data[left_indexer[i]] * other._data[right_indexer[i]]) result_data = create_str_arr_from_list(result_data_as_list) str_arr_set_na_by_mask(result_data, result_nan_mask) return pandas.Series(result_data, joined_index) return _series_operator_mul_common_impl @sdc_overload_method(SeriesType, 'groupby') def sdc_pandas_series_groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.groupby Limitations ----------- - Parameters ``axis``, ``level``, ``as_index``, ``group_keys``, ``squeeze`` and ``observed`` \ are currently unsupported by Intel Scalable Dataframe Compiler - Parameter ``by`` is supported as single literal column name only - Mutating the contents of a DataFrame between creating a groupby object and calling it's methods is unsupported Examples -------- .. literalinclude:: ../../../examples/series/series_groupby.py :language: python :lines: 33- :caption: Return the mean of the values grouped by numpy array. :name: ex_series_groupby .. command-output:: python ./series/series_groupby.py :cwd: ../../../examples .. seealso:: :ref:`Series.resample <pandas.Series.resample>` Resample time-series data. Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.groupby` implementation. .. only:: developer Test: python -m sdc.runtests sdc.tests.test_groupby.TestGroupBy.test_series_groupby* """ _func_name = 'Method Series.groupby().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) # we support only simpliest case of by being 1D array (a column of a DataFrame) # TODO: extend and support fully functional SeriesGroupBy if not ((isinstance(by, types.Array) and by.ndim == 1) or by == string_array_type): return None if not (isinstance(axis, (types.Integer, types.UnicodeType, types.Omitted)) or axis == 0): ty_checker.raise_exc(axis, 'int or str', 'axis') if not (level is None or isinstance(level, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, level)) if not (as_index is True or isinstance(as_index, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, as_index)) if not (isinstance(sort, (types.Omitted, types.Boolean)) or sort is True): ty_checker.raise_exc(sort, 'bool', 'sort') if not (group_keys is True or isinstance(group_keys, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, group_keys)) if not (squeeze is False or isinstance(squeeze, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, squeeze)) if not (observed is False or isinstance(observed, types.Omitted)): raise TypingError('{} Unsupported parameters. Given inplace: {}'.format(_func_name, observed)) by_type = by.dtype list_type = types.ListType(types.int64) def sdc_pandas_series_groupby_impl(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False): if len(self) != len(by): raise ValueError("Series.groupby(). Grouper and axis must be same length") grouped = Dict.empty(by_type, list_type) for i in numpy.arange(len(by)): if isna(by, i): continue value = by[i] group_list = grouped.get(value, List.empty_list(types.int64)) group_list.append(i) grouped[value] = group_list return init_series_groupby(self, by, grouped, sort) return sdc_pandas_series_groupby_impl @sdc_overload_method(SeriesType, 'skew') def sdc_pandas_series_skew(self, axis=None, skipna=None, level=None, numeric_only=None): """ Intel Scalable Dataframe Compiler User Guide ******************************************** Pandas API: pandas.Series.skew Limitations ----------- - Parameters ``level`` and ``numeric_only`` are supported only with default value ``None``. Examples -------- .. literalinclude:: ../../../examples/series/series_skew.py :language: python :lines: 27- :caption: Unbiased rolling skewness. :name: ex_series_skew .. command-output:: python ./series/series_skew.py :cwd: ../../../examples Intel Scalable Dataframe Compiler Developer Guide ************************************************* Pandas Series method :meth:`pandas.Series.skew` implementation. .. only:: developer Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_skew* """ _func_name = 'Method Series.skew().' ty_checker = TypeChecker(_func_name) ty_checker.check(self, SeriesType) if not isinstance(axis, (types.Integer, types.NoneType, types.Omitted)) and axis is not None: ty_checker.raise_exc(axis, 'int64', 'axis') if not isinstance(skipna, (types.Boolean, types.NoneType, types.Omitted)) and skipna is not None: ty_checker.raise_exc(skipna, 'bool', 'skipna') if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None: ty_checker.raise_exc(level, 'None', 'level') if not isinstance(numeric_only, (types.Omitted, types.NoneType)) and numeric_only is not None: ty_checker.raise_exc(numeric_only, 'None', 'numeric_only') def sdc_pandas_series_skew_impl(self, axis=None, skipna=None, level=None, numeric_only=None): if axis != 0 and axis is not None: raise ValueError('Parameter axis must be only 0 or None.') if skipna is None: _skipna = True else: _skipna = skipna if _skipna: return numpy_like.nanskew(self._data) return numpy_like.skew(self._data) return sdc_pandas_series_skew_impl