Python pandas.RangeIndex() Examples

The following are 30 code examples of pandas.RangeIndex(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_range.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_explicit_conversions(self):

        # GH 8608
        # add/sub are overridden explicitly for Float/Int Index
        idx = RangeIndex(5)

        # float conversions
        arr = np.arange(5, dtype='int64') * 3.2
        expected = Float64Index(arr)
        fidx = idx * 3.2
        tm.assert_index_equal(fidx, expected)
        fidx = 3.2 * idx
        tm.assert_index_equal(fidx, expected)

        # interops with numpy arrays
        expected = Float64Index(arr)
        a = np.zeros(5, dtype='float64')
        result = fidx - a
        tm.assert_index_equal(result, expected)

        expected = Float64Index(-arr)
        a = np.zeros(5, dtype='float64')
        result = a - fidx
        tm.assert_index_equal(result, expected) 
Example #2
Source File: utils.py    From mars with Apache License 2.0 6 votes vote down vote up
def convert_labels_into_positions(pandas_index, labels):
    """
    Convert labels into positions

    :param pandas_index: pandas Index
    :param labels: labels
    :return: positions
    """
    result = []
    for label in labels:
        loc = pandas_index.get_loc(label)
        if isinstance(loc, (int, np.integer)):
            result.append(loc)
        else:
            # slice or boolean array
            result.extend(
                pd.RangeIndex(len(pandas_index))[loc].tolist())
    return np.asarray(result) 
Example #3
Source File: transform.py    From mars with Apache License 2.0 6 votes vote down vote up
def _infer_df_func_returns(self, in_dtypes, dtypes):
        if self.output_types[0] == OutputType.dataframe:
            empty_df = build_empty_df(in_dtypes, index=pd.RangeIndex(2))
            with np.errstate(all='ignore'):
                if self.call_agg:
                    infer_df = empty_df.agg(self._func, axis=self._axis, *self.args, **self.kwds)
                else:
                    infer_df = empty_df.transform(self._func, axis=self._axis, *self.args, **self.kwds)
        else:
            empty_df = build_empty_series(in_dtypes[1], index=pd.RangeIndex(2), name=in_dtypes[0])
            with np.errstate(all='ignore'):
                if self.call_agg:
                    infer_df = empty_df.agg(self._func, args=self.args, **self.kwds)
                else:
                    infer_df = empty_df.transform(self._func, convert_dtype=self.convert_dtype,
                                                  args=self.args, **self.kwds)

        if isinstance(infer_df, pd.DataFrame):
            new_dtypes = dtypes or infer_df.dtypes
            self.output_types = [OutputType.dataframe]
        else:
            new_dtypes = dtypes or (infer_df.name, infer_df.dtype)
            self.output_types = [OutputType.series]

        return new_dtypes 
Example #4
Source File: drop_duplicates.py    From mars with Apache License 2.0 6 votes vote down vote up
def __call__(self, inp, inplace=False):
        self._output_types = inp.op.output_types
        params = inp.params
        if self._ignore_index:
            params['index_value'] = parse_index(pd.RangeIndex(-1))
        else:
            params['index_value'] = gen_unknown_index_value(
                params['index_value'], self._keep, self._subset, type(self).__name__)
        shape_list = list(params['shape'])
        shape_list[0] = np.nan
        params['shape'] = tuple(shape_list)

        ret = self.new_tileable([inp], kws=[params])
        if inplace:
            inp.data = ret.data
        return ret 
Example #5
Source File: melt.py    From mars with Apache License 2.0 6 votes vote down vote up
def tile(cls, op: 'DataFrameMelt'):
        inp = op.inputs[0]
        out = op.outputs[0]

        inp = inp.rechunk({1: (inp.shape[1],)})._inplace_tile()

        chunks = []
        for c in inp.chunks:
            new_op = op.copy().reset_key()
            chunks.append(new_op.new_chunk(
                [c], index=c.index,  shape=(np.nan, out.shape[1]), dtypes=out.dtypes,
                index_value=parse_index(pd.RangeIndex(-1), c.key, c.index_value.key),
                columns_value=out.columns_value))

        chunks = standardize_range_index(chunks)
        new_op = op.copy().reset_key()
        return new_op.new_tileables(
            [inp], chunks=chunks, nsplits=((np.nan,) * inp.chunk_shape[0], (out.shape[1],)), **out.params) 
Example #6
Source File: test_range.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_min_fitting_element(self):
        result = RangeIndex(0, 20, 2)._min_fitting_element(1)
        assert 2 == result

        result = RangeIndex(1, 6)._min_fitting_element(1)
        assert 1 == result

        result = RangeIndex(18, -2, -2)._min_fitting_element(1)
        assert 2 == result

        result = RangeIndex(5, 0, -1)._min_fitting_element(1)
        assert 1 == result

        big_num = 500000000000000000000000

        result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num)
        assert big_num == result 
Example #7
Source File: test_range.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_max_fitting_element(self):
        result = RangeIndex(0, 20, 2)._max_fitting_element(17)
        assert 16 == result

        result = RangeIndex(1, 6)._max_fitting_element(4)
        assert 4 == result

        result = RangeIndex(18, -2, -2)._max_fitting_element(17)
        assert 16 == result

        result = RangeIndex(5, 0, -1)._max_fitting_element(4)
        assert 4 == result

        big_num = 500000000000000000000000

        result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num)
        assert big_num == result 
Example #8
Source File: reset_index.py    From mars with Apache License 2.0 6 votes vote down vote up
def _call_dataframe(self, a):
        if self.drop:
            shape = a.shape
            columns_value = a.columns_value
            dtypes = a.dtypes
            range_value = -1 if np.isnan(a.shape[0]) else a.shape[0]
            index_value = parse_index(pd.RangeIndex(range_value))
        else:
            empty_df = build_empty_df(a.dtypes)
            empty_df.index = a.index_value.to_pandas()[:0]
            empty_df = empty_df.reset_index(level=self.level, col_level=self.col_level, col_fill=self.col_fill)
            shape = (a.shape[0], len(empty_df.columns))
            columns_value = parse_index(empty_df.columns, store_data=True)
            dtypes = empty_df.dtypes
            index_value = self._get_out_index(empty_df, shape)
        return self.new_dataframe([a], shape=shape, columns_value=columns_value,
                                  index_value=index_value, dtypes=dtypes) 
Example #9
Source File: string_.py    From mars with Apache License 2.0 6 votes vote down vote up
def call(cls, op, inp):
        method_kwargs = op.method_kwargs
        if method_kwargs.get('expand', False) is False:
            return super().call(op, inp)
        n = method_kwargs.get('n', -1)
        # does not support if expand and n == -1
        if n == -1:  # pragma: no cover
            raise NotImplementedError('`n` needs to be specified when expand=True')

        op.output_types = [OutputType.dataframe]
        columns = pd.RangeIndex(n + 1)
        columns_value = parse_index(columns, store_data=True)
        dtypes = pd.Series([inp.dtype] * len(columns), index=columns)
        return op.new_dataframe([inp], shape=(inp.shape[0], len(columns)),
                                dtypes=dtypes, columns_value=columns_value,
                                index_value=inp.index_value) 
Example #10
Source File: descriptors.py    From psst with MIT License 6 votes vote down vote up
def setattributeindex(self, instance, value):
        bus_name = instance.bus.index
        instance.branch['F_BUS'] = instance.branch['F_BUS'].apply(lambda x: value[bus_name.get_loc(x)])
        instance.branch['T_BUS'] = instance.branch['T_BUS'].apply(lambda x: value[bus_name.get_loc(x)])
        instance.gen['GEN_BUS'] = instance.gen['GEN_BUS'].apply(lambda x: value[bus_name.get_loc(x)])

        try:
            instance.load.columns = [v for b, v in zip(instance.bus_name.isin(instance.load.columns), value) if b == True]
        except ValueError:
            instance.load.columns = value
        except AttributeError:
            instance.load = pd.DataFrame(0, index=range(0, 1), columns=value, dtype='float')

        instance.bus.index = value

        if isinstance(instance.bus_name, pd.RangeIndex) or isinstance(instance.bus_name, pd.Int64Index):
            logger.debug('Forcing string types for all bus names')
            instance.bus_name = ['Bus{}'.format(b) for b in instance.bus_name] 
Example #11
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_duplicated(self, indices, keep):
        if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
            # MultiIndex tested separately in:
            # tests/indexes/multi/test_unique_and_duplicates
            pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex')

        holder = type(indices)

        idx = holder(indices)
        if idx.has_duplicates:
            # We are testing the duplicated-method here, so we need to know
            # exactly which indices are duplicate and how (for the result).
            # This is not possible if "idx" has duplicates already, which we
            # therefore remove. This is seemingly circular, as drop_duplicates
            # invokes duplicated, but in the end, it all works out because we
            # cross-check with Series.duplicated, which is tested separately.
            idx = idx.drop_duplicates()

        n, k = len(idx), 10
        duplicated_selection = np.random.choice(n, k * n)
        expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
        idx = holder(idx.values[duplicated_selection])

        result = idx.duplicated(keep=keep)
        tm.assert_numpy_array_equal(result, expected) 
Example #12
Source File: test_range.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_delete(self):

        idx = RangeIndex(5, name='Foo')
        expected = idx[1:].astype(int)
        result = idx.delete(0)
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name

        expected = idx[:-1].astype(int)
        result = idx.delete(-1)
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name

        with pytest.raises((IndexError, ValueError)):
            # either depending on numpy version
            result = idx.delete(len(idx)) 
Example #13
Source File: test_datasource_execution.py    From mars with Apache License 2.0 6 votes vote down vote up
def testFromRecordsExecution(self):
        dtype = np.dtype([('x', 'int'), ('y', 'double'), ('z', '<U16')])

        ndarr = np.ones((10,), dtype=dtype)
        pdf_expected = pd.DataFrame.from_records(ndarr, index=pd.RangeIndex(10))

        # from structured array of mars
        tensor = mt.ones((10,), dtype=dtype, chunk_size=3)
        df1 = from_records(tensor)
        df1_result = self.executor.execute_dataframe(df1, concat=True)[0]
        pd.testing.assert_frame_equal(df1_result, pdf_expected)

        # from structured array of numpy
        df2 = from_records(ndarr)
        df2_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_frame_equal(df2_result, pdf_expected) 
Example #14
Source File: test_sorting.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_numpy_argsort(idx):
    result = np.argsort(idx)
    expected = idx.argsort()
    tm.assert_numpy_array_equal(result, expected)

    # these are the only two types that perform
    # pandas compatibility input validation - the
    # rest already perform separate (or no) such
    # validation via their 'values' attribute as
    # defined in pandas.core.indexes/base.py - they
    # cannot be changed at the moment due to
    # backwards compatibility concerns
    if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
        msg = "the 'axis' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, axis=1)

        msg = "the 'kind' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, kind='mergesort')

        msg = "the 'order' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.argsort(idx, order=('a', 'b')) 
Example #15
Source File: utils.py    From mars with Apache License 2.0 6 votes vote down vote up
def indexing_index_value(index_value, indexes, store_data=False):
    pd_index = index_value.to_pandas()
    if not index_value.has_value():
        new_index_value = parse_index(pd_index, indexes, store_data=store_data)
        new_index_value._index_value._min_val = index_value.min_val
        new_index_value._index_value._min_val_close = index_value.min_val_close
        new_index_value._index_value._max_val = index_value.max_val
        new_index_value._index_value._max_val_close = index_value.max_val_close
        return new_index_value
    else:
        if isinstance(indexes, Integral):
            return parse_index(pd_index[[indexes]], store_data=store_data)
        elif isinstance(indexes, Entity):
            if isinstance(pd_index, pd.RangeIndex):
                return parse_index(
                    pd.RangeIndex(-1), indexes, index_value, store_data=False)
            else:
                return parse_index(
                    type(pd_index)([]), indexes, index_value, store_data=False)
        if isinstance(indexes, tuple):
            return parse_index(pd_index[list(indexes)], store_data=store_data)
        else:
            return parse_index(pd_index[indexes], store_data=store_data) 
Example #16
Source File: utils.py    From mars with Apache License 2.0 6 votes vote down vote up
def filter_index_value(index_value, min_max, store_data=False):
    from .core import IndexValue

    min_val, min_val_close, max_val, max_val_close = min_max

    pd_index = index_value.to_pandas()

    if isinstance(index_value.value, IndexValue.RangeIndex):
        pd_filtered_index = _filter_range_index(pd_index, min_val, min_val_close,
                                                max_val, max_val_close)
        return parse_index(pd_filtered_index, store_data=store_data)

    if min_val_close:
        f = pd_index >= min_val
    else:
        f = pd_index > min_val
    if max_val_close:
        f = f & (pd_index <= max_val)
    else:
        f = f & (pd_index < max_val)

    return parse_index(pd_index[f], store_data=store_data) 
Example #17
Source File: utils.py    From mars with Apache License 2.0 6 votes vote down vote up
def infer_index_value(left_index_value, right_index_value):
    from .core import IndexValue

    if isinstance(left_index_value.value, IndexValue.RangeIndex) and \
            isinstance(right_index_value.value, IndexValue.RangeIndex):
        if left_index_value.value.slice == right_index_value.value.slice:
            return left_index_value
        return parse_index(pd.Int64Index([]), left_index_value, right_index_value)

    # when left index and right index is identical, and both of them are elements unique,
    # we can infer that the out index should be identical also
    if left_index_value.is_unique and right_index_value.is_unique and \
            left_index_value.key == right_index_value.key:
        return left_index_value

    left_index = left_index_value.to_pandas()
    right_index = right_index_value.to_pandas()
    out_index = pd.Index([], dtype=find_common_type([left_index.dtype, right_index.dtype]))
    return parse_index(out_index, left_index_value, right_index_value) 
Example #18
Source File: hsgtcg.py    From wanggeService with MIT License 6 votes vote down vote up
def getStockHdStatistics(cls, code, browser, retryCount=3):
        """ 抓取持股统计

        :param code: 股票代码
        :param browser: webdriver浏览器
        :return:
        """
        url = 'http://data.eastmoney.com/hsgtcg/StockHdStatistics.aspx?stock={}'.format(code)
        for i in range(retryCount):
            df = cls.scrap(url, browser)
            if len(df) > 0:
                # 修复持股数量
                df['hvol'] = df['hvol'].apply(lambda x: HSGTCG.hz2Num(x)).astype(float)
                df['hamount'] = df['hamount'].apply(lambda x: HSGTCG.hz2Num(x)).astype(float)
                df['close'] = df['close'].astype(float)
                df['tradedate'] = df['tradedate'].apply(lambda x: convertToDate(x)).astype(datetime.date)
                df = df[df['tradedate'].apply(lambda x: Stocktradedate.if_tradeday(x))]  # 删除不是交易日的数据。这是东方财富网页版的bug
                df.index = pd.RangeIndex(len(df.index))
                break
            else:
                pass

        return df 
Example #19
Source File: from_tensor.py    From mars with Apache License 2.0 6 votes vote down vote up
def __call__(self, input_tensor, index, name):
        inputs = [input_tensor]
        if index is not None:
            if not isinstance(index, pd.Index):
                if isinstance(index, INDEX_TYPE):
                    self._index = index
                    index_value = index.index_value
                    inputs.append(index)
                elif isinstance(index, (Base, Entity)):
                    self._index = index
                    index = astensor(index)
                    if index.ndim != 1:
                        raise ValueError('index should be 1-d, got {}-d'.format(index.ndim))
                    index_value = parse_index(pd.Index([], dtype=index.dtype), index, type(self).__name__)
                    inputs.append(index)
                else:
                    index = pd.Index(index)
                    index_value = parse_index(index, store_data=True)
            else:
                index_value = parse_index(index, store_data=True)
        else:
            index_value = parse_index(pd.RangeIndex(start=0, stop=input_tensor.shape[0]))
        return self.new_series(inputs, shape=input_tensor.shape, dtype=self.dtype,
                               index_value=index_value, name=name) 
Example #20
Source File: test_range.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_name(self):
        # GH12288
        orig = RangeIndex(10)
        orig.name = 'original'

        copy = RangeIndex(orig)
        copy.name = 'copy'

        assert orig.name == 'original'
        assert copy.name == 'copy'

        new = Index(copy)
        assert new.name == 'copy'

        new.name = 'new'
        assert orig.name == 'original'
        assert copy.name == 'copy'
        assert new.name == 'new' 
Example #21
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_binops_pow(self):
        # later versions of numpy don't allow powers of negative integers
        # so test separately
        # https://github.com/numpy/numpy/pull/8127
        ops = [pow]
        scalars = [1, 2]
        idxs = [pd.RangeIndex(0, 10, 1), pd.RangeIndex(0, 20, 2)]
        self.check_binop(ops, scalars, idxs)

    # TODO: mod, divmod? 
Example #22
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_binops(self):
        ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
               operator.truediv]
        scalars = [-1, 1, 2]
        idxs = [pd.RangeIndex(0, 10, 1), pd.RangeIndex(0, 20, 2),
                pd.RangeIndex(-10, 10, 2), pd.RangeIndex(5, -5, -1)]
        self.check_binop(ops, scalars, idxs) 
Example #23
Source File: descriptors.py    From psst with MIT License 5 votes vote down vote up
def setattributeindex(self, instance, value):
        instance.gen.index = value
        instance.gencost.index = value

        if isinstance(instance.gen_name, pd.RangeIndex) or isinstance(instance.bus_name, pd.Int64Index):
            instance.gen_name = ['GenCo{}'.format(g) for g in instance.gen_name] 
Example #24
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_arithmetic_with_frame_or_series(self, op):
        # check that we return NotImplemented when operating with Series
        # or DataFrame
        index = pd.RangeIndex(5)
        other = pd.Series(np.random.randn(5))

        expected = op(pd.Series(index), other)
        result = op(index, other)
        tm.assert_series_equal(result, expected)

        other = pd.DataFrame(np.random.randn(2, 5))
        expected = op(pd.DataFrame([index, index]), other)
        result = op(index, other)
        tm.assert_frame_equal(result, expected) 
Example #25
Source File: test_alter_axes.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_reset_index_range(self):
        # GH 12071
        s = Series(range(2), name='A', dtype='int64')
        series_result = s.reset_index()
        assert isinstance(series_result.index, RangeIndex)
        series_expected = DataFrame([[0, 0], [1, 1]],
                                    columns=['index', 'A'],
                                    index=RangeIndex(stop=2))
        tm.assert_frame_equal(series_result, series_expected) 
Example #26
Source File: drop_duplicates.py    From mars with Apache License 2.0 5 votes vote down vote up
def _execute_subset_tree_post(cls, ctx, op):
        inp = ctx[op.input.key]
        out = op.outputs[0]
        idx = op.outputs[0].index[0]
        subset = ctx[op.subset_chunk.key]
        selected = subset[subset['_chunk_index_'] == idx]['_i_']
        ret = inp.iloc[selected]
        if op.ignore_index:
            prev_size = (subset['_chunk_index_'] < out.index[0]).sum()
            ret.index = pd.RangeIndex(prev_size, prev_size + len(ret))
        ctx[op.outputs[0].key] = ret 
Example #27
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_ufunc_compat(self, holder):
        box = pd.Series if holder is pd.Series else pd.Index

        if holder is pd.RangeIndex:
            idx = pd.RangeIndex(0, 5)
        else:
            idx = holder(np.arange(5, dtype='int64'))
        result = np.sin(idx)
        expected = box(np.sin(np.arange(5, dtype='int64')))
        tm.assert_equal(result, expected) 
Example #28
Source File: read_csv.py    From mars with Apache License 2.0 5 votes vote down vote up
def tile(cls, op):
        if op.compression:
            return cls._tile_compressed(op)

        df = op.outputs[0]
        chunk_bytes = df.extra_params.chunk_bytes
        chunk_bytes = int(parse_readable_size(chunk_bytes)[0])

        paths = op.path if isinstance(op.path, (tuple, list)) else glob(op.path, storage_options=op.storage_options)

        out_chunks = []
        index_num = 0
        for path in paths:
            total_bytes = file_size(path)
            offset = 0
            for _ in range(int(np.ceil(total_bytes * 1.0 / chunk_bytes))):
                chunk_op = op.copy().reset_key()
                chunk_op._path = path
                chunk_op._offset = offset
                chunk_op._size = min(chunk_bytes, total_bytes - offset)
                shape = (np.nan, len(df.dtypes))
                index_value = parse_index(df.index_value.to_pandas(), path, index_num)
                new_chunk = chunk_op.new_chunk(None, shape=shape, index=(index_num, 0), index_value=index_value,
                                               columns_value=df.columns_value, dtypes=df.dtypes)
                out_chunks.append(new_chunk)
                index_num += 1
                offset += chunk_bytes

        if op.incremental_index and len(out_chunks) > 1 and \
                isinstance(df.index_value._index_value, IndexValue.RangeIndex):
            out_chunks = standardize_range_index(out_chunks)
        new_op = op.copy()
        nsplits = ((np.nan,) * len(out_chunks), (df.shape[1],))
        return new_op.new_dataframes(None, df.shape, dtypes=df.dtypes,
                                     index_value=df.index_value,
                                     columns_value=df.columns_value,
                                     chunks=out_chunks, nsplits=nsplits) 
Example #29
Source File: from_tensor.py    From mars with Apache License 2.0 5 votes vote down vote up
def _call_input_1d_tileables(self, input_1d_tileables, index, columns):
        tileables = []
        shape = None
        for tileable in input_1d_tileables.values():
            tileable_shape = astensor(tileable).shape
            if len(tileable_shape) > 0:
                if shape is None:
                    shape = tileable_shape
                elif shape != tileable_shape:
                    raise ValueError('input 1-d tensors should have same shape')

            if isinstance(tileable, (Base, Entity)):
                tileables.append(tileable)

        if index is not None:
            if tileables[0].shape[0] != len(index):
                raise ValueError(
                    'index {} should have the same shape with tensor: {}'.format(
                        index, input_1d_tileables[0].shape[0]))
            index_value = self._process_index(index, tileables)
        else:
            index_value = parse_index(pd.RangeIndex(0, tileables[0].shape[0]))

        if columns is not None:
            if len(input_1d_tileables) != len(columns):
                raise ValueError(
                    'columns {0} should have size {1}'.format(columns, len(input_1d_tileables)))
            if not isinstance(columns, pd.Index):
                if isinstance(columns, Base):
                    raise NotImplementedError('The columns value cannot be a tileable')
                columns = pd.Index(columns)
            columns_value = parse_index(columns, store_data=True)
        else:
            columns_value = parse_index(pd.RangeIndex(0, len(input_1d_tileables)), store_data=True)

        shape = (shape[0], len(input_1d_tileables))
        return self.new_dataframe(tileables, shape, dtypes=self.dtypes,
                                  index_value=index_value, columns_value=columns_value) 
Example #30
Source File: test_datastore_execute.py    From mars with Apache License 2.0 5 votes vote down vote up
def testToSQL(self):
        index = pd.RangeIndex(100, 0, -1, name='index')
        raw = pd.DataFrame({
            'col1': np.random.rand(100),
            'col2': np.random.choice(['a', 'b', 'c'], (100,)),
            'col3': np.arange(100).astype('int64'),
        }, index=index)

        with tempfile.TemporaryDirectory() as d:
            table_name1 = 'test_table'
            table_name2 = 'test_table2'
            uri = 'sqlite:///' + os.path.join(d, 'test.db')

            engine = sqlalchemy.create_engine(uri)

            # test write dataframe
            df = DataFrame(raw, chunk_size=33)
            r = df.to_sql(table_name1, con=engine)
            self.executor.execute_dataframe(r)

            written = pd.read_sql(table_name1, con=engine, index_col='index') \
                .sort_index(ascending=False)
            pd.testing.assert_frame_equal(raw, written)

            # test write with existing table
            with self.assertRaises(ValueError):
                df.to_sql(table_name1, con=uri).execute()

            # test write series
            series = md.Series(raw.col1, chunk_size=33)
            with engine.connect() as conn:
                r = series.to_sql(table_name2, con=conn)
                self.executor.execute_dataframe(r)

            written = pd.read_sql(table_name2, con=engine, index_col='index') \
                .sort_index(ascending=False)
            pd.testing.assert_frame_equal(raw.col1.to_frame(), written)