Python pandas.core.frame.DataFrame() Examples

The following are 30 code examples of pandas.core.frame.DataFrame(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.frame , or try the search function .
Example #1
Source File: plm.py    From Computable with MIT License 6 votes vote down vote up
def add_intercept(panel, name='intercept'):
    """
    Add column of ones to input panel

    Parameters
    ----------
    panel: Panel / DataFrame
    name: string, default 'intercept']

    Returns
    -------
    New object (same type as input)
    """
    panel = panel.copy()
    panel[name] = 1.

    return panel.consolidate() 
Example #2
Source File: parsers.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _validate_names(names):
    """
    Check if the `names` parameter contains duplicates.

    If duplicates are found, we issue a warning before returning.

    Parameters
    ----------
    names : array-like or None
        An array containing a list of the names used for the output DataFrame.

    Returns
    -------
    names : array-like or None
        The original `names` parameter.
    """

    if names is not None:
        if len(names) != len(set(names)):
            msg = ("Duplicate names specified. This "
                   "will raise an error in the future.")
            warnings.warn(msg, UserWarning, stacklevel=3)

    return names 
Example #3
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def aggregate(self, arg, *args, **kwargs):
        """
        Aggregate using input function or dict of {column -> function}

        Parameters
        ----------
        arg : function or dict
            Function to use for aggregating groups. If a function, must either
            work when passed a Panel or when passed to Panel.apply. If
            pass a dict, the keys must be DataFrame column names

        Returns
        -------
        aggregated : Panel
        """
        if isinstance(arg, compat.string_types):
            return getattr(self, arg)(*args, **kwargs)

        return self._aggregate_generic(arg, *args, **kwargs) 
Example #4
Source File: parsers.py    From recruit with Apache License 2.0 6 votes vote down vote up
def read(self, nrows=None):
        nrows = _validate_integer('nrows', nrows)
        ret = self._engine.read(nrows)

        # May alter columns / col_dict
        index, columns, col_dict = self._create_index(ret)

        if index is None:
            if col_dict:
                # Any column is actually fine:
                new_rows = len(compat.next(compat.itervalues(col_dict)))
                index = RangeIndex(self._currow, self._currow + new_rows)
            else:
                new_rows = 0
        else:
            new_rows = len(index)

        df = DataFrame(col_dict, columns=columns, index=index)

        self._currow += new_rows

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]].copy()
        return df 
Example #5
Source File: reshape.py    From recruit with Apache License 2.0 6 votes vote down vote up
def unstack(obj, level, fill_value=None):
    if isinstance(level, (tuple, list)):
        if len(level) != 1:
            # _unstack_multiple only handles MultiIndexes,
            # and isn't needed for a single level
            return _unstack_multiple(obj, level, fill_value=fill_value)
        else:
            level = level[0]

    if isinstance(obj, DataFrame):
        if isinstance(obj.index, MultiIndex):
            return _unstack_frame(obj, level, fill_value=fill_value)
        else:
            return obj.T.stack(dropna=False)
    else:
        if is_extension_array_dtype(obj.dtype):
            return _unstack_extension_series(obj, level, fill_value)
        unstacker = _Unstacker(obj.values, obj.index, level=level,
                               fill_value=fill_value,
                               constructor=obj._constructor_expanddim)
        return unstacker.get_result() 
Example #6
Source File: test_gbq.py    From Computable with MIT License 6 votes vote down vote up
def test_upload_new_table(self):
        # Attempting to upload to a new table with valid data and a valid schema should succeed
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        schema = ['STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER',
                  'STRING', 'INTEGER', 'BOOLEAN', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER']

        array = [['TESTING_GBQ', 999999999, 'hi', 0, True, 9999999999, '00.000.00.000', 1, 'hola',
                 99999999, False, False, 1, 'Jedi', 11210]]
        df = DataFrame(array, columns=['title','id','language','wp_namespace','is_redirect','revision_id',
                                       'contributor_ip','contributor_id','contributor_username','timestamp',
                                       'is_minor','is_bot','reversion_id','comment','num_characters'])
        gbq.to_gbq(df, 'pandas_testing_dataset.test_data2', schema=schema, col_order=None, if_exists='append')
        a = gbq.read_gbq("SELECT * FROM pandas_testing_dataset.test_data2")
        self.assertTrue((a == df).all().all()) 
Example #7
Source File: test_gbq.py    From Computable with MIT License 6 votes vote down vote up
def test_upload_bad_data_table(self):
        # Attempting to upload data that does not match schema should fail
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        schema = ['STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER',
                  'STRING', 'INTEGER', 'BOOLEAN', 'BOOLEAN',
                  'INTEGER', 'STRING', 'INTEGER']

        array = [['TESTING_GBQ\',', False, 'hi', 0, True, 'STRING IN INTEGER', '00.000.00.000', 1, 'hola',
                 99999999, -100, 1000, 1, 'Jedi', 11210]]
        df = DataFrame(array, columns=['title','id','language','wp_namespace','is_redirect','revision_id',
                                       'contributor_ip','contributor_id','contributor_username','timestamp',
                                       'is_minor','is_bot','reversion_id','comment','num_characters'])
        with self.assertRaises(bigquery_client.BigqueryServiceError):
            gbq.to_gbq(df, 'pandas_testing_dataset.test_data1', schema=schema, col_order=None, if_exists='append') 
Example #8
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def _aggregate_item_by_item(self, func, *args, **kwargs):
        # only for axis==0

        obj = self._obj_with_exclusions
        result = {}
        cannot_agg = []
        for item in obj:
            try:
                data = obj[item]
                colg = SeriesGroupBy(data, selection=item,
                                     grouper=self.grouper)
                result[item] = self._try_cast(
                    colg.aggregate(func, *args, **kwargs), data)
            except ValueError:
                cannot_agg.append(item)
                continue

        result_columns = obj.columns
        if cannot_agg:
            result_columns = result_columns.drop(cannot_agg)

        return DataFrame(result, columns=result_columns) 
Example #9
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def _wrap_aggregated_output(self, output, names=None):
        agg_axis = 0 if self.axis == 1 else 1
        agg_labels = self._obj_with_exclusions._get_axis(agg_axis)

        output_keys = self._decide_output_index(output, agg_labels)

        if not self.as_index:
            result = DataFrame(output, columns=output_keys)
            group_levels = self.grouper.get_group_levels()
            zipped = zip(self.grouper.names, group_levels)

            for i, (name, labels) in enumerate(zipped):
                result.insert(i, name, labels)
            result = result.consolidate()
        else:
            index = self.grouper.result_index
            result = DataFrame(output, index=index, columns=output_keys)

        if self.axis == 1:
            result = result.T

        return result.convert_objects() 
Example #10
Source File: concat.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _get_series_result_type(result, objs=None):
    """
    return appropriate class of Series concat
    input is either dict or array-like
    """
    # concat Series with axis 1
    if isinstance(result, dict):
        # concat Series with axis 1
        if all(is_sparse(c) for c in compat.itervalues(result)):
            from pandas.core.sparse.api import SparseDataFrame
            return SparseDataFrame
        else:
            from pandas.core.frame import DataFrame
            return DataFrame

    # otherwise it is a SingleBlockManager (axis = 0)
    if result._block.is_sparse:
        from pandas.core.sparse.api import SparseSeries
        return SparseSeries
    else:
        return objs[0]._constructor 
Example #11
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def _transform_item_by_item(self, obj, wrapper):
        # iterate through columns
        output = {}
        inds = []
        for i, col in enumerate(obj):
            try:
                output[col] = self[col].transform(wrapper)
                inds.append(i)
            except Exception:
                pass

        if len(output) == 0:  # pragma: no cover
            raise TypeError('Transform function invalid for data types')

        columns = obj.columns
        if len(output) < len(obj.columns):
            columns = columns.take(inds)

        return DataFrame(output, index=obj.index, columns=columns) 
Example #12
Source File: concat.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _get_sliced_frame_result_type(data, obj):
    """
    return appropriate class of Series. When data is sparse
    it will return a SparseSeries, otherwise it will return
    the Series.

    Parameters
    ----------
    data : array-like
    obj : DataFrame

    Returns
    -------
    Series or SparseSeries
    """
    if is_sparse(data):
        from pandas.core.sparse.api import SparseSeries
        return SparseSeries
    return obj._constructor_sliced 
Example #13
Source File: parsers.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _validate_names(names):
    """
    Check if the `names` parameter contains duplicates.

    If duplicates are found, we issue a warning before returning.

    Parameters
    ----------
    names : array-like or None
        An array containing a list of the names used for the output DataFrame.

    Returns
    -------
    names : array-like or None
        The original `names` parameter.
    """

    if names is not None:
        if len(names) != len(set(names)):
            msg = ("Duplicate names specified. This "
                   "will raise an error in the future.")
            warnings.warn(msg, UserWarning, stacklevel=3)

    return names 
Example #14
Source File: panel.py    From Computable with MIT License 6 votes vote down vote up
def major_xs(self, key):
        """
        Return slice of panel along major axis

        Parameters
        ----------
        key : object
            Major axis label

        Returns
        -------
        y : DataFrame
            index -> minor axis, columns -> items
        """
        slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self))
        return DataFrame(slices, index=self.minor_axis, columns=self.items) 
Example #15
Source File: groupby.py    From Computable with MIT License 6 votes vote down vote up
def get_group(self, name, obj=None):
        """
        Constructs NDFrame from group with provided name

        Parameters
        ----------
        name : object
            the name of the group to get as a DataFrame
        obj : NDFrame, default None
            the NDFrame to take the DataFrame out of.  If
            it is None, the object groupby was called on will
            be used

        Returns
        -------
        group : type of obj
        """
        if obj is None:
            obj = self.obj

        inds = self._get_index(name)
        return obj.take(inds, axis=self.axis, convert=False) 
Example #16
Source File: var.py    From Computable with MIT License 6 votes vote down vote up
def forecast(self, h):
        """
        Returns a DataFrame containing the forecasts for 1, 2, ..., n time
        steps.  Each column x1 contains the forecasts of the x1 column.

        Parameters
        ----------
        n: int
            Number of time steps ahead to forecast.

        Returns
        -------
        DataFrame
        """
        forecast = self._forecast_raw(h)[:, 0, :]
        return DataFrame(forecast, index=lrange(1, 1 + h),
                         columns=self._columns) 
Example #17
Source File: var.py    From Computable with MIT License 6 votes vote down vote up
def lag_select(data, max_lags=5, ic=None):
    """
    Select number of lags based on a variety of information criteria

    Parameters
    ----------
    data : DataFrame-like
    max_lags : int
        Maximum number of lags to evaluate
    ic : {None, 'aic', 'bic', ...}
        Choosing None will just display the results

    Returns
    -------
    None
    """
    pass 
Example #18
Source File: merge.py    From Computable with MIT License 6 votes vote down vote up
def get_result(self):
        join_index, left_indexer, right_indexer = self._get_join_info()

        # this is a bit kludgy
        ldata, rdata = self._get_merge_data()

        if self.fill_method == 'ffill':
            left_join_indexer = algos.ffill_indexer(left_indexer)
            right_join_indexer = algos.ffill_indexer(right_indexer)
        else:
            left_join_indexer = left_indexer
            right_join_indexer = right_indexer

        join_op = _BlockJoinOperation([ldata, rdata], join_index,
                                      [left_join_indexer, right_join_indexer],
                                      axis=1, copy=self.copy)

        result_data = join_op.get_result()
        result = DataFrame(result_data)

        self._maybe_add_join_keys(result, left_indexer, right_indexer)

        return result 
Example #19
Source File: merge.py    From Computable with MIT License 6 votes vote down vote up
def get_result(self):
        join_index, left_indexer, right_indexer = self._get_join_info()

        # this is a bit kludgy
        ldata, rdata = self._get_merge_data()

        # TODO: more efficiently handle group keys to avoid extra
        #       consolidation!
        join_op = _BlockJoinOperation([ldata, rdata], join_index,
                                      [left_indexer, right_indexer], axis=1,
                                      copy=self.copy)

        result_data = join_op.get_result()
        result = DataFrame(result_data)

        self._maybe_add_join_keys(result, left_indexer, right_indexer)

        return result 
Example #20
Source File: test_concurrent_append.py    From arctic with GNU Lesser General Public License v2.1 6 votes vote down vote up
def run(self):
        self.sem.acquire()
        while datetime.now() < self.timeout:
            try:
                # Randomy length dataframe to keep appending to
                df = DataFrame({'v': [self.last]}, [datetime.now()])
                for i in range(random.randint(1, 10)):
                    df = df.append(DataFrame({'v': [self.last + i]}, [datetime.now()]))
                self.last + i
                df.index.name = 'index'
                self.lib.append('symbol', df)
                assert self.last in self.lib.read('symbol').data['v'].tolist()
                self.last += 2
            except OptimisticLockException:
                # Concurrent write, not successful
                pass
#             time.sleep(self.begin) 
Example #21
Source File: plm.py    From Computable with MIT License 6 votes vote down vote up
def _convertDummies(dummies, mapping):
    # cleans up the names of the generated dummies
    new_items = []
    for item in dummies.columns:
        if not mapping:
            var = str(item)
            if isinstance(item, float):
                var = '%g' % item

            new_items.append(var)
        else:
            # renames the dummies if a conversion dict is provided
            new_items.append(mapping[int(item)])

    dummies = DataFrame(dummies.values, index=dummies.index,
                        columns=new_items)

    return dummies 
Example #22
Source File: testing.py    From Computable with MIT License 5 votes vote down vote up
def makeMixedDataFrame():
    return DataFrame(getMixedTypeDict()[1]) 
Example #23
Source File: test_gbq.py    From Computable with MIT License 5 votes vote down vote up
def test_upload_new_table_schema_error(self):
        # Attempting to upload to a non-existent table without a schema should fail
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        df = DataFrame(self.correct_data_small)
        with self.assertRaises(gbq.SchemaMissing):
            gbq.to_gbq(df, 'pandas_testing_dataset.test_database', schema=None, col_order=None, if_exists='fail') 
Example #24
Source File: testing.py    From Computable with MIT License 5 votes vote down vote up
def makeTimeDataFrame(nper=None):
    data = getTimeSeriesData(nper)
    return DataFrame(data) 
Example #25
Source File: test_gbq.py    From Computable with MIT License 5 votes vote down vote up
def test_upload_replace_schema_error(self):
        # Attempting to replace an existing table without specifying a schema should fail
        if not os.path.exists(self.bq_token):
            raise nose.SkipTest('Skipped because authentication information is not available.')

        df = DataFrame(self.correct_data_small)
        with self.assertRaises(gbq.SchemaMissing):
            gbq.to_gbq(df, 'pandas_testing_dataset.test_database', schema=None, col_order=None, if_exists='replace') 
Example #26
Source File: merge.py    From Computable with MIT License 5 votes vote down vote up
def _get_block_dtype(blocks):
    if len(blocks) == 0:
        return object
    blk1 = blocks[0]
    dtype = blk1.dtype

    if issubclass(dtype.type, np.floating):
        for blk in blocks:
            if blk.dtype.type == np.float64:
                return blk.dtype

    return dtype

#----------------------------------------------------------------------
# Concatenate DataFrame objects 
Example #27
Source File: test_gbq.py    From Computable with MIT License 5 votes vote down vote up
def test_column_order_plus_index(self):
        # A User should be able to specify an index and the order of THE REMAINING columns.. they should be notified
        # if they screw up
        col_order = ['corpus_date', 'word', 'corpus']
        result_frame = gbq._parse_data(FakeClient(), self.fake_job, index_col='word_count', col_order=col_order)
        correct_frame_small = DataFrame(self.correct_data_small)
        correct_frame_small.set_index('word_count',inplace=True)
        correct_frame_small = DataFrame(correct_frame_small)[col_order]
        tm.assert_index_equal(result_frame.columns, correct_frame_small.columns) 
Example #28
Source File: testing.py    From Computable with MIT License 5 votes vote down vote up
def makePeriodFrame(nper=None):
    data = getPeriodData(nper)
    return DataFrame(data) 
Example #29
Source File: groupby.py    From Computable with MIT License 5 votes vote down vote up
def __init__(self, obj, keys=None, axis=0, level=None,
                 grouper=None, exclusions=None, selection=None, as_index=True,
                 sort=True, group_keys=True, squeeze=False):
        self._selection = selection

        if isinstance(obj, NDFrame):
            obj._consolidate_inplace()

        self.obj = obj
        self.axis = obj._get_axis_number(axis)
        self.level = level

        if not as_index:
            if not isinstance(obj, DataFrame):
                raise TypeError('as_index=False only valid with DataFrame')
            if axis != 0:
                raise ValueError('as_index=False only valid for axis=0')

        self.as_index = as_index
        self.keys = keys
        self.sort = sort
        self.group_keys = group_keys
        self.squeeze = squeeze

        if grouper is None:
            grouper, exclusions = _get_grouper(obj, keys, axis=axis,
                                               level=level, sort=sort)

        self.grouper = grouper
        self.exclusions = set(exclusions) if exclusions else set() 
Example #30
Source File: groupby.py    From Computable with MIT License 5 votes vote down vote up
def _last_compat(x, axis=0):
    def _last(x):
        x = np.asarray(x)
        x = x[notnull(x)]
        if len(x) == 0:
            return np.nan
        return x[-1]

    if isinstance(x, DataFrame):
        return x.apply(_last, axis=axis)
    else:
        return _last(x)