Python pandas.core.frame.DataFrame() Examples
The following are 30
code examples of pandas.core.frame.DataFrame().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.frame
, or try the search function
.
Example #1
Source File: plm.py From Computable with MIT License | 6 votes |
def add_intercept(panel, name='intercept'): """ Add column of ones to input panel Parameters ---------- panel: Panel / DataFrame name: string, default 'intercept'] Returns ------- New object (same type as input) """ panel = panel.copy() panel[name] = 1. return panel.consolidate()
Example #2
Source File: parsers.py From recruit with Apache License 2.0 | 6 votes |
def _validate_names(names): """ Check if the `names` parameter contains duplicates. If duplicates are found, we issue a warning before returning. Parameters ---------- names : array-like or None An array containing a list of the names used for the output DataFrame. Returns ------- names : array-like or None The original `names` parameter. """ if names is not None: if len(names) != len(set(names)): msg = ("Duplicate names specified. This " "will raise an error in the future.") warnings.warn(msg, UserWarning, stacklevel=3) return names
Example #3
Source File: groupby.py From Computable with MIT License | 6 votes |
def aggregate(self, arg, *args, **kwargs): """ Aggregate using input function or dict of {column -> function} Parameters ---------- arg : function or dict Function to use for aggregating groups. If a function, must either work when passed a Panel or when passed to Panel.apply. If pass a dict, the keys must be DataFrame column names Returns ------- aggregated : Panel """ if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs) return self._aggregate_generic(arg, *args, **kwargs)
Example #4
Source File: parsers.py From recruit with Apache License 2.0 | 6 votes |
def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) ret = self._engine.read(nrows) # May alter columns / col_dict index, columns, col_dict = self._create_index(ret) if index is None: if col_dict: # Any column is actually fine: new_rows = len(compat.next(compat.itervalues(col_dict))) index = RangeIndex(self._currow, self._currow + new_rows) else: new_rows = 0 else: new_rows = len(index) df = DataFrame(col_dict, columns=columns, index=index) self._currow += new_rows if self.squeeze and len(df.columns) == 1: return df[df.columns[0]].copy() return df
Example #5
Source File: reshape.py From recruit with Apache License 2.0 | 6 votes |
def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, # and isn't needed for a single level return _unstack_multiple(obj, level, fill_value=fill_value) else: level = level[0] if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): return _unstack_frame(obj, level, fill_value=fill_value) else: return obj.T.stack(dropna=False) else: if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) unstacker = _Unstacker(obj.values, obj.index, level=level, fill_value=fill_value, constructor=obj._constructor_expanddim) return unstacker.get_result()
Example #6
Source File: test_gbq.py From Computable with MIT License | 6 votes |
def test_upload_new_table(self): # Attempting to upload to a new table with valid data and a valid schema should succeed if not os.path.exists(self.bq_token): raise nose.SkipTest('Skipped because authentication information is not available.') schema = ['STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN', 'INTEGER', 'STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN', 'BOOLEAN', 'INTEGER', 'STRING', 'INTEGER'] array = [['TESTING_GBQ', 999999999, 'hi', 0, True, 9999999999, '00.000.00.000', 1, 'hola', 99999999, False, False, 1, 'Jedi', 11210]] df = DataFrame(array, columns=['title','id','language','wp_namespace','is_redirect','revision_id', 'contributor_ip','contributor_id','contributor_username','timestamp', 'is_minor','is_bot','reversion_id','comment','num_characters']) gbq.to_gbq(df, 'pandas_testing_dataset.test_data2', schema=schema, col_order=None, if_exists='append') a = gbq.read_gbq("SELECT * FROM pandas_testing_dataset.test_data2") self.assertTrue((a == df).all().all())
Example #7
Source File: test_gbq.py From Computable with MIT License | 6 votes |
def test_upload_bad_data_table(self): # Attempting to upload data that does not match schema should fail if not os.path.exists(self.bq_token): raise nose.SkipTest('Skipped because authentication information is not available.') schema = ['STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN', 'INTEGER', 'STRING', 'INTEGER', 'STRING', 'INTEGER', 'BOOLEAN', 'BOOLEAN', 'INTEGER', 'STRING', 'INTEGER'] array = [['TESTING_GBQ\',', False, 'hi', 0, True, 'STRING IN INTEGER', '00.000.00.000', 1, 'hola', 99999999, -100, 1000, 1, 'Jedi', 11210]] df = DataFrame(array, columns=['title','id','language','wp_namespace','is_redirect','revision_id', 'contributor_ip','contributor_id','contributor_username','timestamp', 'is_minor','is_bot','reversion_id','comment','num_characters']) with self.assertRaises(bigquery_client.BigqueryServiceError): gbq.to_gbq(df, 'pandas_testing_dataset.test_data1', schema=schema, col_order=None, if_exists='append')
Example #8
Source File: groupby.py From Computable with MIT License | 6 votes |
def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 obj = self._obj_with_exclusions result = {} cannot_agg = [] for item in obj: try: data = obj[item] colg = SeriesGroupBy(data, selection=item, grouper=self.grouper) result[item] = self._try_cast( colg.aggregate(func, *args, **kwargs), data) except ValueError: cannot_agg.append(item) continue result_columns = obj.columns if cannot_agg: result_columns = result_columns.drop(cannot_agg) return DataFrame(result, columns=result_columns)
Example #9
Source File: groupby.py From Computable with MIT License | 6 votes |
def _wrap_aggregated_output(self, output, names=None): agg_axis = 0 if self.axis == 1 else 1 agg_labels = self._obj_with_exclusions._get_axis(agg_axis) output_keys = self._decide_output_index(output, agg_labels) if not self.as_index: result = DataFrame(output, columns=output_keys) group_levels = self.grouper.get_group_levels() zipped = zip(self.grouper.names, group_levels) for i, (name, labels) in enumerate(zipped): result.insert(i, name, labels) result = result.consolidate() else: index = self.grouper.result_index result = DataFrame(output, index=index, columns=output_keys) if self.axis == 1: result = result.T return result.convert_objects()
Example #10
Source File: concat.py From vnpy_crypto with MIT License | 6 votes |
def _get_series_result_type(result, objs=None): """ return appropriate class of Series concat input is either dict or array-like """ # concat Series with axis 1 if isinstance(result, dict): # concat Series with axis 1 if all(is_sparse(c) for c in compat.itervalues(result)): from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame return DataFrame # otherwise it is a SingleBlockManager (axis = 0) if result._block.is_sparse: from pandas.core.sparse.api import SparseSeries return SparseSeries else: return objs[0]._constructor
Example #11
Source File: groupby.py From Computable with MIT License | 6 votes |
def _transform_item_by_item(self, obj, wrapper): # iterate through columns output = {} inds = [] for i, col in enumerate(obj): try: output[col] = self[col].transform(wrapper) inds.append(i) except Exception: pass if len(output) == 0: # pragma: no cover raise TypeError('Transform function invalid for data types') columns = obj.columns if len(output) < len(obj.columns): columns = columns.take(inds) return DataFrame(output, index=obj.index, columns=columns)
Example #12
Source File: concat.py From vnpy_crypto with MIT License | 6 votes |
def _get_sliced_frame_result_type(data, obj): """ return appropriate class of Series. When data is sparse it will return a SparseSeries, otherwise it will return the Series. Parameters ---------- data : array-like obj : DataFrame Returns ------- Series or SparseSeries """ if is_sparse(data): from pandas.core.sparse.api import SparseSeries return SparseSeries return obj._constructor_sliced
Example #13
Source File: parsers.py From vnpy_crypto with MIT License | 6 votes |
def _validate_names(names): """ Check if the `names` parameter contains duplicates. If duplicates are found, we issue a warning before returning. Parameters ---------- names : array-like or None An array containing a list of the names used for the output DataFrame. Returns ------- names : array-like or None The original `names` parameter. """ if names is not None: if len(names) != len(set(names)): msg = ("Duplicate names specified. This " "will raise an error in the future.") warnings.warn(msg, UserWarning, stacklevel=3) return names
Example #14
Source File: panel.py From Computable with MIT License | 6 votes |
def major_xs(self, key): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label Returns ------- y : DataFrame index -> minor axis, columns -> items """ slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self)) return DataFrame(slices, index=self.minor_axis, columns=self.items)
Example #15
Source File: groupby.py From Computable with MIT License | 6 votes |
def get_group(self, name, obj=None): """ Constructs NDFrame from group with provided name Parameters ---------- name : object the name of the group to get as a DataFrame obj : NDFrame, default None the NDFrame to take the DataFrame out of. If it is None, the object groupby was called on will be used Returns ------- group : type of obj """ if obj is None: obj = self.obj inds = self._get_index(name) return obj.take(inds, axis=self.axis, convert=False)
Example #16
Source File: var.py From Computable with MIT License | 6 votes |
def forecast(self, h): """ Returns a DataFrame containing the forecasts for 1, 2, ..., n time steps. Each column x1 contains the forecasts of the x1 column. Parameters ---------- n: int Number of time steps ahead to forecast. Returns ------- DataFrame """ forecast = self._forecast_raw(h)[:, 0, :] return DataFrame(forecast, index=lrange(1, 1 + h), columns=self._columns)
Example #17
Source File: var.py From Computable with MIT License | 6 votes |
def lag_select(data, max_lags=5, ic=None): """ Select number of lags based on a variety of information criteria Parameters ---------- data : DataFrame-like max_lags : int Maximum number of lags to evaluate ic : {None, 'aic', 'bic', ...} Choosing None will just display the results Returns ------- None """ pass
Example #18
Source File: merge.py From Computable with MIT License | 6 votes |
def get_result(self): join_index, left_indexer, right_indexer = self._get_join_info() # this is a bit kludgy ldata, rdata = self._get_merge_data() if self.fill_method == 'ffill': left_join_indexer = algos.ffill_indexer(left_indexer) right_join_indexer = algos.ffill_indexer(right_indexer) else: left_join_indexer = left_indexer right_join_indexer = right_indexer join_op = _BlockJoinOperation([ldata, rdata], join_index, [left_join_indexer, right_join_indexer], axis=1, copy=self.copy) result_data = join_op.get_result() result = DataFrame(result_data) self._maybe_add_join_keys(result, left_indexer, right_indexer) return result
Example #19
Source File: merge.py From Computable with MIT License | 6 votes |
def get_result(self): join_index, left_indexer, right_indexer = self._get_join_info() # this is a bit kludgy ldata, rdata = self._get_merge_data() # TODO: more efficiently handle group keys to avoid extra # consolidation! join_op = _BlockJoinOperation([ldata, rdata], join_index, [left_indexer, right_indexer], axis=1, copy=self.copy) result_data = join_op.get_result() result = DataFrame(result_data) self._maybe_add_join_keys(result, left_indexer, right_indexer) return result
Example #20
Source File: test_concurrent_append.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def run(self): self.sem.acquire() while datetime.now() < self.timeout: try: # Randomy length dataframe to keep appending to df = DataFrame({'v': [self.last]}, [datetime.now()]) for i in range(random.randint(1, 10)): df = df.append(DataFrame({'v': [self.last + i]}, [datetime.now()])) self.last + i df.index.name = 'index' self.lib.append('symbol', df) assert self.last in self.lib.read('symbol').data['v'].tolist() self.last += 2 except OptimisticLockException: # Concurrent write, not successful pass # time.sleep(self.begin)
Example #21
Source File: plm.py From Computable with MIT License | 6 votes |
def _convertDummies(dummies, mapping): # cleans up the names of the generated dummies new_items = [] for item in dummies.columns: if not mapping: var = str(item) if isinstance(item, float): var = '%g' % item new_items.append(var) else: # renames the dummies if a conversion dict is provided new_items.append(mapping[int(item)]) dummies = DataFrame(dummies.values, index=dummies.index, columns=new_items) return dummies
Example #22
Source File: testing.py From Computable with MIT License | 5 votes |
def makeMixedDataFrame(): return DataFrame(getMixedTypeDict()[1])
Example #23
Source File: test_gbq.py From Computable with MIT License | 5 votes |
def test_upload_new_table_schema_error(self): # Attempting to upload to a non-existent table without a schema should fail if not os.path.exists(self.bq_token): raise nose.SkipTest('Skipped because authentication information is not available.') df = DataFrame(self.correct_data_small) with self.assertRaises(gbq.SchemaMissing): gbq.to_gbq(df, 'pandas_testing_dataset.test_database', schema=None, col_order=None, if_exists='fail')
Example #24
Source File: testing.py From Computable with MIT License | 5 votes |
def makeTimeDataFrame(nper=None): data = getTimeSeriesData(nper) return DataFrame(data)
Example #25
Source File: test_gbq.py From Computable with MIT License | 5 votes |
def test_upload_replace_schema_error(self): # Attempting to replace an existing table without specifying a schema should fail if not os.path.exists(self.bq_token): raise nose.SkipTest('Skipped because authentication information is not available.') df = DataFrame(self.correct_data_small) with self.assertRaises(gbq.SchemaMissing): gbq.to_gbq(df, 'pandas_testing_dataset.test_database', schema=None, col_order=None, if_exists='replace')
Example #26
Source File: merge.py From Computable with MIT License | 5 votes |
def _get_block_dtype(blocks): if len(blocks) == 0: return object blk1 = blocks[0] dtype = blk1.dtype if issubclass(dtype.type, np.floating): for blk in blocks: if blk.dtype.type == np.float64: return blk.dtype return dtype #---------------------------------------------------------------------- # Concatenate DataFrame objects
Example #27
Source File: test_gbq.py From Computable with MIT License | 5 votes |
def test_column_order_plus_index(self): # A User should be able to specify an index and the order of THE REMAINING columns.. they should be notified # if they screw up col_order = ['corpus_date', 'word', 'corpus'] result_frame = gbq._parse_data(FakeClient(), self.fake_job, index_col='word_count', col_order=col_order) correct_frame_small = DataFrame(self.correct_data_small) correct_frame_small.set_index('word_count',inplace=True) correct_frame_small = DataFrame(correct_frame_small)[col_order] tm.assert_index_equal(result_frame.columns, correct_frame_small.columns)
Example #28
Source File: testing.py From Computable with MIT License | 5 votes |
def makePeriodFrame(nper=None): data = getPeriodData(nper) return DataFrame(data)
Example #29
Source File: groupby.py From Computable with MIT License | 5 votes |
def __init__(self, obj, keys=None, axis=0, level=None, grouper=None, exclusions=None, selection=None, as_index=True, sort=True, group_keys=True, squeeze=False): self._selection = selection if isinstance(obj, NDFrame): obj._consolidate_inplace() self.obj = obj self.axis = obj._get_axis_number(axis) self.level = level if not as_index: if not isinstance(obj, DataFrame): raise TypeError('as_index=False only valid with DataFrame') if axis != 0: raise ValueError('as_index=False only valid for axis=0') self.as_index = as_index self.keys = keys self.sort = sort self.group_keys = group_keys self.squeeze = squeeze if grouper is None: grouper, exclusions = _get_grouper(obj, keys, axis=axis, level=level, sort=sort) self.grouper = grouper self.exclusions = set(exclusions) if exclusions else set()
Example #30
Source File: groupby.py From Computable with MIT License | 5 votes |
def _last_compat(x, axis=0): def _last(x): x = np.asarray(x) x = x[notnull(x)] if len(x) == 0: return np.nan return x[-1] if isinstance(x, DataFrame): return x.apply(_last, axis=axis) else: return _last(x)