Python pandas.Index() Examples
The following are code examples for showing how to use pandas.Index(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.
Example 1
Project: qiime2 Author: qiime2 File: test_metadata.py (license) View Source Project | 7 votes |
def test_filter_to_numeric(self): index = pd.Index(['a', 'b', 'c'], dtype=object) df = pd.DataFrame({'col1': ['2', '1', '3'], 'col2': ['two', 'one', 'three']}, index=index, dtype=object) metadata = qiime2.Metadata(df) obs_df = metadata.filter(column_type='numeric').to_dataframe() exp_df = pd.DataFrame({'col1': [2, 1, 3]}, dtype=np.int, index=index) pdt.assert_frame_equal(obs_df, exp_df) df = pd.DataFrame({'col1': ['2', '1', '3'], 'col2': ['2', '1', 'three'], 'col3': ['4.0', '5.2', '6.9']}, index=index, dtype=object) metadata = qiime2.Metadata(df) obs_df = metadata.filter(column_type='numeric').to_dataframe() exp_df = pd.DataFrame({'col1': [2, 1, 3], 'col3': [4.0, 5.2, 6.9]}, index=index) pdt.assert_frame_equal(obs_df, exp_df) self.assertEqual(dict(obs_df.dtypes), {'col1': np.int, 'col3': np.float})
Example 2
Project: zipline-chinese Author: zhanghan1990 File: history_container.py (Apache License 2.0) View Source Project | 6 votes |
def _add_field(self, field): """ Adds a new field to the container. """ # self.fields is already sorted, so we just need to insert the new # field in the correct index. ls = list(self.fields) insort_left(ls, field) self.fields = pd.Index(ls) # unset fillable fields cache self._ffillable_fields = None self._realign_fields() self.last_known_prior_values = self.last_known_prior_values.reindex( index=self.prior_values_index, ) return field
Example 3
Project: q2-diversity Author: qiime2 File: test_alpha_rarefaction.py (license) View Source Project | 6 votes |
def test_some_duplicates_in_category(self): columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1), (200, 2), ('pet', '')], names=['depth', 'iter']) data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'], [9, 10, 11, 12, 'russ']], columns=columns, index=['S1', 'S2', 'S3']) obs = _reindex_with_metadata('pet', ['pet'], data) exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['depth', 'iter']) exp_ind = pd.Index(['milo', 'russ'], name='pet') exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]], columns=exp_col, index=exp_ind) pdt.assert_frame_equal(exp, obs[0]) exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]], columns=exp_col, index=exp_ind) pdt.assert_frame_equal(exp, obs[1])
Example 4
Project: q2-diversity Author: qiime2 File: test_alpha_rarefaction.py (license) View Source Project | 6 votes |
def test_all_identical(self): columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1), (200, 2), ('pet', '')], names=['depth', 'iter']) data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'russ'], [9, 10, 11, 12, 'russ']], columns=columns, index=['S1', 'S2', 'S3']) obs = _reindex_with_metadata('pet', ['pet'], data) exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=['depth', 'iter']) exp_ind = pd.Index(['russ'], name='pet') exp = pd.DataFrame(data=[[5, 6, 7, 8]], columns=exp_col, index=exp_ind) pdt.assert_frame_equal(exp, obs[0]) exp = pd.DataFrame(data=[[3, 3, 3, 3]], columns=exp_col, index=exp_ind) pdt.assert_frame_equal(exp, obs[1])
Example 5
Project: plotnine Author: has2k1 File: utils.py (license) View Source Project | 6 votes |
def cross_join(df1, df2): """ Return a dataframe that is a cross between dataframes df1 and df2 ref: https://github.com/pydata/pandas/issues/5401 """ if len(df1) == 0: return df2 if len(df2) == 0: return df1 # Add as lists so that the new index keeps the items in # the order that they are added together all_columns = pd.Index(list(df1.columns) + list(df2.columns)) df1['key'] = 1 df2['key'] = 1 return pd.merge(df1, df2, on='key').loc[:, all_columns]
Example 6
Project: extra-trees Author: allrod5 File: tree.py (license) View Source Project | 6 votes |
def _split_sample( split: Callable[[object], bool], X: np.ndarray, y: np.ndarray ) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]: """ Split X, y sample set in two with a split function :return: ((X_left, y_left), (X_right, y_right)) """ if split.type is 'numerical': left_indexes = X[:, split.attribute] < split.criteria right_indexes = ~left_indexes else: Z = ( pd.Index(pd.unique(split.criteria)) .get_indexer(X[:, split.attribute])) left_indexes = np.where(Z >= 0)[0] right_indexes = np.where(Z < 0)[0] left = X[left_indexes], y[left_indexes] right = X[right_indexes], y[right_indexes] return left, right
Example 7
Project: InplusTrader_Linux Author: zhengwsh File: inplus_data_source.py (license) View Source Project | 6 votes |
def get_dividend(self, order_book_id, adjusted=True): """ ????/?????? :param str order_book_id: ??? :param bool adjusted: ????????? :return: """ def fetchData(adjusted): if adjusted: mongo_data = self._adjusted_dividends[order_book_id].find({}, {"_id":0}) else: mongo_data = self._original_dividends[order_book_id].find({}, {"_id":0}) return mongo_data result = pd.DataFrame({ 'book_closure_date': pd.Index(pd.Timestamp(d['book_closure_date']) for d in fetchData(adjusted)), 'ex_dividend_date': pd.Index(pd.Timestamp(d['ex_dividend_date']) for d in fetchData(adjusted)), 'payable_date': pd.Index(pd.Timestamp(d['payable_date']) for d in fetchData(adjusted)), 'dividend_cash_before_tax': [d['dividend_cash_before_tax'] for d in fetchData(adjusted)], 'round_lot': [d['round_lot'] for d in fetchData(adjusted)] }, index = pd.Index(pd.Timestamp(d['announcement_date']) for d in fetchData(adjusted))) return result
Example 8
Project: InplusTrader_Linux Author: zhengwsh File: yield_curve_store.py (license) View Source Project | 6 votes |
def get_yield_curve(self, start_date, end_date, tenor): d1 = start_date.year * 10000 + start_date.month * 100 + start_date.day d2 = end_date.year * 10000 + end_date.month * 100 + end_date.day s = self._dates.searchsorted(d1) e = self._dates.searchsorted(d2, side='right') if e == len(self._dates): e -= 1 if self._dates[e] == d2: # ?? end_date e += 1 if e < s: return None df = pd.DataFrame(self._table[s:e]) df.index = pd.Index(pd.Timestamp(str(d)) for d in df['date']) del df['date'] df.rename(columns=lambda n: n[1:]+n[0], inplace=True) if tenor is not None: return df[tenor] return df
Example 9
Project: InplusTrader_Linux Author: zhengwsh File: inplus_data_source.py (license) View Source Project | 6 votes |
def get_dividend(self, order_book_id, adjusted=True): """ ????/?????? :param str order_book_id: ??? :param bool adjusted: ????????? :return: """ def fetchData(adjusted): if adjusted: mongo_data = self._adjusted_dividends[order_book_id].find({}, {"_id":0}) else: mongo_data = self._original_dividends[order_book_id].find({}, {"_id":0}) return mongo_data result = pd.DataFrame({ 'book_closure_date': pd.Index(pd.Timestamp(d['book_closure_date']) for d in fetchData(adjusted)), 'ex_dividend_date': pd.Index(pd.Timestamp(d['ex_dividend_date']) for d in fetchData(adjusted)), 'payable_date': pd.Index(pd.Timestamp(d['payable_date']) for d in fetchData(adjusted)), 'dividend_cash_before_tax': [d['dividend_cash_before_tax'] for d in fetchData(adjusted)], 'round_lot': [d['round_lot'] for d in fetchData(adjusted)] }, index = pd.Index(pd.Timestamp(d['announcement_date']) for d in fetchData(adjusted))) return result
Example 10
Project: InplusTrader_Linux Author: zhengwsh File: yield_curve_store.py (license) View Source Project | 6 votes |
def get_yield_curve(self, start_date, end_date, tenor): d1 = start_date.year * 10000 + start_date.month * 100 + start_date.day d2 = end_date.year * 10000 + end_date.month * 100 + end_date.day s = self._dates.searchsorted(d1) e = self._dates.searchsorted(d2, side='right') if e == len(self._dates): e -= 1 if self._dates[e] == d2: # ?? end_date e += 1 if e < s: return None df = pd.DataFrame(self._table[s:e]) df.index = pd.Index(pd.Timestamp(str(d)) for d in df['date']) del df['date'] df.rename(columns=lambda n: n[1:]+n[0], inplace=True) if tenor is not None: return df[tenor] return df
Example 11
Project: FHDMM Author: aweinstein File: ml.py (license) View Source Project | 6 votes |
def fit_behavioral_data(): """Fit a model for all subjects. """ df = pd.read_pickle('data.pkl') subjects = df.index.get_level_values('subject').unique() data = np.empty((subjects.size, 10)) cues = (0, 1) for i, subject in enumerate(subjects): print('Fitting model for subject {}'.format(subject)) df_s = df.loc[subject] for cue in cues: ml = ML(df_s[df_s['cue']==cue]) r = ml.ml_estimation() data[i,2*cue:(2*cue+2)] = r.x data[i,2*cue+4:2*cue+6] = np.sqrt(np.diag(r.hess_inv.todense())) data[i,cue+8] = r.fun model = pd.DataFrame(data, pd.Index(subjects, name='subject'), ['alpha_0', 'beta_0', 'alpha_1', 'beta_1', 'se_alpha_0', 'se_beta_0', 'se_alpha_1', 'se_beta_1', 'NLL_0', 'NLL_1']) return model
Example 12
Project: coquery Author: gkunter File: app.py (license) View Source Project | 6 votes |
def update_table_models(self, visible=None, hidden=None): if visible is None and hidden is None: manager = self.Session.get_manager() for x in list(manager.hidden_columns): if x not in self.Session.output_object.columns: manager.hidden_columns.remove(x) hidden_cols = pd.Index(manager.hidden_columns) vis_cols = [x for x in self.Session.output_object.columns if not x in hidden_cols] to_show = self.Session.output_object[vis_cols] to_hide = self.Session.output_object[hidden_cols] else: to_show = visible to_hide = hidden self.table_model = classes.CoqTableModel( to_show, session=self.Session) self.hidden_model = classes.CoqHiddenTableModel( to_hide, session=self.Session) self.set_columns_widget() self.table_model.dataChanged.connect(self.change_userdata)
Example 13
Project: bowtie Author: jwkvam File: _component.py (license) View Source Project | 6 votes |
def json_conversion(obj): """Encode additional objects to JSON.""" try: # numpy isn't an explicit dependency of bowtie # so we can't assume it's available import numpy as np if isinstance(obj, (np.ndarray, np.generic)): return obj.tolist() except ImportError: pass try: # pandas isn't an explicit dependency of bowtie # so we can't assume it's available import pandas as pd if isinstance(obj, pd.Index): return obj.tolist() except ImportError: pass if isinstance(obj, (datetime, time, date)): return obj.isoformat() raise TypeError('Not sure how to serialize {} of type {}'.format(obj, type(obj)))
Example 14
Project: bowtie Author: jwkvam File: _component.py (license) View Source Project | 6 votes |
def encoders(obj): """Convert Python object to msgpack encodable ones.""" try: # numpy isn't an explicit dependency of bowtie # so we can't assume it's available import numpy as np if isinstance(obj, (np.ndarray, np.generic)): # https://docs.scipy.org/doc/numpy/reference/arrays.scalars.html return obj.tolist() except ImportError: pass try: # pandas isn't an explicit dependency of bowtie # so we can't assume it's available import pandas as pd if isinstance(obj, pd.Index): return obj.tolist() except ImportError: pass if isinstance(obj, (datetime, time, date)): return obj.isoformat() return obj
Example 15
Project: catalyst Author: enigmampc File: algorithm.py (license) View Source Project | 6 votes |
def batch_market_order(self, share_counts): """Place a batch market order for multiple assets. Parameters ---------- share_counts : pd.Series[Asset -> int] Map from asset to number of shares to order for that asset. Returns ------- order_ids : pd.Index[str] Index of ids for newly-created orders. """ style = MarketOrder() order_args = [ (asset, amount, style) for (asset, amount) in iteritems(share_counts) if amount ] return self.blotter.batch_order(order_args)
Example 16
Project: qiime2 Author: qiime2 File: test_metadata.py (license) View Source Project | 6 votes |
def test_filter_to_categorical(self): index = pd.Index(['a', 'b', 'c'], dtype=object) df = pd.DataFrame({'col1': ['2', '1', '3'], 'col2': ['a', 'b', 'c']}, index=index, dtype=object) metadata = qiime2.Metadata(df) obs_df = metadata.filter(column_type='categorical').to_dataframe() exp_df = pd.DataFrame({'col2': ['a', 'b', 'c']}, index=index) pdt.assert_frame_equal(obs_df, exp_df) df = pd.DataFrame({'col1': ['2', '1', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['peanut', 'hotdog', 'gwar']}, index=index, dtype=object) metadata = qiime2.Metadata(df) obs_df = metadata.filter(column_type='categorical').to_dataframe() exp_df = pd.DataFrame({'col2': ['a', 'b', 'c'], 'col3': ['peanut', 'hotdog', 'gwar']}, index=index) pdt.assert_frame_equal(obs_df, exp_df)
Example 17
Project: qiime2 Author: qiime2 File: test_metadata.py (license) View Source Project | 6 votes |
def test_no_columns(self): fp = pkg_resources.resource_filename( 'qiime2.tests', 'data/metadata/no-columns.tsv') metadata = qiime2.Metadata.load(fp) obs_df = metadata.to_dataframe() exp_index = pd.Index(['a', 'b', 'id'], name='my-index', dtype=object) exp_df = pd.DataFrame({}, index=exp_index, dtype=object) self.assertFalse(obs_df.index.empty) self.assertTrue(obs_df.columns.empty) pdt.assert_frame_equal( obs_df, exp_df, check_dtype=True, check_index_type=True, check_column_type=True, check_frame_type=True, check_names=True, check_exact=True)
Example 18
Project: qiime2 Author: qiime2 File: test_metadata.py (license) View Source Project | 6 votes |
def test_index_and_column_names(self): md1 = qiime2.Metadata(pd.DataFrame( {'a': [1, 2]}, index=pd.Index(['id1', 'id2'], name='foo'), columns=pd.Index(['a'], name='abc'))) md2 = qiime2.Metadata(pd.DataFrame( {'b': [3, 4]}, index=pd.Index(['id1', 'id2'], name='bar'), columns=pd.Index(['b'], name='def'))) obs = md1.merge(md2) exp = qiime2.Metadata(pd.DataFrame( {'a': [1, 2], 'b': [3, 4]}, index=['id1', 'id2'])) self.assertEqual(obs, exp) self.assertIsNone(obs._dataframe.index.name) self.assertIsNone(obs._dataframe.columns.name)
Example 19
Project: qiime2 Author: qiime2 File: test_metadata.py (license) View Source Project | 6 votes |
def test_more_complex_expressions(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = qiime2.Metadata(df) where = "Subject='subject-1' OR Subject='subject-2'" actual = metadata.ids(where) expected = {'S1', 'S2', 'S3'} self.assertEqual(actual, expected) where = "Subject='subject-1' AND Subject='subject-2'" actual = metadata.ids(where) expected = set() self.assertEqual(actual, expected) where = "Subject='subject-1' AND SampleType='gut'" actual = metadata.ids(where) expected = {'S1'} self.assertEqual(actual, expected)
Example 20
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testMultipleCalculationsRelativeTo(self): data = pd.DataFrame({"X": (1, 2, 3, 10, 20, 30, 100, 200, 300), "Y": (0, 1, 2, 3, 4, 5, 6, 7, 8), "Experiment": ("Control", "Control", "Control", "Exp1", "Exp1", "Exp1", "Exp2", "Exp2", "Exp2")}) comparison = comparisons.AbsoluteDifference("Experiment", "Control") output = core.Analyze(data).relative_to(comparison).calculate( (metrics.Sum("X"), metrics.Sum("Y"))).run() correct = pd.DataFrame( {"sum(X) Absolute Difference": (60 - 6, 600 - 6), "sum(Y) Absolute Difference": (12 - 3, 21 - 3)}, index=pd.Index( ("Exp1", "Exp2"), name="Experiment")) self.assertTrue(output.equals(correct))
Example 21
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testRelativeToJackknife(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6, 7, 8, 9], "Y": [0, 0, 0, 1, 1, 1, 2, 2, 2]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([1, 2], name="Y") correct = pd.DataFrame( np.array([[9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))], [18.0, np.sqrt(5 * np.var([21, 20, 19, 11, 10, 9]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
Example 22
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testRelativeToJackknifeIncludeBaseline(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6, 7, 8, 9], "Y": [0, 0, 0, 1, 1, 1, 2, 2, 2]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0, include_base=True) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([0, 1, 2], name="Y") correct = pd.DataFrame( np.array([[0.0, 0.0], [9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))], [18.0, np.sqrt(5 * np.var([21, 20, 19, 11, 10, 9]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
Example 23
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testRelativeToJackknifeSingleComparisonBaselineFirst(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6], "Y": [0, 0, 0, 1, 1, 1]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([1], name="Y") correct = pd.DataFrame( np.array([[9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
Example 24
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testRelativeToJackknifeSingleComparisonBaselineSecond(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6], "Y": [0, 0, 0, 1, 1, 1]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 1) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([0], name="Y") correct = pd.DataFrame( np.array([[-9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
Example 25
Project: meterstick Author: google File: core_test.py (license) View Source Project | 6 votes |
def testSplitJackknife(self): data = pd.DataFrame({"X": np.array([range(11) + [5] * 10]).flatten(), "Y": np.array([[0] * 11 + [1] * 10]).flatten()}) metric = metrics.Sum("X") se_method = standard_errors.Jackknife() output = core.Analyze(data).split_by("Y").with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([0, 1], name="Y") correct = pd.DataFrame( np.array([[55.0, 10.0], [50.0, 0.0]]), columns=("sum(X)", "sum(X) Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
Example 26
Project: tableschema-pandas-py Author: frictionlessdata File: test_storage.py (license) View Source Project | 6 votes |
def test_storage_restore_schema_with_primary_key(): data = [ ('a',), ('b',), ] index = pd.Index([1, 2], name='key') df = pd.DataFrame(data, columns=('value',), index=index) storage = Storage(dataframes={'data': df}) assert list(storage.read('data')) == [[1, 'a'], [2, 'b']] assert storage.describe('data') == { 'primaryKey': 'key', 'fields': [ {'name': 'key', 'type': 'integer', 'constraints': {'required': True}}, {'name': 'value', 'type': 'string'}, ] }
Example 27
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_dataframe_to_tsv_taxonomy_format(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) columns = ['Taxon', 'Foo', 'Bar'] df = pd.DataFrame([['taxon1', '42', 'foo'], ['taxon2', '43', 'bar']], index=index, columns=columns, dtype=object) exp = ( 'Feature ID\tTaxon\tFoo\tBar\n' 'seq1\ttaxon1\t42\tfoo\n' 'seq2\ttaxon2\t43\tbar\n' ) transformer = self.get_transformer(pd.DataFrame, TSVTaxonomyFormat) obs = transformer(df) with obs.open() as fh: self.assertEqual(fh.read(), exp)
Example 28
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_series_to_tsv_taxonomy_format(self): index = pd.Index(['emrakul', 'peanut'], name='Feature ID', dtype=object) series = pd.Series(['taxon1', 'taxon2'], index=index, name='Taxon', dtype=object) exp = ( 'Feature ID\tTaxon\n' 'emrakul\ttaxon1\n' 'peanut\ttaxon2\n' ) transformer = self.get_transformer(pd.Series, TSVTaxonomyFormat) obs = transformer(series) with obs.open() as fh: self.assertEqual(fh.read(), exp)
Example 29
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_tsv_taxonomy_format_to_metadata(self): _, obs = self.transform_format(TSVTaxonomyFormat, qiime2.Metadata, os.path.join('taxonomy', '3-column.tsv')) index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp_df = pd.DataFrame([['k__Foo; p__Bar', '-1.0'], ['k__Foo; p__Baz', '-42.0']], index=index, columns=['Taxon', 'Confidence'], dtype=object) exp = qiime2.Metadata(exp_df) self.assertEqual(exp, obs) # In-depth testing of the `_taxonomy_formats_to_dataframe` helper function, # which does the heavy lifting for the transformers.
Example 30
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_3_columns(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Foo; p__Bar', '-1.0'], ['k__Foo; p__Baz', '-42.0']], index=index, columns=['Taxon', 'Confidence'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', '3-column.tsv')), has_header=True) assert_frame_equal(obs, exp)
Example 31
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_valid_but_messy_file(self): index = pd.Index( ['SEQUENCE1', 'seq2'], name='Feature ID', dtype=object) exp = pd.DataFrame([['k__Bar; p__Baz', 'foo'], ['some; taxonomy; for; ya', 'bar baz']], index=index, columns=['Taxon', 'Extra Column'], dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv'))) assert_frame_equal(obs, exp) # has_header=True obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'valid-but-messy.tsv')), has_header=True) assert_frame_equal(obs, exp)
Example 32
Project: q2-types Author: qiime2 File: test_transformer.py (license) View Source Project | 6 votes |
def test_headerless(self): index = pd.Index(['seq1', 'seq2'], name='Feature ID', dtype=object) columns = ['Taxon', 'Unnamed Column 1', 'Unnamed Column 2'] exp = pd.DataFrame([['k__Foo; p__Bar', 'some', 'another'], ['k__Foo; p__Baz', 'column', 'column!']], index=index, columns=columns, dtype=object) # has_header=None (default) obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv'))) assert_frame_equal(obs, exp) # has_header=False obs = _taxonomy_formats_to_dataframe( self.get_data_path(os.path.join('taxonomy', 'headerless.tsv')), has_header=False) assert_frame_equal(obs, exp) # In-depth testing of the `_dataframe_to_tsv_taxonomy_format` helper function, # which does the heavy lifting for the transformers.
Example 33
Project: mlprojects-py Author: srinathperera File: InventoryDemandPre.py (license) View Source Project | 6 votes |
def find_missing_products(): train = pd.read_csv('/Users/srinath/playground/data-science/BimboInventoryDemand/train.csv') train_ids = train['Producto_ID'].unique() test = pd.read_csv('/Users/srinath/playground/data-science/BimboInventoryDemand/test.csv') test_ids = test['Producto_ID'].unique() missing_ids = pd.Index(test_ids).difference(pd.Index(train_ids)) print "missing ID count ", len(missing_ids) missing_ids_df = pd.DataFrame(missing_ids, columns=["Producto_ID"]) missing_ids_df.to_csv('missing_ids.csv', index=False) entries_with_missing = pd.merge(test, missing_ids_df, on='Producto_ID') print "Mising entries=", entries_with_missing.shape[0], "percentage=", entries_with_missing.shape[0]*100/test.shape[0] print "full entries count", test.shape[0]
Example 34
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: generic.py (license) View Source Project | 6 votes |
def at_time(self, time, asof=False): """ Select values at particular time of day (e.g. 9:30AM). Parameters ---------- time : datetime.time or string Returns ------- values_at_time : type of caller """ try: indexer = self.index.indexer_at_time(time, asof=asof) return self.take(indexer, convert=False) except AttributeError: raise TypeError('Index must be DatetimeIndex')
Example 35
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: generic.py (license) View Source Project | 6 votes |
def between_time(self, start_time, end_time, include_start=True, include_end=True): """ Select values between particular times of the day (e.g., 9:00-9:30 AM). Parameters ---------- start_time : datetime.time or string end_time : datetime.time or string include_start : boolean, default True include_end : boolean, default True Returns ------- values_between_time : type of caller """ try: indexer = self.index.indexer_between_time( start_time, end_time, include_start=include_start, include_end=include_end) return self.take(indexer, convert=False) except AttributeError: raise TypeError('Index must be DatetimeIndex')
Example 36
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: common.py (license) View Source Project | 6 votes |
def _isnull_old(obj): """Detect missing values. Treat None, NaN, INF, -INF as null. Parameters ---------- arr: ndarray or object value Returns ------- boolean ndarray or boolean """ if lib.isscalar(obj): return lib.checknull_old(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, pd.MultiIndex): raise NotImplementedError("isnull is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)): return _isnull_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isnull(func=_isnull_old)) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike_old(np.asarray(obj)) else: return obj is None
Example 37
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: test_timeseries.py (license) View Source Project | 6 votes |
def test_period_resample_with_local_timezone_pytz(self): # GH5430 tm._skip_if_no_pytz() import pytz local_timezone = pytz.timezone('America/Los_Angeles') start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) # 1 day later end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) index = pd.date_range(start, end, freq='H') series = pd.Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() # Create the expected series # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D') - 1) expected = pd.Series(1, index=expected_index) assert_series_equal(result, expected)
Example 38
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: test_timeseries.py (license) View Source Project | 6 votes |
def test_period_resample_with_local_timezone_dateutil(self): # GH5430 tm._skip_if_no_dateutil() import dateutil local_timezone = 'dateutil/America/Los_Angeles' start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) # 1 day later end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) index = pd.date_range(start, end, freq='H') series = pd.Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() # Create the expected series # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D') - 1) expected = pd.Series(1, index=expected_index) assert_series_equal(result, expected)
Example 39
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: test_timeseries.py (license) View Source Project | 6 votes |
def test_dayfirst(self): # GH 5917 arr = ['10/02/2014', '11/02/2014', '12/02/2014'] expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True) idx4 = to_datetime(np.array(arr), dayfirst=True) idx5 = DatetimeIndex(Index(arr), dayfirst=True) idx6 = DatetimeIndex(Series(arr), dayfirst=True) self.assertTrue(expected.equals(idx1)) self.assertTrue(expected.equals(idx2)) self.assertTrue(expected.equals(idx3)) self.assertTrue(expected.equals(idx4)) self.assertTrue(expected.equals(idx5)) self.assertTrue(expected.equals(idx6))
Example 40
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: test_timeseries.py (license) View Source Project | 6 votes |
def test_to_datetime_format(self): values = ['1/1/2000', '1/2/2000', '1/3/2000'] results1 = [Timestamp('20000101'), Timestamp('20000201'), Timestamp('20000301')] results2 = [Timestamp('20000101'), Timestamp('20000102'), Timestamp('20000103')] for vals, expecteds in [(values, (Index(results1), Index(results2))), (Series(values), (Series(results1), Series(results2))), (values[0], (results1[0], results2[0])), (values[1], (results1[1], results2[1])), (values[2], (results1[2], results2[2]))]: for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): result = to_datetime(vals, format=fmt) expected = expecteds[i] if isinstance(expected, Series): assert_series_equal(result, Series(expected)) elif isinstance(expected, Timestamp): self.assertEqual(result, expected) else: self.assertTrue(result.equals(expected))
Example 41
Project: PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda Author: SignalMedia File: test_base.py (license) View Source Project | 6 votes |
def test_asobject_tolist(self): idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') expected_list = [Timedelta('1 days'), Timedelta('2 days'), Timedelta('3 days'), Timedelta('4 days')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject self.assertTrue(isinstance(result, Index)) self.assertEqual(result.dtype, object) self.assertTrue(result.equals(expected)) self.assertEqual(result.name, expected.name) self.assertEqual(idx.tolist(), expected_list) idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, timedelta(days=4)], name='idx') expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, Timedelta('4 days')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.asobject self.assertTrue(isinstance(result, Index)) self.assertEqual(result.dtype, object) self.assertTrue(result.equals(expected)) self.assertEqual(result.name, expected.name) self.assertEqual(idx.tolist(), expected_list)
Example 42
Project: zipline-chinese Author: zhanghan1990 File: history_container.py (Apache License 2.0) View Source Project | 5 votes |
def prior_values_index(self): index_values = list( product( (freq.freq_str for freq in self.unique_frequencies), # Only store prior values for forward-fillable fields. self.ffillable_fields, ) ) if index_values: return pd.MultiIndex.from_tuples(index_values) else: # MultiIndex doesn't gracefully support empty input, so we return # an empty regular Index if we have values. return pd.Index(index_values)
Example 43
Project: zipline-chinese Author: zhanghan1990 File: history_container.py (Apache License 2.0) View Source Project | 5 votes |
def add_sids(self, to_add): """ Add new sids to the container. """ self.sids = pd.Index( sorted(self.sids.union(_ensure_index(to_add))), ) self._realign_sids()
Example 44
Project: zipline-chinese Author: zhanghan1990 File: history_container.py (Apache License 2.0) View Source Project | 5 votes |
def drop_sids(self, to_drop): """ Remove sids from the container. """ self.sids = pd.Index( sorted(self.sids.difference(_ensure_index(to_drop))), ) self._realign_sids()
Example 45
Project: zipline-chinese Author: zhanghan1990 File: data.py (Apache License 2.0) View Source Project | 5 votes |
def _ensure_index(x): if not isinstance(x, pd.Index): x = pd.Index(sorted(x)) return x
Example 46
Project: zipline-chinese Author: zhanghan1990 File: test_algorithm.py (Apache License 2.0) View Source Project | 5 votes |
def test_df_of_assets_as_input(self): algo = TestRegisterTransformAlgorithm( sim_params=self.sim_params, env=TradingEnvironment(), # new env without assets ) df = self.df.copy() df.columns = pd.Index(map(Equity, df.columns)) algo.run(df) assert isinstance(algo.sources[0], DataFrameSource)
Example 47
Project: dask_gdf Author: gpuopenanalytics File: core.py (Apache License 2.0) View Source Project | 5 votes |
def index(self): """Return dask Index instance""" name = self._name + '-index' dsk = {(name, i): (getattr, key, 'index') for i, key in enumerate(self._keys())} return Index(merge(dsk, self.dask), name, self._meta.index, self.divisions)
Example 48
Project: dask_gdf Author: gpuopenanalytics File: core.py (Apache License 2.0) View Source Project | 5 votes |
def _daskify(obj, npartitions=None, chunksize=None): """Convert input to a dask-gdf object. """ npartitions = npartitions or 1 if isinstance(obj, _Frame): return obj elif isinstance(obj, (pd.DataFrame, pd.Series, pd.Index)): return _daskify(dd.from_pandas(obj, npartitions=npartitions)) elif isinstance(obj, (gd.DataFrame, gd.Series, gd.index.Index)): return from_pygdf(obj, npartitions=npartitions) elif isinstance(obj, (dd.DataFrame, dd.Series, dd.Index)): return from_dask_dataframe(obj) else: raise TypeError("type {} is not supported".format(type(obj)))
Example 49
Project: dask_gdf Author: gpuopenanalytics File: core.py (Apache License 2.0) View Source Project | 5 votes |
def concat(objs): """Concantenate dask gdf objects Parameters ---------- objs : sequence of DataFrame, Series, Index A sequence of objects to be concatenated. """ objs = [_daskify(x) for x in objs] meta = gd.concat(_extract_meta(objs)) name = "concat-" + uuid4().hex dsk = {} divisions = [0] base = 0 lastdiv = 0 for obj in objs: for k, i in obj._keys(): dsk[name, base + i] = k, i base += obj.npartitions divisions.extend([d + lastdiv for d in obj.divisions[1:]]) lastdiv = obj.divisions[-1] dasks = [o.dask for o in objs] dsk = merge(dsk, *dasks) return new_dd_object(dsk, name, meta, divisions)
Example 50
Project: dask_gdf Author: gpuopenanalytics File: core.py (Apache License 2.0) View Source Project | 5 votes |
def _get_return_type(meta): if isinstance(meta, gd.Series): return Series elif isinstance(meta, gd.DataFrame): return DataFrame elif isinstance(meta, gd.index.Index): return Index return Scalar