Python pandas.api.types.is_categorical() Examples
The following are 21
code examples of pandas.api.types.is_categorical().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.api.types
, or try the search function
.
Example #1
Source File: test_readwrite.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_readwrite_zarr(typ, tmp_path): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.raw = adata_src assert not is_categorical(adata_src.obs["oanno1"]) adata_src.write_zarr(tmp_path / "test_zarr_dir", chunks=True) adata = ad.read_zarr(tmp_path / "test_zarr_dir") assert is_categorical(adata.obs["oanno1"]) assert not is_categorical(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
Example #2
Source File: anndata.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _remove_unused_categories(self, df_full, df_sub, uns): from pandas.api.types import is_categorical for k in df_full: if not is_categorical(df_full[k]): continue all_categories = df_full[k].cat.categories df_sub[k].cat.remove_unused_categories(inplace=True) # also correct the colors... color_key = f"{k}_colors" if color_key not in uns: continue color_vec = uns[color_key] if np.array(color_vec).ndim == 0: # Make 0D arrays into 1D ones uns[color_key] = np.array(color_vec)[(None,)] elif len(color_vec) != len(all_categories): # Reset colors del uns[color_key] else: idx = np.where(np.in1d(all_categories, df_sub[k].cat.categories))[0] uns[color_key] = np.array(color_vec)[(idx,)]
Example #3
Source File: utils.py From plotnine with GNU General Public License v2.0 | 6 votes |
def ordinal(arr): """ Return True if array is an ordered categorical Parameters ---------- arr : numpy.array Must have a dtype Returns ------- out : bool Whether array `arr` is an ordered categorical """ if pdtypes.is_categorical(arr): return arr.cat.ordered return False
Example #4
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_colors(adata, c): if is_color_like(c): return c else: if f"{c}_colors" not in adata.uns.keys(): palette = default_palette(None) palette = adjust_palette(palette, length=len(adata.obs[c].cat.categories)) n_cats = len(adata.obs[c].cat.categories) adata.uns[f"{c}_colors"] = palette[:n_cats].by_key()["color"] if isinstance(adata.uns[f"{c}_colors"], dict): cluster_ix = adata.obs[c].values else: cluster_ix = adata.obs[c].cat.codes.values return np.array( [ adata.uns[f"{c}_colors"][cluster_ix[i]] if cluster_ix[i] != -1 else "lightgrey" for i in range(adata.n_obs) ] )
Example #5
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_groups(adata, groups, groupby=None): if not isinstance(groupby, str) or groupby not in adata.obs.keys(): groupby = ( "clusters" if "clusters" in adata.obs.keys() else "louvain" if "louvain" in adata.obs.keys() else None ) if groups is True: return None, groupby if groups is not None and not isinstance(groups, str) and len(groups) == 1: groups = groups[0] if isinstance(groups, str): cats = [""] if is_categorical(adata, groupby): cats = adata.obs[groupby].cat.categories if ":" in groups and not np.any([":" in cat for cat in cats]): groupby, groups = groups.split(":") groups = groups.strip() if "," in groups and not np.any(["," in cat for cat in cats]): groups = [a.strip() for a in groups.split(",")] if isinstance(groups, str): groups = [groups] return groups, groupby
Example #6
Source File: _create.py From cooler with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _rename_chroms(grp, rename_dict, h5opts): chroms = get(grp["chroms"]).set_index("name") n_chroms = len(chroms) new_names = np.array( chroms.rename(rename_dict).index.values, dtype=CHROM_DTYPE ) # auto-adjusts char length del grp["chroms/name"] grp["chroms"].create_dataset( "name", shape=(n_chroms,), dtype=new_names.dtype, data=new_names, **h5opts ) bins = get(grp["bins"]) n_bins = len(bins) idmap = dict(zip(new_names, range(n_chroms))) if is_categorical(bins["chrom"]) or is_integer(bins["chrom"]): chrom_ids = bins["chrom"].cat.codes chrom_dtype = h5py.special_dtype(enum=(CHROMID_DTYPE, idmap)) del grp["bins/chrom"] try: grp["bins"].create_dataset( "chrom", shape=(n_bins,), dtype=chrom_dtype, data=chrom_ids, **h5opts ) except ValueError: # If HDF5 enum header would be too large, # try storing chrom IDs as raw int instead chrom_dtype = CHROMID_DTYPE grp["bins"].create_dataset( "chrom", shape=(n_bins,), dtype=chrom_dtype, data=chrom_ids, **h5opts )
Example #7
Source File: test_readwrite.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_readwrite_backed(typ, backing_h5ad): X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_src.filename = backing_h5ad # change to backed mode adata_src.write() adata = ad.read(backing_h5ad) assert is_categorical(adata.obs["oanno1"]) assert not is_categorical(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert_equal(adata, adata_src)
Example #8
Source File: test_readwrite.py From anndata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_readwrite_h5ad(typ, dataset_kwargs, backing_h5ad): tmpdir = tempfile.TemporaryDirectory() tmpdirpth = Path(tmpdir.name) mid_pth = tmpdirpth / "mid.h5ad" X = typ(X_list) adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) assert not is_categorical(adata_src.obs["oanno1"]) adata_src.raw = adata_src adata_src.write(backing_h5ad, **dataset_kwargs) adata_mid = ad.read(backing_h5ad) adata_mid.write(mid_pth, **dataset_kwargs) adata = ad.read_h5ad(mid_pth) assert is_categorical(adata.obs["oanno1"]) assert not is_categorical(adata.obs["oanno2"]) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] assert adata.obs["oanno1"].cat.categories.tolist() == ["cat1", "cat2"] assert is_categorical(adata.raw.var["vanno2"]) assert np.all(adata.obs == adata_src.obs) assert np.all(adata.var == adata_src.var) assert np.all(adata.var.index == adata_src.var.index) assert adata.var.index.dtype == adata_src.var.index.dtype assert type(adata.raw.X) is type(adata_src.raw.X) assert type(adata.raw.varm) is type(adata_src.raw.varm) assert np.allclose(asarray(adata.raw.X), asarray(adata_src.raw.X)) assert np.all(adata.raw.var == adata_src.raw.var) assert isinstance(adata.uns["uns4"]["a"], (int, np.integer)) assert isinstance(adata_src.uns["uns4"]["a"], (int, np.integer)) assert type(adata.uns["uns4"]["c"]) is type(adata_src.uns["uns4"]["c"]) assert_equal(adata, adata_src)
Example #9
Source File: stat_boxplot.py From plotnine with GNU General Public License v2.0 | 5 votes |
def compute_group(cls, data, scales, **params): labels = ['x', 'y'] X = np.array(data[labels]) res = boxplot_stats(X, whis=params['coef'], labels=labels)[1] try: n = data['weight'].sum() except KeyError: n = len(data['y']) if len(np.unique(data['x'])) > 1: width = np.ptp(data['x']) * 0.9 else: width = params['width'] if pdtypes.is_categorical(data['x']): x = data['x'].iloc[0] else: x = np.mean([data['x'].min(), data['x'].max()]) d = {'ymin': res['whislo'], 'lower': res['q1'], 'middle': [res['med']], 'upper': res['q3'], 'ymax': res['whishi'], 'outliers': [res['fliers']], 'notchupper': res['med']+1.58*res['iqr']/np.sqrt(n), 'notchlower': res['med']-1.58*res['iqr']/np.sqrt(n), 'x': x, 'width': width, 'relvarwidth': np.sqrt(n)} return pd.DataFrame(d)
Example #10
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def default_legend_loc(adata, color, legend_loc): n_categories = 0 if is_categorical(adata, color): n_categories = len(adata.obs[color].cat.categories) if legend_loc is False: legend_loc = "none" elif legend_loc is None: legend_loc = "upper right" if n_categories <= 4 else "on data" return legend_loc
Example #11
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_value_counts(adata, color): value_counts = adata.obs[color].value_counts() probs = np.array(adata.obs[color]) for cat in value_counts.index: probs[probs == cat] = value_counts[cat] return np.array(probs, dtype=np.float32)
Example #12
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def is_categorical(data, c=None): from pandas.api.types import is_categorical as cat if c is None: return cat(data) # if data is categorical/array if not is_view(data): # if data is anndata view strings_to_categoricals(data) return isinstance(c, str) and c in data.obs.keys() and cat(data.obs[c])
Example #13
Source File: auto_prep.py From nyaggle with MIT License | 5 votes |
def _fill_na_by_unique_value(strain: pd.Series, stest: Optional[pd.Series]) -> Tuple[pd.Series, pd.Series]: if is_categorical(strain): return strain.cat.codes, stest.cat.codes elif is_integer_dtype(strain.dtype): fillval = min(strain.min(), stest.min()) - 1 return strain.fillna(fillval), stest.fillna(fillval) else: return strain.astype(str), stest.astype(str)
Example #14
Source File: auto_prep.py From nyaggle with MIT License | 5 votes |
def autoprep_gbdt(algorithm_type: str, X_train: pd.DataFrame, X_test: Optional[pd.DataFrame], categorical_feature_to_treat: Optional[List[str]] = None) -> Tuple[pd.DataFrame, pd.DataFrame]: if categorical_feature_to_treat is None: categorical_feature_to_treat = [c for c in X_train.columns if X_train[c].dtype.name in ['object', 'category']] # LightGBM: # Can handle categorical dtype. Otherwise, int, float or bool is acceptable for categorical columns. # https://lightgbm.readthedocs.io/en/latest/Advanced-Topics.html#categorical-feature-support # # CatBoost: # int, float, bool or str is acceptable for categorical columns. NaN should be filled. # https://catboost.ai/docs/concepts/faq.html#why-float-and-nan-values-are-forbidden-for-cat-features # # XGBoost: # All categorical column should be encoded beforehand. if algorithm_type == 'lgbm': # LightGBM can handle categorical dtype natively categorical_feature_to_treat = [c for c in categorical_feature_to_treat if not is_categorical(X_train[c])] if algorithm_type == 'cat' and len(categorical_feature_to_treat) > 0: X_train = X_train.copy() X_test = X_test.copy() if X_test is not None else X_train.iloc[:1, :].copy() # dummy for c in categorical_feature_to_treat: X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c]) if algorithm_type in ('lgbm', 'xgb') and len(categorical_feature_to_treat) > 0: assert X_test is not None, "X_test is required for XGBoost with categorical variables" X_train = X_train.copy() X_test = X_test.copy() for c in categorical_feature_to_treat: X_train[c], X_test[c] = _fill_na_by_unique_value(X_train[c], X_test[c]) le = LabelEncoder() concat = np.concatenate([X_train[c].values, X_test[c].values]) concat = le.fit_transform(concat) X_train[c] = concat[:len(X_train)] X_test[c] = concat[len(X_train):] return X_train, X_test
Example #15
Source File: builder.py From dgl with Apache License 2.0 | 5 votes |
def _series_to_tensor(series): if is_categorical(series): return torch.LongTensor(series.cat.codes.values.astype('int64')) else: # numeric return torch.FloatTensor(series.values)
Example #16
Source File: cat_tools.py From plydata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def as_categorical(c, copy=True): """ Convert input to a categorical Parameters ---------- c : categorical_like Sequence of objects copy : bool If `True` and c is alread a categorical, return a copy of `c` otherwise return `c`. Returns ------- out : categorical Categorical made out of `c` or copy of `c` if it was a categorical """ if not pdtypes.is_categorical(c): c = pd.Categorical(c) elif copy: c = c.copy() return c # Temporary functions
Example #17
Source File: cat_tools.py From plydata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def cat_concat(*args): """ Concatenate categoricals and combine the categories Parameters ---------- *args : tuple Categoricals to be concatenated Examples -------- >>> c1 = pd.Categorical(['a', 'b'], categories=['b', 'a']) >>> c2 = pd.Categorical(['d', 'a', 'c']) >>> cat_concat(c1, c2) [a, b, d, a, c] Categories (4, object): [b, a, c, d] Notes ----- The resulting category is not ordered. """ categories = pd.unique(list(chain(*( c.categories if pdtypes.is_categorical(c) else c for c in args )))) cs = pd.Categorical( list(chain(*(c for c in args))), categories=categories ) return cs
Example #18
Source File: anndata.py From anndata with BSD 3-Clause "New" or "Revised" License | 4 votes |
def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None): """\ Transform string annotations to categoricals. Only affects string annotations that lead to less categories than the total number of observations. Params ------ df If `df` is `None`, modifies both :attr:`obs` and :attr:`var`, otherwise modifies `df` inplace. Notes ----- Turns the view of an :class:`~anndata.AnnData` into an actual :class:`~anndata.AnnData`. """ dont_modify = False # only necessary for backed views if df is None: dfs = [self.obs, self.var] if self.is_view and self.isbacked: dont_modify = True else: dfs = [df] for df in dfs: string_cols = [ key for key in df.columns if is_string_dtype(df[key]) and not is_categorical(df[key]) ] for key in string_cols: # make sure we only have strings # (could be that there are np.nans (float), -666, "-666", for instance) c = df[key].astype("U") # make a categorical c = pd.Categorical(c, categories=natsorted(np.unique(c))) if len(c.categories) >= len(c): continue if dont_modify: raise RuntimeError( "Please call `.strings_to_categoricals()` on full " "AnnData, not on this view. You might encounter this" "error message while copying or writing to disk." ) if self.is_view: warnings.warn( "Initializing view as actual.", ImplicitModificationWarning ) # If `self` is a view, it will be actualized in the next line, # therefore the previous warning df[key] = c logger.info(f"... storing {key!r} as categorical")
Example #19
Source File: cat_tools.py From plydata with BSD 3-Clause "New" or "Revised" License | 4 votes |
def cat_zip(*args, sep=':', keep_empty=False): """ Create a new categorical (zip style) combined from two or more Parameters ---------- *args : tuple Categoricals to be concatenated. sep : str (default: ':') Separator for the combined categories. keep_empty : bool (default: False) If ``True``, include all combinations of categories even those without observations. Examples -------- >>> c1 = pd.Categorical(list('aba')) >>> c2 = pd.Categorical(list('122')) >>> cat_zip(c1, c2) [a:1, b:2, a:2] Categories (3, object): [a:1, a:2, b:2] >>> cat_zip(c1, c2, keep_empty=True) [a:1, b:2, a:2] Categories (4, object): [a:1, a:2, b:1, b:2] """ values = [sep.join(items) for items in zip(*args)] cs = [ c if pdtypes.is_categorical(c) else pd.Categorical(c) for c in args ] categories = [ sep.join(items) for items in product(*(c.categories for c in cs)) ] c = pd.Categorical(values, categories=categories) if not keep_empty: c.remove_unused_categories(inplace=True) return c # helpers
Example #20
Source File: cat_tools.py From plydata with BSD 3-Clause "New" or "Revised" License | 4 votes |
def cat_remove_unused(c, only=None): """ Remove unused categories Parameters ---------- c : list-like Values that will make up the categorical. only : list-like (optional) The categories to remove *if* they are empty. If not given, all unused categories are dropped. Examples -------- >>> c = pd.Categorical(list('abcdd'), categories=list('bacdefg')) >>> c [a, b, c, d, d] Categories (7, object): [b, a, c, d, e, f, g] >>> cat_remove_unused(c) [a, b, c, d, d] Categories (4, object): [b, a, c, d] >>> cat_remove_unused(c, only=['a', 'e', 'g']) [a, b, c, d, d] Categories (5, object): [b, a, c, d, f] """ if not pdtypes.is_categorical(c): # All categories are used c = pd.Categorical(c) return c else: c = c.copy() if only is None: only = c.categories used_idx = pd.unique(c.codes) used_categories = c.categories[used_idx] c = c.remove_categories( c.categories .difference(used_categories) .intersection(only) ) return c
Example #21
Source File: cat_tools.py From plydata with BSD 3-Clause "New" or "Revised" License | 4 votes |
def cat_infreq(c, ordered=None): """ Reorder categorical by frequency of the values Parameters ---------- c : list-like Values that will make up the categorical. ordered : bool If ``True``, the categorical is ordered. Returns ------- out : categorical Values Examples -------- >>> x = ['d', 'a', 'b', 'b', 'c', 'c', 'c'] >>> cat_infreq(x) [d, a, b, b, c, c, c] Categories (4, object): [c, b, d, a] >>> cat_infreq(x, ordered=True) [d, a, b, b, c, c, c] Categories (4, object): [c < b < d < a] When two or more values occur the same number of times, if the categorical is ordered, the order is preserved. If it is not not ordered, the order depends on that of the values. Above 'd' comes before 'a', and below 'a' comes before 'a'. >>> c = pd.Categorical( ... x, categories=['a', 'c', 'b', 'd'] ... ) >>> cat_infreq(c) [d, a, b, b, c, c, c] Categories (4, object): [c, b, a, d] >>> cat_infreq(c.set_ordered(True)) [d, a, b, b, c, c, c] Categories (4, object): [c < b < a < d] """ kwargs = {} if ordered is None else {'ordered': ordered} counts = value_counts(c) if pdtypes.is_categorical(c): original_cat_order = c.categories else: original_cat_order = pd.unique(c) counts = counts.reindex(index=original_cat_order) cats = (_stable_series_sort(counts, ascending=False) .index .to_list()) return pd.Categorical(c, categories=cats, **kwargs)