Python loompy.create() Examples
The following are 9
code examples of loompy.create().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
loompy
, or try the search function
.

Example #1
Source File: export.py From pySCENIC with GNU General Public License v3.0 | 6 votes |
def export_regulons(regulons: Sequence[Regulon], fname: str) -> None: """ Export regulons as GraphML. :param regulons: The sequence of regulons to export. :param fname: The name of the file to create. """ graph = nx.DiGraph() for regulon in regulons: src_name = regulon.transcription_factor graph.add_node(src_name, group='transcription_factor') edge_type = 'activating' if 'activating' in regulon.context else 'inhibiting' node_type = 'activated_target' if 'activating' in regulon.context else 'inhibited_target' for dst_name, edge_strength in regulon.gene2weight.items(): graph.add_node(dst_name, group=node_type, **regulon.context) graph.add_edge(src_name, dst_name, weight=edge_strength, interaction=edge_type, **regulon.context) nx.readwrite.write_graphml(graph, fname)
Example #2
Source File: utils.py From pySCENIC with GNU General Public License v3.0 | 6 votes |
def save_df_as_loom(df: pd.DataFrame, fname: str) -> None: """ Save pandas dataframe as single layer loom file. Can be used to save expression matrix or AUC value matrix as binary loom file. :param df: The 2-dimensional dataframe (rows = cells x columns = genes). :param fname: The name of the loom file to create. """ assert df.ndim == 2 # The orientation of the loom file is always: # - Columns represent cells or aggregates of cells # - Rows represent genes column_attrs = { ATTRIBUTE_NAME_CELL_IDENTIFIER: df.index.values.astype('str'), } row_attrs = { ATTRIBUTE_NAME_GENE: df.columns.values.astype('str'), } lp.create(filename=fname, layers=df.T.values, row_attrs=row_attrs, col_attrs=column_attrs)
Example #3
Source File: expression_matrix.py From starfish with MIT License | 5 votes |
def save_loom(self, filename: str) -> None: """Save an ExpressionMatrix as a loom file Parameters ---------- filename : str Name of loom file """ import loompy row_attrs = {k: self[Features.CELLS][k].values for k in self[Features.CELLS].coords} col_attrs = {k: self[Features.GENES][k].values for k in self[Features.GENES].coords} loompy.create(filename, self.data, row_attrs, col_attrs)
Example #4
Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License | 5 votes |
def create_append(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loompy.LayerManager], row_attrs: Dict[str, np.ndarray], col_attrs: Dict[str, np.ndarray], *, file_attrs: Dict[str, str] = None, fill_values: Dict[str, np.ndarray] = None) -> None: """ **DEPRECATED** - Use `new` instead; see https://github.com/linnarsson-lab/loompy/issues/42 """ deprecated("'create_append' is deprecated. See https://github.com/linnarsson-lab/loompy/issues/42") if os.path.exists(filename): with connect(filename) as ds: ds.add_columns(layers, col_attrs, fill_values=fill_values) else: create(filename, layers, row_attrs, col_attrs, file_attrs=file_attrs)
Example #5
Source File: test_validator.py From loompy with BSD 2-Clause "Simplified" License | 5 votes |
def test_file_with_empty_col_attrs_is_valid(self) -> None: f = NamedTemporaryFile(suffix=".loom") f.close() loompy.create(f.name, np.zeros((5, 5)), {}, {}) try: self.assertTrue( LoomValidator().validate(f.name), "File with empty col_attrs or row_attrs should be valid" ) finally: os.remove(f.name)
Example #6
Source File: test_connection.py From loompy with BSD 2-Clause "Simplified" License | 5 votes |
def setUp(self) -> None: self.file = NamedTemporaryFile(suffix=".loom") self.file.close() loompy.create( self.file.name, np.random.random((5, 5)), row_attrs={ "key": np.fromiter(range(5), dtype=np.int) }, col_attrs={ "key": np.fromiter(range(5), dtype=np.int) })
Example #7
Source File: GServer.py From SCope with GNU General Public License v3.0 | 4 votes |
def doGeneSetEnrichment(self, request, context): gene_set_file_path = os.path.join(self.dfh.get_gene_sets_dir(), request.geneSetFilePath) loom = self.lfh.get_loom(loom_file_path=request.loomFilePath) gse = _gse.GeneSetEnrichment( scope=self, method="AUCell", loom=loom, gene_set_file_path=gene_set_file_path, annotation="" ) # Running AUCell... yield gse.update_state(step=-1, status_code=200, status_message="Running AUCell...", values=None) time.sleep(1) # Reading gene set... yield gse.update_state(step=0, status_code=200, status_message="Reading the gene set...", values=None) with open(gse.gene_set_file_path, "r") as f: # Skip first line because it contains the name of the signature gs = GeneSignature( name="Gene Signature #1", gene2weight=[line.strip() for idx, line in enumerate(f) if idx > 0] ) time.sleep(1) if not gse.has_AUCell_rankings(): # Creating the matrix as DataFrame... yield gse.update_state(step=1, status_code=200, status_message="Creating the matrix...", values=None) loom = self.lfh.get_loom(loom_file_path=request.loomFilePath) dgem = np.transpose(loom.get_connection()[:, :]) ex_mtx = pd.DataFrame(data=dgem, index=loom.get_ca_attr_by_name("CellID"), columns=loom.get_genes()) # Creating the rankings... start_time = time.time() yield gse.update_state(step=2.1, status_code=200, status_message="Creating the rankings...", values=None) rnk_mtx = create_rankings(ex_mtx=ex_mtx) # Saving the rankings... yield gse.update_state(step=2.2, status_code=200, status_message="Saving the rankings...", values=None) lp.create( gse.get_AUCell_ranking_filepath(), rnk_mtx.as_matrix(), {"CellID": loom.get_cell_ids()}, {"Gene": loom.get_genes()}, ) logger.debug("{0:.5f} seconds elapsed generating rankings ---".format(time.time() - start_time)) else: # Load the rankings... yield gse.update_state(step=2, status_code=200, status_message="Rankings exists: loading...", values=None) rnk_loom = self.lfh.get_loom_connection(gse.get_AUCell_ranking_filepath()) rnk_mtx = pd.DataFrame(data=rnk_loom[:, :], index=rnk_loom.ra.CellID, columns=rnk_loom.ca.Gene) # Calculating AUCell enrichment... start_time = time.time() yield gse.update_state(step=3, status_code=200, status_message="Calculating AUCell enrichment...", values=None) aucs = enrichment(rnk_mtx, gs).loc[:, "AUC"].values logger.debug("{0:.5f} seconds elapsed calculating AUC ---".format(time.time() - start_time)) yield gse.update_state( step=4, status_code=200, status_message=gse.get_method() + " enrichment done!", values=aucs )
Example #8
Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License | 4 votes |
def create_from_cellranger(indir: str, outdir: str = None, genome: str = None) -> str: """ Create a .loom file from 10X Genomics cellranger output Args: indir (str): path to the cellranger output folder (the one that contains 'outs') outdir (str): output folder wher the new loom file should be saved (default to indir) genome (str): genome build to load (e.g. 'mm10'; if None, determine species from outs folder) Returns: path (str): Full path to the created loom file. Remarks: The resulting file will be named ``{sampleID}.loom``, where the sampleID is the one given by cellranger. """ if outdir is None: outdir = indir sampleid = os.path.split(os.path.abspath(indir))[-1] matrix_folder = os.path.join(indir, 'outs', 'filtered_gene_bc_matrices') if os.path.exists(matrix_folder): if genome is None: genome = [f for f in os.listdir(matrix_folder) if not f.startswith(".")][0] matrix_folder = os.path.join(matrix_folder, genome) matrix = mmread(os.path.join(matrix_folder, "matrix.mtx")).todense() genelines = open(os.path.join(matrix_folder, "genes.tsv"), "r").readlines() bclines = open(os.path.join(matrix_folder, "barcodes.tsv"), "r").readlines() else: # cellranger V3 file locations if genome is None: genome = "" # Genome is not visible from V3 folder matrix_folder = os.path.join(indir, 'outs', 'filtered_feature_bc_matrix') matrix = mmread(os.path.join(matrix_folder, "matrix.mtx.gz")).todense() genelines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "features.tsv.gz"), "r").readlines()] bclines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "barcodes.tsv.gz"), "r").readlines()] accession = np.array([x.split("\t")[0] for x in genelines]).astype("str") gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str") cellids = np.array([sampleid + ":" + x.strip() for x in bclines]).astype("str") col_attrs = {"CellID": cellids} row_attrs = {"Accession": accession, "Gene": gene} tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "projection.csv") # In cellranger V2 the file moved one level deeper if not os.path.exists(tsne_file): tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "2_components", "projection.csv") if os.path.exists(tsne_file): tsne = np.loadtxt(tsne_file, usecols=(1, 2), delimiter=',', skiprows=1) col_attrs["X"] = tsne[:, 0].astype('float32') col_attrs["Y"] = tsne[:, 1].astype('float32') clusters_file = os.path.join(indir, "outs", "analysis", "clustering", "graphclust", "clusters.csv") if os.path.exists(clusters_file): labels = np.loadtxt(clusters_file, usecols=(1, ), delimiter=',', skiprows=1) col_attrs["ClusterID"] = labels.astype('int') - 1 path = os.path.join(outdir, sampleid + ".loom") create(path, matrix, row_attrs, col_attrs, file_attrs={"Genome": genome}) return path
Example #9
Source File: loompy.py From loompy with BSD 2-Clause "Simplified" License | 4 votes |
def create_from_matrix_market(out_file: str, sample_id: str, layer_paths: Dict[str, str], row_metadata_path: str, column_metadata_path: str, delim: str = "\t", skip_row_headers: bool = False, skip_colums_headers: bool = False, file_attrs: Dict[str, str] = None, matrix_transposed: bool = False) -> None: """ Create a .loom file from .mtx matrix market format Args: out_file: path to the newly created .loom file (will be overwritten if it exists) sample_id: string to use as prefix for cell IDs layer_paths: dict mapping layer names to paths to the corresponding matrix file (usually with .mtx extension) row_metadata_path: path to the row (usually genes) metadata file column_metadata_path: path to the column (usually cells) metadata file delim: delimiter used for metadata (default: "\t") skip_row_headers: if true, skip first line in rows metadata file skip_column_headers: if true, skip first line in columns metadata file file_attrs: dict of global file attributes, or None matrix_transposed: if true, the main matrix is transposed Remarks: layer_paths should typically map the empty string to a matrix market file: {"": "path/to/filename.mtx"}. To create a multilayer loom file, map multiple named layers {"": "path/to/layer1.mtx", "layer2": "path/to/layer2.mtx"} Note: the created file MUST have a main layer named "". If no such layer is given, BUT all given layers are the same datatype, then a main layer will be created as the sum of the other layers. For example, {"spliced": "spliced.mtx", "unspliced": "unspliced.mtx"} will create three layers, "", "spliced", and "unspliced", where "" is the sum of the other two. """ layers: Dict[str, Union[np.ndarray, scipy.sparse.coo_matrix]] = {} for name, path in layer_paths.items(): matrix = mmread(path) if matrix_transposed: matrix = matrix.T layers[name] = matrix if "" not in layers: main_matrix = None for name, matrix in layers.items(): if main_matrix is None: main_matrix = matrix.copy() else: main_matrix = main_matrix + matrix layers[""] = main_matrix genelines = open(row_metadata_path, "r").readlines() bclines = open(column_metadata_path, "r").readlines() accession = np.array([x.split("\t")[0] for x in genelines]).astype("str") if(len(genelines[0].split("\t")) > 1): gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str") row_attrs = {"Accession": accession, "Gene": gene} else: row_attrs = {"Accession": accession} cellids = np.array([sample_id + ":" + x.strip() for x in bclines]).astype("str") col_attrs = {"CellID": cellids} create(out_file, layers[""], row_attrs, col_attrs, file_attrs=file_attrs) if len(layers) > 1: with loompy.connect(out_file) as ds: for name, layer in layers.items(): if name == "": continue ds[name] = layer