Python loompy.create() Examples

The following are 9 code examples of loompy.create(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module loompy , or try the search function .
Example #1
Source File: export.py    From pySCENIC with GNU General Public License v3.0 6 votes vote down vote up
def export_regulons(regulons: Sequence[Regulon], fname: str) -> None:
    """
    Export regulons as GraphML.
    :param regulons: The sequence of regulons to export.
    :param fname: The name of the file to create.
    """
    graph = nx.DiGraph()
    for regulon in regulons:
        src_name = regulon.transcription_factor
        graph.add_node(src_name, group='transcription_factor')
        edge_type = 'activating' if 'activating' in regulon.context else 'inhibiting'
        node_type = 'activated_target' if 'activating' in regulon.context else 'inhibited_target'
        for dst_name, edge_strength in regulon.gene2weight.items():
            graph.add_node(dst_name, group=node_type, **regulon.context)
            graph.add_edge(src_name, dst_name, weight=edge_strength, interaction=edge_type, **regulon.context)
    nx.readwrite.write_graphml(graph, fname) 
Example #2
Source File: utils.py    From pySCENIC with GNU General Public License v3.0 6 votes vote down vote up
def save_df_as_loom(df: pd.DataFrame, fname: str) -> None:
    """
    Save pandas dataframe as single layer loom file. Can be used to save expression matrix or AUC value matrix
    as binary loom file.

    :param df: The 2-dimensional dataframe (rows = cells x columns = genes).
    :param fname: The name of the loom file to create.
    """
    assert df.ndim == 2
    # The orientation of the loom file is always:
    #   - Columns represent cells or aggregates of cells
    # 	- Rows represent genes
    column_attrs = { ATTRIBUTE_NAME_CELL_IDENTIFIER: df.index.values.astype('str'), }
    row_attrs = { ATTRIBUTE_NAME_GENE: df.columns.values.astype('str'), }
    lp.create(filename=fname,
              layers=df.T.values,
              row_attrs=row_attrs,
              col_attrs=column_attrs) 
Example #3
Source File: expression_matrix.py    From starfish with MIT License 5 votes vote down vote up
def save_loom(self, filename: str) -> None:
        """Save an ExpressionMatrix as a loom file

        Parameters
        ----------
        filename : str
            Name of loom file
        """
        import loompy

        row_attrs = {k: self[Features.CELLS][k].values for k in self[Features.CELLS].coords}
        col_attrs = {k: self[Features.GENES][k].values for k in self[Features.GENES].coords}

        loompy.create(filename, self.data, row_attrs, col_attrs) 
Example #4
Source File: loompy.py    From loompy with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def create_append(filename: str, layers: Union[np.ndarray, Dict[str, np.ndarray], loompy.LayerManager], row_attrs: Dict[str, np.ndarray], col_attrs: Dict[str, np.ndarray], *, file_attrs: Dict[str, str] = None, fill_values: Dict[str, np.ndarray] = None) -> None:
	"""
	**DEPRECATED** - Use `new` instead; see https://github.com/linnarsson-lab/loompy/issues/42
	"""
	deprecated("'create_append' is deprecated. See https://github.com/linnarsson-lab/loompy/issues/42")
	if os.path.exists(filename):
		with connect(filename) as ds:
			ds.add_columns(layers, col_attrs, fill_values=fill_values)
	else:
		create(filename, layers, row_attrs, col_attrs, file_attrs=file_attrs) 
Example #5
Source File: test_validator.py    From loompy with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_file_with_empty_col_attrs_is_valid(self) -> None:
        f = NamedTemporaryFile(suffix=".loom")
        f.close()
        loompy.create(f.name, np.zeros((5, 5)), {}, {})
        try:
            self.assertTrue(
                LoomValidator().validate(f.name),
                "File with empty col_attrs or row_attrs should be valid"
            )
        finally:
            os.remove(f.name) 
Example #6
Source File: test_connection.py    From loompy with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def setUp(self) -> None:
        self.file = NamedTemporaryFile(suffix=".loom")
        self.file.close()
        loompy.create(
            self.file.name,
            np.random.random((5, 5)),
            row_attrs={
                "key": np.fromiter(range(5), dtype=np.int)
            },
            col_attrs={
                "key": np.fromiter(range(5), dtype=np.int)
            }) 
Example #7
Source File: GServer.py    From SCope with GNU General Public License v3.0 4 votes vote down vote up
def doGeneSetEnrichment(self, request, context):
        gene_set_file_path = os.path.join(self.dfh.get_gene_sets_dir(), request.geneSetFilePath)
        loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
        gse = _gse.GeneSetEnrichment(
            scope=self, method="AUCell", loom=loom, gene_set_file_path=gene_set_file_path, annotation=""
        )

        # Running AUCell...
        yield gse.update_state(step=-1, status_code=200, status_message="Running AUCell...", values=None)
        time.sleep(1)

        # Reading gene set...
        yield gse.update_state(step=0, status_code=200, status_message="Reading the gene set...", values=None)
        with open(gse.gene_set_file_path, "r") as f:
            # Skip first line because it contains the name of the signature
            gs = GeneSignature(
                name="Gene Signature #1", gene2weight=[line.strip() for idx, line in enumerate(f) if idx > 0]
            )
        time.sleep(1)

        if not gse.has_AUCell_rankings():
            # Creating the matrix as DataFrame...
            yield gse.update_state(step=1, status_code=200, status_message="Creating the matrix...", values=None)
            loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
            dgem = np.transpose(loom.get_connection()[:, :])
            ex_mtx = pd.DataFrame(data=dgem, index=loom.get_ca_attr_by_name("CellID"), columns=loom.get_genes())
            # Creating the rankings...
            start_time = time.time()
            yield gse.update_state(step=2.1, status_code=200, status_message="Creating the rankings...", values=None)
            rnk_mtx = create_rankings(ex_mtx=ex_mtx)
            # Saving the rankings...
            yield gse.update_state(step=2.2, status_code=200, status_message="Saving the rankings...", values=None)
            lp.create(
                gse.get_AUCell_ranking_filepath(),
                rnk_mtx.as_matrix(),
                {"CellID": loom.get_cell_ids()},
                {"Gene": loom.get_genes()},
            )
            logger.debug("{0:.5f} seconds elapsed generating rankings ---".format(time.time() - start_time))
        else:
            # Load the rankings...
            yield gse.update_state(step=2, status_code=200, status_message="Rankings exists: loading...", values=None)
            rnk_loom = self.lfh.get_loom_connection(gse.get_AUCell_ranking_filepath())
            rnk_mtx = pd.DataFrame(data=rnk_loom[:, :], index=rnk_loom.ra.CellID, columns=rnk_loom.ca.Gene)

        # Calculating AUCell enrichment...
        start_time = time.time()
        yield gse.update_state(step=3, status_code=200, status_message="Calculating AUCell enrichment...", values=None)
        aucs = enrichment(rnk_mtx, gs).loc[:, "AUC"].values

        logger.debug("{0:.5f} seconds elapsed calculating AUC ---".format(time.time() - start_time))
        yield gse.update_state(
            step=4, status_code=200, status_message=gse.get_method() + " enrichment done!", values=aucs
        ) 
Example #8
Source File: loompy.py    From loompy with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def create_from_cellranger(indir: str, outdir: str = None, genome: str = None) -> str:
	"""
	Create a .loom file from 10X Genomics cellranger output

	Args:
		indir (str):	path to the cellranger output folder (the one that contains 'outs')
		outdir (str):	output folder wher the new loom file should be saved (default to indir)
		genome (str):	genome build to load (e.g. 'mm10'; if None, determine species from outs folder)

	Returns:
		path (str):		Full path to the created loom file.

	Remarks:
		The resulting file will be named ``{sampleID}.loom``, where the sampleID is the one given by cellranger.
	"""
	if outdir is None:
		outdir = indir
	sampleid = os.path.split(os.path.abspath(indir))[-1]
	matrix_folder = os.path.join(indir, 'outs', 'filtered_gene_bc_matrices')
	if os.path.exists(matrix_folder):
		if genome is None:
			genome = [f for f in os.listdir(matrix_folder) if not f.startswith(".")][0]
		matrix_folder = os.path.join(matrix_folder, genome)
		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx")).todense()
		genelines = open(os.path.join(matrix_folder, "genes.tsv"), "r").readlines()
		bclines = open(os.path.join(matrix_folder, "barcodes.tsv"), "r").readlines()
	else:  # cellranger V3 file locations
		if genome is None:
			genome = ""  # Genome is not visible from V3 folder
		matrix_folder = os.path.join(indir, 'outs', 'filtered_feature_bc_matrix')
		matrix = mmread(os.path.join(matrix_folder, "matrix.mtx.gz")).todense()
		genelines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "features.tsv.gz"), "r").readlines()]
		bclines = [l.decode() for l in gzip.open(os.path.join(matrix_folder, "barcodes.tsv.gz"), "r").readlines()]

	accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
	gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
	cellids = np.array([sampleid + ":" + x.strip() for x in bclines]).astype("str")

	col_attrs = {"CellID": cellids}
	row_attrs = {"Accession": accession, "Gene": gene}

	tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "projection.csv")
	# In cellranger V2 the file moved one level deeper
	if not os.path.exists(tsne_file):
		tsne_file = os.path.join(indir, "outs", "analysis", "tsne", "2_components", "projection.csv")
	if os.path.exists(tsne_file):
		tsne = np.loadtxt(tsne_file, usecols=(1, 2), delimiter=',', skiprows=1)
		col_attrs["X"] = tsne[:, 0].astype('float32')
		col_attrs["Y"] = tsne[:, 1].astype('float32')

	clusters_file = os.path.join(indir, "outs", "analysis", "clustering", "graphclust", "clusters.csv")
	if os.path.exists(clusters_file):
		labels = np.loadtxt(clusters_file, usecols=(1, ), delimiter=',', skiprows=1)
		col_attrs["ClusterID"] = labels.astype('int') - 1

	path = os.path.join(outdir, sampleid + ".loom")
	create(path, matrix, row_attrs, col_attrs, file_attrs={"Genome": genome})
	return path 
Example #9
Source File: loompy.py    From loompy with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def create_from_matrix_market(out_file: str, sample_id: str, layer_paths: Dict[str, str], row_metadata_path: str, column_metadata_path: str, delim: str = "\t", skip_row_headers: bool = False, skip_colums_headers: bool = False, file_attrs: Dict[str, str] = None, matrix_transposed: bool = False) -> None:
	"""
	Create a .loom file from .mtx matrix market format

	Args:
		out_file:				path to the newly created .loom file (will be overwritten if it exists)
		sample_id:				string to use as prefix for cell IDs
		layer_paths:			dict mapping layer names to paths to the corresponding matrix file (usually with .mtx extension)
		row_metadata_path:		path to the row (usually genes) metadata file
		column_metadata_path:	path to the column (usually cells) metadata file
		delim:					delimiter used for metadata (default: "\t")
		skip_row_headers:		if true, skip first line in rows metadata file
		skip_column_headers: 	if true, skip first line in columns metadata file
		file_attrs:				dict of global file attributes, or None
		matrix_transposed:		if true, the main matrix is transposed
	
	Remarks:
		layer_paths should typically map the empty string to a matrix market file: {"": "path/to/filename.mtx"}.
		To create a multilayer loom file, map multiple named layers {"": "path/to/layer1.mtx", "layer2": "path/to/layer2.mtx"}
		Note: the created file MUST have a main layer named "". If no such layer is given, BUT all given layers are the same
		datatype, then a main layer will be created as the sum of the other layers. For example, {"spliced": "spliced.mtx", "unspliced": "unspliced.mtx"}
		will create three layers, "", "spliced", and "unspliced", where "" is the sum of the other two.
	"""
	layers: Dict[str, Union[np.ndarray, scipy.sparse.coo_matrix]] = {}

	for name, path in layer_paths.items():
		matrix = mmread(path)
		if matrix_transposed:
			matrix = matrix.T
		layers[name] = matrix
	if "" not in layers:
		main_matrix = None
		for name, matrix in layers.items():
			if main_matrix is None:
				main_matrix = matrix.copy()
			else:
				main_matrix = main_matrix + matrix
		layers[""] = main_matrix

	genelines = open(row_metadata_path, "r").readlines()
	bclines = open(column_metadata_path, "r").readlines()

	accession = np.array([x.split("\t")[0] for x in genelines]).astype("str")
	if(len(genelines[0].split("\t")) > 1):
		gene = np.array([x.split("\t")[1].strip() for x in genelines]).astype("str")
		row_attrs = {"Accession": accession, "Gene": gene}
	else:
		row_attrs = {"Accession": accession}

	cellids = np.array([sample_id + ":" + x.strip() for x in bclines]).astype("str")
	col_attrs = {"CellID": cellids}

	create(out_file, layers[""], row_attrs, col_attrs, file_attrs=file_attrs)

	if len(layers) > 1:
		with loompy.connect(out_file) as ds:
			for name, layer in layers.items():
				if name == "":
					continue
				ds[name] = layer