Java Code Examples for htsjdk.samtools.util.IOUtil#openFileForBufferedWriting()

The following examples show how to use htsjdk.samtools.util.IOUtil#openFileForBufferedWriting() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GatherReadQualityMetrics.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	Map<String, ReadQualityMetrics> metricsMap = gatherMetrics(INPUT);
	MetricsFile<ReadQualityMetrics, Integer> outFile = new MetricsFile<>();
	outFile.addHistogram(metricsMap.get(this.GLOBAL).getHistogram());
       // Make sure the GLOBAL metrics is added first
       outFile.addMetric(metricsMap.remove(this.GLOBAL));
	for (ReadQualityMetrics metrics: metricsMap.values())
		outFile.addMetric(metrics);
	BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
	outFile.write(w);
	// close properly.
	try {
		w.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}
	return 0;
}
 
Example 2
Source File: BaitDesigner.java    From picard with MIT License 6 votes vote down vote up
/** Method that writes out all the parameter values that were used in the design using reflection. */
void writeParametersFile(final File file) {
    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
        for (final Field field : getClass().getDeclaredFields()) {
            if (Modifier.isPrivate(field.getModifiers())) continue;

            final String name = field.getName();

            if (name.toUpperCase().equals(name) && !name.equals("USAGE")) {
                final Object value = field.get(this);

                if (value != null) {
                    out.append(name);
                    out.append("=");
                    out.append(value.toString());
                    out.newLine();
                }
            }
        }
        out.close();
    } catch (Exception e) {
        throw new PicardException("Error writing out parameters file.", e);
    }
}
 
Example 3
Source File: IntervalListToBed.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    IntervalList intervals = IntervalList.fromFile(INPUT);
    if (SORT) intervals = intervals.sorted();

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        for (final Interval i : intervals) {
            final String strand = i.isNegativeStrand() ? "-" : "+";
            final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand);
            out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t")));
            out.newLine();
        }

        out.close();
    }
    catch (IOException ioe) {
        throw new RuntimeIOException(ioe);
    }

    return 0;
}
 
Example 4
Source File: TargetMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
/** Emits a file with per base coverage if an output file has been set. */
private void emitPerBaseCoverageIfRequested() {
    if (this.perBaseOutput == null) return;

    final PrintWriter out = new PrintWriter(IOUtil.openFileForBufferedWriting(this.perBaseOutput));
    out.println("chrom\tpos\ttarget\tcoverage");
    for (final Map.Entry<Interval,Coverage> entry : this.highQualityCoverageByTarget.entrySet()) {
        final Interval interval = entry.getKey();
        final String chrom = interval.getContig();
        final int firstBase = interval.getStart();

        final int[] cov = entry.getValue().getDepths();
        for (int i = 0; i < cov.length; ++i) {
            out.print(chrom);
            out.print('\t');
            out.print(firstBase + i);
            out.print('\t');
            out.print(interval.getName());
            out.print('\t');
            out.print(cov[i]);
            out.println();
        }
    }

    out.close();
}
 
Example 5
Source File: MergeDgeSparse.java    From Drop-seq with MIT License 6 votes vote down vote up
private void writeDiscardedCellsFile(final List<SparseDge> dges) {
    if (DISCARDED_CELLS_FILE != null) {
        try {
            LOG.info("Writing " + DISCARDED_CELLS_FILE.getAbsolutePath());
            final BufferedWriter writer = IOUtil.openFileForBufferedWriting(DISCARDED_CELLS_FILE);
            for (final SparseDge dge : dges) {
                for (final String cell_barcode: dge.getDiscardedCells()) {
                    writer.write(cell_barcode);
                    writer.newLine();
                }
            }
            writer.close();
        } catch (IOException e) {
            throw new RuntimeIOException("Exception writing " + DISCARDED_CELLS_FILE.getAbsolutePath(), e);
        }
    }
}
 
Example 6
Source File: GatherMolecularBarcodeDistributionByGene.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {

	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

	writePerTranscriptHeader(out);


	Set<String> cellBarcodes=new HashSet<>(new BarcodeListRetrieval().getCellBarcodes(this.INPUT, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
               this.GENE_NAME_TAG, this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST,
               this.CELL_BC_FILE, this.READ_MQ, this.MIN_NUM_TRANSCRIPTS_PER_CELL,
               this.MIN_NUM_GENES_PER_CELL, this.MIN_NUM_READS_PER_CELL, this.NUM_CORE_BARCODES, this.EDIT_DISTANCE, this.MIN_BC_READ_THRESHOLD));

	UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false),
			GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
       		this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
       		this.READ_MQ, false, cellBarcodes, true, false);

	UMICollection batch;

	while ((batch=umiIterator.next())!=null)
		if (batch.isEmpty()==false) {
			String cellTag = batch.getCellBarcode();
			if (cellBarcodes.contains(cellTag) || cellBarcodes.isEmpty())
				writePerTranscriptStats (batch.getGeneName(), batch.getCellBarcode(), batch.getMolecularBarcodeCountsCollapsed(this.EDIT_DISTANCE), out);
		}

	CloserUtil.close(umiIterator);

	try {
		out.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}

	return 0;
}
 
Example 7
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	if (RIBOSOMAL_INTERVALS!=null) IOUtil.assertFileIsReadable(RIBOSOMAL_INTERVALS);

	for (final String mtSequence : MT_SEQUENCE) {
		final SAMSequenceRecord samSequenceRecord =
				SamReaderFactory.makeDefault().open(INPUT).getFileHeader().getSequence(mtSequence);
		if (samSequenceRecord == null)
			throw new RuntimeException("MT_SEQUENCE '" + mtSequence + "' is not found in sequence dictionary in " + INPUT.getAbsolutePath());
	}

	List<String> cellBarcodes = getCellBarcodes(this.CELL_BC_FILE, this.INPUT, this.CELL_BARCODE_TAG, this.READ_MQ, this.NUM_CORE_BARCODES);
	RnaSeqMetricsCollector collector = getRNASeqMetricsCollector(this.CELL_BARCODE_TAG, cellBarcodes, this.INPUT, this.STRAND_SPECIFICITY, this.RRNA_FRAGMENT_PERCENTAGE, this.READ_MQ, this.ANNOTATIONS_FILE, this.RIBOSOMAL_INTERVALS);
	final MetricsFile<RnaSeqMetrics, Integer> file = getMetricsFile();
	log.info("Adding metrics to file.  This may take a while, with no progress messages.");
   	collector.addAllLevelsToFile(file);

   	BufferedWriter b = IOUtil.openFileForBufferedWriting(OUTPUT);
   	file.write(b);
   	try {
		b.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}
   	return 0;
   }
 
Example 8
Source File: CellSizeWriter.java    From Drop-seq with MIT License 5 votes vote down vote up
public CellSizeWriter(final File cellSizeFile) {
    try {
        this.cellSizeFile = cellSizeFile;
        cellSizeWriter = IOUtil.openFileForBufferedWriting(cellSizeFile);
        cellSizeWriter.write(CELL_SIZE_HEADER);
    } catch (IOException e) {
        throw new RuntimeIOException("Exception writing " + cellSizeFile.getAbsolutePath(), e);
    }
}
 
Example 9
Source File: SelectCellsByNumTranscripts.java    From Drop-seq with MIT License 5 votes vote down vote up
public static void writeBarcodes(final File file, final List<String> barcodes) {
    final BufferedWriter writer = IOUtil.openFileForBufferedWriting(file);
    try {
        for (final String barcode : barcodes) {
            writer.write(barcode);
            writer.newLine();
        }
        writer.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Exception writing " + file.getAbsolutePath(), e);
    }
}
 
Example 10
Source File: MatrixMarketWriter.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 *
 * @param outputFile If has extension .gz, output file will be gzipped.
 * @param numNonZeroElements the number of triplets to be written
 * @param rowNames If non-null, a list of row names to write into the header.  Length must == numRows
 * @param colNames if non-null, a list of column names to write into the header.  Length must == numCols
 * @param rowNamesLabel If non-null, a label for the rowNames in the header.  If null, "ROWS" is used.
 * @param colNamesLabel If non-null, a label for the rowNames in the header.  If null, "COLS" is used.
 */
public MatrixMarketWriter(final File outputFile,
                          final MatrixMarketConstants.ElementType elementType,
                          final int numRows,
                          final int numCols,
                          final int numNonZeroElements,
                          final List<String> rowNames,
                          final List<String> colNames,
                          final String rowNamesLabel,
                          final String colNamesLabel) {
    this(IOUtil.openFileForBufferedWriting(outputFile), outputFile.getAbsolutePath(), elementType,
            numRows, numCols, numNonZeroElements, rowNames, colNames, rowNamesLabel, colNamesLabel);
}
 
Example 11
Source File: ExtractSequences.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}
 
Example 12
Source File: PedFile.java    From picard with MIT License 5 votes vote down vote up
/**
 * Writes a set of pedigrees out to disk.
 */
public void write(final File file) {
    IOUtil.assertFileIsWritable(file);
    final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);

    try {
        for (final PedTrio trio : values()) {
            out.write(trio.getFamilyId());
            out.write("\t");
            out.write(trio.getIndividualId());
            out.write("\t");
            out.write(trio.getPaternalId());
            out.write("\t");
            out.write(trio.getMaternalId());
            out.write("\t");
            out.write(String.valueOf(trio.getSex().toCode()));
            out.write("\t");
            out.write(trio.getPhenotype().toString());
            out.newLine();
        }

        out.close();
    }
    catch (final IOException ioe) {
        throw new RuntimeIOException("IOException while writing to file " + file.getAbsolutePath(), ioe);
    }
}
 
Example 13
Source File: DgeHeaderCodec.java    From Drop-seq with MIT License 5 votes vote down vote up
public void encode(final File file, final DgeHeader header) {
    final Writer writer = IOUtil.openFileForBufferedWriting(file);
    encode(writer, header);
    try {
        writer.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Exception writing " + file.getAbsolutePath(), e);
    }
}
 
Example 14
Source File: FilterGtf.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(this.GTF);
    IOUtil.assertFileIsWritable(this.OUTPUT);

    geneBiotypesToFilter.addAll(UNDESIRED_GENE_BIOTYPE);
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = DropSeqSamUtil.loadSequenceDictionary(SEQUENCE_DICTIONARY);
    }

    // Don't use TabbedInputParser because we want to preserve comments
    // Don't use GTFReader because 1) we want to preserve comments; and 2) what is needed at this point is not
    // complicated and custom parsing will be more straightforward.
    final BufferedReader reader = IOUtil.openFileForBufferedReading(GTF);
    final BufferedWriter writer = IOUtil.openFileForBufferedWriting(OUTPUT);
    String inputLine;
    try {
        while ((inputLine = reader.readLine()) != null ) {
            if (inputLine.startsWith("#") || goodLine(inputLine)) {
                writer.write(inputLine);
                writer.newLine();
            }
        }
        CloserUtil.close(reader);
        writer.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return 0;
}
 
Example 15
Source File: BaitDesigner.java    From picard with MIT License 5 votes vote down vote up
void writeDesignFastaFile(final File file, final IntervalList baits) {
    final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
    for (final Interval i : baits) {
        writeBaitFasta(out, i, false);
    }
    CloserUtil.close(out);
}
 
Example 16
Source File: BaitDesigner.java    From picard with MIT License 4 votes vote down vote up
/**
 * Writes out fasta files for each pool and also agilent format files if requested.
 *
 * @param dir      the directory to output files into
 * @param basename the basename of each file
 * @param baits    the set of baits to write out
 */
void writePoolFiles(final File dir, final String basename, final IntervalList baits) {
    final int copies;
    if (FILL_POOLS && baits.size() < POOL_SIZE) copies = (int) Math.floor(POOL_SIZE / (double) baits.size());
    else copies = 1;

    int written = 0;
    int nextPool = 0;
    BufferedWriter out = null;
    BufferedWriter agilentOut = null;
    final String prefix = DESIGN_NAME.substring(0, Math.min(DESIGN_NAME.length(), 8)) + "_"; // prefix for 15 digit bait id
    final NumberFormat fmt = new DecimalFormat("000000");

    try {
        for (int i = 0; i < copies; ++i) {
            final boolean rc = i % 2 == 1;

            int baitId = 1;
            for (final Interval interval : baits) {
                final Bait bait = (Bait) interval;

                if (written++ % POOL_SIZE == 0) {
                    if (out != null) out.close();
                    if (agilentOut != null) agilentOut.close();

                    final String filename = basename + ".pool" + nextPool++ + ".design.";
                    out = IOUtil.openFileForBufferedWriting(new File(dir, filename + "fasta"));
                    if (OUTPUT_AGILENT_FILES) {
                        agilentOut = IOUtil.openFileForBufferedWriting(new File(dir, filename + "txt"));
                    }
                }

                writeBaitFasta(out, interval, rc);
                if (OUTPUT_AGILENT_FILES) {
                    agilentOut.append(prefix).append(fmt.format(baitId++));
                    agilentOut.append("\t");
                    agilentOut.append(getBaitSequence(bait, rc).toUpperCase());
                    agilentOut.newLine();
                }
            }
        }

        CloserUtil.close(out);
        CloserUtil.close(agilentOut);
    } catch (Exception e) {
        throw new PicardException("Error while writing pool files.", e);
    }
}
 
Example 17
Source File: NormalizeFasta.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile())) {
        throw new IllegalArgumentException("Input and output cannot be the same file.");
    }

    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    ReferenceSequence seq = null;
    while ((seq = ref.nextSequence()) != null) {
        final String name  = seq.getName();
        final byte[] bases = seq.getBases();

        try {
            out.write(">");
            out.write(name);
            out.newLine();

            if (bases.length == 0) {
                log.warn("Sequence " + name + " contains 0 bases.");
            }
            else {
                for (int i=0; i<bases.length; ++i) {
                    if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                    out.write(bases[i]);
                }

                out.write("\n");
            }
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }
    try {
        out.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return 0;
}
 
Example 18
Source File: NonNFastaSize.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}
 
Example 19
Source File: MergeDgeSparseTest.java    From Drop-seq with MIT License 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test(dataProvider = "testBasicDataProvider")
public void testBasic(final String testName,
                      final int expectedNumGenes,
                      final int expectedNumCells,
                      final int[] cell_count,
                      final String[] prefix,
                      final String[] filteredGeneRe,
                      final Integer min_cells,
                      final Integer min_genes,
                      final Integer min_transcripts,
                      final DgeHeaderMerger.Stringency headerStringency,
                      final List<File> selectedCellsFiles) throws IOException {
    final File tempDir = Files.createTempDirectory("MergeDgeSparseTest.").toFile();
    tempDir.deleteOnExit();

    final Yaml yamlConverter = new Yaml();
    final Map yamlMap = (Map)yamlConverter.load(IOUtil.openFileForReading(YAML));
    final List datasets = (List)yamlMap.get(MergeDgeSparse.YamlKeys.DATASETS_KEY);

    if (cell_count != null) {
        for (int i = 0; i < cell_count.length; ++i) {
            ((Map)datasets.get(i)).put(MergeDgeSparse.YamlKeys.DatasetsKeys.CELL_COUNT_KEY, cell_count[i]);
        }
    }
    if (prefix != null) {
        for (int i = 0; i < prefix.length; ++i) {
            ((Map)datasets.get(i)).put(MergeDgeSparse.YamlKeys.DatasetsKeys.NAME_KEY, prefix[i]);
        }
    }
    final File outputYaml = new File(tempDir, "test.yaml");
    outputYaml.deleteOnExit();
    final BufferedWriter outputYamlWriter = IOUtil.openFileForBufferedWriting(outputYaml);
    yamlConverter.dump(yamlMap, outputYamlWriter);
    outputYamlWriter.close();

    final File cellSizeOutputFile = new File(tempDir, "test.cell_size.txt");    cellSizeOutputFile.deleteOnExit();
    final File dgeHeaderOutputFile = new File(tempDir, "test.dge_header.txt");  dgeHeaderOutputFile.deleteOnExit();
    final File rawDgeOutputFile = new File(tempDir, "test.raw.dge.txt");        rawDgeOutputFile.deleteOnExit();
    final File scaledDgeOutputFile = new File(tempDir, "test.scaled.dge.txt");  scaledDgeOutputFile.deleteOnExit();
    final File discardedCellsOutputFile = new File(tempDir, "test.discarded_cells.txt"); discardedCellsOutputFile.deleteOnExit();

    final MergeDgeSparse merger = new MergeDgeSparse();
    merger.YAML = outputYaml;
    merger.CELL_SIZE_OUTPUT_FILE = cellSizeOutputFile;
    if (prefix != null) {
        // If not setting prefixes, can't create merged header
        merger.DGE_HEADER_OUTPUT_FILE = dgeHeaderOutputFile;
    }
    merger.RAW_DGE_OUTPUT_FILE = rawDgeOutputFile;
    merger.SCALED_DGE_OUTPUT_FILE = scaledDgeOutputFile;
    merger.HEADER_STRINGENCY = headerStringency;

    if (min_cells != null) {
        merger.MIN_CELLS = min_cells;
    }
    if (min_genes != null) {
        merger.MIN_GENES = min_genes;
    }
    if (min_transcripts != null) {
        merger.MIN_TRANSCRIPTS = min_transcripts;
    }
    if (filteredGeneRe != null) {
        merger.FILTERED_GENE_RE = Arrays.asList(filteredGeneRe);
    } else {
        merger.FILTERED_GENE_RE = Collections.emptyList();
    }
    merger.CELL_BC_FILE = selectedCellsFiles;
    merger.DISCARDED_CELLS_FILE = discardedCellsOutputFile;

    Assert.assertEquals(merger.doWork(), 0);

    final DGEMatrix rawMatrix = DGEMatrix.parseFile(rawDgeOutputFile);
    final DGEMatrix scaledMatrix = DGEMatrix.parseFile(scaledDgeOutputFile);
    Assert.assertEquals(rawMatrix.getGenes().size(), scaledMatrix.getGenes().size(), "nrow(raw matrix) != nrow(scaled matrix)");
    Assert.assertEquals(rawMatrix.getCellBarcodes().size(), scaledMatrix.getCellBarcodes().size(), "ncol(raw matrix) != ncol(scaled matrix)");
    Assert.assertEquals(rawMatrix.getGenes().size(), expectedNumGenes);
    Assert.assertEquals(rawMatrix.getCellBarcodes().size(), expectedNumCells);
}
 
Example 20
Source File: ComputeUMISharing.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}