Java Code Examples for htsjdk.samtools.util.IOUtil#assertFileIsWritable()

The following examples show how to use htsjdk.samtools.util.IOUtil#assertFileIsWritable() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AddCommentsToBam.java    From picard with MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsolutePath().endsWith(".sam")) {
        throw new PicardException("SAM files are not supported");
    }

    final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    for (final String comment : COMMENT) {
        if (comment.contains("\n")) {
            throw new PicardException("Comments can not contain a new line");
        }
        samFileHeader.addComment(comment);
    }

    BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX);

    return 0;
}
 
Example 2
Source File: SamToFastq.java    From picard with MIT License 6 votes vote down vote up
private File makeReadGroupFile(final SAMReadGroupRecord readGroup, final String preExtSuffix) {
    String fileName = null;
    if (RG_TAG.equalsIgnoreCase("PU")) {
        fileName = readGroup.getPlatformUnit();
    } else if (RG_TAG.equalsIgnoreCase("ID")) {
        fileName = readGroup.getReadGroupId();
    }
    if (fileName == null) {
        throw new PicardException("The selected RG_TAG: " + RG_TAG + " is not present in the bam header.");
    }
    fileName = IOUtil.makeFileNameSafe(fileName);
    if (preExtSuffix != null) {
        fileName += preExtSuffix;
    }
    fileName += COMPRESS_OUTPUTS_PER_RG ? ".fastq.gz" : ".fastq";

    final File result = (OUTPUT_DIR != null)
            ? new File(OUTPUT_DIR, fileName)
            : new File(fileName);
    IOUtil.assertFileIsWritable(result);
    return result;
}
 
Example 3
Source File: InsertSizeMetricsCollector.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calls R script to plot histogram(s) in PDF.
 */
private void writeHistogramPDF(final String inputName) throws RScriptExecutorException {
    // path to Picard R script for producing histograms in PDF files.
    final String R_SCRIPT = "insertSizeHistogram.R";

    File histFile = new File(inputArgs.histogramPlotFile);
    IOUtil.assertFileIsWritable(histFile);

    final RScriptExecutor executor = new RScriptExecutor();
    executor.addScript(new Resource(R_SCRIPT, InsertSizeMetricsCollector.class));
    executor.addArgs(
            inputArgs.output,           // text-based metrics file
            histFile.getAbsolutePath(), // PDF graphics file
            inputName                   // input bam file
    );
    if (inputArgs.histogramWidth != null) {
        executor.addArgs(String.valueOf(inputArgs.histogramWidth));
    }
    executor.exec();
}
 
Example 4
Source File: BamTagOfTagCounts.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
protected int doWork() {

	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));

	writeHeader(out);

	TagOfTagResults<String,String> results= getResults(this.INPUT, this.PRIMARY_TAG, this.SECONDARY_TAG, this.FILTER_PCR_DUPLICATES, this.MINIMUM_MAPPING_QUALITY);

	for (String k: results.getKeys()) {
		Set<String> values = results.getValues(k);
		writeStats(k, values, out);
	}
	out.close();

	return(0);
}
 
Example 5
Source File: SamFormatConverter.java    From picard with MIT License 6 votes vote down vote up
/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}
 
Example 6
Source File: ExtractSequences.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}
 
Example 7
Source File: VcfFormatConverter.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 8
Source File: CreateVerifyIDIntensityContaminationMetricsFile.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
    IOUtil.assertFileIsWritable(metricsFile);

    final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

    final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
    final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
    final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$");
    try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
        String line;
        line = br.readLine();
        lineMatch(line, HEADER_PATTERN);
        line = br.readLine();
        lineMatch(line, DASHES_PATTERN);
        while ((line = br.readLine()) != null) {
            final Matcher m = lineMatch(line, DATA_PATTERN);
            // Load up and store the metrics
            final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
            metrics.ID = Integer.parseInt(m.group(1));
            metrics.PCT_MIX = Double.parseDouble(m.group(2));
            metrics.LLK = Double.parseDouble(m.group(3));
            metrics.LLK0 = Double.parseDouble(m.group(4));

            verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
        }
        verifyIDIntensityContaminationMetricsFile.write(metricsFile);
    } catch (IOException e) {
        throw new PicardException("Error parsing VerifyIDIntensity Output", e);
    }
    return 0;
}
 
Example 9
Source File: CollectGcBiasMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    IOUtil.assertFileIsWritable(SUMMARY_OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);

    //Calculate windowsByGc for the reference sequence
    final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE);

    //Delegate actual collection to GcBiasMetricCollector
    multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED, ALSO_IGNORE_DUPLICATES);
}
 
Example 10
Source File: MeanQualityByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    // If we're working with a single library, assign that library's name
    // as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
    }
}
 
Example 11
Source File: DGEMatrix.java    From Drop-seq with MIT License 5 votes vote down vote up
public void writeDropSeqMatrixMarket(final File output, final boolean formatAsInteger, final boolean transpose) {
	try {
		IOUtil.assertFileIsWritable(output);
		final int cardinality;
		if (expressionMatrix instanceof SparseMatrix)
			cardinality = ((SparseMatrix) expressionMatrix).cardinality();
		else
			cardinality = expressionMatrix.columns() * expressionMatrix.columns();
		final MatrixMarketWriter writer = new MatrixMarketWriter(output, MatrixMarketConstants.ElementType.real,
				transpose? expressionMatrix.columns(): expressionMatrix.rows(),
				transpose? expressionMatrix.rows(): expressionMatrix.columns(),
				cardinality, this.getGenes(),this.getCellBarcodes(),
				MatrixMarketConstants.GENES, MatrixMarketConstants.CELL_BARCODES);
		if (expressionMatrix instanceof SparseMatrix) {
               final SparseMatrix mat = (SparseMatrix)expressionMatrix;
               final MatrixIterator it = mat.nonZeroIterator();
               while (it.hasNext()) {
				final double val = it.next();
                   final int row = it.rowIndex();
                   final int col = it.columnIndex();
				writeMatrixMarketTriplet(writer, row, col, val, formatAsInteger, transpose);
               }
           } else
			for (int i = 0; i < expressionMatrix.rows(); ++i)
				for (int j = 0; j < expressionMatrix.columns(); ++j) {
					double exp = expressionMatrix.get(i, j);
					if (exp != SPARSE_VALUE)
						writeMatrixMarketTriplet(writer, i, j, exp, formatAsInteger, transpose);
				}
		writer.close();
	} catch (IOException e) {
		throw new RuntimeException("Trouble writing " + output.getAbsolutePath(), e);
	}
}
 
Example 12
Source File: TagReadWithGeneExonFunction.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(this.INPUT);
    IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE);
    if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY);
    IOUtil.assertFileIsWritable(this.OUTPUT);

    SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);

    SAMFileHeader header = inputSam.getFileHeader();
    SamHeaderUtil.addPgRecord(header, this);
    SAMSequenceDictionary bamDict = header.getSequenceDictionary();

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict);
    SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

    for (SAMRecord r: inputSam) {
        pl.record(r);

        if (!r.getReadUnmappedFlag())
            r=	setAnnotations(r, geneOverlapDetector);
        writer.addAlignment(r);
    }

    CloserUtil.close(inputSam);
    writer.close();
    if (this.USE_STRAND_INFO) log.info(this.metrics.toString());
    if (SUMMARY==null) return 0;

    //process summary
    MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<>();
    outFile.addMetric(this.metrics);
    outFile.write(this.SUMMARY);
    return 0;

}
 
Example 13
Source File: CollectInsertSizeMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(Histogram_FILE);

    //Delegate actual collection to InsertSizeMetricCollector
    multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT,
            HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
}
 
Example 14
Source File: GatherVcfsCloud.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
protected Object doWork() {
    log.info("Checking inputs.");
    final List<Path> inputPaths = inputs.stream().map(IOUtils::getPath).collect(Collectors.toList());

    if(!ignoreSafetyChecks) {
        for (final Path f : inputPaths) {
            IOUtil.assertFileIsReadable(f);
        }
    }

    IOUtil.assertFileIsWritable(output);

    final SAMSequenceDictionary sequenceDictionary = getHeader(inputPaths.get(0)).getSequenceDictionary();

    if (createIndex && sequenceDictionary == null) {
        throw new UserException("In order to index the resulting VCF, the input VCFs must contain ##contig lines.");
    }

    if( !ignoreSafetyChecks) {
        log.info("Checking file headers and first records to ensure compatibility.");
        assertSameSamplesAndValidOrdering(inputPaths, disableContigOrderingCheck);
    }

    if(gatherType == GatherType.AUTOMATIC) {
        if ( canBlockCopy(inputPaths, output) ) {
            gatherType = GatherType.BLOCK;
        } else {
            gatherType = GatherType.CONVENTIONAL;
        }
    }

    if (gatherType == GatherType.BLOCK && !canBlockCopy(inputPaths, output)) {
        throw new UserException.BadInput(
                "Requested block copy but some files are not bgzipped, all inputs and the output must be bgzipped to block copy");
    }

    switch (gatherType) {
        case BLOCK:
            log.info("Gathering by copying gzip blocks. Will not be able to validate position non-overlap of files.");
            if (createIndex) {
                log.warn("Index creation not currently supported when gathering block compressed VCFs.");
            }
            gatherWithBlockCopying(inputPaths, output, cloudPrefetchBuffer);
            break;
        case CONVENTIONAL:
            log.info("Gathering by conventional means.");
            gatherConventionally(sequenceDictionary, createIndex, inputPaths, output, cloudPrefetchBuffer, disableContigOrderingCheck);
            break;
        default:
            throw new GATKException.ShouldNeverReachHereException("Invalid gather type: " + gatherType + ".  Please report this bug to the developers.");
    }
    return null;
}
 
Example 15
Source File: NonNFastaSize.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}
 
Example 16
Source File: CollectWgsMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    INTERVALS = intervalArugmentCollection.getIntervalFile();
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
    }
    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        IOUtil.assertFileIsWritable(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    // it doesn't make sense for the locus accumulation cap to be lower than the coverage cap
    if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) {
        log.warn("Setting the LOCUS_ACCUMULATION_CAP to be equal to the COVERAGE_CAP (" + COVERAGE_CAP + ") because it should not be lower");
        LOCUS_ACCUMULATION_CAP = COVERAGE_CAP;
    }

    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = getSamReader();
    final AbstractLocusIterator iterator = getLocusIterator(in);

    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter adapterFilter = new CountingAdapterFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    // The order in which filters are added matters!
    filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
    filters.add(adapterFilter);
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    if (!COUNT_UNPAIRED) {
        filters.add(pairFilter);
    }
    iterator.setSamFilters(filters);
    iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
    iterator.setIncludeNonPfReads(false);

    final AbstractWgsMetricsCollector<?> collector = getCollector(COVERAGE_CAP, getIntervalsToExamine());
    final WgsMetricsProcessor processor = getWgsMetricsProcessor(progress, refWalker, iterator, collector);
    processor.processFile();

    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    processor.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, adapterFilter, mapqFilter, pairFilter);
    out.write(OUTPUT);

    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        // Write out theoretical sensitivity results.
        final MetricsFile<TheoreticalSensitivityMetrics, ?> theoreticalSensitivityMetrics = getMetricsFile();
        log.info("Calculating theoretical sentitivity at " + ALLELE_FRACTION.size() + " allele fractions.");
        List<TheoreticalSensitivityMetrics> tsm = TheoreticalSensitivity.calculateSensitivities(SAMPLE_SIZE, collector.getUnfilteredDepthHistogram(), collector.getUnfilteredBaseQHistogram(), ALLELE_FRACTION);
        theoreticalSensitivityMetrics.addAllMetrics(tsm);
        theoreticalSensitivityMetrics.write(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    return 0;
}
 
Example 17
Source File: ComputeUMISharing.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}
 
Example 18
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
	int vc = validateCommands();
	if (vc>0) return vc;

	if (this.COUNT_TAGS_EDIT_DISTANCE>0) this.medUMI = new MapBarcodesByEditDistance(false);

	med = new MapBarcodesByEditDistance(false, this.NUM_THREADS, 0);
	
	PrintStream outMetrics = null;
	if (this.ADAPTIVE_ED_METRICS_FILE!=null) {
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.ADAPTIVE_ED_METRICS_FILE));
		writeAdaptiveEditDistanceMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}
	
	if (this.MUTATIONAL_COLLAPSE_METRICS_FILE!=null) {
		med = new MapBarcodesByEditDistance(true, this.NUM_THREADS, 1000);
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.MUTATIONAL_COLLAPSE_METRICS_FILE));
		writeMutationalCollapseMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}

	IOUtil.assertFileIsReadable(INPUT);
       IOUtil.assertFileIsWritable(OUTPUT);

       SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
       SAMFileHeader header =  reader.getFileHeader();
       SortOrder sortOrder= header.getSortOrder();
       
       SAMFileWriter writer = getWriter (reader);
       final ObjectSink<SAMRecord> recSink = new SamWriterSink(writer);
               
       PeekableGroupingIterator<SAMRecord> groupingIter = orderReadsByTagsPeekable(reader, this.COLLAPSE_TAG, this.CONTEXT_TAGS, this.READ_MQ, this.OUT_TAG, recSink);

       log.info("Collapsing tag and writing results");

       if (!LOW_MEMORY_MODE) 
       	fasterIteration(groupingIter, writer, outMetrics);
       else
       	lowMemoryIteration(groupingIter, writer, outMetrics, header);
               
       log.info("Re-sorting output BAM in "+ sortOrder.toString()+ " if neccesary");
       CloserUtil.close(groupingIter);
       CloserUtil.close(reader);
       writer.close();
       if (outMetrics!=null) CloserUtil.close(outMetrics);
       log.info("DONE");
	return 0;
}
 
Example 19
Source File: MarkIlluminaAdapters.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(METRICS);

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
    SAMFileWriter out = null;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    }

    final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");

    // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
    final AdapterPair[] adapters;
    {
        final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
        tmp.addAll(ADAPTERS);
        if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
            tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
        }
        adapters = tmp.toArray(new AdapterPair[tmp.size()]);
    }

    ////////////////////////////////////////////////////////////////////////
    // Main loop that consumes reads, clips them and writes them to the output
    ////////////////////////////////////////////////////////////////////////
    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
    final SAMRecordIterator iterator = in.iterator();

    final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
            setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
            setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
            setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
            setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
        rec.setAttribute(ReservedTagConstants.XT, null);

        // Do the clipping one way for PE and another for SE reads
        if (rec.getReadPairedFlag()) {
            // Assert that the input file is in query name order only if we see some PE reads
            if (order != SAMFileHeader.SortOrder.queryname) {
                throw new PicardException("Input BAM file must be sorted by queryname");
            }

            if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
            rec2.setAttribute(ReservedTagConstants.XT, null);

            // Assert that we did in fact just get two mate pairs
            if (!rec.getReadName().equals(rec2.getReadName())) {
                throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                        rec.getReadName() + ", " + rec2.getReadName());
            }

            // establish which of pair is first and which second
            final SAMRecord first, second;

            if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                first = rec;
                second = rec2;
            } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                first = rec2;
                second = rec;
            } else {
                throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
            }

            adapterMarker.adapterTrimIlluminaPairedReads(first, second);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(rec);
        }

        // Then output the records, update progress and metrics
        for (final SAMRecord r : new SAMRecord[]{rec, rec2}) {
            if (r != null) {
                progress.record(r);
                if (out != null) out.addAlignment(r);

                final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
                if (clip != null) histo.increment(r.getReadLength() - clip + 1);
            }
        }
    }

    if (out != null) out.close();

    // Lastly output the metrics to file
    final MetricsFile<?, Integer> metricsFile = getMetricsFile();
    metricsFile.setHistogram(histo);
    metricsFile.write(METRICS);

    CloserUtil.close(in);
    return 0;
}
 
Example 20
Source File: FilterBam.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	buildPatterns();

	SamReader in = SamReaderFactory.makeDefault().open(INPUT);

	SAMFileHeader fileHeader = editSequenceDictionary(in.getFileHeader().clone());
	SamHeaderUtil.addPgRecord(fileHeader, this);
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, true, OUTPUT);
	ProgressLogger progLog=new ProgressLogger(log);

	final boolean sequencesRemoved = fileHeader.getSequenceDictionary().getSequences().size() != in.getFileHeader().getSequenceDictionary().getSequences().size();
	
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	for (final SAMRecord r : in) {
		progLog.record(r);
		if (!filterRead(r)) {
               String sequenceName = stripReferencePrefix(r.getReferenceName());
               String mateSequenceName = null;
               if (r.getMateReferenceIndex() != -1)
				mateSequenceName = stripReferencePrefix(r.getMateReferenceName());
		    if (sequencesRemoved || sequenceName != null) {
		        if (sequenceName == null)
					sequenceName = r.getReferenceName();
                   // Even if sequence name has not been edited, if sequences have been removed, need to set
                   // reference name again to invalidate reference index cache.
                   r.setReferenceName(sequenceName);
               }
               if (r.getMateReferenceIndex() != -1 && (sequencesRemoved || mateSequenceName != null)) {
                   if (mateSequenceName == null)
					mateSequenceName = r.getMateReferenceName();
                   // It's possible that the mate was mapped to a reference sequence that has been dropped from
                   // the sequence dictionary.  If so, set the mate to be unmapped.
                   if (fileHeader.getSequenceDictionary().getSequence(mateSequenceName) != null)
					r.setMateReferenceName(mateSequenceName);
				else {
                       r.setMateUnmappedFlag(true);
                       r.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
                       r.setMateAlignmentStart(0);
                   }
               }
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	// write the summary if the summary file is not null.
	writeSummary(this.SUMMARY, m);
       CloserUtil.close(in);
       out.close();
       FilterProgramUtils.reportAndCheckFilterResults("reads", m.READS_ACCEPTED, m.READS_REJECTED,
			PASSING_READ_THRESHOLD, log);
	return (0);
}