Java Code Examples for htsjdk.samtools.SamReader

The following examples show how to use htsjdk.samtools.SamReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private SAMRecord readOneSamRecord(String samstr) throws IOException {
  final File input = FileUtils.createTempDir("testcheck", "sv_in");
  try {
    FileUtils.stringToFile(samstr, new File(input, OUT_SAM));

    final String inn = input.getPath();

    final SAMRecord rec;
    final File samfile = new File(inn + FS + OUT_SAM);
    try (SamReader reader = SamUtils.makeSamReader(FileUtils.createInputStream(samfile, false))) {
      final CloseableIterator<SAMRecord> iterator = reader.iterator();
      if (iterator.hasNext()) {
        rec = iterator.next();
      } else {
        rec = null;
      }

    }
    return rec;
  } finally {
    FileHelper.deleteAll(input);
  }
}
 
Example 2
Source Project: Drop-seq   Source File: FilterBamByTag.java    License: MIT License 6 votes vote down vote up
/**
 * Just work through the reads one at a time.
 * @param in
 * @param out
 */
void processUnpairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality ,boolean allowPartial) {
	FilteredReadsMetric m = new FilteredReadsMetric();
	ProgressLogger progLog = new ProgressLogger(log);
	for (final SAMRecord r : in) {
		progLog.record(r);
		boolean filterFlag = filterRead(r, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		if (!filterFlag) { 
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		}
		else {
			m.READS_REJECTED++;
		}
	}
	writeSummary(summaryFile, m);
	CloserUtil.close(in);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example 3
@Test()
public void testAddingOATag() {
    final File oaAddedBam = createTempFile("oaAdded", ".bam");

    final List<String> args = Arrays.asList("-I", localTestData.getPath(),
            "-O", oaAddedBam.getPath());
    runCommandLine(args);

    SamReader out = SamReaderFactory.makeDefault().open(oaAddedBam);
    Iterator<String> expectedOAIter = expectedOATags.iterator();
    Iterator<String> expectedXMIter = expectedXMTags.iterator();
    int readsInBam = 0;

    for(SAMRecord r : out) {
        assertEquals(r.getAttribute(AddOriginalAlignmentTags.OA_TAG_NAME), expectedOAIter.next());
        assertEquals(r.getAttribute(AddOriginalAlignmentTags.MATE_CONTIG_TAG_NAME), expectedXMIter.next());
        readsInBam++;
    }
    assertEquals(readsInBam, 8);
}
 
Example 4
Source Project: picard   Source File: CompareSAMs.java    License: MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);

    try (final SamReader samReader1 = samReaderFactory.open(SAM_FILES.get(0));
         final SamReader samReader2 = samReaderFactory.open(SAM_FILES.get(1)))
    {
        final SamComparison comparison = new SamComparison(samReader1, samReader2,
                SAM_FILES.get(0).getAbsolutePath(), SAM_FILES.get(1).getAbsolutePath(), samComparisonArgumentCollection);
        if (OUTPUT != null) {
            comparison.writeReport(OUTPUT, getDefaultHeaders());
        }
        if (comparison.areEqual()) {
            System.out.println("SAM files match.");
        } else {
            System.out.println("SAM files differ.");
        }
        return comparison.areEqual() ? 0 : 1;
    } catch (IOException e) {
        throw new RuntimeIOException("Error opening file", e);
    }
}
 
Example 5
Source Project: Drop-seq   Source File: AnnotationUtilsTest.java    License: MIT License 6 votes vote down vote up
/**
 * Only suitable for small BAMs.
 * @param name
 * @param bamFile
 * @return
 */
private SAMRecord getReadByName (final String name, final File bamFile) {
	SamReader inputSam = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
	Iterator<SAMRecord> iter = inputSam.iterator();
	boolean found = false;
	SAMRecord result=null;
	while (iter.hasNext() && !found) {
		SAMRecord r = iter.next();
		if (r.getReadName().equals(name)) {
			found=true;
			result=r;
		}
	}
	CloserUtil.close(inputSam);
	return result;
}
 
Example 6
Source Project: Drop-seq   Source File: TagReadWithGeneExonFunctionTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 7
private void check(final File bam) throws IOException {
  final SamReader normalReader = SamUtils.makeSamReader(bam);
  final Iterator<SAMRecord> norm = normalReader.iterator();
  try {
    try (final RecordIterator<SAMRecord> closed = new SamClosedFileReader(bam, null, null, normalReader.getFileHeader())) {
      while (norm.hasNext() && closed.hasNext()) {
        final SAMRecord a = norm.next();
        final SAMRecord b = closed.next();
        assertEquals(a.getSAMString().trim(), b.getSAMString().trim());
      }
      assertEquals(norm.hasNext(), closed.hasNext());
    }
  } finally {
    normalReader.close();
  }
}
 
Example 8
Source Project: picard   Source File: CrosscheckFingerprintsTest.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "bamFilesRGs")
public void testCrossCheckRGs(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException {

    File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics");
    metrics.deleteOnExit();

    final List<String> args = new ArrayList<>(Arrays.asList("INPUT=" + file1.getAbsolutePath(),
            "INPUT=" + file2.getAbsolutePath(),
            "OUTPUT=" + metrics.getAbsolutePath(),
            "LOD_THRESHOLD=" + -2.0,
            "EXPECT_ALL_GROUPS_TO_MATCH=" + expectAllMatch)
    );

    if (file1.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
        args.add("R=" + referenceForCrams);
        args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS);
    } else {
        args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP);
    }

    doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch);
}
 
Example 9
Source Project: Drop-seq   Source File: TestUtils.java    License: MIT License 6 votes vote down vote up
public static void assertSamFilesSame(final File actual, final File expected) {
	final SamReader expectedReader = SamReaderFactory.makeDefault().open(expected);
	final SamReader actualReader = SamReaderFactory.makeDefault().open(actual);
	Assert.assertEquals(expectedReader.getFileHeader(), actualReader.getFileHeader());
	final SAMRecordIterator expectedIterator = expectedReader.iterator();
	final SAMRecordIterator actualIterator = actualReader.iterator();
	while (expectedIterator.hasNext()) {
		if (!actualIterator.hasNext()) {
			Assert.fail(String.format("%s has more records than %s", expected, actual));
		} else {
			Assert.assertEquals(actualIterator.next(), expectedIterator.next());
		}
	}
	if (actualIterator.hasNext()) {
		Assert.fail(String.format("%s has fewer records than %s", expected, actual));
	}
}
 
Example 10
Source Project: Drop-seq   Source File: MapBarcodesByEditDistanceTest.java    License: MIT License 6 votes vote down vote up
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) {
	Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>();
	ObjectCounter<String> barcodes = new ObjectCounter<>();
	
	SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData);
	SAMRecordIterator iter = reader.iterator();
	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		String barcode = r.getStringAttribute(cellBarcode);
		TagValues v = matcher.getValues(r);
		Set<TagValues> s = umisPerBarcode.get(barcode);
		if (s==null) {
			s = new HashSet<TagValues>();
			umisPerBarcode.put(barcode, s);
		}			
		if (!s.contains(v))
			barcodes.increment(barcode);
		s.add(v);
	}
	UMISharingData result = new UMISharingData();
	result.barcodes=barcodes;
	result.umisPerBarcode=umisPerBarcode;
	return result;
}
 
Example 11
Source Project: Drop-seq   Source File: DetectBeadSubstitutionErrorsTest.java    License: MIT License 6 votes vote down vote up
/**
  * Very lame test -- just confirms that program doesn't crash and return 0 exit status.
  */
 @Test
 public void testBasic() throws IOException {
     final DetectBeadSubstitutionErrors clp = new DetectBeadSubstitutionErrors();
     clp.INPUT = Arrays.asList(TEST_FILE);
     clp.OUTPUT_REPORT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".substitution_report.txt");
     clp.OUTPUT_REPORT.deleteOnExit();
     clp.OUTPUT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".sam");
     clp.OUTPUT.deleteOnExit();
     clp.OUTPUT_SUMMARY = File.createTempFile("DetectBeadSubstitutionErrorsTest.", "substitution_summary.txt");
     clp.OUTPUT_SUMMARY.deleteOnExit();
     Assert.assertEquals(clp.doWork(), 0);
     final TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(clp.OUTPUT_REPORT);
     int numRows = 0;
     for (final TabbedTextFileWithHeaderParser.Row row : parser) {
         ++numRows;
         Assert.assertEquals(row.getField("intended_barcode"), BIG_BARCODE);
         final String neighbor_barcode = row.getField("neighbor_barcode");
         Assert.assertTrue(SMALL_BARCODES.contains(neighbor_barcode));
         Assert.assertTrue(Boolean.parseBoolean(row.getField("repaired")));
     }
     final SamReader reader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
     for (final SAMRecord rec : reader)
Assert.assertEquals(rec.getStringAttribute("XC"), BIG_BARCODE);
     CloserUtil.close(reader);
 }
 
Example 12
Source Project: hmftools   Source File: BamSlicer.java    License: GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final List<GenomeRegion> regions, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example 13
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException {
    int duplicateCount = 0;
    final SamReaderFactory factory = SamReaderFactory.makeDefault();
    if(referenceFile != null) {
        factory.referenceSequence(referenceFile);
    }
    try ( final SamReader reader = factory.open(bam) ) {
        for ( SAMRecord read : reader ) {
            if ( read.getDuplicateReadFlag() ) {
                ++duplicateCount;
            }
        }
    }

    return duplicateCount;
}
 
Example 14
Source Project: picard   Source File: SamFormatConverterTest.java    License: MIT License 6 votes vote down vote up
private void convertFile(final File inputFile, final File fileToCompare, final String extension) throws IOException {
    final List<File> samFiles = new ArrayList<>();
    final ValidateSamFile validateSamFile = new ValidateSamFile();

    final File output = File.createTempFile("SamFormatConverterTest." + inputFile.getName(), extension);
    output.deleteOnExit();
    SamFormatConverter.convert(inputFile, output, ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM, Defaults.CREATE_INDEX);

    validateSamFile.INPUT = output;
    assertEquals(validateSamFile.doWork(), 0);
    final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM);
    try (final SamReader samReader1 = samReaderFactory.open(output);
         final SamReader samReader2 = samReaderFactory.open(fileToCompare)) {
        final SamComparison samComparison = new SamComparison(samReader1, samReader2);
        Assert.assertTrue(samComparison.areEqual());
    }

}
 
Example 15
@BeforeClass
public void initHeaders() throws IOException {
    try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
         final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
        normalHeader = normalBamReader.getFileHeader();
        tumorHeader = tumorBamReader.getFileHeader();

        normalHetPulldownExpected = new Pulldown(normalHeader);
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));

        tumorHetPulldownExpected = new Pulldown(tumorHeader);
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    }
}
 
Example 16
Source Project: picard   Source File: CleanSamTest.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example 17
Source Project: hmftools   Source File: CandidateEvidence.java    License: GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private List<AltContext> get(@NotNull final String bamFile, @NotNull final GenomeRegion bounds,
        @NotNull final Consumer<SAMRecord> recordConsumer, @NotNull final RefContextFactory candidates) {
    final List<AltContext> altContexts = Lists.newArrayList();

    final SamSlicer slicer = samSlicerFactory.create(bounds);
    try (final SamReader tumorReader = SamReaderFactory.makeDefault()
            .referenceSource(new ReferenceSource(refGenome))
            .open(new File(bamFile))) {

        // First parse
        slicer.slice(tumorReader, recordConsumer);

        // Add all valid alt contexts
        altContexts.addAll(candidates.altContexts());
    } catch (Exception e) {
        throw new CompletionException(e);
    }

    return altContexts;
}
 
Example 18
Source Project: picard   Source File: FastqToSamTest.java    License: MIT License 6 votes vote down vote up
private void convertFileAndVerifyRecordCount(final int expectedCount,
                   final String fastqFilename1,
                   final String fastqFilename2,
                   final FastqQualityFormat version,
                   final boolean permissiveFormat,
                   final boolean useSequentialFastqs) throws IOException {
    final File samFile = convertFile(fastqFilename1, fastqFilename2, version, permissiveFormat, useSequentialFastqs);
    final SamReader samReader = SamReaderFactory.makeDefault().open(samFile);
    final SAMRecordIterator iterator = samReader.iterator();
    int actualCount = 0;
    while (iterator.hasNext()) {
        iterator.next();
        actualCount++;
    }
    samReader.close();
    Assert.assertEquals(actualCount, expectedCount);
}
 
Example 19
Source Project: hmftools   Source File: CountSupplier.java    License: GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public Multimap<Chromosome, CobaltCount> fromBam(@NotNull final String referenceBam, @NotNull final String tumorBam)
        throws IOException, ExecutionException, InterruptedException {
    final File tumorFile = new File(tumorBam);
    final File referenceFile = new File(referenceBam);

    final String chromosomeLengthFileName = ChromosomeLengthFile.generateFilename(outputDirectory, tumor);
    final List<ChromosomeLength> lengths;
    try (SamReader reader = readerFactory.open(tumorFile)) {
        lengths = ChromosomeLengthFactory.create(reader.getFileHeader());
    }
    ChromosomeLengthFile.write(chromosomeLengthFileName, lengths);

    LOGGER.info("Calculating Read Count from {}", tumorFile.toString());
    final List<Future<ChromosomeReadCount>> tumorFutures = createFutures(readerFactory, tumorFile, lengths);

    LOGGER.info("Calculating Read Count from {}", referenceFile.toString());
    final List<Future<ChromosomeReadCount>> referenceFutures = createFutures(readerFactory, referenceFile, lengths);

    final Multimap<Chromosome, ReadCount> tumorCounts = fromFutures(tumorFutures);
    final Multimap<Chromosome, ReadCount> referenceCounts = fromFutures(referenceFutures);

    LOGGER.info("Read Count Complete");
    return CobaltCountFactory.merge(referenceCounts, tumorCounts);
}
 
Example 20
Source Project: Drop-seq   Source File: GenotypeSperm.java    License: MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	// validation
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IOUtil.assertFileIsReadable(this.CELL_BC_FILE);
	
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
	log.info("Using " + cellBarcodes.size() + " cells in analysis");
	IntervalList snpIntervals = IntervalList.fromFile(INTERVALS);
	log.info("Using " + snpIntervals.getIntervals().size() + " SNP intervals in analysis");
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
	writeHeader(out);

	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(this.INPUT);			
	
	SNPUMIBasePileupIterator sbpi = getIter(reader, snpIntervals, cellBarcodes);
	
	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, BASE_QUALITY));

	// sort cell barcodes alphabetically for output.
	Collections.sort(cellBarcodes);
	@SuppressWarnings("unused")
	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		processMCDAC(cellBarcodes, mcdac, out, AUTO_FLUSH_OUTPUTS);
		counter++;
		if (counter%PROGRESS_RATE==0) log.info("Processed " + counter + " SNPs");
	}
	log.info("Processed " + counter +" total SNPs");
	out.close();
	multiIter.close();
	return 0;
}
 
Example 21
Source Project: picard   Source File: CrosscheckFingerprints.java    License: MIT License 5 votes vote down vote up
@Override
protected String[] customCommandLineValidation() {
    if (GENOTYPING_ERROR_RATE <= 0) {
        return new String[]{"GENOTYPING_ERROR_RATE must be greater than zero. Found " + GENOTYPING_ERROR_RATE};
    }
    if (GENOTYPING_ERROR_RATE >= 1) {
        return new String[]{"GENOTYPING_ERROR_RATE must be strictly less than 1, found " + GENOTYPING_ERROR_RATE};
    }
    if (SECOND_INPUT == null && INPUT_SAMPLE_MAP != null) {
        return new String[]{"INPUT_SAMPLE_MAP can only be used when also using SECOND_INPUT"};
    }
    if (SECOND_INPUT == null && SECOND_INPUT_SAMPLE_MAP != null) {
        return new String[]{"SECOND_INPUT_SAMPLE_MAP can only be used when also using SECOND_INPUT"};
    }


    //check that reference is provided if using crams as input
    if (REFERENCE_SEQUENCE == null) {
        final List<String> allInputs = new ArrayList<>(INPUT);
        allInputs.addAll(SECOND_INPUT);
        for (final String input : allInputs) {
            if (input.endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
                return new String[]{"REFERENCE must be provided when using CRAM as input."};
            }
        }
    }

    return super.customCommandLineValidation();
}
 
Example 22
Source Project: rtg-tools   Source File: SamUtils.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Entry point for creating SamReaders using our preferences
 * @param file the file to open
 * @param reference the SequencesReader to be used as the reference (required for CRAM files).
 * @return the SamReader
 * @throws IOException if an I/O problem occurs opening the file
 */
public static SamReader makeSamReader(File file, SequencesReader reference) throws IOException {
  try {
    return getSamReaderFactory(reference).open(file);
  } catch (final RuntimeIOException e) {
    throw (IOException) e.getCause();
  }
}
 
Example 23
Source Project: Drop-seq   Source File: TagReadWithInterval.java    License: MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);
	SAMFileHeader header = inputSam.getFileHeader();
       header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
	SamHeaderUtil.addPgRecord(header, this);

	SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

	IntervalList loci = IntervalList.fromFile(this.INTERVALS);
	OverlapDetector<Interval> od =getOverlapDetector (loci);
	ProgressLogger processLogger = new ProgressLogger(log);

	for (SAMRecord record: inputSam) {
		processLogger.record(record);

		if (record.getReadUnmappedFlag()==false)
			// use alignment blocks instead of start/end to properly deal with split reads mapped over exon/exon boundaries.
			record=tagRead(record, od);
		else
			record.setAttribute(this.TAG, null);
		writer.addAlignment(record);

	}

	CloserUtil.close(inputSam);
	writer.close();

	return(0);


}
 
Example 24
public void testIterator() throws IOException {
  final ByteArrayInputStream baos = new ByteArrayInputStream(SAM.getBytes());
  final SamReader reader = SamUtils.makeSamReader(baos);
  final ReferenceRanges<String> ranges = SamRangeUtils.createExplicitReferenceRange(reader.getFileHeader(), new SamRegionRestriction("g1", 22, 23));
  final SamRestrictingIterator it = new SamRestrictingIterator(reader.iterator(), ranges); //these positions are 0-based
  final int[] expectedLocs = {15, 15, 17, 18, 18, 23};
  int i = 0;
  while (it.hasNext()) {
    final SAMRecord r = it.next();
    assertEquals(r.getSAMString().trim(), expectedLocs[i++], r.getAlignmentStart());
  }
  assertFalse(it.hasNext());
  assertEquals(6, i);
}
 
Example 25
@Override
protected Object doWork(){
    SAMFileHeader header = null;
    try ( final SamReader headerReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamWithHeader) ) {
        header = headerReader.getFileHeader();
    }
    catch ( IOException e ) {
        throw new UserException("Error reading header from " + bamWithHeader.getAbsolutePath(), e);
    }

    SparkUtils.convertHeaderlessHadoopBamShardToBam(bamShard, header, outputBam);
    return null;
}
 
Example 26
/**
 * Prepare iterators over all readers in response to a request for a complete iteration or query
 *
 * @param queryIntervals Intervals to bound the iteration (reads must overlap one of these intervals). If null, iteration is unbounded.
 * @return Iterator over all reads in this data source, limited to overlap with the supplied intervals
 */
private Iterator<GATKRead> prepareIteratorsForTraversal( final List<SimpleInterval> queryIntervals, final boolean queryUnmapped ) {
    // htsjdk requires that only one iterator be open at a time per reader, so close out
    // any previous iterations
    closePreviousIterationsIfNecessary();

    final boolean traversalIsBounded = (queryIntervals != null && ! queryIntervals.isEmpty()) || queryUnmapped;

    // Set up an iterator for each reader, bounded to overlap with the supplied intervals if there are any
    for ( Map.Entry<SamReader, CloseableIterator<SAMRecord>> readerEntry : readers.entrySet() ) {
        if (traversalIsBounded) {
            readerEntry.setValue(
                    new SamReaderQueryingIterator(
                            readerEntry.getKey(),
                            readers.size() > 1 ?
                                    getIntervalsOverlappingReader(readerEntry.getKey(), queryIntervals) :
                                    queryIntervals,
                            queryUnmapped
                    )
            );
        } else {
            readerEntry.setValue(readerEntry.getKey().iterator());
        }
    }

    // Create a merging iterator over all readers if necessary. In the case where there's only a single reader,
    // return its iterator directly to avoid the overhead of the merging iterator.
    Iterator<SAMRecord> startingIterator = null;
    if ( readers.size() == 1 ) {
        startingIterator = readers.entrySet().iterator().next().getValue();
    }
    else {
        startingIterator = new MergingSamRecordIterator(headerMerger, readers, true);
    }

    return new SAMRecordToReadIterator(startingIterator);
}
 
Example 27
Source Project: dataflow-java   Source File: BAMIOITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void openBAMTest() throws IOException {
 GCSOptions popts = PipelineOptionsFactory.create().as(GCSOptions.class);
 final Storage.Objects storageClient = Transport.newStorageClient(popts).build().objects();

 SamReader samReader = BAMIO.openBAM(storageClient, TEST_BAM_FNAME, ValidationStringency.DEFAULT_STRINGENCY);
 SAMRecordIterator iterator =  samReader.query("1", 550000, 560000, false);
 int readCount = 0;
 while (iterator.hasNext()) {
     iterator.next();
     readCount++;
 }
 Assert.assertEquals("Unexpected count of unmapped reads",
	  EXPECTED_UNMAPPED_READS_COUNT, readCount);
}
 
Example 28
Source Project: Drop-seq   Source File: SpermSeqMarkDuplicatesTest.java    License: MIT License 5 votes vote down vote up
@Test
// tests which reads are marked as duplicates by the read position strategy.
public void testDetectDuplicatesByReadPositionStrategy() throws IOException {
	String [] duplicateReadNames={"READ1:2", "READ2:3"};
	Set<String> dupes = new HashSet<String>(Arrays.asList(duplicateReadNames));

	SpermSeqMarkDuplicates d = new SpermSeqMarkDuplicates();
	d.INPUT=Arrays.asList(INPUT);
	d.OUTPUT=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".bam");
	d.OUTPUT.deleteOnExit();
	d.OUTPUT_STATS=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".pcr_duplicate_metrics");
	d.OUTPUT_STATS.deleteOnExit();
	Assert.assertEquals(0, d.doWork());

	SamReader inputSam = SamReaderFactory.makeDefault().open(d.OUTPUT);
	for (SAMRecord r: inputSam) {
		boolean duplicateReadFlag = r.getDuplicateReadFlag();
		String readName = r.getReadName();
		if (dupes.contains(readName))
			Assert.assertTrue(duplicateReadFlag);
		else
			Assert.assertFalse(duplicateReadFlag);
	}
	final List<SpermSeqMarkDuplicates.PCRDuplicateMetrics> beans = MetricsFile.readBeans(d.OUTPUT_STATS);
	Assert.assertEquals(1, beans.size());
	Assert.assertEquals(dupes.size(), beans.get(0).NUM_DUPLICATES);
}
 
Example 29
/**
 * Shut down this data source permanently, closing all iterations and readers.
 */
@Override
public void close() {
    closePreviousIterationsIfNecessary();

    try {
        for ( Map.Entry<SamReader, CloseableIterator<SAMRecord>> readerEntry : readers.entrySet() ) {
            readerEntry.getKey().close();
        }
    }
    catch ( IOException e ) {
        throw new GATKException("Error closing SAMReader");
    }
}
 
Example 30
Source Project: Drop-seq   Source File: TagReadWithGeneFunctionTest.java    License: MIT License 5 votes vote down vote up
private SAMRecord getFakeRecord (final File testBAMFile, final int start, final int end, final boolean negativeStrand) {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMRecord r = inputSam.iterator().next();
	r.setAlignmentStart(start);
	int length = (end -start) +1;
	r.setCigarString(length+"M");
	r.setMappingQuality(255);
	r.setReadNegativeStrandFlag(negativeStrand);
	r.setReadBases(Arrays.copyOf(r.getReadBases(), length));
	r.setBaseQualities(Arrays.copyOf(r.getBaseQualities(), length));
	return (r);
}