htsjdk.samtools.SamReader Java Examples

The following examples show how to use htsjdk.samtools.SamReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example #2
Source File: CandidateEvidence.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
private List<AltContext> get(@NotNull final String bamFile, @NotNull final GenomeRegion bounds,
        @NotNull final Consumer<SAMRecord> recordConsumer, @NotNull final RefContextFactory candidates) {
    final List<AltContext> altContexts = Lists.newArrayList();

    final SamSlicer slicer = samSlicerFactory.create(bounds);
    try (final SamReader tumorReader = SamReaderFactory.makeDefault()
            .referenceSource(new ReferenceSource(refGenome))
            .open(new File(bamFile))) {

        // First parse
        slicer.slice(tumorReader, recordConsumer);

        // Add all valid alt contexts
        altContexts.addAll(candidates.altContexts());
    } catch (Exception e) {
        throw new CompletionException(e);
    }

    return altContexts;
}
 
Example #3
Source File: CleanSamTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example #4
Source File: FastqToSamTest.java    From picard with MIT License 6 votes vote down vote up
private void convertFileAndVerifyRecordCount(final int expectedCount,
                   final String fastqFilename1,
                   final String fastqFilename2,
                   final FastqQualityFormat version,
                   final boolean permissiveFormat,
                   final boolean useSequentialFastqs) throws IOException {
    final File samFile = convertFile(fastqFilename1, fastqFilename2, version, permissiveFormat, useSequentialFastqs);
    final SamReader samReader = SamReaderFactory.makeDefault().open(samFile);
    final SAMRecordIterator iterator = samReader.iterator();
    int actualCount = 0;
    while (iterator.hasNext()) {
        iterator.next();
        actualCount++;
    }
    samReader.close();
    Assert.assertEquals(actualCount, expectedCount);
}
 
Example #5
Source File: MachineOrientationTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
private SAMRecord readOneSamRecord(String samstr) throws IOException {
  final File input = FileUtils.createTempDir("testcheck", "sv_in");
  try {
    FileUtils.stringToFile(samstr, new File(input, OUT_SAM));

    final String inn = input.getPath();

    final SAMRecord rec;
    final File samfile = new File(inn + FS + OUT_SAM);
    try (SamReader reader = SamUtils.makeSamReader(FileUtils.createInputStream(samfile, false))) {
      final CloseableIterator<SAMRecord> iterator = reader.iterator();
      if (iterator.hasNext()) {
        rec = iterator.next();
      } else {
        rec = null;
      }

    }
    return rec;
  } finally {
    FileHelper.deleteAll(input);
  }
}
 
Example #6
Source File: FilterBamByTag.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Just work through the reads one at a time.
 * @param in
 * @param out
 */
void processUnpairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality ,boolean allowPartial) {
	FilteredReadsMetric m = new FilteredReadsMetric();
	ProgressLogger progLog = new ProgressLogger(log);
	for (final SAMRecord r : in) {
		progLog.record(r);
		boolean filterFlag = filterRead(r, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		if (!filterFlag) { 
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		}
		else {
			m.READS_REJECTED++;
		}
	}
	writeSummary(summaryFile, m);
	CloserUtil.close(in);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example #7
Source File: GetHetCoverageIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@BeforeClass
public void initHeaders() throws IOException {
    try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
         final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
        normalHeader = normalBamReader.getFileHeader();
        tumorHeader = tumorBamReader.getFileHeader();

        normalHetPulldownExpected = new Pulldown(normalHeader);
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));

        tumorHetPulldownExpected = new Pulldown(tumorHeader);
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    }
}
 
Example #8
Source File: SamFormatConverterTest.java    From picard with MIT License 6 votes vote down vote up
private void convertFile(final File inputFile, final File fileToCompare, final String extension) throws IOException {
    final List<File> samFiles = new ArrayList<>();
    final ValidateSamFile validateSamFile = new ValidateSamFile();

    final File output = File.createTempFile("SamFormatConverterTest." + inputFile.getName(), extension);
    output.deleteOnExit();
    SamFormatConverter.convert(inputFile, output, ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM, Defaults.CREATE_INDEX);

    validateSamFile.INPUT = output;
    assertEquals(validateSamFile.doWork(), 0);
    final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM);
    try (final SamReader samReader1 = samReaderFactory.open(output);
         final SamReader samReader2 = samReaderFactory.open(fileToCompare)) {
        final SamComparison samComparison = new SamComparison(samReader1, samReader2);
        Assert.assertTrue(samComparison.areEqual());
    }

}
 
Example #9
Source File: AddOriginalAlignmentTagsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test()
public void testAddingOATag() {
    final File oaAddedBam = createTempFile("oaAdded", ".bam");

    final List<String> args = Arrays.asList("-I", localTestData.getPath(),
            "-O", oaAddedBam.getPath());
    runCommandLine(args);

    SamReader out = SamReaderFactory.makeDefault().open(oaAddedBam);
    Iterator<String> expectedOAIter = expectedOATags.iterator();
    Iterator<String> expectedXMIter = expectedXMTags.iterator();
    int readsInBam = 0;

    for(SAMRecord r : out) {
        assertEquals(r.getAttribute(AddOriginalAlignmentTags.OA_TAG_NAME), expectedOAIter.next());
        assertEquals(r.getAttribute(AddOriginalAlignmentTags.MATE_CONTIG_TAG_NAME), expectedXMIter.next());
        readsInBam++;
    }
    assertEquals(readsInBam, 8);
}
 
Example #10
Source File: UnmarkDuplicatesIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException {
    int duplicateCount = 0;
    final SamReaderFactory factory = SamReaderFactory.makeDefault();
    if(referenceFile != null) {
        factory.referenceSequence(referenceFile);
    }
    try ( final SamReader reader = factory.open(bam) ) {
        for ( SAMRecord read : reader ) {
            if ( read.getDuplicateReadFlag() ) {
                ++duplicateCount;
            }
        }
    }

    return duplicateCount;
}
 
Example #11
Source File: DetectBeadSubstitutionErrorsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
  * Very lame test -- just confirms that program doesn't crash and return 0 exit status.
  */
 @Test
 public void testBasic() throws IOException {
     final DetectBeadSubstitutionErrors clp = new DetectBeadSubstitutionErrors();
     clp.INPUT = Arrays.asList(TEST_FILE);
     clp.OUTPUT_REPORT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".substitution_report.txt");
     clp.OUTPUT_REPORT.deleteOnExit();
     clp.OUTPUT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".sam");
     clp.OUTPUT.deleteOnExit();
     clp.OUTPUT_SUMMARY = File.createTempFile("DetectBeadSubstitutionErrorsTest.", "substitution_summary.txt");
     clp.OUTPUT_SUMMARY.deleteOnExit();
     Assert.assertEquals(clp.doWork(), 0);
     final TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(clp.OUTPUT_REPORT);
     int numRows = 0;
     for (final TabbedTextFileWithHeaderParser.Row row : parser) {
         ++numRows;
         Assert.assertEquals(row.getField("intended_barcode"), BIG_BARCODE);
         final String neighbor_barcode = row.getField("neighbor_barcode");
         Assert.assertTrue(SMALL_BARCODES.contains(neighbor_barcode));
         Assert.assertTrue(Boolean.parseBoolean(row.getField("repaired")));
     }
     final SamReader reader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
     for (final SAMRecord rec : reader)
Assert.assertEquals(rec.getStringAttribute("XC"), BIG_BARCODE);
     CloserUtil.close(reader);
 }
 
Example #12
Source File: CountSupplier.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
public Multimap<Chromosome, CobaltCount> fromBam(@NotNull final String referenceBam, @NotNull final String tumorBam)
        throws IOException, ExecutionException, InterruptedException {
    final File tumorFile = new File(tumorBam);
    final File referenceFile = new File(referenceBam);

    final String chromosomeLengthFileName = ChromosomeLengthFile.generateFilename(outputDirectory, tumor);
    final List<ChromosomeLength> lengths;
    try (SamReader reader = readerFactory.open(tumorFile)) {
        lengths = ChromosomeLengthFactory.create(reader.getFileHeader());
    }
    ChromosomeLengthFile.write(chromosomeLengthFileName, lengths);

    LOGGER.info("Calculating Read Count from {}", tumorFile.toString());
    final List<Future<ChromosomeReadCount>> tumorFutures = createFutures(readerFactory, tumorFile, lengths);

    LOGGER.info("Calculating Read Count from {}", referenceFile.toString());
    final List<Future<ChromosomeReadCount>> referenceFutures = createFutures(readerFactory, referenceFile, lengths);

    final Multimap<Chromosome, ReadCount> tumorCounts = fromFutures(tumorFutures);
    final Multimap<Chromosome, ReadCount> referenceCounts = fromFutures(referenceFutures);

    LOGGER.info("Read Count Complete");
    return CobaltCountFactory.merge(referenceCounts, tumorCounts);
}
 
Example #13
Source File: SamClosedFileReaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
private void check(final File bam) throws IOException {
  final SamReader normalReader = SamUtils.makeSamReader(bam);
  final Iterator<SAMRecord> norm = normalReader.iterator();
  try {
    try (final RecordIterator<SAMRecord> closed = new SamClosedFileReader(bam, null, null, normalReader.getFileHeader())) {
      while (norm.hasNext() && closed.hasNext()) {
        final SAMRecord a = norm.next();
        final SAMRecord b = closed.next();
        assertEquals(a.getSAMString().trim(), b.getSAMString().trim());
      }
      assertEquals(norm.hasNext(), closed.hasNext());
    }
  } finally {
    normalReader.close();
  }
}
 
Example #14
Source File: CrosscheckFingerprintsTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "bamFilesRGs")
public void testCrossCheckRGs(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException {

    File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics");
    metrics.deleteOnExit();

    final List<String> args = new ArrayList<>(Arrays.asList("INPUT=" + file1.getAbsolutePath(),
            "INPUT=" + file2.getAbsolutePath(),
            "OUTPUT=" + metrics.getAbsolutePath(),
            "LOD_THRESHOLD=" + -2.0,
            "EXPECT_ALL_GROUPS_TO_MATCH=" + expectAllMatch)
    );

    if (file1.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
        args.add("R=" + referenceForCrams);
        args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS);
    } else {
        args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP);
    }

    doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch);
}
 
Example #15
Source File: AnnotationUtilsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Only suitable for small BAMs.
 * @param name
 * @param bamFile
 * @return
 */
private SAMRecord getReadByName (final String name, final File bamFile) {
	SamReader inputSam = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
	Iterator<SAMRecord> iter = inputSam.iterator();
	boolean found = false;
	SAMRecord result=null;
	while (iter.hasNext() && !found) {
		SAMRecord r = iter.next();
		if (r.getReadName().equals(name)) {
			found=true;
			result=r;
		}
	}
	CloserUtil.close(inputSam);
	return result;
}
 
Example #16
Source File: TestUtils.java    From Drop-seq with MIT License 6 votes vote down vote up
public static void assertSamFilesSame(final File actual, final File expected) {
	final SamReader expectedReader = SamReaderFactory.makeDefault().open(expected);
	final SamReader actualReader = SamReaderFactory.makeDefault().open(actual);
	Assert.assertEquals(expectedReader.getFileHeader(), actualReader.getFileHeader());
	final SAMRecordIterator expectedIterator = expectedReader.iterator();
	final SAMRecordIterator actualIterator = actualReader.iterator();
	while (expectedIterator.hasNext()) {
		if (!actualIterator.hasNext()) {
			Assert.fail(String.format("%s has more records than %s", expected, actual));
		} else {
			Assert.assertEquals(actualIterator.next(), expectedIterator.next());
		}
	}
	if (actualIterator.hasNext()) {
		Assert.fail(String.format("%s has fewer records than %s", expected, actual));
	}
}
 
Example #17
Source File: CompareSAMs.java    From picard with MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);

    try (final SamReader samReader1 = samReaderFactory.open(SAM_FILES.get(0));
         final SamReader samReader2 = samReaderFactory.open(SAM_FILES.get(1)))
    {
        final SamComparison comparison = new SamComparison(samReader1, samReader2,
                SAM_FILES.get(0).getAbsolutePath(), SAM_FILES.get(1).getAbsolutePath(), samComparisonArgumentCollection);
        if (OUTPUT != null) {
            comparison.writeReport(OUTPUT, getDefaultHeaders());
        }
        if (comparison.areEqual()) {
            System.out.println("SAM files match.");
        } else {
            System.out.println("SAM files differ.");
        }
        return comparison.areEqual() ? 0 : 1;
    } catch (IOException e) {
        throw new RuntimeIOException("Error opening file", e);
    }
}
 
Example #18
Source File: MapBarcodesByEditDistanceTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) {
	Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>();
	ObjectCounter<String> barcodes = new ObjectCounter<>();
	
	SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData);
	SAMRecordIterator iter = reader.iterator();
	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		String barcode = r.getStringAttribute(cellBarcode);
		TagValues v = matcher.getValues(r);
		Set<TagValues> s = umisPerBarcode.get(barcode);
		if (s==null) {
			s = new HashSet<TagValues>();
			umisPerBarcode.put(barcode, s);
		}			
		if (!s.contains(v))
			barcodes.increment(barcode);
		s.add(v);
	}
	UMISharingData result = new UMISharingData();
	result.barcodes=barcodes;
	result.umisPerBarcode=umisPerBarcode;
	return result;
}
 
Example #19
Source File: BamSlicer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final List<GenomeRegion> regions, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example #20
Source File: TestSAMInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Test
public void testMapReduceJob() throws Exception {
  Configuration conf = new Configuration();

  FileSystem fileSystem = FileSystem.get(conf);
  Path inputPath = new Path(input);
  Path outputPath = fileSystem.makeQualified(new Path("target/out"));
  fileSystem.delete(outputPath, true);

  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SAMInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(SAMRecordWritable.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertTrue(success);

  List<String> samStrings = new ArrayList<String>();
  SamReader samReader = SamReaderFactory.makeDefault().open(new File(input));
  for (SAMRecord r : samReader) {
    samStrings.add(r.getSAMString().trim());
  }
  samReader.close();

  File outputFile = new File(new File(outputPath.toUri()), "part-m-00000");
  BufferedReader br = new BufferedReader(new FileReader(outputFile));
  String line;
  int index = 0;
  while ((line = br.readLine()) != null) {
    String value = line.substring(line.indexOf("\t") + 1); // ignore key
    assertEquals(samStrings.get(index++), value);
  }
  br.close();
}
 
Example #21
Source File: BamOverlapChecker.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
public BamOverlapChecker(SamReader bam_file){
    
    SAMFileHeader  header = bam_file.getFileHeader();
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    List<SAMSequenceRecord> sequences = dict.getSequences();
   
    
    booleanMap = new HashMap<String, boolean[]>();
    
    for(SAMSequenceRecord sequence : sequences){
        int sequenceEnd = sequence.getSequenceLength();
        int arrayLength = (int) Math.ceil( (float) sequenceEnd / (float)stepSize );
        boolean[] tempArray;
        tempArray = new boolean[arrayLength];
        
        for(int i=0;i<arrayLength;i++){
            SAMRecordIterator bamQuery = bam_file.queryOverlapping(sequence.getSequenceName(), i*stepSize, (i+1)*stepSize);
            if(bamQuery.hasNext()){
                tempArray[i] = true;
            }else{
                tempArray[i] = false;
            }
            bamQuery.close();
        }
        booleanMap.put(sequence.getSequenceName(),tempArray );
        //System.out.println("Finished checking the bam for chromosome " + sequence.getSequenceName());
    }
    
}
 
Example #22
Source File: GenotypeSperm.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	// validation
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IOUtil.assertFileIsReadable(this.CELL_BC_FILE);
	
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
	log.info("Using " + cellBarcodes.size() + " cells in analysis");
	IntervalList snpIntervals = IntervalList.fromFile(INTERVALS);
	log.info("Using " + snpIntervals.getIntervals().size() + " SNP intervals in analysis");
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
	writeHeader(out);

	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(this.INPUT);			
	
	SNPUMIBasePileupIterator sbpi = getIter(reader, snpIntervals, cellBarcodes);
	
	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, BASE_QUALITY));

	// sort cell barcodes alphabetically for output.
	Collections.sort(cellBarcodes);
	@SuppressWarnings("unused")
	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		processMCDAC(cellBarcodes, mcdac, out, AUTO_FLUSH_OUTPUTS);
		counter++;
		if (counter%PROGRESS_RATE==0) log.info("Processed " + counter + " SNPs");
	}
	log.info("Processed " + counter +" total SNPs");
	out.close();
	multiIter.close();
	return 0;
}
 
Example #23
Source File: HeaderInfo.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException {
  HeaderInfo result = null;

  // Open and read start of BAM
  LOG.info("Reading header from " + BAMPath);
  final SamReader samReader = BAMIO
      .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY);
  final SAMFileHeader header = samReader.getFileHeader();
  Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs);
  if (firstContig == null) {
    final SAMSequenceRecord seqRecord = header.getSequence(0);
    firstContig = new Contig(seqRecord.getSequenceName(), -1, -1);
  }

  LOG.info("Reading first chunk of reads from " + BAMPath);
  final SAMRecordIterator recordIterator = samReader.query(
      firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false);

  Contig firstShard = null;
  while (recordIterator.hasNext() && result == null) {
    SAMRecord record = recordIterator.next();
    final int alignmentStart = record.getAlignmentStart();
    if (firstShard == null && alignmentStart > firstContig.start &&
        (alignmentStart < firstContig.end || firstContig.end == -1)) {
      firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart);
      LOG.info("Determined first shard to be " + firstShard);
      result = new HeaderInfo(header, firstShard);
    }
  }
  recordIterator.close();
  samReader.close();

  if (result == null) {
    throw new IOException("Did not find reads for the first contig " + firstContig.toString());
  }
  LOG.info("Finished header reading from " + BAMPath);
  return result;
}
 
Example #24
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Entry point for specifically creating a SamReader given a pre-positioned stream, header, and known type
 * @param stream the stream to read from. Must already be performing decompression if required.
 * @param reference the SequencesReader to be used as the reference (required for CRAM files).
 * @param headerOverride the pre-determined SAM header
 * @param assumeType the type of input to assume.
 * @return the SamReader
 * @throws IOException if an I/O problem occurs opening the file
 */
public static SamReader makeSamReader(InputStream stream, SequencesReader reference, SAMFileHeader headerOverride, SamReader.Type assumeType) throws IOException {
  if (assumeType == null) {
    throw new NullPointerException();
  }
  try {
    return getSamReaderFactory(reference)
      .open(SamInputResource.of(stream).header(headerOverride).assumeType(assumeType));
  } catch (final RuntimeIOException e) {
    throw (IOException) e.getCause();
  }
}
 
Example #25
Source File: IntervalTagComparatorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test(enabled=false)
/**
 * For this test, we generate a lot of data, and sort it.
 * Since the SAMRecord doesn't really need to change, generate one static record and a LOT of intervals to tag the record with.
 * Size of the SAMRecord doesn't change, but the speed of the comparator should.
 */
public void testSpeed () {
	int numRecords = 30000000;
	int readLength=60;
	SamReader inputSam = SamReaderFactory.makeDefault().open(dictFile);
	SAMSequenceDictionary dict= inputSam.getFileHeader().getSequenceDictionary();
	dict = filterSD(dict);

	long start = System.currentTimeMillis();

	GenerateIntervalTaggedSAMRecords gen = new GenerateIntervalTaggedSAMRecords(dict, intervalTag, readLength, numRecords);
	IntervalTagComparator comparator = new IntervalTagComparator(this.intervalTag);

	final CloseableIterator<SAMRecord> sortingIterator =
            SamRecordSortingIteratorFactory.create(inputSam.getFileHeader(), gen, comparator, null);

	while(sortingIterator.hasNext()) {
		SAMRecord r = sortingIterator.next();
		Assert.assertNotNull(r);
	}
	long elapsed = System.currentTimeMillis() - start;
	System.out.println("elapsed time = " + elapsed + " ms");
	System.out.println("elapsed time = " + Math.round(elapsed/1000) + " seconds");
}
 
Example #26
Source File: MapQualityProcessorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
private SAMRecord getRecordFromBAM (final String readName) {
	File inFile = new File("testdata/org/broadinstitute/transcriptome/barnyard/5cell3gene.bam");
	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(inFile);
	for (SAMRecord r: reader)
		if (r.getReadName().equals(readName)) return (r);
	throw new IllegalArgumentException("Asked for a read not in the BAM!");
}
 
Example #27
Source File: AsIsMarkDuplicatesTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "queryGroupedInput")
public void testQueryGroupedInput(final File input) throws IOException {

    try (final SamReader reader = SamReaderFactory.makeDefault().open(input)) {
        final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester(reader.getFileHeader().getSortOrder());
        tester.setHeader(reader.getFileHeader());
        reader.iterator().stream().forEach(tester::addRecord);
        tester.setExpectedOpticalDuplicate(0);
        tester.addArg("ASSUME_SORT_ORDER=queryname");
        tester.runTest();
    }
}
 
Example #28
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
private Iterator<List<SAMRecord>> filterSortAndGroupByTags(final File bamFile, final String...tags) {
    final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
    final Iterator<SAMRecord> filteringIterator = new MissingTagFilteringIterator(reader.iterator(), tags);
    final List<Comparator<SAMRecord>> comparators = new ArrayList<>(tags.length);
    for (final String tag: tags) {
        comparators.add(new StringTagComparator(tag));
    }
    final MultiComparator<SAMRecord> comparator = new MultiComparator<>(comparators);
    final CloseableIterator<SAMRecord> sortingIterator =
            SamRecordSortingIteratorFactory.create(reader.getFileHeader(), filteringIterator, comparator, null);
    return new GroupingIterator<>(sortingIterator, comparator);
}
 
Example #29
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static CloseableIterator<SAMRecord> getIterator(@NotNull SamReader reader, @NotNull QueryInterval[] intervals,
        @NotNull final long[] filePointers) {
    if (reader instanceof SamReader.PrimitiveSamReaderToSamReaderAdapter) {
        SamReader.PrimitiveSamReaderToSamReaderAdapter adapter = (SamReader.PrimitiveSamReaderToSamReaderAdapter) reader;
        if (adapter.underlyingReader() instanceof BAMFileReader) {
            BAMFileReader bamReader = (BAMFileReader) adapter.underlyingReader();
            return bamReader.createIndexIterator(intervals, false, filePointers);
        }
    }
    return reader.queryOverlapping(intervals);
}
 
Example #30
Source File: AbstractPrintReadsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "readFilterTestData")
public void testReadFilters(
        final String input,
        final String reference,
        final String extOut,
        final List<String> inputArgs,
        final int expectedCount) throws IOException
{
    final File outFile = createTempFile("testReadFilter", extOut);

    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addRaw("-I"); args.addRaw(new File(TEST_DATA_DIR, input).getAbsolutePath());
    args.addRaw("-O"); args.addRaw(outFile.getAbsolutePath());
    if ( reference != null ) {
        args.addRaw("-R"); args.addRaw(new File(TEST_DATA_DIR, reference).getAbsolutePath());
    }
    for (final String filter : inputArgs) {
        args.addRaw(filter);
    }

    runCommandLine(args);


    SamReaderFactory factory = SamReaderFactory.makeDefault();
    if (reference != null) {
        factory = factory.referenceSequence(new File(TEST_DATA_DIR, reference));
    }
    int count = 0;
    try (final SamReader reader = factory.open(outFile)) {
        Iterator<SAMRecord> it = reader.iterator();
        while (it.hasNext()) {
            SAMRecord rec = it.next();
            count++;
        }
    }
    Assert.assertEquals(count, expectedCount);
}