htsjdk.samtools.SamReaderFactory Java Examples

The following examples show how to use htsjdk.samtools.SamReaderFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CleanSamTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example #2
Source File: GetHetCoverageIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@BeforeClass
public void initHeaders() throws IOException {
    try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
         final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
        normalHeader = normalBamReader.getFileHeader();
        tumorHeader = tumorBamReader.getFileHeader();

        normalHetPulldownExpected = new Pulldown(normalHeader);
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));

        tumorHetPulldownExpected = new Pulldown(tumorHeader);
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    }
}
 
Example #3
Source File: ApplyBQSRIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testAddingPG() throws IOException {
    final File inFile = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam");
    final File outFile = GATKBaseTest.createTempFile("testAddingPG", ".bam");
    final String[] args = new String[] {
            "--input", inFile.getAbsolutePath(),
            "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz",
            "--use-original-qualities",
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD,
            "--output", outFile.getAbsolutePath()
    };
    runCommandLine(args);

    //The expected output is actually the same as inputs for this read (this ignores the PGs header)
    SamAssertionUtils.assertSamsEqual(outFile, inFile);

    //input has no GATK ApplyBQSR in headers
    Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));

    //output has a GATK ApplyBQSR in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));
}
 
Example #4
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public void indexBam(File bamFile, File baiFile) {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
       final SamReader bam;

           // input from a normal file
           IOUtil.assertFileIsReadable(bamFile);
           bam = SamReaderFactory.makeDefault().referenceSequence(null)
                   .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
                   .open(bamFile);

       if (bam.type() != SamReader.Type.BAM_TYPE) {
           throw new SAMException("Input file must be bam file, not sam file.");
       }

       if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
           throw new SAMException("Input bam file must be sorted by coordinate");
       }

       BAMIndexer.createIndex(bam, baiFile);

       CloserUtil.close(bam);
}
 
Example #5
Source File: ReadsSparkSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Loads the header using Hadoop-BAM.
 * @param filePathSpecifier path to the bam.
 * @param referencePathSpecifier Reference path or null if not available. Reference is required for CRAM files.
 * @return the header for the bam.
 */
public SAMFileHeader getHeader(final GATKPath filePathSpecifier, final GATKPath referencePathSpecifier) {
    final GATKPath cramReferencePathSpec = checkCramReference(ctx, filePathSpecifier, referencePathSpecifier);

    // GCS case
    if (BucketUtils.isGcsUrl(filePathSpecifier)) {
        final SamReaderFactory factory = SamReaderFactory.makeDefault()
                .validationStringency(validationStringency)
                .referenceSequence(cramReferencePathSpec == null ? null : referencePathSpecifier.toPath());
        try (final ReadsDataSource readsDataSource =
                     new ReadsPathDataSource(Collections.singletonList(filePathSpecifier.toPath()), factory)) {
             return readsDataSource.getHeader();
        }
    }

    // local file or HDFs case
    try {
        return HtsjdkReadsRddStorage.makeDefault(ctx)
                .validationStringency(validationStringency)
                .referenceSourcePath(cramReferencePathSpec == null ? null : cramReferencePathSpec.getRawInputString())
                .read(filePathSpecifier.getRawInputString())
                .getHeader();
    } catch (IOException | IllegalArgumentException e) {
        throw new UserException("Failed to read bam header from " + filePathSpecifier.getRawInputString() + "\n Caused by:" + e.getMessage(), e);
    }
}
 
Example #6
Source File: TestCRAMInputFormatOnHDFS.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Test
public void testReader() throws Exception {
  int expectedCount = 0;
  SamReader samReader = SamReaderFactory.makeDefault()
      .referenceSequence(new File(URI.create(reference))).open(new File(input));
  for (SAMRecord r : samReader) {
    expectedCount++;
  }

  CRAMInputFormat inputFormat = new CRAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(1, splits.size());
  RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
      .createRecordReader(splits.get(0), taskAttemptContext);
  reader.initialize(splits.get(0), taskAttemptContext);

  int actualCount = 0;
  while (reader.nextKeyValue()) {
    actualCount++;
  }

  assertEquals(expectedCount, actualCount);
}
 
Example #7
Source File: SamFormatConverter.java    From picard with MIT License 6 votes vote down vote up
/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}
 
Example #8
Source File: PrintReadsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testNoConflictPG() throws IOException {
    final File inFile = new File(TEST_DATA_DIR, "print_reads_withPG.sam");
    final File outFile = GATKBaseTest.createTempFile("testNoConflictRG", ".sam");
    final String[] args = new String[] {
            "--input" , inFile.getAbsolutePath(),
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD,
            "--output", outFile.getAbsolutePath()
    };
    runCommandLine(args);

    //Make sure contents are the same
    SamAssertionUtils.assertSamsEqual(outFile, inFile);

    //input has GATK PrintReads not NOT GATK PrintReads.1 in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads"));
    Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads.1"));

    //output has both GATK PrintReads and GATK PrintReads.1 in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads"));
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads.1"));
}
 
Example #9
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	String geneName = r.getStringAttribute("gn");
	String geneStrand = r.getStringAttribute("gs");
	String function = r.getStringAttribute("gf");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(function, "UTR");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example #10
Source File: SAMHeaderReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
Example #11
Source File: AddCommentsToBamTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testAddCommentsToBam() throws Exception {
    final File outputFile = File.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    outputFile.deleteOnExit();
    runIt(INPUT_FILE, outputFile, commentList);

    final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile);

    // The original comments are massaged when they're added to the header. Perform the same massaging here,
    // and then compare the lists
    final List<String> massagedComments = new LinkedList<String>();
    for (final String comment : commentList) {
        massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment);
    }

    Assert.assertEquals(newHeader.getComments(), massagedComments);
    outputFile.delete();
}
 
Example #12
Source File: RevertBaseQualityScoresIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static boolean hasOriginalQualScores( final File bam ) throws IOException {
    try ( final SamReader reader = SamReaderFactory.makeDefault().open(bam) ) {
        for ( SAMRecord read : reader ) {
            final byte[] originalBaseQualities = read.getOriginalBaseQualities();
            Assert.assertNotNull(originalBaseQualities);
            final byte[] baseQualities = read.getBaseQualities();
            Assert.assertEquals(originalBaseQualities.length, baseQualities.length);
            for (int i = 0; i < originalBaseQualities.length; ++i) {
                if (originalBaseQualities[i] != baseQualities[i]) {
                    return false;
                }
            }
        }
    }
    return true;
}
 
Example #13
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example #14
Source File: MergeSamFilesTest.java    From picard with MIT License 6 votes vote down vote up
/**
 * Confirm that unsorted input can result in coordinate sorted output, with index created.
 */
@Test
public void unsortedInputSortedOutputTest() throws Exception {
    final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input");
    final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    mergedOutput.deleteOnExit();
    final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX);
    mergedOutputIndex.deleteOnExit();

    final String[] args = {
            "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(),
            "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(),
            "O=" + mergedOutput.getAbsolutePath(),
            "CREATE_INDEX=true",
            "SO=coordinate"
    };
    final int mergeExitStatus = runPicardCommandLine(args);
    Assert.assertEquals(mergeExitStatus, 0);
    final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput);
    Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);
    Assert.assertTrue(reader.hasIndex());
    new ValidateSamTester().assertSamValid(mergedOutput);
    Assert.assertTrue(mergedOutputIndex.delete());
    CloserUtil.close(reader);
}
 
Example #15
Source File: CountBamLinesApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private void run() throws IOException, ExecutionException, InterruptedException {
    LOGGER.info("Reading GC Profile");
    final Multimap<Chromosome, GCProfile> gcProfiles = GCProfileFactory.loadGCContent(config.windowSize(), config.gcProfilePath());

    final SamReaderFactory readerFactory = readerFactory(config);
    final CountSupplier countSupplier = new CountSupplier(config.tumor(),
            config.outputDirectory(),
            config.windowSize(),
            config.minMappingQuality(),
            executorService,
            readerFactory);
    final Multimap<Chromosome, CobaltCount> readCounts = countSupplier.fromBam(config.referenceBamPath(), config.tumorBamPath());

    final RatioSupplier ratioSupplier = new RatioSupplier(config.reference(), config.tumor(), config.outputDirectory());
    final Multimap<Chromosome, CobaltRatio> ratios = ratioSupplier.generateRatios(gcProfiles, readCounts);

    final String outputFilename = CobaltRatioFile.generateFilenameForWriting(config.outputDirectory(), config.tumor());
    LOGGER.info("Persisting cobalt ratios to {}", outputFilename);
    versionInfo.write(config.outputDirectory());
    CobaltRatioFile.write(outputFilename, ratios);

    new RatioSegmentation(executorService, config.outputDirectory()).applySegmentation(config.reference(), config.tumor());
}
 
Example #16
Source File: AmberApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private void runTumorOnly() throws InterruptedException, ExecutionException, IOException {
    final SamReaderFactory readerFactory = readerFactory(config);

    final ListMultimap<Chromosome, BaseDepth> allNormal = emptyNormalHetSites(sites);
    final ListMultimap<Chromosome, TumorBAF> tumorBAFMap = tumorBAF(readerFactory, allNormal);

    final List<TumorBAF> tumorBAFList = tumorBAFMap.values()
            .stream()
            .filter(x -> x.tumorRefSupport() >= config.tumorOnlyMinSupport())
            .filter(x -> x.tumorAltSupport() >= config.tumorOnlyMinSupport())
            .filter(x -> isFinite(x.refFrequency()) && Doubles.greaterOrEqual(x.refFrequency(), config.tumorOnlyMinVaf()))
            .filter(x -> isFinite(x.altFrequency()) && Doubles.greaterOrEqual(x.altFrequency(), config.tumorOnlyMinVaf()))
            .sorted()
            .collect(toList());
    final List<AmberBAF> amberBAFList =
            tumorBAFList.stream().map(AmberBAF::create).filter(x -> Double.isFinite(x.tumorBAF())).collect(toList());

    persistence.persisQC(amberBAFList, Lists.newArrayList());
    persistence.persistVersionInfo(versionInfo);
    persistence.persistBafVcf(tumorBAFList, new AmberHetNormalEvidence());
    persistence.persistBAF(amberBAFList);
}
 
Example #17
Source File: TumorContaminationEvidence.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public TumorContaminationEvidence(int typicalReadDepth, int minMappingQuality, int minBaseQuality, final String contig,
        final String bamFile, final SamReaderFactory samReaderFactory, final List<BaseDepth> baseDepths) {
    this.bafFactory = new BaseDepthFactory(minBaseQuality);
    this.contig = contig;
    this.bamFile = bamFile;
    this.samReaderFactory = samReaderFactory;
    this.evidenceMap = Maps.newHashMap();

    final List<ModifiableBaseDepth> tumorRecords = Lists.newArrayList();
    for (BaseDepth baseDepth : baseDepths) {
        ModifiableBaseDepth modifiableBaseDepth = BaseDepthFactory.create(baseDepth);
        evidenceMap.put(baseDepth, modifiableBaseDepth);
        tumorRecords.add(modifiableBaseDepth);
    }
    this.selector = GenomePositionSelectorFactory.create(tumorRecords);

    final GenomeRegions builder = new GenomeRegions(contig, typicalReadDepth);
    baseDepths.forEach(x -> builder.addPosition(x.position()));
    this.supplier = new SAMSlicer(minMappingQuality, builder.build());
}
 
Example #18
Source File: ReplaceSamHeader.java    From picard with MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(HEADER);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMFileHeader replacementHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(HEADER);

    if (BamFileIoUtils.isBamFile(INPUT)) {
        blockCopyReheader(replacementHeader);
    } else {
        standardReheader(replacementHeader);
    }

    return 0;
}
 
Example #19
Source File: ReplaceSamHeader.java    From picard with MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example #20
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException {
    String inputPath = cmd.getOptionValue(INPUT);
    String vcfPath = cmd.getOptionValue(VCF);
    int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));

    SamReaderFactory readerFactory = createFromCommandLine(cmd);
    SamReader reader = readerFactory.open(new File(inputPath));

    QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    writeToSlice(writer, iterator);

    writer.close();
    reader.close();
}
 
Example #21
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
Example #22
Source File: MapBarcodesByEditDistanceTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) {
	Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>();
	ObjectCounter<String> barcodes = new ObjectCounter<>();
	
	SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData);
	SAMRecordIterator iter = reader.iterator();
	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		String barcode = r.getStringAttribute(cellBarcode);
		TagValues v = matcher.getValues(r);
		Set<TagValues> s = umisPerBarcode.get(barcode);
		if (s==null) {
			s = new HashSet<TagValues>();
			umisPerBarcode.put(barcode, s);
		}			
		if (!s.contains(v))
			barcodes.increment(barcode);
		s.add(v);
	}
	UMISharingData result = new UMISharingData();
	result.barcodes=barcodes;
	result.umisPerBarcode=umisPerBarcode;
	return result;
}
 
Example #23
Source File: GatherBamFilesTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void sanityCheckTheGathering() throws Exception {
    final File outputFile = File.createTempFile("gatherBamFilesTest.samFile.", BamFileIoUtils.BAM_FILE_EXTENSION);
    outputFile.deleteOnExit();
    final List<String> args = new ArrayList<String>();
    for (final File splitBam : SPLIT_BAMS) {
        args.add("INPUT=" + splitBam.getAbsolutePath());
    }
    args.add("OUTPUT=" + outputFile);
    runPicardCommandLine(args);

    try (final SamReader samReader1 = SamReaderFactory.makeDefault().open(ORIG_BAM);
         final SamReader samReader2 = SamReaderFactory.makeDefault().open(SPLIT_BAMS.get(0))) {
        final SamComparison samComparison = new SamComparison(samReader1, samReader2);
        Assert.assertFalse(samComparison.areEqual());
    }
}
 
Example #24
Source File: IntervalTagComparatorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Tests ordering of a few SAMRecords without use of a dictionary file.
 * This means that chromosomes are sorted in natural string ordering.
 * The difference is illustrated below, assuming that the sequence dictionary file has chromosomes
 * in the order 1,2,3,4,5,6,7,8,9,10,11....so chromosome 2 has index 2, chromosome 10 has index 10.
 * > sort (c("2", "10"), decreasing=F)
 * [1] "10" "2"
 * > sort (c(2, 10), decreasing=F)
 * [1]  2 10
 */
@Test
public void orderRecordsTestWithDictFile () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(this.dictFile);

	List<SAMRecord> recs = getRecords();

	Integer a = 1;  // these start at 0
	Integer b = 9;
	int exp = a.compareTo(b);

	// when we sort without sequence dictionary, we expect the chr10 tagged read to show up first.
	IntervalTagComparator c = new IntervalTagComparator(this.intervalTag, inputSam.getFileHeader().getSequenceDictionary());
	int comp = c.compare(recs.get(0), recs.get(1));
	// assert sorting same order.
	Assert.assertEquals(exp>0,  comp>0);
}
 
Example #25
Source File: SequenceDictionaryIntersectionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testNoMatch() {
    final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam");
    final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile);
    try {
        final SAMSequenceDictionary nonMatchingDict = new SAMSequenceDictionary(
                leftDict.getFileHeader().getSequenceDictionary().getSequences().stream().
                        map((s) -> new SAMSequenceRecord("xyz_" + s.getSequenceName(), s.getSequenceLength())).
                        collect(Collectors.toList()));
        SequenceDictionaryIntersection sdi = new SequenceDictionaryIntersection(
                leftDict, leftDictFile.getName(), nonMatchingDict, "with prefixes");
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse with descriptions: " + sdi.message(false));
        System.out.println("Verbose with descriptions: " + sdi.message(true));

        sdi = new SequenceDictionaryIntersection(leftDict, nonMatchingDict);
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse without descriptions: " + sdi.message(false) + "\n");
        System.out.println("Verbose without descriptions: " + sdi.message(true) + "\n");
    } finally {
        CloserUtil.close(leftDict);
    }
}
 
Example #26
Source File: UnmarkDuplicatesIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException {
    int duplicateCount = 0;
    final SamReaderFactory factory = SamReaderFactory.makeDefault();
    if(referenceFile != null) {
        factory.referenceSequence(referenceFile);
    }
    try ( final SamReader reader = factory.open(bam) ) {
        for ( SAMRecord read : reader ) {
            if ( read.getDuplicateReadFlag() ) {
                ++duplicateCount;
            }
        }
    }

    return duplicateCount;
}
 
Example #27
Source File: SAMRecordReader.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private SamReader createSamReader(InputStream in, ValidationStringency stringency) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	return readerFactory.open(SamInputResource.of(in));
}
 
Example #28
Source File: GeneFunctionIteratorWrapperTest.java    From Drop-seq with MIT License 5 votes vote down vote up
private SAMRecord getRecord (final String recName) {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	for (SAMRecord r: inputSam)
		if (r.getReadName().equals(recName))
			return (r);
	Assert.fail("Should never happen");
	throw new IllegalArgumentException("Never found " + recName);
}
 
Example #29
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
private Iterator<List<SAMRecord>> filterSortAndGroupByTags(final File bamFile, final String...tags) {
    final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
    final Iterator<SAMRecord> filteringIterator = new MissingTagFilteringIterator(reader.iterator(), tags);
    final List<Comparator<SAMRecord>> comparators = new ArrayList<>(tags.length);
    for (final String tag: tags) {
        comparators.add(new StringTagComparator(tag));
    }
    final MultiComparator<SAMRecord> comparator = new MultiComparator<>(comparators);
    final CloseableIterator<SAMRecord> sortingIterator =
            SamRecordSortingIteratorFactory.create(reader.getFileHeader(), filteringIterator, comparator, null);
    return new GroupingIterator<>(sortingIterator, comparator);
}
 
Example #30
Source File: MergeBamAlignmentTest.java    From picard with MIT License 5 votes vote down vote up
/**
 * Various scenarios for EarliestFragmentStrategy.  Confirms that one of the expected ones is selected.
 * Note that there may be an arbitrary selection due to a tie.
 */
@Test(dataProvider = "testEarliestFragmentStrategyDataProvider")
public void testEarliestFragmentStrategy(final String testName, final MultipleAlignmentSpec[] specs) throws IOException {

    final File output = File.createTempFile(testName, ".sam");
    output.deleteOnExit();
    final File[] sams = createSamFilesToBeMerged(specs);

    doMergeAlignment(sams[0], Collections.singletonList(sams[1]),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true, fasta, output,
            SamPairUtil.PairOrientation.FR, MergeBamAlignment.PrimaryAlignmentStrategy.EarliestFragment,
            ONE_OF_THE_BEST_TAG,
            null, false, null);


    final SamReader mergedReader = SamReaderFactory.makeDefault().open(output);
    boolean seenPrimary = false;
    for (final SAMRecord rec : mergedReader) {
        if (!rec.getNotPrimaryAlignmentFlag()) {
            seenPrimary = true;
            final Integer oneOfTheBest = rec.getIntegerAttribute(ONE_OF_THE_BEST_TAG);
            Assert.assertEquals(oneOfTheBest, new Integer(1), "Read not marked as one of the best is primary: " + rec);
        }
    }
    CloserUtil.close(mergedReader);
    Assert.assertTrue(seenPrimary, "Never saw primary alignment");
}