Java Code Examples for htsjdk.samtools.SamReaderFactory

The following examples show how to use htsjdk.samtools.SamReaderFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@BeforeClass
public void initHeaders() throws IOException {
    try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
         final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
        normalHeader = normalBamReader.getFileHeader();
        tumorHeader = tumorBamReader.getFileHeader();

        normalHetPulldownExpected = new Pulldown(normalHeader);
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));

        tumorHetPulldownExpected = new Pulldown(tumorHeader);
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
        tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
    }
}
 
Example 2
private void run() throws IOException, ExecutionException, InterruptedException {
    LOGGER.info("Reading GC Profile");
    final Multimap<Chromosome, GCProfile> gcProfiles = GCProfileFactory.loadGCContent(config.windowSize(), config.gcProfilePath());

    final SamReaderFactory readerFactory = readerFactory(config);
    final CountSupplier countSupplier = new CountSupplier(config.tumor(),
            config.outputDirectory(),
            config.windowSize(),
            config.minMappingQuality(),
            executorService,
            readerFactory);
    final Multimap<Chromosome, CobaltCount> readCounts = countSupplier.fromBam(config.referenceBamPath(), config.tumorBamPath());

    final RatioSupplier ratioSupplier = new RatioSupplier(config.reference(), config.tumor(), config.outputDirectory());
    final Multimap<Chromosome, CobaltRatio> ratios = ratioSupplier.generateRatios(gcProfiles, readCounts);

    final String outputFilename = CobaltRatioFile.generateFilenameForWriting(config.outputDirectory(), config.tumor());
    LOGGER.info("Persisting cobalt ratios to {}", outputFilename);
    versionInfo.write(config.outputDirectory());
    CobaltRatioFile.write(outputFilename, ratios);

    new RatioSegmentation(executorService, config.outputDirectory()).applySegmentation(config.reference(), config.tumor());
}
 
Example 3
Source Project: picard   Source File: SamFormatConverter.java    License: MIT License 6 votes vote down vote up
/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}
 
Example 4
/**
 * Loads the header using Hadoop-BAM.
 * @param filePathSpecifier path to the bam.
 * @param referencePathSpecifier Reference path or null if not available. Reference is required for CRAM files.
 * @return the header for the bam.
 */
public SAMFileHeader getHeader(final GATKPath filePathSpecifier, final GATKPath referencePathSpecifier) {
    final GATKPath cramReferencePathSpec = checkCramReference(ctx, filePathSpecifier, referencePathSpecifier);

    // GCS case
    if (BucketUtils.isGcsUrl(filePathSpecifier)) {
        final SamReaderFactory factory = SamReaderFactory.makeDefault()
                .validationStringency(validationStringency)
                .referenceSequence(cramReferencePathSpec == null ? null : referencePathSpecifier.toPath());
        try (final ReadsDataSource readsDataSource =
                     new ReadsPathDataSource(Collections.singletonList(filePathSpecifier.toPath()), factory)) {
             return readsDataSource.getHeader();
        }
    }

    // local file or HDFs case
    try {
        return HtsjdkReadsRddStorage.makeDefault(ctx)
                .validationStringency(validationStringency)
                .referenceSourcePath(cramReferencePathSpec == null ? null : cramReferencePathSpec.getRawInputString())
                .read(filePathSpecifier.getRawInputString())
                .getHeader();
    } catch (IOException | IllegalArgumentException e) {
        throw new UserException("Failed to read bam header from " + filePathSpecifier.getRawInputString() + "\n Caused by:" + e.getMessage(), e);
    }
}
 
Example 5
Source Project: Hadoop-BAM   Source File: TestCRAMInputFormatOnHDFS.java    License: MIT License 6 votes vote down vote up
@Test
public void testReader() throws Exception {
  int expectedCount = 0;
  SamReader samReader = SamReaderFactory.makeDefault()
      .referenceSequence(new File(URI.create(reference))).open(new File(input));
  for (SAMRecord r : samReader) {
    expectedCount++;
  }

  CRAMInputFormat inputFormat = new CRAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(1, splits.size());
  RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
      .createRecordReader(splits.get(0), taskAttemptContext);
  reader.initialize(splits.get(0), taskAttemptContext);

  int actualCount = 0;
  while (reader.nextKeyValue()) {
    actualCount++;
  }

  assertEquals(expectedCount, actualCount);
}
 
Example 6
Source Project: chipster   Source File: SamBamUtils.java    License: MIT License 6 votes vote down vote up
public void indexBam(File bamFile, File baiFile) {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
       final SamReader bam;

           // input from a normal file
           IOUtil.assertFileIsReadable(bamFile);
           bam = SamReaderFactory.makeDefault().referenceSequence(null)
                   .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
                   .open(bamFile);

       if (bam.type() != SamReader.Type.BAM_TYPE) {
           throw new SAMException("Input file must be bam file, not sam file.");
       }

       if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
           throw new SAMException("Input bam file must be sorted by coordinate");
       }

       BAMIndexer.createIndex(bam, baiFile);

       CloserUtil.close(bam);
}
 
Example 7
@Test
public void testAddingPG() throws IOException {
    final File inFile = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam");
    final File outFile = GATKBaseTest.createTempFile("testAddingPG", ".bam");
    final String[] args = new String[] {
            "--input", inFile.getAbsolutePath(),
            "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz",
            "--use-original-qualities",
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD,
            "--output", outFile.getAbsolutePath()
    };
    runCommandLine(args);

    //The expected output is actually the same as inputs for this read (this ignores the PGs header)
    SamAssertionUtils.assertSamsEqual(outFile, inFile);

    //input has no GATK ApplyBQSR in headers
    Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));

    //output has a GATK ApplyBQSR in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));
}
 
Example 8
@Test
public void testNoConflictPG() throws IOException {
    final File inFile = new File(TEST_DATA_DIR, "print_reads_withPG.sam");
    final File outFile = GATKBaseTest.createTempFile("testNoConflictRG", ".sam");
    final String[] args = new String[] {
            "--input" , inFile.getAbsolutePath(),
            "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD,
            "--output", outFile.getAbsolutePath()
    };
    runCommandLine(args);

    //Make sure contents are the same
    SamAssertionUtils.assertSamsEqual(outFile, inFile);

    //input has GATK PrintReads not NOT GATK PrintReads.1 in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads"));
    Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK PrintReads.1"));

    //output has both GATK PrintReads and GATK PrintReads.1 in headers
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads"));
    Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK PrintReads.1"));
}
 
Example 9
Source Project: Drop-seq   Source File: TagReadWithGeneFunctionTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	String geneName = r.getStringAttribute("gn");
	String geneStrand = r.getStringAttribute("gs");
	String function = r.getStringAttribute("gf");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(function, "UTR");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 10
Source Project: Hadoop-BAM   Source File: SAMHeaderReader.java    License: MIT License 6 votes vote down vote up
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
Example 11
Source Project: picard   Source File: AddCommentsToBamTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testAddCommentsToBam() throws Exception {
    final File outputFile = File.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    outputFile.deleteOnExit();
    runIt(INPUT_FILE, outputFile, commentList);

    final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile);

    // The original comments are massaged when they're added to the header. Perform the same massaging here,
    // and then compare the lists
    final List<String> massagedComments = new LinkedList<String>();
    for (final String comment : commentList) {
        massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment);
    }

    Assert.assertEquals(newHeader.getComments(), massagedComments);
    outputFile.delete();
}
 
Example 12
private static boolean hasOriginalQualScores( final File bam ) throws IOException {
    try ( final SamReader reader = SamReaderFactory.makeDefault().open(bam) ) {
        for ( SAMRecord read : reader ) {
            final byte[] originalBaseQualities = read.getOriginalBaseQualities();
            Assert.assertNotNull(originalBaseQualities);
            final byte[] baseQualities = read.getBaseQualities();
            Assert.assertEquals(originalBaseQualities.length, baseQualities.length);
            for (int i = 0; i < originalBaseQualities.length; ++i) {
                if (originalBaseQualities[i] != baseQualities[i]) {
                    return false;
                }
            }
        }
    }
    return true;
}
 
Example 13
Source Project: Drop-seq   Source File: TagReadWithGeneExonFunctionTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 14
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException {
    int duplicateCount = 0;
    final SamReaderFactory factory = SamReaderFactory.makeDefault();
    if(referenceFile != null) {
        factory.referenceSequence(referenceFile);
    }
    try ( final SamReader reader = factory.open(bam) ) {
        for ( SAMRecord read : reader ) {
            if ( read.getDuplicateReadFlag() ) {
                ++duplicateCount;
            }
        }
    }

    return duplicateCount;
}
 
Example 15
Source Project: picard   Source File: MergeSamFilesTest.java    License: MIT License 6 votes vote down vote up
/**
 * Confirm that unsorted input can result in coordinate sorted output, with index created.
 */
@Test
public void unsortedInputSortedOutputTest() throws Exception {
    final File unsortedInputTestDataDir = new File(TEST_DATA_DIR, "unsorted_input");
    final File mergedOutput = File.createTempFile("unsortedInputSortedOutputTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    mergedOutput.deleteOnExit();
    final File mergedOutputIndex = new File(mergedOutput.getParent(), IOUtil.basename(mergedOutput)+ BAMIndex.BAI_INDEX_SUFFIX);
    mergedOutputIndex.deleteOnExit();

    final String[] args = {
            "I=" + new File(unsortedInputTestDataDir, "1.sam").getAbsolutePath(),
            "I=" + new File(unsortedInputTestDataDir, "2.sam").getAbsolutePath(),
            "O=" + mergedOutput.getAbsolutePath(),
            "CREATE_INDEX=true",
            "SO=coordinate"
    };
    final int mergeExitStatus = runPicardCommandLine(args);
    Assert.assertEquals(mergeExitStatus, 0);
    final SamReader reader = SamReaderFactory.makeDefault().open(mergedOutput);
    Assert.assertEquals(reader.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);
    Assert.assertTrue(reader.hasIndex());
    new ValidateSamTester().assertSamValid(mergedOutput);
    Assert.assertTrue(mergedOutputIndex.delete());
    CloserUtil.close(reader);
}
 
Example 16
Source Project: Drop-seq   Source File: SequenceDictionaryIntersectionTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testNoMatch() {
    final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam");
    final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile);
    try {
        final SAMSequenceDictionary nonMatchingDict = new SAMSequenceDictionary(
                leftDict.getFileHeader().getSequenceDictionary().getSequences().stream().
                        map((s) -> new SAMSequenceRecord("xyz_" + s.getSequenceName(), s.getSequenceLength())).
                        collect(Collectors.toList()));
        SequenceDictionaryIntersection sdi = new SequenceDictionaryIntersection(
                leftDict, leftDictFile.getName(), nonMatchingDict, "with prefixes");
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse with descriptions: " + sdi.message(false));
        System.out.println("Verbose with descriptions: " + sdi.message(true));

        sdi = new SequenceDictionaryIntersection(leftDict, nonMatchingDict);
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse without descriptions: " + sdi.message(false) + "\n");
        System.out.println("Verbose without descriptions: " + sdi.message(true) + "\n");
    } finally {
        CloserUtil.close(leftDict);
    }
}
 
Example 17
Source Project: Drop-seq   Source File: IntervalTagComparatorTest.java    License: MIT License 6 votes vote down vote up
/**
 * Tests ordering of a few SAMRecords without use of a dictionary file.
 * This means that chromosomes are sorted in natural string ordering.
 * The difference is illustrated below, assuming that the sequence dictionary file has chromosomes
 * in the order 1,2,3,4,5,6,7,8,9,10,11....so chromosome 2 has index 2, chromosome 10 has index 10.
 * > sort (c("2", "10"), decreasing=F)
 * [1] "10" "2"
 * > sort (c(2, 10), decreasing=F)
 * [1]  2 10
 */
@Test
public void orderRecordsTestWithDictFile () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(this.dictFile);

	List<SAMRecord> recs = getRecords();

	Integer a = 1;  // these start at 0
	Integer b = 9;
	int exp = a.compareTo(b);

	// when we sort without sequence dictionary, we expect the chr10 tagged read to show up first.
	IntervalTagComparator c = new IntervalTagComparator(this.intervalTag, inputSam.getFileHeader().getSequenceDictionary());
	int comp = c.compare(recs.get(0), recs.get(1));
	// assert sorting same order.
	Assert.assertEquals(exp>0,  comp>0);
}
 
Example 18
Source Project: picard   Source File: GatherBamFilesTest.java    License: MIT License 6 votes vote down vote up
@Test
public void sanityCheckTheGathering() throws Exception {
    final File outputFile = File.createTempFile("gatherBamFilesTest.samFile.", BamFileIoUtils.BAM_FILE_EXTENSION);
    outputFile.deleteOnExit();
    final List<String> args = new ArrayList<String>();
    for (final File splitBam : SPLIT_BAMS) {
        args.add("INPUT=" + splitBam.getAbsolutePath());
    }
    args.add("OUTPUT=" + outputFile);
    runPicardCommandLine(args);

    try (final SamReader samReader1 = SamReaderFactory.makeDefault().open(ORIG_BAM);
         final SamReader samReader2 = SamReaderFactory.makeDefault().open(SPLIT_BAMS.get(0))) {
        final SamComparison samComparison = new SamComparison(samReader1, samReader2);
        Assert.assertFalse(samComparison.areEqual());
    }
}
 
Example 19
Source Project: Drop-seq   Source File: MapBarcodesByEditDistanceTest.java    License: MIT License 6 votes vote down vote up
private UMISharingData readUMISharingData (File f, String cellBarcode, ParentEditDistanceMatcher matcher) {
	Map<String, Set<TagValues>> umisPerBarcode = new HashMap<String, Set<TagValues>>();
	ObjectCounter<String> barcodes = new ObjectCounter<>();
	
	SamReader reader = SamReaderFactory.makeDefault().open(UmiSharingData);
	SAMRecordIterator iter = reader.iterator();
	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		String barcode = r.getStringAttribute(cellBarcode);
		TagValues v = matcher.getValues(r);
		Set<TagValues> s = umisPerBarcode.get(barcode);
		if (s==null) {
			s = new HashSet<TagValues>();
			umisPerBarcode.put(barcode, s);
		}			
		if (!s.contains(v))
			barcodes.increment(barcode);
		s.add(v);
	}
	UMISharingData result = new UMISharingData();
	result.barcodes=barcodes;
	result.umisPerBarcode=umisPerBarcode;
	return result;
}
 
Example 20
Source Project: dataflow-java   Source File: BAMIO.java    License: Apache License 2.0 6 votes vote down vote up
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
Example 21
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException {
    String inputPath = cmd.getOptionValue(INPUT);
    String vcfPath = cmd.getOptionValue(VCF);
    int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));

    SamReaderFactory readerFactory = createFromCommandLine(cmd);
    SamReader reader = readerFactory.open(new File(inputPath));

    QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    writeToSlice(writer, iterator);

    writer.close();
    reader.close();
}
 
Example 22
Source Project: picard   Source File: ReplaceSamHeader.java    License: MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example 23
Source Project: picard   Source File: ReplaceSamHeader.java    License: MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(HEADER);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SAMFileHeader replacementHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(HEADER);

    if (BamFileIoUtils.isBamFile(INPUT)) {
        blockCopyReheader(replacementHeader);
    } else {
        standardReheader(replacementHeader);
    }

    return 0;
}
 
Example 24
public TumorContaminationEvidence(int typicalReadDepth, int minMappingQuality, int minBaseQuality, final String contig,
        final String bamFile, final SamReaderFactory samReaderFactory, final List<BaseDepth> baseDepths) {
    this.bafFactory = new BaseDepthFactory(minBaseQuality);
    this.contig = contig;
    this.bamFile = bamFile;
    this.samReaderFactory = samReaderFactory;
    this.evidenceMap = Maps.newHashMap();

    final List<ModifiableBaseDepth> tumorRecords = Lists.newArrayList();
    for (BaseDepth baseDepth : baseDepths) {
        ModifiableBaseDepth modifiableBaseDepth = BaseDepthFactory.create(baseDepth);
        evidenceMap.put(baseDepth, modifiableBaseDepth);
        tumorRecords.add(modifiableBaseDepth);
    }
    this.selector = GenomePositionSelectorFactory.create(tumorRecords);

    final GenomeRegions builder = new GenomeRegions(contig, typicalReadDepth);
    baseDepths.forEach(x -> builder.addPosition(x.position()));
    this.supplier = new SAMSlicer(minMappingQuality, builder.build());
}
 
Example 25
Source Project: hmftools   Source File: AmberApplication.java    License: GNU General Public License v3.0 6 votes vote down vote up
private void runTumorOnly() throws InterruptedException, ExecutionException, IOException {
    final SamReaderFactory readerFactory = readerFactory(config);

    final ListMultimap<Chromosome, BaseDepth> allNormal = emptyNormalHetSites(sites);
    final ListMultimap<Chromosome, TumorBAF> tumorBAFMap = tumorBAF(readerFactory, allNormal);

    final List<TumorBAF> tumorBAFList = tumorBAFMap.values()
            .stream()
            .filter(x -> x.tumorRefSupport() >= config.tumorOnlyMinSupport())
            .filter(x -> x.tumorAltSupport() >= config.tumorOnlyMinSupport())
            .filter(x -> isFinite(x.refFrequency()) && Doubles.greaterOrEqual(x.refFrequency(), config.tumorOnlyMinVaf()))
            .filter(x -> isFinite(x.altFrequency()) && Doubles.greaterOrEqual(x.altFrequency(), config.tumorOnlyMinVaf()))
            .sorted()
            .collect(toList());
    final List<AmberBAF> amberBAFList =
            tumorBAFList.stream().map(AmberBAF::create).filter(x -> Double.isFinite(x.tumorBAF())).collect(toList());

    persistence.persisQC(amberBAFList, Lists.newArrayList());
    persistence.persistVersionInfo(versionInfo);
    persistence.persistBafVcf(tumorBAFList, new AmberHetNormalEvidence());
    persistence.persistBAF(amberBAFList);
}
 
Example 26
Source Project: picard   Source File: CleanSamTest.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example 27
Source Project: Drop-seq   Source File: BamTagHistogram.java    License: MIT License 5 votes vote down vote up
public ObjectCounter<String> getBamTagCounts (final File bamFile, final String tag, final int readQuality, final boolean filterPCRDuplicates) {
SamReader inputSam = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
      try {
          return getBamTagCounts(inputSam.iterator(), tag, readQuality, filterPCRDuplicates);
      } finally {
          CloserUtil.close(inputSam);
      }
  }
 
Example 28
@Test
public void testPGOnByDefault() throws IOException {
    final File inFile = new File(TEST_DATA_DIR, "testPGOnByDefault.bam");
    final File outFile = GATKBaseTest.createTempFile("testNoConflictRG", ".bam");
    final String[] args = new String[] {
            //NOTE: no addOutputSAMProgramRecord argument
            "--input" , inFile.getAbsolutePath(),
            "--output", outFile.getAbsolutePath()
    };
    runCommandLine(args);

    //input has GATK PrintReads not NOT GATK PrintReads.1 in headers

    try(final SamReader reader1 = SamReaderFactory.makeDefault().open(inFile)){
        Assert.assertNotNull(reader1.getFileHeader().getProgramRecord("GATK PrintReads"));
    }

    try(final SamReader reader2 = SamReaderFactory.makeDefault().open(inFile)) {
        Assert.assertNull(reader2.getFileHeader().getProgramRecord("GATK PrintReads.1"));
    }
    //output has both GATK PrintReads and GATK PrintReads.1 in headers
    try(final SamReader reader3 = SamReaderFactory.makeDefault().open(outFile)) {
        Assert.assertNotNull(reader3.getFileHeader().getProgramRecord("GATK PrintReads"));
    }

    try(final SamReader reader4 = SamReaderFactory.makeDefault().open(outFile)) {
        Assert.assertNotNull(reader4.getFileHeader().getProgramRecord("GATK PrintReads.1"));
    }
}
 
Example 29
Source Project: Hadoop-BAM   Source File: TestSAMInputFormat.java    License: MIT License 5 votes vote down vote up
@Test
public void testMapReduceJob() throws Exception {
  Configuration conf = new Configuration();

  FileSystem fileSystem = FileSystem.get(conf);
  Path inputPath = new Path(input);
  Path outputPath = fileSystem.makeQualified(new Path("target/out"));
  fileSystem.delete(outputPath, true);

  Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SAMInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(SAMRecordWritable.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertTrue(success);

  List<String> samStrings = new ArrayList<String>();
  SamReader samReader = SamReaderFactory.makeDefault().open(new File(input));
  for (SAMRecord r : samReader) {
    samStrings.add(r.getSAMString().trim());
  }
  samReader.close();

  File outputFile = new File(new File(outputPath.toUri()), "part-m-00000");
  BufferedReader br = new BufferedReader(new FileReader(outputFile));
  String line;
  int index = 0;
  while ((line = br.readLine()) != null) {
    String value = line.substring(line.indexOf("\t") + 1); // ignore key
    assertEquals(samStrings.get(index++), value);
  }
  br.close();
}
 
Example 30
@BeforeClass
public void initHeaders() throws IOException {
    try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
         final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
        normalHeader = normalBamReader.getFileHeader();
        tumorHeader = tumorBamReader.getFileHeader();
    }
}