htsjdk.samtools.ValidationStringency Java Examples

The following examples show how to use htsjdk.samtools.ValidationStringency. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public String printSamBam(InputStream samBamStream, int maxRecords) throws IOException {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader in = new SAMFileReader(samBamStream);
	SAMFileHeader header = in.getFileHeader();
	ByteArrayOutputStream buffer = new ByteArrayOutputStream();
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, true, buffer);
	int i = 0;
	try {
		for (final SAMRecord rec : in) {
			if (i > maxRecords) {
				break;
			}
			out.addAlignment(rec);
			i++;
		}
	} finally {
		closeIfPossible(out);
	}

	if (i > maxRecords) {
		buffer.write("SAM/BAM too long for viewing, truncated here!\n".getBytes());
	}
	
	return buffer.toString();
}
 
Example #2
Source File: BAMRecordReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex,
		ValidationStringency stringency, boolean useIntelInflater) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	SamInputResource resource = SamInputResource.of(in);
	if (inIndex != null) {
		resource.index(inIndex);
	}
	if (useIntelInflater) {
		readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor());
	}
	return readerFactory.open(resource);
}
 
Example #3
Source File: SAMHeaderReader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
Example #4
Source File: GetSortedBAMHeader.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	if (args.length < 2) {
		System.err.println(
			"Usage: GetSortedBAMHeader input output\n\n"+

			"Reads the BAM header from input (a standard BGZF-compressed BAM "+
			"file), and\nwrites it (BGZF-compressed, no terminator block) to "+
			"output. Sets the sort order\nindicated in the SAM header to "+
			"'coordinate'.");
		System.exit(1);
	}

	final SAMFileHeader h =
			SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
					.setUseAsyncIo(false)
					.open(new File(args[0])).getFileHeader();
	h.setSortOrder(SAMFileHeader.SortOrder.coordinate);

       try (FileOutputStream stream = new FileOutputStream(args[1])) {
           new SAMOutputPreparer().prepareForRecords(stream, SAMFormat.BAM, h);
       }
}
 
Example #5
Source File: GetHetCoverageIntegrationTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions = UserException.class)
public void testNonStrictBAM() {
    final File normalOutputFile = createTempFile("normal-test",".txt");
    final File tumorOutputFile = createTempFile("tumor-test",".txt");

    final String[] arguments = {
            "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(),
            "--VALIDATION_STRINGENCY", ValidationStringency.STRICT.toString()
    };
    runCommandLine(arguments);
    //should catch SAMFormatException and throw new UserException with --VALIDATION_STRINGENCY STRICT
}
 
Example #6
Source File: CleanSamTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example #7
Source File: QualityScoreStats.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static void dist(File file, byte defaultQualityScore) throws IllegalArgumentException, IOException,
		IllegalAccessException {
	InputStream is = new FileInputStream(file);
	CramHeader header = CramIO.readCramHeader(is);
	Container c = null;
	ContainerParser parser = new ContainerParser(header.getSamFileHeader());
	ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(10000);

	long[] freq = new long[255];
	while ((c = ContainerIO.readContainer(header.getVersion(), is)) != null && !c.isEOF()) {
		parser.getRecords(c, records, ValidationStringency.SILENT);

		CramNormalizer.restoreQualityScores(defaultQualityScore, records);
		for (CramCompressionRecord record : records) {
			for (byte b : record.qualityScores)
				freq[b & 0xFF]++;
		}
		records.clear();
	}
	print(freq, defaultQualityScore, System.out);
}
 
Example #8
Source File: ReplaceSamHeader.java    From picard with MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example #9
Source File: SortVcf.java    From picard with MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example #10
Source File: IlluminaLaneMetricsCollectorTest.java    From picard with MIT License 6 votes vote down vote up
/** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */
@Test
public void testMissingPhasingValuesSilent() throws IOException {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example #11
Source File: IlluminaLaneMetricsCollectorTest.java    From picard with MIT License 6 votes vote down vote up
/** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */
@Test(expectedExceptions = PicardException.class)
public void testMissingPhasingValuesStrict() {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example #12
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}
 
Example #13
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
Example #14
Source File: GTFParser.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
public GTFRecord next() {
    final TabbedTextFileWithHeaderParser.Row row = it.next();
    if (row.getFields().length != GTFColumnLabels.length) {
        throw new AnnotationException("Wrong number of fields in GTF file " + gtfFile + " at line " +
                row.getCurrentLine());
    }
    final GTFRecord ret = parseLine(row);
    if (validationStringency != ValidationStringency.SILENT) {
        final List<String> errors = ret.validate();
        if (errors != null && !errors.isEmpty()) {
            final String message = String.format(
                    "Invalid GTF line: \n%s\nProblems:\n%s",
                    row.getCurrentLine(),
                    CollectionUtil.join(errors, "\n"));
            if (validationStringency == ValidationStringency.STRICT) {
                throw new AnnotationException(message);
            } else {
                LOG.warn(message);
            }
        }
    }
    progressLogger.record(ret.getChromosome(), ret.getStart());
    return ret;
}
 
Example #15
Source File: FingerprintChecker.java    From picard with MIT License 6 votes vote down vote up
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) {
    final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile);
    if (validationStringency != ValidationStringency.STRICT) {
        final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString());
        readGroupRecord.setLibrary("<UNKNOWN>");
        readGroupRecord.setSample(defaultSampleID);
        readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ");

        if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) {
            log.warn(e.getMessage());
            log.warn("further messages from this file will be suppressed");
        }

        return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString());
    } else {
        log.error(e.getMessage());
        throw e;
    }
}
 
Example #16
Source File: HtsgetReaderIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "successfulParameters")
public void testSuccessfulParameters(final Map<String, String> params, final String expectedFileName) throws IOException {
    final File expected = new File(getToolTestDataDir(), expectedFileName);
    final File output = createTempFile("output", ".bam");

    final ArgumentsBuilder args = new ArgumentsBuilder()
        .add(HtsgetReader.URL_LONG_NAME, ENDPOINT)
        .addOutput(output);
    params.forEach(args::add);
    
    runCommandLine(args);
    SamAssertionUtils.assertEqualBamFiles(output, expected, false, ValidationStringency.LENIENT);
}
 
Example #17
Source File: CleanSamTester.java    From picard with MIT License 5 votes vote down vote up
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example #18
Source File: BaseRecalibratorSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(description = "This is to test https://github.com/broadinstitute/hellbender/issues/322", groups = {"cloud", "spark"}, enabled = false)
public void testPlottingWorkflow() throws IOException {
    final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
    final String chr2021Reference2bit = GCS_b37_CHR20_21_REFERENCE_2BIT;
    final String dbSNPb37_chr2021 = resourceDir + DBSNP_138_B37_CH20_1M_1M1K_VCF;
    final String HiSeqBam_chr20 = getResourceDir() + WGS_B37_CH20_1M_1M1K_BAM;

    final File actualHiSeqBam_recalibrated = createTempFile("actual.recalibrated", ".bam");

    final String tablePre = createTempFile("gatk4.pre.cols", ".table").getAbsolutePath();
    final String argPre = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + HiSeqBam_chr20
            + " -O " + tablePre;
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argPre));

    final String argApply = "-I " + HiSeqBam_chr20 + " --bqsr-recal-file " + tablePre + " -O " + actualHiSeqBam_recalibrated.getAbsolutePath();
    new ApplyBQSRSpark().instanceMain(Utils.escapeExpressions(argApply));

    final File actualTablePost = createTempFile("gatk4.post.cols", ".table");
    final String argsPost = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath()
            + " -O " + actualTablePost.getAbsolutePath();
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argsPost));

    final File expectedHiSeqBam_recalibrated = new File(resourceDir + "expected.NA12878.chr17_69k_70k.dictFix.recalibrated.DIQ.bam");

    SamAssertionUtils.assertSamsEqual(actualHiSeqBam_recalibrated, expectedHiSeqBam_recalibrated, ValidationStringency.LENIENT);

    final File expectedTablePost = new File(getResourceDir() + "expected.NA12878.chr17_69k_70k.postRecalibrated.txt");
    IntegrationTestSpec.assertEqualTextFiles(actualTablePost, expectedTablePost);
}
 
Example #19
Source File: BamDataSource.java    From chipster with MIT License 5 votes vote down vote up
/**
  * Generally we would like to have both data and index files,
  * because otherwise we cannot access random locations.
  * 
  * @param data
  * @param index
  * @throws URISyntaxException
  * @throws IOException 
  */
 public BamDataSource(DataUrl data, DataUrl index) throws URISyntaxException, IOException {
     super(data);

 	// BAMFileReader emits useless warning to System.err that can't be turned off,
 	// so we direct it to other stream and discard. 
 	PrintStream originalErr = System.err;
 	System.setErr(new PrintStream(new ByteArrayOutputStream()));
 	
 	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
 	this.reader = SamBamUtils.getSAMReader(data.getUrl(), index.getUrl());

 	LinkedList<String> chrList = new LinkedList<>();
 	
 	// Iterate chromosomes to check naming convention
 	for (SAMSequenceRecord sequenceRecord : this.reader.getFileHeader().getSequenceDictionary().getSequences()) {
 		
 		String name = sequenceRecord.getSequenceName();
 		
chrList.add(name);
 	}
 	
 	// Create unnormaliser for this naming convention
 	// Look only at the first sequence (assume all have the same convention)
 	this.chromosomeNameUnnormaliser = new ChromosomeNameUnnormaliser(chrList);      
     
     // Restore System.err
     System.setErr(originalErr);
 }
 
Example #20
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "ReadsLikelikhoodData")
public void testWriteToBAMFileWithMD5
        (
                @SuppressWarnings("unused") final String haplotypeBaseSignature,
                final List<Haplotype> haplotypes,
                final Locatable genomeLoc,
                final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
        ) throws IOException
{
    // create output BAM file
    final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, false, true);
    final File expectedFile = new File(expectedFilePath, "testBAM.bam");
    SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY);
}
 
Example #21
Source File: ConvertHeaderlessHadoopBamShardToBam.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected Object doWork(){
    SAMFileHeader header = null;
    try ( final SamReader headerReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamWithHeader) ) {
        header = headerReader.getFileHeader();
    }
    catch ( IOException e ) {
        throw new UserException("Error reading header from " + bamWithHeader.getAbsolutePath(), e);
    }

    SparkUtils.convertHeaderlessHadoopBamShardToBam(bamShard, header, outputBam);
    return null;
}
 
Example #22
Source File: CramToBam_OBA_Function.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Override
public OrderedByteArray apply(OrderedByteArray object) {
	if (object == null)
		throw new NullPointerException();

	log.debug("processing container " + object.order);
	Container container;
	try {
		container = ContainerIO.readContainer(header.getVersion(), new ByteArrayInputStream(object.bytes));
		if (container.isEOF())
			return null;

		ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(container.nofRecords);
		parser.getRecords(container, records, ValidationStringency.SILENT);
		n.normalize(records, null, 0, container.header.substitutionMatrix);

		ByteArrayOutputStream bamBAOS = new ByteArrayOutputStream();
		BlockCompressedOutputStream os = new BlockCompressedOutputStream(bamBAOS, null);
		codec.setOutputStream(os);
		for (CramCompressionRecord record : records) {
			SAMRecord samRecord = f.create(record);
			codec.encode(samRecord);
		}
		os.flush();
		OrderedByteArray bb = new OrderedByteArray();
		bb.bytes = bamBAOS.toByteArray();
		bb.order = object.order;
		log.debug(String.format("Converted OBA %d, records %d", object.order, records.size()));
		return bb;
	} catch (IOException | IllegalArgumentException | IllegalAccessException e) {
		throw new RuntimeException(e);
	}
}
 
Example #23
Source File: SAMRecordReader.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private SamReader createSamReader(InputStream in, ValidationStringency stringency) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	return readerFactory.open(SamInputResource.of(in));
}
 
Example #24
Source File: TestVCFInputFormatStringency.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public void checkReading(ValidationStringency validationStringency) throws Exception {
    String filename = "invalid_info_field.vcf";
    Configuration conf = new Configuration();
    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
    conf.set("mapred.input.dir", "file://" + input_file);

    if (validationStringency != null) {
        VCFRecordReader.setValidationStringency(conf, validationStringency);
    }

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());

    VCFInputFormat inputFormat = new VCFInputFormat(conf);
    List<InputSplit> splits = inputFormat.getSplits(ctx);
    assertEquals(1, splits.size());
    RecordReader<LongWritable, VariantContextWritable> reader =
        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
    int counter = 0;
    while (reader.nextKeyValue()) {
        VariantContextWritable writable = reader.getCurrentValue();
        assertNotNull(writable);
        VariantContext vc = writable.get();
        assertNotNull(vc);
        String value = vc.toString();
        assertNotNull(value);
        counter++;
    }
    assertEquals(4, counter);
}
 
Example #25
Source File: ShardedBAMWriting.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static PCollection<Read> getReadsFromBAMFile() throws IOException, URISyntaxException {
  /**
   * Policy used to shard Reads.
   * By default we are using the default sharding supplied by the policy class.
   * If you want custom sharding, use the following pattern:
   * <pre>
   *    BAM_FILE_READ_SHARDING_POLICY = new ShardingPolicy() {
   *     @Override
   *     public boolean shardBigEnough(BAMShard shard) {
   *       return shard.sizeInLoci() > 50000000;
   *     }
   *   };
   * </pre>
   */
  final ShardingPolicy BAM_FILE_READ_SHARDING_POLICY = ShardingPolicy.BYTE_SIZE_POLICY_10MB;

  LOG.info("Sharded reading of " + pipelineOptions.getBAMFilePath());

  final ReaderOptions readerOptions = new ReaderOptions(
      ValidationStringency.DEFAULT_STRINGENCY,
      true);

  // TODO: change this to ReadBAMTransform.getReadsFromBAMFilesSharded when
  // https://github.com/googlegenomics/dataflow-java/issues/214 is fixed.
  return ReadBAMTransform.getReadsFromBAMFileSharded(pipeline,
      pipelineOptions,
      auth,
      contigs,
      readerOptions,
      pipelineOptions.getBAMFilePath(),
      BAM_FILE_READ_SHARDING_POLICY);
}
 
Example #26
Source File: PathSeqBwaSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "pathseqBwaTestData")
private void testBwaTool(final String inputBamFilename, final String expectedBamFilename) throws Exception {
    final File inputBamFile = getTestFile(inputBamFilename);
    final File pairedOutputBamFile = createTempFile("paired_output", ".bam");

    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add(PathSeqBwaSpark.PAIRED_INPUT_LONG_NAME, inputBamFile);
    args.add(PathSeqBwaSpark.PAIRED_OUTPUT_LONG_NAME, pairedOutputBamFile);
    args.add(PSBwaArgumentCollection.MICROBE_BWA_IMAGE_LONG_NAME, IMAGE_PATH);
    args.add(PSBwaArgumentCollection.MICROBE_REF_DICT_LONG_NAME, REF_DICT_PATH);
    this.runCommandLine(args.getArgsArray());
    SamAssertionUtils.assertSamsEqual(pairedOutputBamFile, getTestFile(expectedBamFilename), ValidationStringency.LENIENT);
}
 
Example #27
Source File: CountReads.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static PCollection<Read> getReadsFromBAMFile() throws IOException, URISyntaxException {
  LOG.info("getReadsFromBAMFile");

  final Iterable<Contig> contigs = Contig.parseContigsFromCommandLine(pipelineOptions.getReferences());
  final ReaderOptions readerOptions = new ReaderOptions(
      ValidationStringency.LENIENT,
      pipelineOptions.isIncludeUnmapped());
  if (pipelineOptions.isShardBAMReading()) {
    LOG.info("Sharded reading of "+ pipelineOptions.getBAMFilePath());

    ShardingPolicy policy = new ShardingPolicy() {
      final int MAX_BYTES_PER_SHARD = pipelineOptions.getMaxShardSizeBytes();
      @Override
      public Boolean apply(BAMShard shard) {
        return shard.approximateSizeInBytes() > MAX_BYTES_PER_SHARD;
      }
    };

    return ReadBAMTransform.getReadsFromBAMFilesSharded(p,
        pipelineOptions,
        auth,
        Lists.newArrayList(contigs),
        readerOptions,
        pipelineOptions.getBAMFilePath(),
        policy);
  } else {  // For testing and comparing sharded vs. not sharded only
    LOG.info("Unsharded reading of " + pipelineOptions.getBAMFilePath());
    return p.apply(
        Create.of(
            Reader.readSequentiallyForTesting(
                GCSOptions.Methods.createStorageClient(pipelineOptions, auth),
                pipelineOptions.getBAMFilePath(),
                contigs.iterator().next(),
                readerOptions)));
  }
}
 
Example #28
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static ReaderAndIndex openBAMAndExposeIndex(Storage.Objects storageClient, String gcsStoragePath, ValidationStringency stringency) throws IOException {
  ReaderAndIndex result = new ReaderAndIndex();
  result.index = openIndexForPath(storageClient, gcsStoragePath);
  result.reader = openBAMReader(
      openBAMFile(storageClient, gcsStoragePath,result.index), stringency, false, 0);
  return result;
}
 
Example #29
Source File: HeaderInfo.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException {
  HeaderInfo result = null;

  // Open and read start of BAM
  LOG.info("Reading header from " + BAMPath);
  final SamReader samReader = BAMIO
      .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY);
  final SAMFileHeader header = samReader.getFileHeader();
  Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs);
  if (firstContig == null) {
    final SAMSequenceRecord seqRecord = header.getSequence(0);
    firstContig = new Contig(seqRecord.getSequenceName(), -1, -1);
  }

  LOG.info("Reading first chunk of reads from " + BAMPath);
  final SAMRecordIterator recordIterator = samReader.query(
      firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false);

  Contig firstShard = null;
  while (recordIterator.hasNext() && result == null) {
    SAMRecord record = recordIterator.next();
    final int alignmentStart = record.getAlignmentStart();
    if (firstShard == null && alignmentStart > firstContig.start &&
        (alignmentStart < firstContig.end || firstContig.end == -1)) {
      firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart);
      LOG.info("Determined first shard to be " + firstShard);
      result = new HeaderInfo(header, firstShard);
    }
  }
  recordIterator.close();
  samReader.close();

  if (result == null) {
    throw new IOException("Did not find reads for the first contig " + firstContig.toString());
  }
  LOG.info("Finished header reading from " + BAMPath);
  return result;
}
 
Example #30
Source File: Sharder.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
void openFile() throws IOException {
  final BAMIO.ReaderAndIndex r = BAMIO.openBAMAndExposeIndex(storageClient, filePath, ValidationStringency.DEFAULT_STRINGENCY);
  reader = r.reader;
  indexStream = r.index;
  header = reader.getFileHeader();
  hasIndex = reader.hasIndex() && reader.indexing().hasBrowseableIndex();
  LOG.info("Has index = " + hasIndex);
  if (hasIndex) {
    index = new BAMFileIndexImpl(
        IOUtil.maybeBufferedSeekableStream(indexStream),header.getSequenceDictionary());
  } else {
    index = null;
  }
}