Java Code Examples for htsjdk.samtools.ValidationStringency

The following examples show how to use htsjdk.samtools.ValidationStringency. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Drop-seq   Source File: GTFParser.java    License: MIT License 6 votes vote down vote up
@Override
public GTFRecord next() {
    final TabbedTextFileWithHeaderParser.Row row = it.next();
    if (row.getFields().length != GTFColumnLabels.length) {
        throw new AnnotationException("Wrong number of fields in GTF file " + gtfFile + " at line " +
                row.getCurrentLine());
    }
    final GTFRecord ret = parseLine(row);
    if (validationStringency != ValidationStringency.SILENT) {
        final List<String> errors = ret.validate();
        if (errors != null && !errors.isEmpty()) {
            final String message = String.format(
                    "Invalid GTF line: \n%s\nProblems:\n%s",
                    row.getCurrentLine(),
                    CollectionUtil.join(errors, "\n"));
            if (validationStringency == ValidationStringency.STRICT) {
                throw new AnnotationException(message);
            } else {
                LOG.warn(message);
            }
        }
    }
    progressLogger.record(ret.getChromosome(), ret.getStart());
    return ret;
}
 
Example 2
Source Project: cramtools   Source File: QualityScoreStats.java    License: Apache License 2.0 6 votes vote down vote up
private static void dist(File file, byte defaultQualityScore) throws IllegalArgumentException, IOException,
		IllegalAccessException {
	InputStream is = new FileInputStream(file);
	CramHeader header = CramIO.readCramHeader(is);
	Container c = null;
	ContainerParser parser = new ContainerParser(header.getSamFileHeader());
	ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(10000);

	long[] freq = new long[255];
	while ((c = ContainerIO.readContainer(header.getVersion(), is)) != null && !c.isEOF()) {
		parser.getRecords(c, records, ValidationStringency.SILENT);

		CramNormalizer.restoreQualityScores(defaultQualityScore, records);
		for (CramCompressionRecord record : records) {
			for (byte b : record.qualityScores)
				freq[b & 0xFF]++;
		}
		records.clear();
	}
	print(freq, defaultQualityScore, System.out);
}
 
Example 3
@Test(expectedExceptions = UserException.class)
public void testNonStrictBAM() {
    final File normalOutputFile = createTempFile("normal-test",".txt");
    final File tumorOutputFile = createTempFile("tumor-test",".txt");

    final String[] arguments = {
            "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(),
            "--VALIDATION_STRINGENCY", ValidationStringency.STRICT.toString()
    };
    runCommandLine(arguments);
    //should catch SAMFormatException and throw new UserException with --VALIDATION_STRINGENCY STRICT
}
 
Example 4
Source Project: picard   Source File: ReplaceSamHeader.java    License: MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example 5
Source Project: picard   Source File: SortVcf.java    License: MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example 6
Source Project: picard   Source File: FingerprintChecker.java    License: MIT License 6 votes vote down vote up
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) {
    final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile);
    if (validationStringency != ValidationStringency.STRICT) {
        final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString());
        readGroupRecord.setLibrary("<UNKNOWN>");
        readGroupRecord.setSample(defaultSampleID);
        readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ");

        if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) {
            log.warn(e.getMessage());
            log.warn("further messages from this file will be suppressed");
        }

        return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString());
    } else {
        log.error(e.getMessage());
        throw e;
    }
}
 
Example 7
Source Project: picard   Source File: IlluminaLaneMetricsCollectorTest.java    License: MIT License 6 votes vote down vote up
/** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */
@Test(expectedExceptions = PicardException.class)
public void testMissingPhasingValuesStrict() {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example 8
Source Project: picard   Source File: IlluminaLaneMetricsCollectorTest.java    License: MIT License 6 votes vote down vote up
/** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */
@Test
public void testMissingPhasingValuesSilent() throws IOException {
    final ReadStructure readStructure = new ReadStructure("151T8B8B151T");
    for (final boolean useReadStructure : Arrays.asList(true, false)) {
        final File runDirectory = TEST_MISSING_PHASING_DIRECTORY;
        final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics();
        clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null);
        clp.RUN_DIRECTORY = runDirectory;
        clp.OUTPUT_PREFIX = "test";
        clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT;
        if (useReadStructure) clp.READ_STRUCTURE = readStructure;
        clp.doWork();

        final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension());
        final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile);

        final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension());
        final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension());
        IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile);
        IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY);
    }
}
 
Example 9
Source Project: picard   Source File: CleanSamTest.java    License: MIT License 6 votes vote down vote up
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
    final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
    cleanedFile.deleteOnExit();
    final String[] args = new String[]{
            "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(),
            "OUTPUT=" + cleanedFile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
    validator.setIgnoreWarnings(true);
    validator.setVerbose(true, 1000);
    validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
    SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final SAMRecord rec = samReader.iterator().next();
    samReader.close();
    Assert.assertEquals(rec.getCigarString(), expectedCigar);
    samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile);
    final boolean validated = validator.validateSamFileVerbose(samReader, null);
    samReader.close();
    Assert.assertTrue(validated, "ValidateSamFile failed");
}
 
Example 10
Source Project: Hadoop-BAM   Source File: BAMRecordReader.java    License: MIT License 6 votes vote down vote up
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex,
		ValidationStringency stringency, boolean useIntelInflater) {
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}
	SamInputResource resource = SamInputResource.of(in);
	if (inIndex != null) {
		resource.index(inIndex);
	}
	if (useIntelInflater) {
		readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor());
	}
	return readerFactory.open(resource);
}
 
Example 11
Source Project: Hadoop-BAM   Source File: SAMHeaderReader.java    License: MIT License 6 votes vote down vote up
/** Does not close the stream. */
public static SAMFileHeader readSAMHeaderFrom(
	final InputStream in, final Configuration conf)
{
	final ValidationStringency
		stringency = getValidationStringency(conf);
	SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
			.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
			.setUseAsyncIo(false);
	if (stringency != null) {
		readerFactory.validationStringency(stringency);
	}

	final ReferenceSource refSource = getReferenceSource(conf);
	if (null != refSource) {
		readerFactory.referenceSource(refSource);
	}
	return readerFactory.open(SamInputResource.of(in)).getFileHeader();
}
 
Example 12
Source Project: Hadoop-BAM   Source File: GetSortedBAMHeader.java    License: MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	if (args.length < 2) {
		System.err.println(
			"Usage: GetSortedBAMHeader input output\n\n"+

			"Reads the BAM header from input (a standard BGZF-compressed BAM "+
			"file), and\nwrites it (BGZF-compressed, no terminator block) to "+
			"output. Sets the sort order\nindicated in the SAM header to "+
			"'coordinate'.");
		System.exit(1);
	}

	final SAMFileHeader h =
			SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
					.setUseAsyncIo(false)
					.open(new File(args[0])).getFileHeader();
	h.setSortOrder(SAMFileHeader.SortOrder.coordinate);

       try (FileOutputStream stream = new FileOutputStream(args[1])) {
           new SAMOutputPreparer().prepareForRecords(stream, SAMFormat.BAM, h);
       }
}
 
Example 13
Source Project: chipster   Source File: SamBamUtils.java    License: MIT License 6 votes vote down vote up
public String printSamBam(InputStream samBamStream, int maxRecords) throws IOException {
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader in = new SAMFileReader(samBamStream);
	SAMFileHeader header = in.getFileHeader();
	ByteArrayOutputStream buffer = new ByteArrayOutputStream();
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, true, buffer);
	int i = 0;
	try {
		for (final SAMRecord rec : in) {
			if (i > maxRecords) {
				break;
			}
			out.addAlignment(rec);
			i++;
		}
	} finally {
		closeIfPossible(out);
	}

	if (i > maxRecords) {
		buffer.write("SAM/BAM too long for viewing, truncated here!\n".getBytes());
	}
	
	return buffer.toString();
}
 
Example 14
Source Project: dataflow-java   Source File: BAMIO.java    License: Apache License 2.0 6 votes vote down vote up
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  SamReaderFactory samReaderFactory = SamReaderFactory
      .makeDefault()
      .validationStringency(stringency)
      .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
  if (includeFileSource) {
    samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
  }
  if (offset == 0) {
    return samReaderFactory.open(resource);
  }
  LOG.info("Initializing seeking reader with the offset of " + offset);
  SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
      false,
      stringency,
      DefaultSAMRecordFactory.getInstance(),
      offset);
  final SeekingReaderAdapter reader =
      new SeekingReaderAdapter(primitiveReader, resource);
  samReaderFactory.reapplyOptions(reader);
  return reader;
}
 
Example 15
Source Project: chipster   Source File: SamBamUtils.java    License: MIT License 6 votes vote down vote up
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}
 
Example 16
Source Project: Drop-seq   Source File: EnhanceGTFRecordsTest.java    License: MIT License 5 votes vote down vote up
@Test(enabled=true, groups={"dropseq", "transcriptome"})
public void test1Enhanced() {
	EnhanceGTFRecords e = new EnhanceGTFRecords();
	GTFParser parser = new GTFParser(GTF_FILE1, ValidationStringency.STRICT);
       List<GTFRecord> records;
       try {
           records = e.enhanceGTFRecords(parser);
       } finally {
           CloserUtil.close(parser);
       }
       Assert.assertNotNull(records);
	
}
 
Example 17
Source Project: Drop-seq   Source File: EnhanceGTFRecordsTest.java    License: MIT License 5 votes vote down vote up
@Test(enabled=true, expectedExceptions=java.lang.IllegalStateException.class)
public void testGeneNoExon () {
	EnhanceGTFRecords e = new EnhanceGTFRecords();
	GTFParser parser = new GTFParser(GTF_FILE3, ValidationStringency.STRICT);
       List<GTFRecord> records;
       try {
           records = e.enhanceGTFRecords(parser);
       } finally {
           CloserUtil.close(parser);
       }
       Assert.assertNotNull(records);		
}
 
Example 18
private int getReadCounts(final Path result) throws IOException {
    IOUtil.assertFileIsReadable(result);

    int count = 0;
    try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) {
        for (@SuppressWarnings("unused") final SAMRecord rec : in) {
            count++;
        }
    }
    return count;
}
 
Example 19
Source Project: hmftools   Source File: CobaltConfig.java    License: GNU General Public License v3.0 5 votes vote down vote up
@NotNull
static CobaltConfig createConfig(@NotNull final CommandLine cmd) throws ParseException {
    final int threadCount = defaultIntValue(cmd, THREADS, DEFAULT_THREADS);
    final int minMappingQuality = defaultIntValue(cmd, MIN_MAPPING_QUALITY, DEFAULT_MIN_MAPPING_QUALITY);
    final String refGenomePath = cmd.getOptionValue(REF_GENOME, "");

    final StringJoiner missingJoiner = new StringJoiner(", ");
    final String gcProfilePath = parameter(cmd, GC_PROFILE, missingJoiner);
    if (gcProfilePath.endsWith("gz")) {
        throw new ParseException("Please supply un-compressed " + GC_PROFILE + " file");
    }

    final String tumorBamPath = parameter(cmd, TUMOR_BAM, missingJoiner);
    final String referenceBamPath = parameter(cmd, REFERENCE_BAM, missingJoiner);
    final String outputDirectory = parameter(cmd, OUTPUT_DIR, missingJoiner);
    final String normal = parameter(cmd, REFERENCE, missingJoiner);
    final String tumor = parameter(cmd, TUMOR, missingJoiner);
    final String missing = missingJoiner.toString();

    final ValidationStringency validationStringency = defaultEnumValue(cmd, VALIDATION_STRINGENCY, ValidationStringency.DEFAULT_STRINGENCY);

    if (!missing.isEmpty()) {
        throw new ParseException("Missing the following parameters: " + missing);
    }

    return ImmutableCobaltConfig.builder()
            .threadCount(threadCount)
            .minMappingQuality(minMappingQuality)
            .gcProfilePath(gcProfilePath)
            .tumorBamPath(tumorBamPath)
            .referenceBamPath(referenceBamPath)
            .refGenomePath(refGenomePath)
            .outputDirectory(outputDirectory)
            .reference(normal)
            .tumor(tumor)
            .validationStringency(validationStringency)
            .build();
}
 
Example 20
@Test(dataProvider = "successfulParameters")
public void testSuccessfulParameters(final Map<String, String> params, final String expectedFileName) throws IOException {
    final File expected = new File(getToolTestDataDir(), expectedFileName);
    final File output = createTempFile("output", ".bam");

    final ArgumentsBuilder args = new ArgumentsBuilder()
        .add(HtsgetReader.URL_LONG_NAME, ENDPOINT)
        .addOutput(output);
    params.forEach(args::add);
    
    runCommandLine(args);
    SamAssertionUtils.assertEqualBamFiles(output, expected, false, ValidationStringency.LENIENT);
}
 
Example 21
@Test
public void testNonStrictBAMWithSilentValidationStringency() {
    final File outputFile = createTempFile("collect-allelic-counts-test-output", ".tsv");
    final String[] arguments = {
            "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(),
            "-" + ExomeStandardArgumentDefinitions.SITES_FILE_SHORT_NAME, SITES_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputFile.getAbsolutePath(),
            "-" + StandardArgumentDefinitions.READ_VALIDATION_STRINGENCY_SHORT_NAME, ValidationStringency.SILENT.toString()
    };
    runCommandLine(arguments);
    //should complete successfully with --readValidationStringency SILENT
}
 
Example 22
@Test(groups = {"cloud", "bucket"}, dataProvider = "getHttpPaths")
public void testHttpPaths(String reads, String index, String nonHttpReads, String nonHttpIndex, List<SimpleInterval> intervals, long expectedNumberOfReads) throws IOException {
    final ArgumentsBuilder args = new ArgumentsBuilder();
    final File out = createTempFile("out", ".bam");
    // this test reads tiny amounts of data from multiple places, if you don't set the prefetcher to a lower number
    // it loads large amounts of data that slows the test down significantly for no good reason
    args.addInput(reads)
            .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1)
            .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1)
            .add("read-index", index)
            .addReference(GATKBaseTest.b37Reference)
            .addOutput(out);
    intervals.forEach(args::addInterval);
    runCommandLine(args);

    final ArgumentsBuilder args2 = new ArgumentsBuilder();
    final File out2 = createTempFile("out", ".bam");
    args2.addInput(nonHttpReads)
            .add("read-index", nonHttpIndex)
            .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1)
            .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1)
            .addReference(GATKBaseTest.b37Reference)
            .addOutput(out2);
    intervals.forEach(args2::addInterval);
    runCommandLine(args2);

    try(final ReadsDataSource reader = new ReadsPathDataSource(out.toPath())){
        final long count = Utils.stream(reader).count();
        Assert.assertEquals( count, expectedNumberOfReads);
    }

    SamAssertionUtils.assertEqualBamFiles(out, out2, false, ValidationStringency.DEFAULT_STRINGENCY);
}
 
Example 23
Source Project: picard   Source File: TileMetricsUtil.java    License: MIT License 5 votes vote down vote up
/**
 * @deprecated use {@link #parseClusterRecordsFromTileMetricsV3(Collection, Map, ReadStructure)} instead
 */
@Deprecated
public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile,
                                                final Map<Integer, File> phasingMetricsFiles,
                                                final ReadStructure readStructure,
                                                final ValidationStringency validationStringency)
        throws FileNotFoundException {
    final Map<Integer, Map<Integer, Collection<TilePhasingValue>>> phasingValues = getTilePhasingValues(phasingMetricsFiles, readStructure);
    final TileMetricsOutReader tileMetricsIterator = new TileMetricsOutReader(tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.THREE);
    final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(tileMetricsIterator);
    final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
    return getTileClusterRecordsV3(locationToMetricsMap, phasingValues, tileMetricsIterator.getDensity());
}
 
Example 24
Source Project: picard   Source File: TileMetricsUtil.java    License: MIT License 5 votes vote down vote up
/**
 * Returns an unmodifiable collection of tile data read from the provided file. For each tile we will extract:
 * - lane number
 * - tile number
 * - density
 * - cluster ID
 * - Phasing & Prephasing for first template read (if available)
 * - Phasing & Prephasing for second template read (if available)
 */
public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure,
                                                final ValidationStringency validationStringency) throws FileNotFoundException {
    // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination
    final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader
            (tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.TWO));

    // Collect the tiles by lane & tile, and then collect the metrics by lane
    final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
    final Collection<Tile> tiles = new LinkedList<>();
    for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
        final Collection<IlluminaTileMetrics> tileRecords = entry.getValue();

        // Get a mapping from metric code number to the corresponding IlluminaTileMetrics
        final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords);

        final Set<Integer> observedCodes = codeMetricsMap.keySet();
        if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())))
            throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.",
                    IlluminaMetricsCode.CLUSTER_ID.getMetricsCode(), IlluminaMetricsCode.DENSITY_ID.getMetricsCode(), entry.getKey(), observedCodes));

        final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()));
        final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode()));

        // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen
        final Collection<TilePhasingValue> tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure, validationStringency);

        tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(),
                tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()])));
    }

    return Collections.unmodifiableCollection(tiles);
}
 
Example 25
Source Project: picard   Source File: CollectIlluminaLaneMetrics.java    License: MIT License 5 votes vote down vote up
/** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */
public static Map<Integer, ? extends Collection<Tile>> readLaneTiles(final File illuminaRunDirectory,
                                                                     final ReadStructure readStructure,
                                                                     final ValidationStringency validationStringency,
                                                                     final boolean isNovaSeq) {
    final Collection<Tile> tiles;
    try {
        final List<File> tileMetricsOutFiles = TileMetricsUtil.findTileMetricsFiles(illuminaRunDirectory, readStructure.totalCycles, isNovaSeq);
        if (isNovaSeq) {
            tiles = TileMetricsUtil.parseClusterRecordsFromTileMetricsV3(
                    tileMetricsOutFiles,
                    TileMetricsUtil.renderPhasingMetricsFilesFromBasecallingDirectory(illuminaRunDirectory),
                    readStructure
            );
        } else {
            tiles = TileMetricsUtil.parseTileMetrics(
                    tileMetricsOutFiles.get(0),
                    readStructure,
                    validationStringency
            );
        }
    } catch (final FileNotFoundException e) {
        throw new PicardException("Unable to open laneMetrics file.", e);
    }

    return tiles.stream().filter(tile -> tile.getLaneNumber() > 0).collect(Collectors.groupingBy(Tile::getLaneNumber));
}
 
Example 26
Source Project: picard   Source File: CollectIlluminaLaneMetrics.java    License: MIT License 5 votes vote down vote up
/** Parses the tile data from the basecall directory and writes to both the lane and phasing metrics files */
public static void collectLaneMetrics(final File runDirectory, final File outputDirectory, final String outputPrefix,
                                      final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile,
                                      final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile,
                                      final ReadStructure readStructure, final String fileExtension,
                                      final ValidationStringency validationStringency,
                                      final boolean isNovaSeq) {
    final Map<Integer, ? extends Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure, validationStringency, isNovaSeq);
    writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile, fileExtension);
    writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile, fileExtension, isNovaSeq);
}
 
Example 27
@Test
/**
 * This test just checks that PrintReads runs with a config arg without blowing up.
 * It does not test whether the config settings were actually loaded correctly
 * (tested by {@link ConfigIntegrationTest}).
 */
public void testPrintReadsWithConfigFile() throws Exception {

    final String inputFile = publicTestDir + "NA12878.chr17_69k_70k.dictFix.bam";
    final File outputFile = createTempFile("TEST_OUT_NA12878.chr17_69k_70k.dictFix", ".bam");

    // Create some arguments for our command:
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addRaw("--" + StandardArgumentDefinitions.GATK_CONFIG_FILE_OPTION);
    args.addRaw(configFilePath);
    args.addRaw("-" + StandardArgumentDefinitions.INPUT_SHORT_NAME);
    args.addRaw(inputFile);
    args.addRaw("-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME);
    args.addRaw(outputFile);

    // Run our command:
    runCommandLine(args.getArgsArray());

    // Ensure the files are the same:
    IntegrationTestSpec.assertMatchingFiles(
            Collections.singletonList(new File(inputFile)),
            Collections.singletonList(outputFile.getAbsolutePath()),
            true,
            ValidationStringency.LENIENT
    );
}
 
Example 28
Source Project: picard   Source File: CleanSamTester.java    License: MIT License 5 votes vote down vote up
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example 29
@Test(description = "This is to test https://github.com/broadinstitute/hellbender/issues/322", groups = {"cloud", "spark"}, enabled = false)
public void testPlottingWorkflow() throws IOException {
    final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
    final String chr2021Reference2bit = GCS_b37_CHR20_21_REFERENCE_2BIT;
    final String dbSNPb37_chr2021 = resourceDir + DBSNP_138_B37_CH20_1M_1M1K_VCF;
    final String HiSeqBam_chr20 = getResourceDir() + WGS_B37_CH20_1M_1M1K_BAM;

    final File actualHiSeqBam_recalibrated = createTempFile("actual.recalibrated", ".bam");

    final String tablePre = createTempFile("gatk4.pre.cols", ".table").getAbsolutePath();
    final String argPre = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + HiSeqBam_chr20
            + " -O " + tablePre;
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argPre));

    final String argApply = "-I " + HiSeqBam_chr20 + " --bqsr-recal-file " + tablePre + " -O " + actualHiSeqBam_recalibrated.getAbsolutePath();
    new ApplyBQSRSpark().instanceMain(Utils.escapeExpressions(argApply));

    final File actualTablePost = createTempFile("gatk4.post.cols", ".table");
    final String argsPost = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath()
            + " -O " + actualTablePost.getAbsolutePath();
    new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argsPost));

    final File expectedHiSeqBam_recalibrated = new File(resourceDir + "expected.NA12878.chr17_69k_70k.dictFix.recalibrated.DIQ.bam");

    SamAssertionUtils.assertSamsEqual(actualHiSeqBam_recalibrated, expectedHiSeqBam_recalibrated, ValidationStringency.LENIENT);

    final File expectedTablePost = new File(getResourceDir() + "expected.NA12878.chr17_69k_70k.postRecalibrated.txt");
    IntegrationTestSpec.assertEqualTextFiles(actualTablePost, expectedTablePost);
}
 
Example 30
Source Project: cramtools   Source File: CramToBam_OBA_Function.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public OrderedByteArray apply(OrderedByteArray object) {
	if (object == null)
		throw new NullPointerException();

	log.debug("processing container " + object.order);
	Container container;
	try {
		container = ContainerIO.readContainer(header.getVersion(), new ByteArrayInputStream(object.bytes));
		if (container.isEOF())
			return null;

		ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(container.nofRecords);
		parser.getRecords(container, records, ValidationStringency.SILENT);
		n.normalize(records, null, 0, container.header.substitutionMatrix);

		ByteArrayOutputStream bamBAOS = new ByteArrayOutputStream();
		BlockCompressedOutputStream os = new BlockCompressedOutputStream(bamBAOS, null);
		codec.setOutputStream(os);
		for (CramCompressionRecord record : records) {
			SAMRecord samRecord = f.create(record);
			codec.encode(samRecord);
		}
		os.flush();
		OrderedByteArray bb = new OrderedByteArray();
		bb.bytes = bamBAOS.toByteArray();
		bb.order = object.order;
		log.debug(String.format("Converted OBA %d, records %d", object.order, records.size()));
		return bb;
	} catch (IOException | IllegalArgumentException | IllegalAccessException e) {
		throw new RuntimeException(e);
	}
}