Java Code Examples for htsjdk.samtools.ValidationStringency
The following examples show how to use
htsjdk.samtools.ValidationStringency. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Drop-seq Source File: GTFParser.java License: MIT License | 6 votes |
@Override public GTFRecord next() { final TabbedTextFileWithHeaderParser.Row row = it.next(); if (row.getFields().length != GTFColumnLabels.length) { throw new AnnotationException("Wrong number of fields in GTF file " + gtfFile + " at line " + row.getCurrentLine()); } final GTFRecord ret = parseLine(row); if (validationStringency != ValidationStringency.SILENT) { final List<String> errors = ret.validate(); if (errors != null && !errors.isEmpty()) { final String message = String.format( "Invalid GTF line: \n%s\nProblems:\n%s", row.getCurrentLine(), CollectionUtil.join(errors, "\n")); if (validationStringency == ValidationStringency.STRICT) { throw new AnnotationException(message); } else { LOG.warn(message); } } } progressLogger.record(ret.getChromosome(), ret.getStart()); return ret; }
Example 2
Source Project: cramtools Source File: QualityScoreStats.java License: Apache License 2.0 | 6 votes |
private static void dist(File file, byte defaultQualityScore) throws IllegalArgumentException, IOException, IllegalAccessException { InputStream is = new FileInputStream(file); CramHeader header = CramIO.readCramHeader(is); Container c = null; ContainerParser parser = new ContainerParser(header.getSamFileHeader()); ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(10000); long[] freq = new long[255]; while ((c = ContainerIO.readContainer(header.getVersion(), is)) != null && !c.isEOF()) { parser.getRecords(c, records, ValidationStringency.SILENT); CramNormalizer.restoreQualityScores(defaultQualityScore, records); for (CramCompressionRecord record : records) { for (byte b : record.qualityScores) freq[b & 0xFF]++; } records.clear(); } print(freq, defaultQualityScore, System.out); }
Example 3
Source Project: gatk-protected Source File: GetHetCoverageIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(expectedExceptions = UserException.class) public void testNonStrictBAM() { final File normalOutputFile = createTempFile("normal-test",".txt"); final File tumorOutputFile = createTempFile("tumor-test",".txt"); final String[] arguments = { "-" + ExomeStandardArgumentDefinitions.NORMAL_BAM_FILE_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_BAM_FILE_SHORT_NAME, TUMOR_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SNP_FILE_SHORT_NAME, SNP_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.NORMAL_ALLELIC_COUNTS_FILE_SHORT_NAME, normalOutputFile.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.TUMOR_ALLELIC_COUNTS_FILE_SHORT_NAME, tumorOutputFile.getAbsolutePath(), "--VALIDATION_STRINGENCY", ValidationStringency.STRICT.toString() }; runCommandLine(arguments); //should catch SAMFormatException and throw new UserException with --VALIDATION_STRINGENCY STRICT }
Example 4
Source Project: picard Source File: ReplaceSamHeader.java License: MIT License | 6 votes |
private void standardReheader(final SAMFileHeader replacementHeader) { final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT); if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) { throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() + ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree."); } final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT); final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class)); for (final SAMRecord rec : recordReader) { rec.setHeader(replacementHeader); writer.addAlignment(rec); progress.record(rec); } writer.close(); CloserUtil.close(recordReader); }
Example 5
Source Project: picard Source File: SortVcf.java License: MIT License | 6 votes |
/** * Merge the inputs and sort them by adding each input's content to a single SortingCollection. * <p/> * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs. * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now. * MergeVcfs exists for simple merging of presorted inputs. * * @param readers - a list of VCFFileReaders, one for each input VCF * @param outputHeader - The merged header whose information we intend to use in the final output file */ private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) { final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records"); // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time. final SortingCollection<VariantContext> sorter = SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); int readerCount = 1; for (final VCFFileReader reader : readers) { log.info("Reading entries from input file " + readerCount); for (final VariantContext variantContext : reader) { sorter.add(variantContext); readProgress.record(variantContext.getContig(), variantContext.getStart()); } reader.close(); readerCount++; } return sorter; }
Example 6
Source Project: picard Source File: FingerprintChecker.java License: MIT License | 6 votes |
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) { final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile); if (validationStringency != ValidationStringency.STRICT) { final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString()); readGroupRecord.setLibrary("<UNKNOWN>"); readGroupRecord.setSample(defaultSampleID); readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ"); if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) { log.warn(e.getMessage()); log.warn("further messages from this file will be suppressed"); } return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString()); } else { log.error(e.getMessage()); throw e; } }
Example 7
Source Project: picard Source File: IlluminaLaneMetricsCollectorTest.java License: MIT License | 6 votes |
/** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */ @Test(expectedExceptions = PicardException.class) public void testMissingPhasingValuesStrict() { final ReadStructure readStructure = new ReadStructure("151T8B8B151T"); for (final boolean useReadStructure : Arrays.asList(true, false)) { final File runDirectory = TEST_MISSING_PHASING_DIRECTORY; final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics(); clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null); clp.RUN_DIRECTORY = runDirectory; clp.OUTPUT_PREFIX = "test"; clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT; if (useReadStructure) clp.READ_STRUCTURE = readStructure; clp.doWork(); final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension()); IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile); final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension()); final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension()); IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile); IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY); } }
Example 8
Source Project: picard Source File: IlluminaLaneMetricsCollectorTest.java License: MIT License | 6 votes |
/** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */ @Test public void testMissingPhasingValuesSilent() throws IOException { final ReadStructure readStructure = new ReadStructure("151T8B8B151T"); for (final boolean useReadStructure : Arrays.asList(true, false)) { final File runDirectory = TEST_MISSING_PHASING_DIRECTORY; final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics(); clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null); clp.RUN_DIRECTORY = runDirectory; clp.OUTPUT_PREFIX = "test"; clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT; if (useReadStructure) clp.READ_STRUCTURE = readStructure; clp.doWork(); final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension()); IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile); final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension()); final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension()); IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile); IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY); } }
Example 9
Source Project: picard Source File: CleanSamTest.java License: MIT License | 6 votes |
@Test(dataProvider = "testCleanSamDataProvider") public void testCleanSam(final String samFile, final String expectedCigar) throws IOException { final File cleanedFile = File.createTempFile(samFile + ".", ".sam"); cleanedFile.deleteOnExit(); final String[] args = new String[]{ "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(), "OUTPUT=" + cleanedFile.getAbsolutePath() }; Assert.assertEquals(runPicardCommandLine(args), 0); final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final SAMRecord rec = samReader.iterator().next(); samReader.close(); Assert.assertEquals(rec.getCigarString(), expectedCigar); samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final boolean validated = validator.validateSamFileVerbose(samReader, null); samReader.close(); Assert.assertTrue(validated, "ValidateSamFile failed"); }
Example 10
Source Project: Hadoop-BAM Source File: BAMRecordReader.java License: MIT License | 6 votes |
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex, ValidationStringency stringency, boolean useIntelInflater) { SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true) .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } SamInputResource resource = SamInputResource.of(in); if (inIndex != null) { resource.index(inIndex); } if (useIntelInflater) { readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor()); } return readerFactory.open(resource); }
Example 11
Source Project: Hadoop-BAM Source File: SAMHeaderReader.java License: MIT License | 6 votes |
/** Does not close the stream. */ public static SAMFileHeader readSAMHeaderFrom( final InputStream in, final Configuration conf) { final ValidationStringency stringency = getValidationStringency(conf); SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } final ReferenceSource refSource = getReferenceSource(conf); if (null != refSource) { readerFactory.referenceSource(refSource); } return readerFactory.open(SamInputResource.of(in)).getFileHeader(); }
Example 12
Source Project: Hadoop-BAM Source File: GetSortedBAMHeader.java License: MIT License | 6 votes |
public static void main(String[] args) throws IOException { if (args.length < 2) { System.err.println( "Usage: GetSortedBAMHeader input output\n\n"+ "Reads the BAM header from input (a standard BGZF-compressed BAM "+ "file), and\nwrites it (BGZF-compressed, no terminator block) to "+ "output. Sets the sort order\nindicated in the SAM header to "+ "'coordinate'."); System.exit(1); } final SAMFileHeader h = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .setUseAsyncIo(false) .open(new File(args[0])).getFileHeader(); h.setSortOrder(SAMFileHeader.SortOrder.coordinate); try (FileOutputStream stream = new FileOutputStream(args[1])) { new SAMOutputPreparer().prepareForRecords(stream, SAMFormat.BAM, h); } }
Example 13
Source Project: chipster Source File: SamBamUtils.java License: MIT License | 6 votes |
public String printSamBam(InputStream samBamStream, int maxRecords) throws IOException { SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader in = new SAMFileReader(samBamStream); SAMFileHeader header = in.getFileHeader(); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); SAMFileWriter out = new SAMFileWriterFactory().makeSAMWriter(header, true, buffer); int i = 0; try { for (final SAMRecord rec : in) { if (i > maxRecords) { break; } out.addAlignment(rec); i++; } } finally { closeIfPossible(out); } if (i > maxRecords) { buffer.write("SAM/BAM too long for viewing, truncated here!\n".getBytes()); } return buffer.toString(); }
Example 14
Source Project: dataflow-java Source File: BAMIO.java License: Apache License 2.0 | 6 votes |
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException { SamReaderFactory samReaderFactory = SamReaderFactory .makeDefault() .validationStringency(stringency) .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES); if (includeFileSource) { samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS); } if (offset == 0) { return samReaderFactory.open(resource); } LOG.info("Initializing seeking reader with the offset of " + offset); SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource, false, stringency, DefaultSAMRecordFactory.getInstance(), offset); final SeekingReaderAdapter reader = new SeekingReaderAdapter(primitiveReader, resource); samReaderFactory.reapplyOptions(reader); return reader; }
Example 15
Source Project: chipster Source File: SamBamUtils.java License: MIT License | 6 votes |
public static void sortSamBam(File samBamFile, File sortedBamFile) { SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile)); SAMFileWriter writer = null; try { reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate); writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile); Iterator<SAMRecord> iterator = reader.iterator(); while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } } finally { closeIfPossible(reader); closeIfPossible(writer); } }
Example 16
Source Project: Drop-seq Source File: EnhanceGTFRecordsTest.java License: MIT License | 5 votes |
@Test(enabled=true, groups={"dropseq", "transcriptome"}) public void test1Enhanced() { EnhanceGTFRecords e = new EnhanceGTFRecords(); GTFParser parser = new GTFParser(GTF_FILE1, ValidationStringency.STRICT); List<GTFRecord> records; try { records = e.enhanceGTFRecords(parser); } finally { CloserUtil.close(parser); } Assert.assertNotNull(records); }
Example 17
Source Project: Drop-seq Source File: EnhanceGTFRecordsTest.java License: MIT License | 5 votes |
@Test(enabled=true, expectedExceptions=java.lang.IllegalStateException.class) public void testGeneNoExon () { EnhanceGTFRecords e = new EnhanceGTFRecords(); GTFParser parser = new GTFParser(GTF_FILE3, ValidationStringency.STRICT); List<GTFRecord> records; try { records = e.enhanceGTFRecords(parser); } finally { CloserUtil.close(parser); } Assert.assertNotNull(records); }
Example 18
Source Project: gatk Source File: HaplotypeBAMWriterUnitTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
private int getReadCounts(final Path result) throws IOException { IOUtil.assertFileIsReadable(result); int count = 0; try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) { for (@SuppressWarnings("unused") final SAMRecord rec : in) { count++; } } return count; }
Example 19
Source Project: hmftools Source File: CobaltConfig.java License: GNU General Public License v3.0 | 5 votes |
@NotNull static CobaltConfig createConfig(@NotNull final CommandLine cmd) throws ParseException { final int threadCount = defaultIntValue(cmd, THREADS, DEFAULT_THREADS); final int minMappingQuality = defaultIntValue(cmd, MIN_MAPPING_QUALITY, DEFAULT_MIN_MAPPING_QUALITY); final String refGenomePath = cmd.getOptionValue(REF_GENOME, ""); final StringJoiner missingJoiner = new StringJoiner(", "); final String gcProfilePath = parameter(cmd, GC_PROFILE, missingJoiner); if (gcProfilePath.endsWith("gz")) { throw new ParseException("Please supply un-compressed " + GC_PROFILE + " file"); } final String tumorBamPath = parameter(cmd, TUMOR_BAM, missingJoiner); final String referenceBamPath = parameter(cmd, REFERENCE_BAM, missingJoiner); final String outputDirectory = parameter(cmd, OUTPUT_DIR, missingJoiner); final String normal = parameter(cmd, REFERENCE, missingJoiner); final String tumor = parameter(cmd, TUMOR, missingJoiner); final String missing = missingJoiner.toString(); final ValidationStringency validationStringency = defaultEnumValue(cmd, VALIDATION_STRINGENCY, ValidationStringency.DEFAULT_STRINGENCY); if (!missing.isEmpty()) { throw new ParseException("Missing the following parameters: " + missing); } return ImmutableCobaltConfig.builder() .threadCount(threadCount) .minMappingQuality(minMappingQuality) .gcProfilePath(gcProfilePath) .tumorBamPath(tumorBamPath) .referenceBamPath(referenceBamPath) .refGenomePath(refGenomePath) .outputDirectory(outputDirectory) .reference(normal) .tumor(tumor) .validationStringency(validationStringency) .build(); }
Example 20
Source Project: gatk Source File: HtsgetReaderIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "successfulParameters") public void testSuccessfulParameters(final Map<String, String> params, final String expectedFileName) throws IOException { final File expected = new File(getToolTestDataDir(), expectedFileName); final File output = createTempFile("output", ".bam"); final ArgumentsBuilder args = new ArgumentsBuilder() .add(HtsgetReader.URL_LONG_NAME, ENDPOINT) .addOutput(output); params.forEach(args::add); runCommandLine(args); SamAssertionUtils.assertEqualBamFiles(output, expected, false, ValidationStringency.LENIENT); }
Example 21
Source Project: gatk-protected Source File: CollectAllelicCountsIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testNonStrictBAMWithSilentValidationStringency() { final File outputFile = createTempFile("collect-allelic-counts-test-output", ".tsv"); final String[] arguments = { "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, NON_STRICT_BAM_FILE.getAbsolutePath(), "-" + ExomeStandardArgumentDefinitions.SITES_FILE_SHORT_NAME, SITES_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REF_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputFile.getAbsolutePath(), "-" + StandardArgumentDefinitions.READ_VALIDATION_STRINGENCY_SHORT_NAME, ValidationStringency.SILENT.toString() }; runCommandLine(arguments); //should complete successfully with --readValidationStringency SILENT }
Example 22
Source Project: gatk Source File: PrintReadsIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(groups = {"cloud", "bucket"}, dataProvider = "getHttpPaths") public void testHttpPaths(String reads, String index, String nonHttpReads, String nonHttpIndex, List<SimpleInterval> intervals, long expectedNumberOfReads) throws IOException { final ArgumentsBuilder args = new ArgumentsBuilder(); final File out = createTempFile("out", ".bam"); // this test reads tiny amounts of data from multiple places, if you don't set the prefetcher to a lower number // it loads large amounts of data that slows the test down significantly for no good reason args.addInput(reads) .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1) .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1) .add("read-index", index) .addReference(GATKBaseTest.b37Reference) .addOutput(out); intervals.forEach(args::addInterval); runCommandLine(args); final ArgumentsBuilder args2 = new ArgumentsBuilder(); final File out2 = createTempFile("out", ".bam"); args2.addInput(nonHttpReads) .add("read-index", nonHttpIndex) .add(StandardArgumentDefinitions.CLOUD_PREFETCH_BUFFER_LONG_NAME, 1) .add(StandardArgumentDefinitions.CLOUD_INDEX_PREFETCH_BUFFER_LONG_NAME, 1) .addReference(GATKBaseTest.b37Reference) .addOutput(out2); intervals.forEach(args2::addInterval); runCommandLine(args2); try(final ReadsDataSource reader = new ReadsPathDataSource(out.toPath())){ final long count = Utils.stream(reader).count(); Assert.assertEquals( count, expectedNumberOfReads); } SamAssertionUtils.assertEqualBamFiles(out, out2, false, ValidationStringency.DEFAULT_STRINGENCY); }
Example 23
Source Project: picard Source File: TileMetricsUtil.java License: MIT License | 5 votes |
/** * @deprecated use {@link #parseClusterRecordsFromTileMetricsV3(Collection, Map, ReadStructure)} instead */ @Deprecated public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final Map<Integer, File> phasingMetricsFiles, final ReadStructure readStructure, final ValidationStringency validationStringency) throws FileNotFoundException { final Map<Integer, Map<Integer, Collection<TilePhasingValue>>> phasingValues = getTilePhasingValues(phasingMetricsFiles, readStructure); final TileMetricsOutReader tileMetricsIterator = new TileMetricsOutReader(tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.THREE); final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(tileMetricsIterator); final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics); return getTileClusterRecordsV3(locationToMetricsMap, phasingValues, tileMetricsIterator.getDensity()); }
Example 24
Source Project: picard Source File: TileMetricsUtil.java License: MIT License | 5 votes |
/** * Returns an unmodifiable collection of tile data read from the provided file. For each tile we will extract: * - lane number * - tile number * - density * - cluster ID * - Phasing & Prephasing for first template read (if available) * - Phasing & Prephasing for second template read (if available) */ public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure, final ValidationStringency validationStringency) throws FileNotFoundException { // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader (tileMetricsOutFile, TileMetricsOutReader.TileMetricsVersion.TWO)); // Collect the tiles by lane & tile, and then collect the metrics by lane final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics); final Collection<Tile> tiles = new LinkedList<>(); for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) { final Collection<IlluminaTileMetrics> tileRecords = entry.getValue(); // Get a mapping from metric code number to the corresponding IlluminaTileMetrics final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords); final Set<Integer> observedCodes = codeMetricsMap.keySet(); if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode()))) throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.", IlluminaMetricsCode.CLUSTER_ID.getMetricsCode(), IlluminaMetricsCode.DENSITY_ID.getMetricsCode(), entry.getKey(), observedCodes)); final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.DENSITY_ID.getMetricsCode())); final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())); // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen final Collection<TilePhasingValue> tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure, validationStringency); tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(), tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()]))); } return Collections.unmodifiableCollection(tiles); }
Example 25
Source Project: picard Source File: CollectIlluminaLaneMetrics.java License: MIT License | 5 votes |
/** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */ public static Map<Integer, ? extends Collection<Tile>> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure, final ValidationStringency validationStringency, final boolean isNovaSeq) { final Collection<Tile> tiles; try { final List<File> tileMetricsOutFiles = TileMetricsUtil.findTileMetricsFiles(illuminaRunDirectory, readStructure.totalCycles, isNovaSeq); if (isNovaSeq) { tiles = TileMetricsUtil.parseClusterRecordsFromTileMetricsV3( tileMetricsOutFiles, TileMetricsUtil.renderPhasingMetricsFilesFromBasecallingDirectory(illuminaRunDirectory), readStructure ); } else { tiles = TileMetricsUtil.parseTileMetrics( tileMetricsOutFiles.get(0), readStructure, validationStringency ); } } catch (final FileNotFoundException e) { throw new PicardException("Unable to open laneMetrics file.", e); } return tiles.stream().filter(tile -> tile.getLaneNumber() > 0).collect(Collectors.groupingBy(Tile::getLaneNumber)); }
Example 26
Source Project: picard Source File: CollectIlluminaLaneMetrics.java License: MIT License | 5 votes |
/** Parses the tile data from the basecall directory and writes to both the lane and phasing metrics files */ public static void collectLaneMetrics(final File runDirectory, final File outputDirectory, final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile, final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile, final ReadStructure readStructure, final String fileExtension, final ValidationStringency validationStringency, final boolean isNovaSeq) { final Map<Integer, ? extends Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure, validationStringency, isNovaSeq); writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile, fileExtension); writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile, fileExtension, isNovaSeq); }
Example 27
Source Project: gatk Source File: ConfigPrintReadsIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test /** * This test just checks that PrintReads runs with a config arg without blowing up. * It does not test whether the config settings were actually loaded correctly * (tested by {@link ConfigIntegrationTest}). */ public void testPrintReadsWithConfigFile() throws Exception { final String inputFile = publicTestDir + "NA12878.chr17_69k_70k.dictFix.bam"; final File outputFile = createTempFile("TEST_OUT_NA12878.chr17_69k_70k.dictFix", ".bam"); // Create some arguments for our command: final ArgumentsBuilder args = new ArgumentsBuilder(); args.addRaw("--" + StandardArgumentDefinitions.GATK_CONFIG_FILE_OPTION); args.addRaw(configFilePath); args.addRaw("-" + StandardArgumentDefinitions.INPUT_SHORT_NAME); args.addRaw(inputFile); args.addRaw("-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME); args.addRaw(outputFile); // Run our command: runCommandLine(args.getArgsArray()); // Ensure the files are the same: IntegrationTestSpec.assertMatchingFiles( Collections.singletonList(new File(inputFile)), Collections.singletonList(outputFile.getAbsolutePath()), true, ValidationStringency.LENIENT ); }
Example 28
Source Project: picard Source File: CleanSamTester.java License: MIT License | 5 votes |
protected void test() { try { final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); // Validate it has the expected cigar validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); SamReader samReader = factory.open(getOutput()); final SAMRecordIterator iterator = samReader.iterator(); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); Assert.assertEquals(rec.getCigarString(), expectedCigar); if (SAMUtils.hasMateCigar(rec)) { Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar); } } CloserUtil.close(samReader); // Run validation on the output file samReader = factory.open(getOutput()); final boolean validated = validator.validateSamFileVerbose(samReader, null); CloserUtil.close(samReader); Assert.assertTrue(validated, "ValidateSamFile failed"); } finally { IOUtil.recursiveDelete(getOutputDir().toPath()); } }
Example 29
Source Project: gatk Source File: BaseRecalibratorSparkIntegrationTest.java License: BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(description = "This is to test https://github.com/broadinstitute/hellbender/issues/322", groups = {"cloud", "spark"}, enabled = false) public void testPlottingWorkflow() throws IOException { final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/"; final String chr2021Reference2bit = GCS_b37_CHR20_21_REFERENCE_2BIT; final String dbSNPb37_chr2021 = resourceDir + DBSNP_138_B37_CH20_1M_1M1K_VCF; final String HiSeqBam_chr20 = getResourceDir() + WGS_B37_CH20_1M_1M1K_BAM; final File actualHiSeqBam_recalibrated = createTempFile("actual.recalibrated", ".bam"); final String tablePre = createTempFile("gatk4.pre.cols", ".table").getAbsolutePath(); final String argPre = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + HiSeqBam_chr20 + " -O " + tablePre; new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argPre)); final String argApply = "-I " + HiSeqBam_chr20 + " --bqsr-recal-file " + tablePre + " -O " + actualHiSeqBam_recalibrated.getAbsolutePath(); new ApplyBQSRSpark().instanceMain(Utils.escapeExpressions(argApply)); final File actualTablePost = createTempFile("gatk4.post.cols", ".table"); final String argsPost = " -R " + chr2021Reference2bit + "-indels --enable-baq " +" --known-sites " + dbSNPb37_chr2021 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath() + " -O " + actualTablePost.getAbsolutePath(); new BaseRecalibratorSpark().instanceMain(Utils.escapeExpressions(argsPost)); final File expectedHiSeqBam_recalibrated = new File(resourceDir + "expected.NA12878.chr17_69k_70k.dictFix.recalibrated.DIQ.bam"); SamAssertionUtils.assertSamsEqual(actualHiSeqBam_recalibrated, expectedHiSeqBam_recalibrated, ValidationStringency.LENIENT); final File expectedTablePost = new File(getResourceDir() + "expected.NA12878.chr17_69k_70k.postRecalibrated.txt"); IntegrationTestSpec.assertEqualTextFiles(actualTablePost, expectedTablePost); }
Example 30
Source Project: cramtools Source File: CramToBam_OBA_Function.java License: Apache License 2.0 | 5 votes |
@Override public OrderedByteArray apply(OrderedByteArray object) { if (object == null) throw new NullPointerException(); log.debug("processing container " + object.order); Container container; try { container = ContainerIO.readContainer(header.getVersion(), new ByteArrayInputStream(object.bytes)); if (container.isEOF()) return null; ArrayList<CramCompressionRecord> records = new ArrayList<CramCompressionRecord>(container.nofRecords); parser.getRecords(container, records, ValidationStringency.SILENT); n.normalize(records, null, 0, container.header.substitutionMatrix); ByteArrayOutputStream bamBAOS = new ByteArrayOutputStream(); BlockCompressedOutputStream os = new BlockCompressedOutputStream(bamBAOS, null); codec.setOutputStream(os); for (CramCompressionRecord record : records) { SAMRecord samRecord = f.create(record); codec.encode(samRecord); } os.flush(); OrderedByteArray bb = new OrderedByteArray(); bb.bytes = bamBAOS.toByteArray(); bb.order = object.order; log.debug(String.format("Converted OBA %d, records %d", object.order, records.size())); return bb; } catch (IOException | IllegalArgumentException | IllegalAccessException e) { throw new RuntimeException(e); } }