htsjdk.variant.vcf.VCFHeader Java Examples
The following examples show how to use
htsjdk.variant.vcf.VCFHeader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantsSparkSinkUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void assertSingleShardedWritingWorks(String vcf, String outputPath, boolean writeTabixIndex) throws IOException { JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx); JavaRDD<VariantContext> variants = variantsSparkSource.getParallelVariantContexts(vcf, null); if (variants.getNumPartitions() == 1) { variants = variants.repartition(3); // repartition to more than 1 partition } VCFHeader header = getHeader(vcf); VariantsSparkSink.writeVariants(ctx, outputPath, variants, header, writeTabixIndex); checkFileExtensionConsistentWithContents(outputPath, writeTabixIndex); JavaRDD<VariantContext> variants2 = variantsSparkSource.getParallelVariantContexts(outputPath, null); final List<VariantContext> writtenVariants = variants2.collect(); VariantContextTestUtils.assertEqualVariants(readVariants(vcf), writtenVariants); }
Example #2
Source File: ReblockGVCFIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testMQHeadersAreUpdated() throws Exception { final File output = createTempFile("reblockedgvcf", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.add("V", getToolTestDataDir() + "justHeader.g.vcf") .addOutput(output); runCommandLine(args); Pair<VCFHeader, List<VariantContext>> actual = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); VCFHeader header = actual.getLeft(); List<VCFInfoHeaderLine> infoLines = new ArrayList<>(header.getInfoHeaderLines()); //check all the headers in case there's one old and one updated for (final VCFInfoHeaderLine line : infoLines) { if (line.getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED)) { Assert.assertTrue(line.getType().equals(VCFHeaderLineType.Float)); Assert.assertTrue(line.getDescription().contains("deprecated")); } else if (line.getID().equals(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) { Assert.assertTrue(line.getDescription().contains("deprecated")); } } }
Example #3
Source File: CombineGVCFsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testAddToCombinedSomaticGvcf() throws Exception { final File output = createTempFile("combinegvcfs", ".vcf"); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(b37Reference)); args.addOutput(output); args.addVCF(getTestFile("twoSamples.MT.g.vcf")); args.addVCF(getTestFile("NA12891.MT.filtered.g.vcf")); args.add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true); runCommandLine(args); final File output2 = createTempFile("expected", ".vcf"); final ArgumentsBuilder args2 = new ArgumentsBuilder(); args2.addReference(new File(b37Reference)); args2.addOutput(output2); args2.addVCF(getTestFile("NA12878.MT.filtered.g.vcf")); args2.addVCF(getTestFile("NA19240.MT.filtered.g.vcf")); args2.addVCF(getTestFile("NA12891.MT.filtered.g.vcf")); args2.add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true); runCommandLine(args2); final List<VariantContext> expectedVC = getVariantContexts(output2); final List<VariantContext> actualVC = getVariantContexts(output); final VCFHeader header = getHeaderFromFile(output); assertForEachElementInLists(actualVC, expectedVC, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, Collections.emptyList(), Collections.emptyList(), header)); }
Example #4
Source File: GatkToolIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testSitesOnlyMode() { File out = createTempFile("GTStrippedOutput", "vcf"); String[] args = new String[] { "-V", TEST_DIRECTORY + "vcf_with_genotypes.vcf", "--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME, "-O", out.getAbsolutePath()}; runCommandLine(Arrays.asList(args), SelectVariants.class.getSimpleName()); // Assert that the genotype field has been stripped from the file Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath()); Assert.assertFalse(results.getLeft().hasGenotypingData()); for (VariantContext v: results.getRight()) { Assert.assertFalse(v.hasGenotypes()); } }
Example #5
Source File: VariantsSparkSink.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static JavaRDD<VariantContext> sortVariants(final JavaRDD<VariantContext> variants, final VCFHeader header, final int numReducers) { // Turn into key-value pairs so we can sort (by key). Values are null so there is no overhead in the amount // of data going through the shuffle. final JavaPairRDD<VariantContext, Void> rddVariantPairs = variants.mapToPair(variant -> new Tuple2<>(variant, (Void) null)); // do a total sort so that all the records in partition i are less than those in partition i+1 final Comparator<VariantContext> comparator = header.getVCFRecordComparator(); final JavaPairRDD<VariantContext, Void> variantVoidPairs; if (comparator == null){ variantVoidPairs = rddVariantPairs; //no sort } else if (numReducers > 0) { variantVoidPairs = rddVariantPairs.sortByKey(comparator, true, numReducers); } else { variantVoidPairs = rddVariantPairs.sortByKey(comparator); } return variantVoidPairs.map(Tuple2::_1); }
Example #6
Source File: GnarlyGenotyper.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); if(onlyOutputCallsStartingInIntervals) { if( !intervalArgumentCollection.intervalsSpecified()) { throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); } } intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : Collections.emptyList(); final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); setupVCFWriter(inputVCFHeader, samples); genotyperEngine = new GnarlyGenotyperEngine(keepAllSites, genotypeArgs.MAX_ALTERNATE_ALLELES, SUMMARIZE_PLs, stripASAnnotations); Reflections reflections = new Reflections("org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific"); //not InfoFieldAnnotation.class because we don't want AS_InbreedingCoeff allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_StrandBiasTest.class)); allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_RankSumTest.class)); allAlleleSpecificAnnotations.add(AS_RMSMappingQuality.class); allAlleleSpecificAnnotations.add(AS_QualByDepth.class); }
Example #7
Source File: AnnotateVcfWithExpectedAlleleFraction.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); }
Example #8
Source File: FilterVariantTranches.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void writeVCFHeader(VariantContextWriter vcfWriter) { // setup the header fields final VCFHeader inputHeader = getHeaderForVariants(); Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>(); hInfo.addAll(inputHeader.getMetaDataInSortedOrder()); boolean hasInfoKey = hInfo.stream().anyMatch( x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey)); if (!hasInfoKey){ throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey)); } if (removeOldFilters){ hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine); } addTrancheHeaderFields(SNPString, snpTranches, hInfo); addTrancheHeaderFields(INDELString, indelTranches, hInfo); final TreeSet<String> samples = new TreeSet<>(); samples.addAll(inputHeader.getGenotypeSamples()); hInfo.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
Example #9
Source File: FilterAlignmentArtifacts.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { realignmentEngine = new RealignmentEngine(realignmentArgumentCollection); vcfWriter = createVCFWriter(new File(outputVcf)); final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY)); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter.writeHeader(vcfHeader); bamHeader = getHeaderForReads(); samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader))); referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier())); assemblyEngine = MTAC.createReadThreadingAssembler(); likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs); haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() : Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter())); }
Example #10
Source File: GVCFWriterUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static VCFHeader getMinimalVCFHeader() { final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>(); VCFStandardHeaderLines.addStandardFormatLines(headerlines, true, VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS); VCFStandardHeaderLines.addStandardInfoLines(headerlines, true, VCFConstants.DEPTH_KEY, VCFConstants.RMS_MAPPING_QUALITY_KEY, VCFConstants.MAPPING_QUALITY_ZERO_KEY ); Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY, GATKVCFConstants.CLIPPING_RANK_SUM_KEY, GATKVCFConstants.MLE_ALLELE_COUNT_KEY, GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY, GATKVCFConstants.READ_POS_RANK_SUM_KEY) .forEach( c -> headerlines.add(GATKVCFHeaderLines.getInfoLine(c))); headerlines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)); return new VCFHeader(headerlines, Collections.singleton(SAMPLE_NAME)); }
Example #11
Source File: FastaAlternateReferenceMaker.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { super.onTraversalStart(); if (snpmaskPriority && snpmask == null){ throw new CommandLineException("Cannot specify --" + SNP_MASK_PRIORITY_LONG_NAME + " without " + " --" + SNP_MASK_LONG_NAME); } if ( iupacSample != null ) { final VCFHeader variantHeader = (VCFHeader) getHeaderForFeatures(variants); final ArrayList<String> samples = variantHeader.getSampleNamesInOrder(); if( samples == null || !samples.contains(iupacSample)) { throw new UserException.BadInput("the IUPAC sample specified is not present in the provided VCF file"); } } }
Example #12
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testGetDefaultToolVCFHeaderLines() throws IOException { final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures(); final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf"); final String[] args = {"--mask", vcfFile.getCanonicalPath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"}; tool.instanceMain(args); Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines(); VCFHeader hdr = new VCFHeader(stdHeaderLines); VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source"); Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName()); VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine"); Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName()); String commandLineString = commandLine.toString(); assertContains(commandLineString,"CommandLine="); assertContains(commandLineString,"Version="); assertContains(commandLineString,"Date="); }
Example #13
Source File: RemoveNearbyIndels.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples()); getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); }
Example #14
Source File: PostprocessGermlineCNVCallsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "differentValidInput", groups = {"python"}) public void testDifferentValidInput(final int sampleIndex, final List<String> callShards, final List<String> modelShards) throws IOException { final File actualIntervalsOutputVCF = createTempFile("intervals-output-vcf-" + sampleIndex, ".vcf"); final File actualSegmentsOutputVCF = createTempFile("segments-output-vcf-" + sampleIndex, ".vcf"); final File intervalsIndex = new File(actualIntervalsOutputVCF.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX); Assert.assertFalse(intervalsIndex.exists()); final File segmentsIndex = new File(actualIntervalsOutputVCF.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX); Assert.assertFalse(segmentsIndex.exists()); final File actualDenoisedCopyRatiosOutput = createTempFile("denoised-copy-ratios-output-" + sampleIndex, ".tsv"); final File expectedIntervalsOutputVCF = INTERVALS_VCF_CORRECT_OUTPUTS.get(sampleIndex); final File expectedSegmentsOutputVCF = SEGMENTS_VCF_CORRECT_OUTPUTS.get(sampleIndex); final File expectedDenoisedCopyRatiosOutput = DENOISED_COPY_RATIOS_OUTPUTS.get(sampleIndex); runToolForSingleSample(callShards, modelShards, sampleIndex, actualIntervalsOutputVCF, actualSegmentsOutputVCF, actualDenoisedCopyRatiosOutput, ALLOSOMAL_CONTIGS, AUTOSOMAL_REF_COPY_NUMBER); Assert.assertTrue(intervalsIndex.exists()); Assert.assertTrue(segmentsIndex.exists()); IntegrationTestSpec.assertEqualTextFiles(actualIntervalsOutputVCF, expectedIntervalsOutputVCF, "##"); final VCFHeader intervalsHeader = VariantContextTestUtils.getVCFHeader(actualIntervalsOutputVCF.getAbsolutePath()); Assert.assertTrue(intervalsHeader.getContigLines().size() > 0); IntegrationTestSpec.assertEqualTextFiles(actualSegmentsOutputVCF, expectedSegmentsOutputVCF, "##"); final VCFHeader segmentsHeader = VariantContextTestUtils.getVCFHeader(actualIntervalsOutputVCF.getAbsolutePath()); Assert.assertTrue(segmentsHeader.getContigLines().size() > 0); IntegrationTestSpec.assertEqualTextFiles(actualDenoisedCopyRatiosOutput, expectedDenoisedCopyRatiosOutput, "##"); }
Example #15
Source File: HaplotypeCallerEngine.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Writes an appropriate VCF header, given our arguments, to the provided writer * * @param vcfWriter writer to which the header should be written */ public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary, final Set<VCFHeaderLine> defaultToolHeaderLines) { Utils.nonNull(vcfWriter); final Set<VCFHeaderLine> headerInfo = new HashSet<>(); headerInfo.addAll(defaultToolHeaderLines); headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders()); // all annotation fields from VariantAnnotatorEngine headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); // all callers need to add these standard annotation header lines headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY)); headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY)); headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY)); // all callers need to add these standard FORMAT field header lines VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); if ( ! hcArgs.doNotRunPhysicalPhasing ) { headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY)); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY)); } // FILTER fields are added unconditionally as it's not always 100% certain the circumstances // where the filters are used. For example, in emitting all sites the lowQual field is used headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME)); if ( emitReferenceConfidence() ) { headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines()); } final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet); vcfHeader.setSequenceDictionary(sequenceDictionary); vcfWriter.writeHeader(vcfHeader); }
Example #16
Source File: KeyIgnoringBCFRecordWriter.java From Hadoop-BAM with MIT License | 5 votes |
/** * @deprecated This constructor has no {@link TaskAttemptContext} so it is not * possible to pass configuration properties to the writer. */ @Deprecated public KeyIgnoringBCFRecordWriter( OutputStream output, VCFHeader header, boolean writeHeader) throws IOException { super(output, header, writeHeader); }
Example #17
Source File: CNNScoreVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void writeVCFHeader(VariantContextWriter vcfWriter) { // setup the header fields final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder(); final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders); hInfo.add(GATKVCFHeaderLines.getInfoLine(scoreKey)); final TreeSet<String> samples = new TreeSet<>(); samples.addAll(inputHeader.getGenotypeSamples()); hInfo.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
Example #18
Source File: CreateSomaticPanelOfNormals.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Object doWork() { final List<File> inputVcfs = new ArrayList<>(vcfs); final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size()); final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size()); final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader(); final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary(); final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator(); for (final File vcf : inputVcfs) { final VCFFileReader reader = new VCFFileReader(vcf, false); iterators.add(reader.iterator()); final VCFHeader header = reader.getFileHeader(); Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs."); headers.add(header); } final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY); writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false))); final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators); SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1); final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20); while (mergingIterator.hasNext()) { final VariantContext vc = mergingIterator.next(); if (!currentPosition.overlaps(vc)) { processVariantsAtSamePosition(variantsAtThisPosition, writer); variantsAtThisPosition.clear(); currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart()); } variantsAtThisPosition.add(vc); } mergingIterator.close(); writer.close(); return "SUCCESS"; }
Example #19
Source File: SortVcf.java From picard with MIT License | 5 votes |
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) { final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records"); final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(outputHeader.getSequenceDictionary()). setOptions(options). setOutputFile(OUTPUT).build(); out.writeHeader(outputHeader); for (final VariantContext variantContext : sortedOutput) { out.add(variantContext); writeProgress.record(variantContext.getContig(), variantContext.getStart()); } out.close(); }
Example #20
Source File: GtcToVcf.java From picard with MIT License | 5 votes |
static Sex getFingerprintSex(final File file) { if (file != null) { try (VCFFileReader reader = new VCFFileReader(file, false)) { final VCFHeader header = reader.getFileHeader(); final VCFHeaderLine gender = header.getMetaDataLine("gender"); if (gender != null) { return Sex.valueOf(gender.getValue()); } } } return Sex.Unknown; }
Example #21
Source File: VcfOutputRendererUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** Test that the exclusion list overrides the manually specified annotations */ @Test public void testExclusionListOverridesManualDefaultAnnotations() { final Pair<VCFHeader, List<VariantContext>> entireInputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(TEST_VCF); final File outFile = createTempFile("vcf_output_renderer_exclusion", ".vcf"); final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(outFile.toPath(),null, false); final LinkedHashMap<String, String> dummyDefaults = new LinkedHashMap<>(); dummyDefaults.put("FOO", "BAR"); dummyDefaults.put("BAZ", "HUH?"); final VcfOutputRenderer vcfOutputRenderer = new VcfOutputRenderer(vcfWriter, new ArrayList<>(), entireInputVcf.getLeft(), new LinkedHashMap<>(dummyDefaults), new LinkedHashMap<>(), new HashSet<>(), new HashSet<>(Arrays.asList("BAZ", "AC")), "Unknown"); final VariantContext variant = entireInputVcf.getRight().get(0); final FuncotationMap funcotationMap = FuncotationMap.createNoTranscriptInfo(Collections.emptyList()); vcfOutputRenderer.write(variant, funcotationMap); vcfOutputRenderer.close(); // Check the output final Pair<VCFHeader, List<VariantContext>> tempVcf = VariantContextTestUtils.readEntireVCFIntoMemory(outFile.getAbsolutePath()); final VariantContext tempVariant = tempVcf.getRight().get(0); final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine("FUNCOTATION").getDescription()); Assert.assertEquals(funcotatorKeys.length,1); Assert.assertEquals(funcotatorKeys[0],"FOO"); final FuncotationMap tempFuncotationMap = FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotatorKeys, tempVariant.getAttributeAsString("FUNCOTATION", ""), tempVariant.getAlternateAllele(0), "TEST"); Assert.assertTrue(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("FOO")); Assert.assertEquals(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).getField("FOO"), "BAR"); Assert.assertFalse(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("BAZ")); // IMPORTANT: If the field is not a proper funcotation in VCFs, it will not be excluded. I.e. if an input VCF has an excluded field, it will not be excluded. Assert.assertEquals(tempVariant.getAttribute("AC"), "1"); }
Example #22
Source File: VcfFuncotationFactoryUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "provideForTestCreateFuncotationsOnVariant") public void testCreateFuncotationMetadata(final String variantFeatureDataFileName, final VariantContext variant, final ReferenceContext referenceContext, final List<Funcotation> expected) { // Don't need the expected gt for this test, but useful to reuse the data provider. // Make our factory: final VcfFuncotationFactory vcfFuncotationFactory = createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(variantFeatureDataFileName)); // Create features from the file: final List<Feature> vcfFeatures; try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(variantFeatureDataFileName))) { vcfFeatures = vcfReader.query(variant.getContig(), variant.getStart(), variant.getEnd()).stream().collect(Collectors.toList()); } // test the metadata final List<Funcotation> funcotations = vcfFuncotationFactory.createFuncotationsOnVariant( variant, referenceContext, vcfFeatures, Collections.emptyList() ); Assert.assertEquals(funcotations.stream().map(f -> f.getMetadata().retrieveAllHeaderInfo()).collect(Collectors.toSet()).size(), 1); final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(variantFeatureDataFileName); final List<VCFInfoHeaderLine> gtOutputVcfInfoHeaderLines = vcfFuncotationFactory.createFuncotationVcfInfoHeaderLines(vcfInfo.getLeft()); // Get the info headers that are in the VCF and make sure that these are also present in the metadata final Set<String> headerInfoLines = funcotations.get(0).getFieldNames(); final Set<String> metadataFields = funcotations.get(0).getMetadata().retrieveAllHeaderInfo().stream() .map(f -> f.getID()) .collect(Collectors.toSet()); Assert.assertEquals(metadataFields, headerInfoLines); Assert.assertEquals(metadataFields, vcfFuncotationFactory.getSupportedFuncotationFields()); Assert.assertEquals(funcotations.get(0).getMetadata().retrieveAllHeaderInfo(), gtOutputVcfInfoHeaderLines); }
Example #23
Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static VCFHeader getHeader(final Path path) { try (FeatureReader<VariantContext> reader = getReaderFromVCFUri(path, 0)) { return ((VCFHeader) reader.getHeader()); } catch (final IOException e) { throw new UserException.CouldNotReadInputFile(path, e.getMessage(), e); } }
Example #24
Source File: FastVCFFileReader.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public FastVCFFileReader(String vcfFilename) throws IOException { super(vcfFilename); // load header VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false); VCFHeader header = reader.getFileHeader(); samples = header.getGenotypeSamples(); samplesCount = samples.size(); variantContext = new MinimalVariantContext(samplesCount); reader.close(); parser = new VCFLineParser(samplesCount); }
Example #25
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void writeHeaderAndBadVariant(final VariantContextWriter writer) { final VariantContextBuilder vcBuilder = new VariantContextBuilder( "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true))); vcBuilder.attribute("fake", new Object()); final VariantContext vc = vcBuilder.make(); final VCFHeader vcfHeader = new VCFHeader(); writer.writeHeader(vcfHeader); writer.add(vc); }
Example #26
Source File: FixCallSetSampleOrdering.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { assertThatTheyReallyWantToProceed(); if (batchSize == 0) { throw new SampleNameFixingCannotProceedException("your callset is not affected by the bug if you ran with --"+ GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME +" 0"); } if ( readerThreads > 1 ) { if ( gvcfToHeaderSampleMapFile == null ) { throw new SampleNameFixingCannotProceedException("You must provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " > 1"); } } else if ( gvcfToHeaderSampleMapFile != null ) { throw new SampleNameFixingCannotProceedException("You must NOT provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " 1"); } final VCFHeader originalHeader = getHeaderForVariants(); final Set<VCFHeaderLine> originalHeaderLines = originalHeader.getMetaDataInInputOrder(); final Set<VCFHeaderLine> newHeaderLines = new LinkedHashSet<>(originalHeaderLines); newHeaderLines.addAll(getDefaultToolVCFHeaderLines()); loadSampleNameMappings(); final List<String> sampleNamesOriginalOrdering = new ArrayList<>(sampleNameMapFromGenomicsDBImport.keySet()); if( sampleNamesOriginalOrdering.size() <= batchSize ){ throw new SampleNameFixingCannotProceedException("you are not affected by the sample name ordering bug if your batch size is >= the number of samples in your callset. \n" + "batch size: " + batchSize + "\n" + "number of samples: " + sampleNamesOriginalOrdering.size()); } assertSampleNamesMatchInputVCF(originalHeader.getSampleNamesInOrder(), sampleNamesOriginalOrdering); final List<String> batchSortedSampleNames = getBatchSortedList(); final VCFHeader remappedHeader = new VCFHeader(newHeaderLines, batchSortedSampleNames); logger.info("Writing the new header with corrected sample names"); writer = createVCFWriter(output); writer.writeHeader(remappedHeader); logger.info("Copying the rest of the VCF"); }
Example #27
Source File: FuncotatorIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testExclusionFromDatasourceVcfToVcf() { // Clinvar datasource did go through one round of preprocessing to make contig names "1" --> "chr1" (for example). This is an issue with ClinVar, not GATK. final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF; final File outputFile = getOutputFile(outputFormatType); final List<String> excludedFields = Arrays.asList("dummy_ClinVar_VCF_DBVARID", "dummy_ClinVar_VCF_CLNVI"); final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator( PIK3CA_VCF_HG38, outputFile, hg38Chr3Ref, DS_PIK3CA_DIR, FuncotatorTestConstants.REFERENCE_VERSION_HG38, outputFormatType, false, excludedFields); runCommandLine(arguments); final Pair<VCFHeader, List<VariantContext>> tempVcf = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath()); final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME).getDescription()); // Ensure that the header does not contain the excluded fields Stream.of(funcotatorKeys).forEach(k -> Assert.assertFalse(excludedFields.contains(k))); final List<VariantContext> variantContexts = tempVcf.getRight(); for (final VariantContext vc : variantContexts) { final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( funcotatorKeys, vc, "Gencode_28_annotationTranscript", "FAKE_SOURCE"); for (final String txId: funcs.get(vc.getAlternateAllele(0)).getTranscriptList()) { final List<Funcotation> funcotations = funcs.get(vc.getAlternateAllele(0)).get(txId); for (final Funcotation funcotation : funcotations) { funcotation.getFieldNames().forEach(f -> Assert.assertFalse(excludedFields.contains(f))); } } } }
Example #28
Source File: MTLowHeteroplasmyFilterTool.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { final VCFHeader header = getHeaderForVariants(); header.addMetaDataLine(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_HET_FILTER_NAME)); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(header); }
Example #29
Source File: FeatureDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Returns the sequence dictionary for this source of Features. * Uses the dictionary from the VCF header (if present) for variant inputs, * otherwise attempts to create a sequence dictionary from the index file (if present). * Returns null if no dictionary could be created from either the header or the index. */ public SAMSequenceDictionary getSequenceDictionary() { SAMSequenceDictionary dict = null; final Object header = getHeader(); if (header instanceof VCFHeader) { dict = ((VCFHeader) header).getSequenceDictionary(); } if (dict != null && !dict.isEmpty()) { return dict; } if (hasIndex) { return IndexUtils.createSequenceDictionaryFromFeatureIndex(new File(featureInput.getFeaturePath())); } return null; }
Example #30
Source File: SageVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
public SageVCF(@NotNull final IndexedFastaSequenceFile reference, @NotNull final SageConfig config) { writer = new VariantContextWriterBuilder().setOutputFile(config.outputFile()) .modifyOption(Options.INDEX_ON_THE_FLY, true) .modifyOption(Options.USE_ASYNC_IO, false) .setReferenceDictionary(reference.getSequenceDictionary()) .build(); refContextEnrichment = new SomaticRefContextEnrichment(reference, this::writeToFile); final VCFHeader header = refContextEnrichment.enrichHeader(header(config)); header.setSequenceDictionary(reference.getSequenceDictionary()); writer.writeHeader(header); }