htsjdk.variant.vcf.VCFHeader Java Examples

The following examples show how to use htsjdk.variant.vcf.VCFHeader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantsSparkSinkUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void assertSingleShardedWritingWorks(String vcf, String outputPath, boolean writeTabixIndex) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
    JavaRDD<VariantContext> variants = variantsSparkSource.getParallelVariantContexts(vcf, null);
    if (variants.getNumPartitions() == 1) {
        variants = variants.repartition(3); // repartition to more than 1 partition
    }
    VCFHeader header = getHeader(vcf);

    VariantsSparkSink.writeVariants(ctx, outputPath, variants, header, writeTabixIndex);

    checkFileExtensionConsistentWithContents(outputPath, writeTabixIndex);

    JavaRDD<VariantContext> variants2 = variantsSparkSource.getParallelVariantContexts(outputPath, null);
    final List<VariantContext> writtenVariants = variants2.collect();

    VariantContextTestUtils.assertEqualVariants(readVariants(vcf), writtenVariants);
}
 
Example #2
Source File: ReblockGVCFIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testMQHeadersAreUpdated() throws Exception {
    final File output = createTempFile("reblockedgvcf", ".vcf");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.add("V", getToolTestDataDir() + "justHeader.g.vcf")
            .addOutput(output);
    runCommandLine(args);

    Pair<VCFHeader, List<VariantContext>> actual = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
    VCFHeader header = actual.getLeft();
    List<VCFInfoHeaderLine> infoLines = new ArrayList<>(header.getInfoHeaderLines());
    //check all the headers in case there's one old and one updated
    for (final VCFInfoHeaderLine line : infoLines) {
        if (line.getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED)) {
            Assert.assertTrue(line.getType().equals(VCFHeaderLineType.Float));
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        } else if (line.getID().equals(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
            Assert.assertTrue(line.getDescription().contains("deprecated"));
        }
    }
}
 
Example #3
Source File: CombineGVCFsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testAddToCombinedSomaticGvcf() throws Exception {
    final File output = createTempFile("combinegvcfs", ".vcf");
    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addReference(new File(b37Reference));
    args.addOutput(output);
    args.addVCF(getTestFile("twoSamples.MT.g.vcf"));
    args.addVCF(getTestFile("NA12891.MT.filtered.g.vcf"));
    args.add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true);
    runCommandLine(args);

    final File output2 = createTempFile("expected", ".vcf");
    final ArgumentsBuilder args2 = new ArgumentsBuilder();
    args2.addReference(new File(b37Reference));
    args2.addOutput(output2);
    args2.addVCF(getTestFile("NA12878.MT.filtered.g.vcf"));
    args2.addVCF(getTestFile("NA19240.MT.filtered.g.vcf"));
    args2.addVCF(getTestFile("NA12891.MT.filtered.g.vcf"));
    args2.add(CombineGVCFs.SOMATIC_INPUT_LONG_NAME, true);
    runCommandLine(args2);

    final List<VariantContext> expectedVC = getVariantContexts(output2);
    final List<VariantContext> actualVC = getVariantContexts(output);
    final VCFHeader header = getHeaderFromFile(output);
    assertForEachElementInLists(actualVC, expectedVC, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, Collections.emptyList(), Collections.emptyList(), header));
}
 
Example #4
Source File: GatkToolIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testSitesOnlyMode() {
    File out = createTempFile("GTStrippedOutput", "vcf");
    String[] args = new String[] {
            "-V",  TEST_DIRECTORY + "vcf_with_genotypes.vcf",
            "--" + StandardArgumentDefinitions.SITES_ONLY_LONG_NAME,
            "-O",
            out.getAbsolutePath()};
    runCommandLine(Arrays.asList(args), SelectVariants.class.getSimpleName());

    // Assert that the genotype field has been stripped from the file
    Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(out.getAbsolutePath());

    Assert.assertFalse(results.getLeft().hasGenotypingData());
    for (VariantContext v: results.getRight()) {
        Assert.assertFalse(v.hasGenotypes());
    }
}
 
Example #5
Source File: VariantsSparkSink.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static JavaRDD<VariantContext> sortVariants(final JavaRDD<VariantContext> variants, final VCFHeader header, final int numReducers) {
    // Turn into key-value pairs so we can sort (by key). Values are null so there is no overhead in the amount
    // of data going through the shuffle.
    final JavaPairRDD<VariantContext, Void> rddVariantPairs = variants.mapToPair(variant -> new Tuple2<>(variant, (Void) null));

    // do a total sort so that all the records in partition i are less than those in partition i+1
    final Comparator<VariantContext> comparator = header.getVCFRecordComparator();
    final JavaPairRDD<VariantContext, Void> variantVoidPairs;
    if (comparator == null){
        variantVoidPairs = rddVariantPairs; //no sort
    } else if (numReducers > 0) {
        variantVoidPairs = rddVariantPairs.sortByKey(comparator, true, numReducers);
    } else {
        variantVoidPairs = rddVariantPairs.sortByKey(comparator);
    }

    return variantVoidPairs.map(Tuple2::_1);
}
 
Example #6
Source File: GnarlyGenotyper.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader inputVCFHeader = getHeaderForVariants();

    if(onlyOutputCallsStartingInIntervals) {
        if( !intervalArgumentCollection.intervalsSpecified()) {
            throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified.");
        }
    }
    intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) :
            Collections.emptyList();

    final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples());

    setupVCFWriter(inputVCFHeader, samples);

    genotyperEngine = new GnarlyGenotyperEngine(keepAllSites, genotypeArgs.MAX_ALTERNATE_ALLELES, SUMMARIZE_PLs, stripASAnnotations);

    Reflections reflections = new Reflections("org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific");
    //not InfoFieldAnnotation.class because we don't want AS_InbreedingCoeff
    allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_StrandBiasTest.class));
    allAlleleSpecificAnnotations.addAll(reflections.getSubTypesOf(AS_RankSumTest.class));
    allAlleleSpecificAnnotations.add(AS_RMSMappingQuality.class);
    allAlleleSpecificAnnotations.add(AS_QualByDepth.class);
}
 
Example #7
Source File: AnnotateVcfWithExpectedAlleleFraction.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);

    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream()
            .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream()
            .mapToDouble(mixingfractionsMap::get).toArray();
}
 
Example #8
Source File: FilterVariantTranches.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>();
    hInfo.addAll(inputHeader.getMetaDataInSortedOrder());

    boolean hasInfoKey = hInfo.stream().anyMatch(
            x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey));
    if (!hasInfoKey){
        throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey));
    }

    if (removeOldFilters){
        hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine);
    }

    addTrancheHeaderFields(SNPString, snpTranches, hInfo);
    addTrancheHeaderFields(INDELString, indelTranches, hInfo);

    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #9
Source File: FilterAlignmentArtifacts.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    realignmentEngine = new RealignmentEngine(realignmentArgumentCollection);
    vcfWriter = createVCFWriter(new File(outputVcf));

    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY));
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    vcfWriter.writeHeader(vcfHeader);
    bamHeader = getHeaderForReads();
    samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader)));
    referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier()));
    assemblyEngine = MTAC.createReadThreadingAssembler();
    likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs);
    haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() :
            Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter()));
}
 
Example #10
Source File: GVCFWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static VCFHeader getMinimalVCFHeader() {
    final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>();
    VCFStandardHeaderLines.addStandardFormatLines(headerlines, true,
            VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS);

    VCFStandardHeaderLines.addStandardInfoLines(headerlines, true,
            VCFConstants.DEPTH_KEY,
            VCFConstants.RMS_MAPPING_QUALITY_KEY,
            VCFConstants.MAPPING_QUALITY_ZERO_KEY );

    Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY,
           GATKVCFConstants.CLIPPING_RANK_SUM_KEY,
           GATKVCFConstants.MLE_ALLELE_COUNT_KEY,
           GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY,
           GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY,
           GATKVCFConstants.READ_POS_RANK_SUM_KEY)
           .forEach( c -> headerlines.add(GATKVCFHeaderLines.getInfoLine(c)));

    headerlines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY));
    return new VCFHeader(headerlines, Collections.singleton(SAMPLE_NAME));
}
 
Example #11
Source File: FastaAlternateReferenceMaker.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void onTraversalStart() {
    super.onTraversalStart();

    if (snpmaskPriority && snpmask == null){
        throw new CommandLineException("Cannot specify --" + SNP_MASK_PRIORITY_LONG_NAME + " without " + " --" + SNP_MASK_LONG_NAME);
    }

    if ( iupacSample != null ) {
        final VCFHeader variantHeader = (VCFHeader) getHeaderForFeatures(variants);
        final ArrayList<String> samples = variantHeader.getSampleNamesInOrder();
        if( samples == null || !samples.contains(iupacSample)) {
            throw new UserException.BadInput("the IUPAC sample specified is not present in the provided VCF file");
        }
    }
}
 
Example #12
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGetDefaultToolVCFHeaderLines() throws IOException {
    final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures();
    final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf");
    final String[] args = {"--mask", vcfFile.getCanonicalPath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"};
    tool.instanceMain(args);

    Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines();
    VCFHeader hdr = new VCFHeader(stdHeaderLines);

    VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source");
    Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName());

    VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine");
    Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName());

    String commandLineString = commandLine.toString();
    assertContains(commandLineString,"CommandLine=");
    assertContains(commandLineString,"Version=");
    assertContains(commandLineString,"Date=");
}
 
Example #13
Source File: RemoveNearbyIndels.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples());
    getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine);
    vcfWriter = createVCFWriter(new File(outputVcf));
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #14
Source File: PostprocessGermlineCNVCallsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "differentValidInput", groups = {"python"})
public void testDifferentValidInput(final int sampleIndex,
                                    final List<String> callShards,
                                    final List<String> modelShards) throws IOException {
    final File actualIntervalsOutputVCF = createTempFile("intervals-output-vcf-" + sampleIndex, ".vcf");
    final File actualSegmentsOutputVCF = createTempFile("segments-output-vcf-" + sampleIndex, ".vcf");
    final File intervalsIndex = new File(actualIntervalsOutputVCF.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX);
    Assert.assertFalse(intervalsIndex.exists());
    final File segmentsIndex = new File(actualIntervalsOutputVCF.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX);
    Assert.assertFalse(segmentsIndex.exists());
    final File actualDenoisedCopyRatiosOutput = createTempFile("denoised-copy-ratios-output-" + sampleIndex, ".tsv");
    final File expectedIntervalsOutputVCF = INTERVALS_VCF_CORRECT_OUTPUTS.get(sampleIndex);
    final File expectedSegmentsOutputVCF = SEGMENTS_VCF_CORRECT_OUTPUTS.get(sampleIndex);
    final File expectedDenoisedCopyRatiosOutput = DENOISED_COPY_RATIOS_OUTPUTS.get(sampleIndex);
    runToolForSingleSample(callShards, modelShards, sampleIndex,
            actualIntervalsOutputVCF, actualSegmentsOutputVCF, actualDenoisedCopyRatiosOutput,
            ALLOSOMAL_CONTIGS, AUTOSOMAL_REF_COPY_NUMBER);

    Assert.assertTrue(intervalsIndex.exists());
    Assert.assertTrue(segmentsIndex.exists());
    IntegrationTestSpec.assertEqualTextFiles(actualIntervalsOutputVCF, expectedIntervalsOutputVCF, "##");
    final VCFHeader intervalsHeader = VariantContextTestUtils.getVCFHeader(actualIntervalsOutputVCF.getAbsolutePath());
    Assert.assertTrue(intervalsHeader.getContigLines().size() > 0);
    IntegrationTestSpec.assertEqualTextFiles(actualSegmentsOutputVCF, expectedSegmentsOutputVCF, "##");
    final VCFHeader segmentsHeader = VariantContextTestUtils.getVCFHeader(actualIntervalsOutputVCF.getAbsolutePath());
    Assert.assertTrue(segmentsHeader.getContigLines().size() > 0);
    IntegrationTestSpec.assertEqualTextFiles(actualDenoisedCopyRatiosOutput, expectedDenoisedCopyRatiosOutput, "##");
}
 
Example #15
Source File: HaplotypeCallerEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Writes an appropriate VCF header, given our arguments, to the provided writer
 *
 * @param vcfWriter writer to which the header should be written
 */
public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary,
                         final Set<VCFHeaderLine>  defaultToolHeaderLines) {
    Utils.nonNull(vcfWriter);

    final Set<VCFHeaderLine> headerInfo = new HashSet<>();
    headerInfo.addAll(defaultToolHeaderLines);

    headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
    // all annotation fields from VariantAnnotatorEngine
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
    // all callers need to add these standard annotation header lines
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    // all callers need to add these standard FORMAT field header lines
    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_PL_KEY);

    if ( ! hcArgs.doNotRunPhysicalPhasing ) {
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
    }

    // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
    // where the filters are used.  For example, in emitting all sites the lowQual field is used
    headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));

    if ( emitReferenceConfidence() ) {
        headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
    }

    final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet);
    vcfHeader.setSequenceDictionary(sequenceDictionary);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #16
Source File: KeyIgnoringBCFRecordWriter.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
/**
 * @deprecated This constructor has no {@link TaskAttemptContext} so it is not
 * possible to pass configuration properties to the writer.
 */
@Deprecated
public KeyIgnoringBCFRecordWriter(
		OutputStream output, VCFHeader header, boolean writeHeader)
	throws IOException
{
	super(output, header, writeHeader);
}
 
Example #17
Source File: CNNScoreVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();
    final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders);
    hInfo.add(GATKVCFHeaderLines.getInfoLine(scoreKey));
    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #18
Source File: CreateSomaticPanelOfNormals.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();


    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }

    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();

    return "SUCCESS";
}
 
Example #19
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().
            setReferenceDictionary(outputHeader.getSequenceDictionary()).
            setOptions(options).
            setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}
 
Example #20
Source File: GtcToVcf.java    From picard with MIT License 5 votes vote down vote up
static Sex getFingerprintSex(final File file) {
    if (file != null) {
        try (VCFFileReader reader = new VCFFileReader(file, false)) {
            final VCFHeader header = reader.getFileHeader();
            final VCFHeaderLine gender = header.getMetaDataLine("gender");
            if (gender != null) {
                return Sex.valueOf(gender.getValue());
            }
        }
    }
    return Sex.Unknown;
}
 
Example #21
Source File: VcfOutputRendererUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Test that the exclusion list overrides the manually specified annotations */
@Test
public void testExclusionListOverridesManualDefaultAnnotations() {
    final Pair<VCFHeader, List<VariantContext>> entireInputVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(TEST_VCF);
    final File outFile = createTempFile("vcf_output_renderer_exclusion", ".vcf");
    final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter(outFile.toPath(),null, false);

    final LinkedHashMap<String, String> dummyDefaults = new LinkedHashMap<>();
    dummyDefaults.put("FOO", "BAR");
    dummyDefaults.put("BAZ", "HUH?");

    final VcfOutputRenderer vcfOutputRenderer = new VcfOutputRenderer(vcfWriter,
        new ArrayList<>(), entireInputVcf.getLeft(), new LinkedHashMap<>(dummyDefaults),
        new LinkedHashMap<>(),
        new HashSet<>(), new HashSet<>(Arrays.asList("BAZ", "AC")), "Unknown");

    final VariantContext variant = entireInputVcf.getRight().get(0);

    final FuncotationMap funcotationMap = FuncotationMap.createNoTranscriptInfo(Collections.emptyList());
    vcfOutputRenderer.write(variant, funcotationMap);
    vcfOutputRenderer.close();

    // Check the output
    final Pair<VCFHeader, List<VariantContext>> tempVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(outFile.getAbsolutePath());
    final VariantContext tempVariant = tempVcf.getRight().get(0);
    final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine("FUNCOTATION").getDescription());
    Assert.assertEquals(funcotatorKeys.length,1);
    Assert.assertEquals(funcotatorKeys[0],"FOO");
    final FuncotationMap tempFuncotationMap =
            FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotatorKeys,
                    tempVariant.getAttributeAsString("FUNCOTATION", ""), tempVariant.getAlternateAllele(0), "TEST");
    Assert.assertTrue(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("FOO"));
    Assert.assertEquals(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).getField("FOO"), "BAR");
    Assert.assertFalse(tempFuncotationMap.get(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY).get(0).hasField("BAZ"));

    // IMPORTANT: If the field is not a proper funcotation in VCFs, it will not be excluded.  I.e. if an input VCF has an excluded field, it will not be excluded.
    Assert.assertEquals(tempVariant.getAttribute("AC"), "1");
}
 
Example #22
Source File: VcfFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideForTestCreateFuncotationsOnVariant")
public void testCreateFuncotationMetadata(final String variantFeatureDataFileName,
                                          final VariantContext variant,
                                          final ReferenceContext referenceContext,
                                          final List<Funcotation> expected) {
    // Don't need the expected gt for this test, but useful to reuse the data provider.
    // Make our factory:
    final VcfFuncotationFactory vcfFuncotationFactory =
            createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(variantFeatureDataFileName));

    // Create features from the file:
    final List<Feature> vcfFeatures;
    try (final VCFFileReader vcfReader = new VCFFileReader(IOUtils.getPath(variantFeatureDataFileName))) {
        vcfFeatures = vcfReader.query(variant.getContig(), variant.getStart(), variant.getEnd()).stream().collect(Collectors.toList());
    }

    // test the metadata
    final List<Funcotation> funcotations = vcfFuncotationFactory.createFuncotationsOnVariant(
            variant,
            referenceContext,
            vcfFeatures,
            Collections.emptyList()
    );

    Assert.assertEquals(funcotations.stream().map(f -> f.getMetadata().retrieveAllHeaderInfo()).collect(Collectors.toSet()).size(), 1);
    final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(variantFeatureDataFileName);
    final List<VCFInfoHeaderLine> gtOutputVcfInfoHeaderLines = vcfFuncotationFactory.createFuncotationVcfInfoHeaderLines(vcfInfo.getLeft());

    // Get the info headers that are in the VCF and make sure that these are also present in the metadata
    final Set<String> headerInfoLines = funcotations.get(0).getFieldNames();
    final Set<String> metadataFields = funcotations.get(0).getMetadata().retrieveAllHeaderInfo().stream()
            .map(f -> f.getID())
            .collect(Collectors.toSet());
    Assert.assertEquals(metadataFields, headerInfoLines);
    Assert.assertEquals(metadataFields, vcfFuncotationFactory.getSupportedFuncotationFields());
    Assert.assertEquals(funcotations.get(0).getMetadata().retrieveAllHeaderInfo(), gtOutputVcfInfoHeaderLines);
}
 
Example #23
Source File: GatherVcfsCloud.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static VCFHeader getHeader(final Path path) {
    try (FeatureReader<VariantContext> reader =  getReaderFromVCFUri(path, 0)) {
        return ((VCFHeader) reader.getHeader());
    } catch (final IOException e) {
        throw new UserException.CouldNotReadInputFile(path, e.getMessage(), e);
    }
}
 
Example #24
Source File: FastVCFFileReader.java    From imputationserver with GNU Affero General Public License v3.0 5 votes vote down vote up
public FastVCFFileReader(String vcfFilename) throws IOException {

		super(vcfFilename);
		// load header
		VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false);
		VCFHeader header = reader.getFileHeader();
		samples = header.getGenotypeSamples();
		samplesCount = samples.size();
		variantContext = new MinimalVariantContext(samplesCount);
		reader.close();

		parser = new VCFLineParser(samplesCount);

	}
 
Example #25
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeHeaderAndBadVariant(final VariantContextWriter writer) {
    final VariantContextBuilder vcBuilder = new VariantContextBuilder(
            "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true)));
    vcBuilder.attribute("fake", new Object());
    final VariantContext vc = vcBuilder.make();
    final VCFHeader vcfHeader = new VCFHeader();
    writer.writeHeader(vcfHeader);
    writer.add(vc);
}
 
Example #26
Source File: FixCallSetSampleOrdering.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    assertThatTheyReallyWantToProceed();

    if (batchSize == 0) {
        throw new SampleNameFixingCannotProceedException("your callset is not affected by the bug if you ran with --"+ GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME +" 0");
    }

    if ( readerThreads > 1 ) {
        if ( gvcfToHeaderSampleMapFile == null ) {
            throw new SampleNameFixingCannotProceedException("You must provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " > 1");
        }
    } else if ( gvcfToHeaderSampleMapFile != null ) {
        throw new SampleNameFixingCannotProceedException("You must NOT provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " 1");
    }

    final VCFHeader originalHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> originalHeaderLines = originalHeader.getMetaDataInInputOrder();
    final Set<VCFHeaderLine> newHeaderLines = new LinkedHashSet<>(originalHeaderLines);
    newHeaderLines.addAll(getDefaultToolVCFHeaderLines());

    loadSampleNameMappings();
    final List<String> sampleNamesOriginalOrdering = new ArrayList<>(sampleNameMapFromGenomicsDBImport.keySet());
    if( sampleNamesOriginalOrdering.size() <= batchSize ){
        throw new SampleNameFixingCannotProceedException("you are not affected by the sample name ordering bug if your batch size is >= the number of samples in your callset. \n"
                                        + "batch size: " + batchSize + "\n"
                                        + "number of samples: " + sampleNamesOriginalOrdering.size());
    }
    assertSampleNamesMatchInputVCF(originalHeader.getSampleNamesInOrder(), sampleNamesOriginalOrdering);

    final List<String> batchSortedSampleNames = getBatchSortedList();

    final VCFHeader remappedHeader = new VCFHeader(newHeaderLines, batchSortedSampleNames);
    logger.info("Writing the new header with corrected sample names");
    writer = createVCFWriter(output);
    writer.writeHeader(remappedHeader);
    logger.info("Copying the rest of the VCF");
}
 
Example #27
Source File: FuncotatorIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testExclusionFromDatasourceVcfToVcf() {
    // Clinvar datasource did  go through one round of preprocessing to make contig names "1" --> "chr1" (for example).  This is an issue with ClinVar, not GATK.
    final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
    final File outputFile = getOutputFile(outputFormatType);

    final List<String> excludedFields = Arrays.asList("dummy_ClinVar_VCF_DBVARID", "dummy_ClinVar_VCF_CLNVI");
    final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
            PIK3CA_VCF_HG38,
            outputFile,
            hg38Chr3Ref,
            DS_PIK3CA_DIR,
            FuncotatorTestConstants.REFERENCE_VERSION_HG38,
            outputFormatType,
            false, excludedFields);

    runCommandLine(arguments);

    final Pair<VCFHeader, List<VariantContext>> tempVcf =  VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());

    final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME).getDescription());

    // Ensure that the header does not contain the excluded fields
    Stream.of(funcotatorKeys).forEach(k -> Assert.assertFalse(excludedFields.contains(k)));

    final List<VariantContext> variantContexts = tempVcf.getRight();
    for (final VariantContext vc : variantContexts) {
        final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
                funcotatorKeys, vc, "Gencode_28_annotationTranscript", "FAKE_SOURCE");
        for (final String txId: funcs.get(vc.getAlternateAllele(0)).getTranscriptList()) {
            final List<Funcotation> funcotations = funcs.get(vc.getAlternateAllele(0)).get(txId);
            for (final Funcotation funcotation : funcotations) {
                funcotation.getFieldNames().forEach(f -> Assert.assertFalse(excludedFields.contains(f)));
            }
        }
    }
}
 
Example #28
Source File: MTLowHeteroplasmyFilterTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    final VCFHeader header = getHeaderForVariants();
    header.addMetaDataLine(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_HET_FILTER_NAME));
    vcfWriter = createVCFWriter(new File(outputVcf));
    vcfWriter.writeHeader(header);
}
 
Example #29
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the sequence dictionary for this source of Features.
 * Uses the dictionary from the VCF header (if present) for variant inputs,
 * otherwise attempts to create a sequence dictionary from the index file (if present).
 * Returns null if no dictionary could be created from either the header or the index.
 */
public SAMSequenceDictionary getSequenceDictionary() {
    SAMSequenceDictionary dict = null;
    final Object header = getHeader();
    if (header instanceof VCFHeader) {
        dict = ((VCFHeader) header).getSequenceDictionary();
    }
    if (dict != null && !dict.isEmpty()) {
        return dict;
    }
    if (hasIndex) {
        return IndexUtils.createSequenceDictionaryFromFeatureIndex(new File(featureInput.getFeaturePath()));
    }
    return null;
}
 
Example #30
Source File: SageVCF.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public SageVCF(@NotNull final IndexedFastaSequenceFile reference, @NotNull final SageConfig config) {
    writer = new VariantContextWriterBuilder().setOutputFile(config.outputFile())
            .modifyOption(Options.INDEX_ON_THE_FLY, true)
            .modifyOption(Options.USE_ASYNC_IO, false)
            .setReferenceDictionary(reference.getSequenceDictionary())
            .build();
    refContextEnrichment = new SomaticRefContextEnrichment(reference, this::writeToFile);

    final VCFHeader header = refContextEnrichment.enrichHeader(header(config));
    header.setSequenceDictionary(reference.getSequenceDictionary());
    writer.writeHeader(header);
}