htsjdk.variant.variantcontext.writer.VariantContextWriter Java Examples

The following examples show how to use htsjdk.variant.variantcontext.writer.VariantContextWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HMMPostProcessor.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * For each segment in genotypingSegments, compose the variant context and write to outputWriter
 *
 * @param genotypingSegments a list of genotyping segments
 * @param outputWriter a VCF writer
 * @param variantPrefix a prefix for composing variant IDs
 * @param commandLine (optional) command line used to generated the data
 */
private void composeVariantContextAndWrite(@Nonnull final List<GenotypingSegment> genotypingSegments,
                                           @Nonnull final VariantContextWriter outputWriter,
                                           @Nonnull final String variantPrefix,
                                           @Nullable final String commandLine) {
    outputWriter.writeHeader(composeHeader(commandLine));
    int counter = 0;
    int prevReportedDonePercentage = -1;
    for (final GenotypingSegment segment : genotypingSegments) {
        final int donePercentage = (int)(100 * counter / (double)genotypingSegments.size());
        if (donePercentage % 10 == 0 && prevReportedDonePercentage != donePercentage) {
            logger.info(String.format("%d%% done...", donePercentage));
            prevReportedDonePercentage = donePercentage;
        }
        final VariantContext variant = composeVariantContext(segment, variantPrefix);
        counter++;
        outputWriter.add(variant);
    }
    logger.info("100% done.");
}
 
Example #2
Source File: GtcToVcf.java    From picard with MIT License 6 votes vote down vote up
/**
 * Writes out the VariantContext objects in the order presented to the supplied output file
 * in VCF format.
 */
private void writeVcf(final SortingCollection<VariantContext> variants,
                      final File output,
                      final SAMSequenceDictionary dict,
                      final VCFHeader vcfHeader) {

    try (final VariantContextWriter writer = new VariantContextWriterBuilder()
            .setOutputFile(output)
            .setReferenceDictionary(dict)
            .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS)
            .build()) {

        writer.writeHeader(vcfHeader);

        for (final VariantContext variant : variants) {
            if (variant.getAlternateAlleles().size() > 1) {
                variant.getCommonInfo().addFilter(InfiniumVcfFields.TRIALLELIC);
            }

            writer.add(variant);
        }
    }
}
 
Example #3
Source File: GenotypeConcordance.java    From picard with MIT License 6 votes vote down vote up
/** Gets the variant context writer if the output VCF is to be written, otherwise empty. */
private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) {
    if (OUTPUT_VCF) {
        final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION);
        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(outputVcfFile)
                .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary())
                .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                .setOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

        // create the output header
        final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME);
        final Set<VCFHeaderLine> headerLines = new HashSet<>();
        headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder());
        headerLines.add(CONTINGENCY_STATE_HEADER_LINE);
        writer.writeHeader(new VCFHeader(headerLines, sampleNames));
        return Optional.of(writer);
    }
    else {
        return Optional.empty();
    }
}
 
Example #4
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterLenientData")
public void testCreateVCFWriterLenientTrue(
        final File inputFile,
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,
        final boolean createMD5) throws IOException {
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();

    // verify lenient==true is honored by writing a bad attribute
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, true);

    try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) {
        writeHeaderAndBadVariant(writer); // write bad attribute succeed with lenient set
    }

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
 
Example #5
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterWithOptions(
        final File inputFile,
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    // create a writer and make sure the requested index/md5 params are honored
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();

    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);

    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
 
Example #6
Source File: Mutect2Engine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void writeHeader(final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary,
                        final Set<VCFHeaderLine>  defaultToolHeaderLines) {
    final Set<VCFHeaderLine> headerInfo = new HashSet<>();

    // all annotation fields from VariantAnnotatorEngine
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
    headerInfo.addAll(defaultToolHeaderLines);

    // all callers need to add these standard FORMAT field header lines
    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_PL_KEY);

    headerInfo.addAll(getM2HeaderLines());
    headerInfo.addAll(getSampleHeaderLines());

    final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples());
    vcfHeader.setSequenceDictionary(sequenceDictionary);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #7
Source File: PostprocessGermlineCNVCalls.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void generateIntervalsVCFFileFromAllShards() {
    logger.info("Generating intervals VCF file...");
    final VariantContextWriter intervalsVCFWriter = createVCFWriter(outputIntervalsVCFFile);

    final GermlineCNVIntervalVariantComposer germlineCNVIntervalVariantComposer =
            new GermlineCNVIntervalVariantComposer(intervalsVCFWriter, sampleName,
                    refAutosomalIntegerCopyNumberState, allosomalContigSet);
    germlineCNVIntervalVariantComposer.composeVariantContextHeader(sequenceDictionary, getDefaultToolVCFHeaderLines());

    logger.info(String.format("Writing intervals VCF file to %s...", outputIntervalsVCFFile.getAbsolutePath()));
    for (int shardIndex = 0; shardIndex < numShards; shardIndex++) {
        logger.info(String.format("Analyzing shard %d / %d...", shardIndex + 1, numShards));
        germlineCNVIntervalVariantComposer.writeAll(getShardIntervalCopyNumberPosteriorData(shardIndex));
    }
    intervalsVCFWriter.close();
}
 
Example #8
Source File: HaplotypeCallerEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a VCF or GVCF writer as appropriate, given our arguments
 *
 * @param outputVCF location to which the vcf should be written
 * @param readsDictionary sequence dictionary for the reads
 * @return a VCF or GVCF writer as appropriate, ready to use
 */
public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary ) {
    Utils.nonNull(outputVCF);
    Utils.nonNull(readsDictionary);

    VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(new File(outputVCF), readsDictionary, false);

    if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
        try {
            writer = new GVCFWriter(writer, hcArgs.GVCFGQBands, hcArgs.genotypeArgs.samplePloidy);
        } catch ( IllegalArgumentException e ) {
            throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
        }
    }

    return writer;
}
 
Example #9
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class)
public void testCreateVCFWriterLenientFalse(
        final File inputFile,
        final String outputExtension,
        final String indexExtension, // unused
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    // verify lenient==false is honored by writing a bad attribute
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);

    try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) {
        writeHeaderAndBadVariant(writer); // throws due to bad attribute
    }
}
 
Example #10
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterDefaults(
        final File inputFile,           // unused
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,      // unused
        final boolean createMD5         // unused
) throws IOException {

    // create a writer and make sure the default index/md5 params are honored
    final TestGATKToolWithVariants tool = createTestVariantTool(null);

    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);

    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertTrue(outFileIndex.exists(), "The index file was not created");
    Assert.assertFalse(outFileMD5.exists(), "An md5 file was created and should not have been");
}
 
Example #11
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterWithNoSequenceDictionary(
        final File inputFile,
        final String outputExtension,
        final String indexExtension,
        final boolean createIndex,
        final boolean createMD5) throws IOException
{
    // verify that a null sequence dictionary still results in a file, but with no index
    final TestGATKVariantToolWithNoSequenceDictionary tool = new TestGATKVariantToolWithNoSequenceDictionary();
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);

    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();

    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");

    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), false, "An index file should not have been created"); // always false with no seq dictionary
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
 
Example #12
Source File: SVVCFWriter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList,
                                  final SAMSequenceDictionary referenceSequenceDictionary,
                                  final Set<VCFHeaderLine> defaultToolVCFHeaderLines) {
    try (final OutputStream outputStream
                 = new BufferedOutputStream(BucketUtils.createFile(fileName))) {

        final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary);

        final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary);
        defaultToolVCFHeaderLines.forEach(vcfHeader::addMetaDataLine);
        vcfWriter.writeHeader(vcfHeader);
        variantsArrayList.forEach(vcfWriter::add);
        vcfWriter.close();

    } catch (final IOException e) {
        throw new GATKException("Could not create output file", e);
    }
}
 
Example #13
Source File: VariantDataManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) {
    // we need to sort in coordinate order in order to produce a valid VCF
    Collections.sort( data, VariantDatum.getComparator(seqDictionary) );

    // create dummy alleles to be used
    List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));

    for( final VariantDatum datum : data ) {
        if (VRAC.useASannotations)
            alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode
        VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles);
        builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd());
        builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
        builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));

        if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
        if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);

        recalWriter.add(builder.make());
    }
}
 
Example #14
Source File: MNVValidatorApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf,
        @NotNull final String tumorBam) {
    final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false);
    final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR");
    final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf)
            .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary())
            .build();
    vcfWriter.writeHeader(outputHeader);
    final MNVValidator validator = ImmutableMNVValidator.of(tumorBam);
    final MNVMerger merger = ImmutableMNVMerger.of(outputHeader);
    Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty());
    for (final VariantContext rawVariant : vcfReader) {
        final VariantContext simplifiedVariant =
                strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant;

        final PotentialMNVRegion potentialMNV = outputPair.getLeft();
        outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant);
        outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add));
    }
    validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add);
    vcfWriter.close();
    vcfReader.close();
    LOGGER.info("Written output variants to " + outputVcf);
}
 
Example #15
Source File: PurpleStructuralVariantSupplier.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
void write(@NotNull final PurityAdjuster purityAdjuster, @NotNull final List<PurpleCopyNumber> copyNumbers) throws IOException {
    if (header.isPresent()) {
        try (final IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(refGenomePath));
                final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF)
                        .setReferenceDictionary(header.get().getSequenceDictionary())
                        .setIndexCreator(new TabixIndexCreator(header.get().getSequenceDictionary(), new TabixFormat()))
                        .setOutputFileType(VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF)
                        .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
                        .build()) {

            final StructuralRefContextEnrichment refEnricher =
                    new StructuralRefContextEnrichment(indexedFastaSequenceFile, writer::add);
            writer.writeHeader(refEnricher.enrichHeader(header.get()));

            enriched(purityAdjuster, copyNumbers).forEach(refEnricher);
            refEnricher.flush();
        }
    }
}
 
Example #16
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class)
public void testCreateVCFWriterLenientFalse(
        final String outputExtension,
        final String indexExtension, // unused
        final boolean createIndex,
        final boolean createMD5) throws IOException {

    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);

    Options options[] = createIndex ?
            new Options[] {Options.INDEX_ON_THE_FLY} :
            new Options[] {};
    try (final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(
            outputFile.toPath(),
            makeSimpleSequenceDictionary(),
            createMD5,
            options)) {
        writeHeader(vcw);
        writeBadVariant(vcw); // write a bad attribute and throw...
    }
}
 
Example #17
Source File: GermlineCNVIntervalVariantComposerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "alleleDeterminationTestData")
public void testAlleleDetermination(final int refAutosomalCopyNumber,
                                    final Set<String> allosomalContigs,
                                    final int baselineCopyNumber,
                                    final Allele expectedAllele) {
    final File outputFile = createTempFile("test", ".vcf");
    final VariantContextWriter outputWriter = GATKVariantContextUtils.createVCFWriter(outputFile.toPath(), null, false);
    final GermlineCNVIntervalVariantComposer variantComposer = new GermlineCNVIntervalVariantComposer(
            outputWriter, "TEST_SAMPLE_NAME", new IntegerCopyNumberState(refAutosomalCopyNumber), allosomalContigs);
    final VariantContext var = variantComposer.composeVariantContext(
            new IntervalCopyNumberGenotypingData(TEST_INTERVAL, TEST_DISTRIBUTION,
                    new IntegerCopyNumberState(baselineCopyNumber)));
    final List<Allele> allAlleles = var.getAlleles();
    Assert.assertEquals(allAlleles, GermlineCNVIntervalVariantComposer.ALL_ALLELES);
    final Genotype gen = var.getGenotype(TEST_SAMPLE_NAME);
    final Allele genAllele = gen.getAlleles().get(0);
    Assert.assertEquals(genAllele, expectedAllele);
}
 
Example #18
Source File: AmberVCF.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public void writeBAF(@NotNull final String filename, @NotNull final Collection<TumorBAF> tumorEvidence,  @NotNull final AmberHetNormalEvidence hetNormalEvidence) {
    final List<TumorBAF> list = Lists.newArrayList(tumorEvidence);
    Collections.sort(list);

    final VariantContextWriter writer =
            new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build();
    final VCFHeader header = header(config.tumorOnly() ? Collections.singletonList(config.tumor()) : config.allSamples());
    writer.setHeader(header);
    writer.writeHeader(header);

    final ListMultimap<AmberSite, Genotype> genotypeMap = ArrayListMultimap.create();
    for (final String sample : hetNormalEvidence.samples()) {
        for (BaseDepth baseDepth : hetNormalEvidence.evidence(sample)) {
            genotypeMap.put(AmberSiteFactory.asSite(baseDepth), createGenotype(sample, baseDepth));
        }
    }

    for (final TumorBAF tumorBAF : list) {
        AmberSite tumorSite = AmberSiteFactory.tumorSite(tumorBAF);
        genotypeMap.put(tumorSite, createGenotype(tumorBAF));
        writer.add(create(tumorBAF, genotypeMap.get(tumorSite)));
    }

    writer.close();
}
 
Example #19
Source File: FingerprintUtils.java    From picard with MIT License 6 votes vote down vote up
private static VariantContextWriter getVariantContextWriter(final File outputFile,
                                                            final File referenceSequenceFileName,
                                                            final String sample,
                                                            final String source,
                                                            final ReferenceSequenceFile ref) {
    final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder()
            .setReferenceDictionary(ref.getSequenceDictionary())
            .setOutputFile(outputFile).build();

    final Set<VCFHeaderLine> lines = new LinkedHashSet<>();
    lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath()));
    lines.add(new VCFHeaderLine("source", source));
    lines.add(new VCFHeaderLine("fileDate", new Date().toString()));

    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
    lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));

    final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample));
    header.setSequenceDictionary(ref.getSequenceDictionary());
    variantContextWriter.writeHeader(header);
    return variantContextWriter;
}
 
Example #20
Source File: HaplotypeCallerEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a VCF or GVCF writer as appropriate, given our arguments
 *
 * @param outputVCF location to which the vcf should be written
 * @param readsDictionary sequence dictionary for the reads
 * @return a VCF or GVCF writer as appropriate, ready to use
 */
public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary,
                                           final boolean createOutputVariantIndex, final boolean  createOutputVariantMD5,
                                           final boolean sitesOnlyMode ) {
    Utils.nonNull(outputVCF);
    Utils.nonNull(readsDictionary);

    final List<Options> options = new ArrayList<>(2);
    if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);}
    if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);}

    VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(
            IOUtils.getPath(outputVCF),
            readsDictionary,
            createOutputVariantMD5,
            options.toArray(new Options[options.size()])
    );

    if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
        try {
            writer = new GVCFWriter(writer, new ArrayList<Number>(hcArgs.GVCFGQBands), hcArgs.standardArgs.genotypeArgs.samplePloidy, hcArgs.floorBlocks);
        } catch ( IllegalArgumentException e ) {
            throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
        }
    }

    return writer;
}
 
Example #21
Source File: CombineGVCFs.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private VariantContextWriter getVCFWriter() {
    final SortedSet<String> samples = getSamplesForVariants();

    final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples);

    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(getDefaultToolVCFHeaderLines());

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());

    // add headers for annotations added by this tool
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    if (somaticInput) {
        //single-sample M2 variant filter status will get moved to genotype filter
        headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));

        if (!dropSomaticFilteringAnnotations) {
            //standard M2 INFO annotations for filtering will get moved to FORMAT field
            for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) {
                headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key));
            }
        }
    }

    VariantContextWriter writer = createVCFWriter(outputFile);

    final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples();
    final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    writer.writeHeader(vcfHeader);

    return writer;
}
 
Example #22
Source File: GATKVariantContextUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name.
 * The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if
 * <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args.
 *
 * @param outPath output Path for this writer. May not be null.
 * @param referenceDictionary required if on the fly indexing is set, otherwise can be null
 * @param createMD5 true if an md5 file should be created
 * @param options variable length list of additional Options to be set for this writer
 * @returns VariantContextWriter must be closed by the caller
 */
public static VariantContextWriter createVCFWriter(
        final Path outPath,
        final SAMSequenceDictionary referenceDictionary,
        final boolean createMD5,
        final Options... options)
{
    Utils.nonNull(outPath);

    VariantContextWriterBuilder vcWriterBuilder =
            new VariantContextWriterBuilder().clearOptions().setOutputPath(outPath);

    if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == VariantContextWriterBuilder.determineOutputTypeFromFile(outPath)) {
        // the only way the user has to specify an output type is by file extension, and htsjdk
        // throws if it can't map the file extension to a known vcf type, so fallback to a default
        // of VCF
        logger.warn(String.format(
                "Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.",
                FilenameUtils.getExtension(outPath.getFileName().toString())));
        vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF);
    }

    if (createMD5) {
        vcWriterBuilder.setCreateMD5();
    }

    if (null != referenceDictionary) {
        vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary);
    }

    for (Options opt : options) {
        vcWriterBuilder = vcWriterBuilder.setOption(opt);
    }

    return vcWriterBuilder.build();
}
 
Example #23
Source File: Mutect2Engine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void writeHeader(final VariantContextWriter vcfWriter, final Set<VCFHeaderLine> defaultToolHeaderLines) {
    final Set<VCFHeaderLine> headerInfo = new HashSet<>();
    headerInfo.add(new VCFHeaderLine("MutectVersion", MUTECT_VERSION));
    headerInfo.add(new VCFHeaderLine(FilterMutectCalls.FILTERING_STATUS_VCF_KEY, "Warning: unfiltered Mutect 2 calls.  Please run " + FilterMutectCalls.class.getSimpleName() + " to remove false positives."));
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions(false));
    headerInfo.addAll(defaultToolHeaderLines);
    STANDARD_MUTECT_INFO_FIELDS.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerInfo::add);

    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
            VCFConstants.GENOTYPE_KEY,
            VCFConstants.GENOTYPE_ALLELE_DEPTHS,
            VCFConstants.GENOTYPE_QUALITY_KEY,
            VCFConstants.DEPTH_KEY,
            VCFConstants.GENOTYPE_PL_KEY);
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ALLELE_FRACTION_KEY));
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
    headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
    headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.PHASE_SET_KEY));

    ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample)
            .forEach(s -> headerInfo.add(new VCFHeaderLine(TUMOR_SAMPLE_KEY_IN_VCF_HEADER, s)));

    normalSamples.forEach(sample -> headerInfo.add(new VCFHeaderLine(NORMAL_SAMPLE_KEY_IN_VCF_HEADER, sample)));
    if (emitReferenceConfidence()) {
        headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY));
    }

    final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples());
    vcfHeader.setSequenceDictionary(header.getSequenceDictionary());
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #24
Source File: SVVCFWriter.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static VariantContextWriter getVariantContextWriter(final OutputStream outputStream,
                                                            final SAMSequenceDictionary referenceSequenceDictionary) {
    VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder()
            .clearOptions()
            .setOutputStream(outputStream);

    if (null != referenceSequenceDictionary) {
        vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceSequenceDictionary);
    }
    for (final Options opt : new Options[]{}) {
        vcWriterBuilder = vcWriterBuilder.setOption(opt);
    }

    return vcWriterBuilder.build();
}
 
Example #25
Source File: FingerprintUtils.java    From picard with MIT License 5 votes vote down vote up
/**
 * A function that takes a Fingerprint and writes it as a VCF to a file
 *
 * @param fingerprint               the fingerprint to write
 * @param outputFile                the file to write to
 * @param referenceSequenceFileName the reference sequence (file)
 * @param sample                    the sample name to use in the vcf
 * @param source                    a "source" comment to use in the VCF
 * @throws IOException
 */
public static void writeFingerPrint(final Fingerprint fingerprint,
                                    final File outputFile,
                                    final File referenceSequenceFileName,
                                    final String sample,
                                    final String source) throws IOException {

    try (final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequenceFileName);
         final VariantContextWriter variantContextWriter = getVariantContextWriter(outputFile, referenceSequenceFileName, sample, source, ref)) {

        createVCSetFromFingerprint(fingerprint, ref, sample).forEach(variantContextWriter::add);
    }
}
 
Example #26
Source File: GenotypeGVCFsEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a VCF header in the writer
 *
 * @param vcfWriter
 * @return a VCF writer

 */
public VariantContextWriter setupVCFWriter(Set<VCFHeaderLine> defaultToolVCFHeaderLines, boolean keepCombined, DbsnpArgumentCollection dbsnp, VariantContextWriter vcfWriter) {
    final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder());
    headerLines.addAll(defaultToolVCFHeaderLines);

    // Remove GCVFBlocks
    headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().startsWith(GVCF_BLOCK));

    headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions(false));
    headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());

    // add headers for annotations added by this tool
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));   // needed for gVCFs without DP tags
    if (keepCombined) {
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_QUAL_KEY));
        headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY));
    }
    if ( dbsnp.dbsnp != null  ) {
        VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
    }

    final Set<String> sampleNameSet = samples.asSetOfSamples();
    outputHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
    vcfWriter.writeHeader(outputHeader);

    return vcfWriter;
}
 
Example #27
Source File: CNNScoreVariants.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeVCFHeader(VariantContextWriter vcfWriter) {
    // setup the header fields
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();
    final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders);
    hInfo.add(GATKVCFHeaderLines.getInfoLine(scoreKey));
    final TreeSet<String> samples = new TreeSet<>();
    samples.addAll(inputHeader.getGenotypeSamples());
    hInfo.addAll(getDefaultToolVCFHeaderLines());
    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
}
 
Example #28
Source File: GATKToolUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeHeaderAndBadVariant(final VariantContextWriter writer) {
    final VariantContextBuilder vcBuilder = new VariantContextBuilder(
            "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true)));
    vcBuilder.attribute("fake", new Object());
    final VariantContext vc = vcBuilder.make();
    final VCFHeader vcfHeader = new VCFHeader();
    writer.writeHeader(vcfHeader);
    writer.add(vc);
}
 
Example #29
Source File: VcfTestUtils.java    From picard with MIT License 5 votes vote down vote up
/**
 * This method makes a copy of the input VCF and creates an index file for it in the same location.
 * This is done so that we don't need to store the index file in the same repo
 * The copy of the input is done so that it and its index are in the same directory which is typically required.
 *
 * @param vcfFile the vcf file to index
 * @return File a vcf file (index file is created in same path).
 */
public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix, final String suffix) throws IOException {
    final String extension;

    if (suffix != null) {
        extension = suffix;
    } else if (vcfFile.getAbsolutePath().endsWith(".vcf")) {
        extension = ".vcf";
    } else if (vcfFile.getAbsolutePath().endsWith(".vcf.gz")) {
        extension = ".vcf.gz";
    } else {
        extension = "";
    }

    if (!extension.equals(".vcf") && !extension.equals(".vcf.gz")) {
        throw new IllegalArgumentException("couldn't find a .vcf or .vcf.gz ending for input file " + vcfFile.getAbsolutePath());
    }

    File output = createTemporaryIndexedFile(tempFilePrefix, extension);

    try (final VCFFileReader in = new VCFFileReader(vcfFile, false)) {
        final VCFHeader header = in.getFileHeader();
        try (final VariantContextWriter out = new VariantContextWriterBuilder().
                setReferenceDictionary(header.getSequenceDictionary()).
                setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)).
                setOutputFile(output).build()) {
            out.writeHeader(header);
            for (final VariantContext ctx : in) {
                out.add(ctx);
            }
        }
    }
    return output;
}
 
Example #30
Source File: SortVcf.java    From picard with MIT License 5 votes vote down vote up
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().
            setReferenceDictionary(outputHeader.getSequenceDictionary()).
            setOptions(options).
            setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}